ILIAS  trunk Revision v11.0_alpha-1749-g1a06bdef097
All Data Structures Namespaces Files Functions Variables Enumerations Enumerator Modules Pages
LevenshteinTransformation.php
Go to the documentation of this file.
1 <?php
2 
19 declare(strict_types=1);
20 
22 
27 use ilStr;
28 
30 {
33 
34  private string $primary_string;
35  private int $maximum_distance;
36  private float $cost_insertion;
37  private float $cost_replacement;
38  private float $cost_deletion;
39 
52  public function __construct(
53  string $primary_string = '',
54  int $maximum_distance = 0,
55  float $cost_insertion = 1.0,
56  float $cost_replacement = 1.0,
57  float $cost_deletion = 1.0
58  ) {
59  $this->primary_string = $primary_string;
60  $this->maximum_distance = $maximum_distance;
61  $this->cost_insertion = $cost_insertion;
62  $this->cost_replacement = $cost_replacement;
63  $this->cost_deletion = $cost_deletion;
64  }
65 
79  protected function levenshtein(string $secondary_string): float
80  {
81  $cost_matrix = [];
82  $primary_string_array = $this->stringToCharacterArray($this->primary_string);
83  $secondary_string_array = $this->stringToCharacterArray($secondary_string);
84  $primary_string_length = count($primary_string_array);
85  $secondary_string_length = count($secondary_string_array);
86 
87  // if the difference between string length is bigger than the maximum allowed levenshtein distance
88  // the code can be skipped
89  if ($this->maximum_distance !== 0 && abs($primary_string_length - $secondary_string_length) > $this->maximum_distance) {
90  return -1.0;
91  }
92 
93  $current_row = [];
94  $current_row[0] = 0.0;
95  for ($j = 1; $j < $secondary_string_length + 1; $j++) {
96  $current_row[$j] = $j * $this->cost_insertion;
97  }
98 
99  $cost_matrix[0] = $current_row;
100  for ($i = 0; $i < $primary_string_length; $i++) {
101  $current_row = [];
102  $current_row[0] = ($i + 1) * $this->cost_deletion;
103  for ($j = 0; $j < $secondary_string_length; $j++) {
104  $current_row[$j + 1] = min(
105  $cost_matrix[$i][$j + 1] + $this->cost_deletion,
106  $current_row[$j] + $this->cost_insertion,
107  $cost_matrix[$i][$j] + ($primary_string_array[$i] === $secondary_string_array[$j] ? 0.0 : $this->cost_replacement)
108  );
109  }
110  // maximum distance reached
111  if ($this->maximum_distance !== 0 && min($current_row) > $this->maximum_distance) {
112  return -1.0;
113  }
114  $cost_matrix[$i + 1] = $current_row;
115  }
116  if ($cost_matrix[$primary_string_length][$secondary_string_length] > $this->maximum_distance && $this->maximum_distance != 0) {
117  return -1.0;
118  }
119  return $cost_matrix[$primary_string_length][$secondary_string_length];
120  }
121 
128  private function stringToCharacterArray(string $string_to_convert): array
129  {
130  $length = ilStr::strLen($string_to_convert);
131  $character_array = [];
132  for ($index = 0; $index < $length; $index++) {
133  $character_array[$index] = ilStr::subStr($string_to_convert, $index, 1);
134  }
135  return $character_array;
136  }
137 
147  public function transform($from): float
148  {
149  if (!is_string($from)) {
150  throw new InvalidArgumentException(__METHOD__ . " the argument is not a string.");
151  }
152 
153  return $this->levenshtein($from);
154  }
155 }
static subStr(string $a_str, int $a_start, ?int $a_length=null)
Definition: class.ilStr.php:24
static strLen(string $a_string)
Definition: class.ilStr.php:63
__construct(string $primary_string='', int $maximum_distance=0, float $cost_insertion=1.0, float $cost_replacement=1.0, float $cost_deletion=1.0)
This constructor allows to parameterize the levenshtein distance function.
A transformation is a function from one datatype to another.
levenshtein(string $secondary_string)
Levenshtein function alternative code as mentioned in the bug report: https://mantis.ilias.de/view.php?id=17861 Original code under MIT-License: https://github.com/GordonLesti/levenshtein/blob/master/src/Levenshtein.php.
transform($from)
The transform method checks if the $form variable contains a string alternatively an InvalidArgumentE...
stringToCharacterArray(string $string_to_convert)
Helper function for levenshtein distance calculation, used to convert strings into character arrays...