ILIAS  release_7 Revision v7.30-3-g800a261c036
LevenshteinTransformation.php
Go to the documentation of this file.
1 <?php
2 
19 declare(strict_types=1);
20 
22 
27 use ilStr;
28 
30 {
33 
35  private $primary_string;
39  private $cost_insertion;
43  private $cost_deletion;
44 
57  public function __construct(
58  string $primary_string = "",
59  int $maximum_distance = 0,
60  float $cost_insertion = 1.0,
61  float $cost_replacement = 1.0,
62  float $cost_deletion = 1.0
63  ) {
64  $this->primary_string= $primary_string;
65  $this->maximum_distance = $maximum_distance;
66  $this->cost_insertion = $cost_insertion;
67  $this->cost_replacement = $cost_replacement;
68  $this->cost_deletion = $cost_deletion;
69  }
70 
84  protected function levenshtein(string $secondary_string): float
85  {
86  $cost_matrix = [];
87  $primary_string_array = $this->stringToCharacterArray($this->primary_string);
88  $secondary_string_array = $this->stringToCharacterArray($secondary_string);
89  $primary_string_length = count($primary_string_array);
90  $secondary_string_length = count($secondary_string_array);
91 
92  // if the difference between string length is bigger than the maximum allowed levenshtein distance
93  // the code can be skipped
94  if (abs($primary_string_length - $secondary_string_length) > $this->maximum_distance && $this->maximum_distance != 0) {
95  return -1.0;
96  }
97 
98  $current_row = [];
99  $current_row[0] = 0.0;
100  for ($j = 1; $j < $secondary_string_length + 1; $j++) {
101  $current_row[$j] = $j * $this->cost_insertion;
102  }
103 
104  $cost_matrix[0] = $current_row;
105  for ($i = 0; $i < $primary_string_length; $i++) {
106  $current_row = [];
107  $current_row[0] = ($i + 1) * $this->cost_deletion;
108  for ($j = 0; $j < $secondary_string_length; $j++) {
109  $current_row[$j + 1] = min(
110  $cost_matrix[$i][$j + 1] + $this->cost_deletion,
111  $current_row[$j] + $this->cost_insertion,
112  $cost_matrix[$i][$j] + ($primary_string_array[$i] === $secondary_string_array[$j] ? 0.0 : $this->cost_replacement)
113  );
114  }
115  // maximum distance reached
116  if (min($current_row) > $this->maximum_distance && $this->maximum_distance != 0) {
117  return -1.0;
118  }
119  $cost_matrix[$i + 1] = $current_row;
120  }
121  if ($cost_matrix[$primary_string_length][$secondary_string_length] > $this->maximum_distance && $this->maximum_distance != 0) {
122  return -1.0;
123  }
124  return $cost_matrix[$primary_string_length][$secondary_string_length];
125  }
126 
133  private function stringToCharacterArray(string $string_to_convert): array
134  {
135  $length = ilStr::strLen($string_to_convert);
136  $character_array = [];
137  for ($index = 0; $index < $length; $index++) {
138  $character_array[$index] = ilStr::subStr($string_to_convert, $index, 1);
139  }
140  return $character_array;
141  }
142 
152  public function transform($from): float
153  {
154  // check if $from is string otherwise exception
155  if (!is_string($from)) {
156  throw new InvalidArgumentException(__METHOD__ . " the argument is not a string.");
157  }
158 
159  // call levenshtein methode return result
160  return $this->levenshtein($from);
161  }
162 }
static strLen($a_string)
Definition: class.ilStr.php:78
static subStr($a_str, $a_start, $a_length=null)
Definition: class.ilStr.php:15
$index
Definition: metadata.php:128
__construct(string $primary_string="", int $maximum_distance=0, float $cost_insertion=1.0, float $cost_replacement=1.0, float $cost_deletion=1.0)
This constructor allows to parameterize the levenshtein distance function.
A transformation is a function from one datatype to another.
levenshtein(string $secondary_string)
Levenshtein function alternative code as mentioned in the bug report: https://mantis.ilias.de/view.php?id=17861 Original code under MIT-License: https://github.com/GordonLesti/levenshtein/blob/master/src/Levenshtein.php.
transform($from)
The transform method checks if the $form variable contains a string alternatively an InvalidArgumentE...
stringToCharacterArray(string $string_to_convert)
Helper function for levenshtein distance calculation, used to convert strings into character arrays...
$i
Definition: metadata.php:24