ILIAS  trunk Revision v11.0_alpha-3011-gc6b235a2e85
LevenshteinTransformation.php
Go to the documentation of this file.
1<?php
2
19declare(strict_types=1);
20
22
26use InvalidArgumentException;
27use ilStr;
28
30{
33
34 private string $primary_string;
35 private int $maximum_distance;
36 private float $cost_insertion;
37 private float $cost_replacement;
38 private float $cost_deletion;
39
52 public function __construct(
53 string $primary_string = '',
54 int $maximum_distance = 0,
55 float $cost_insertion = 1.0,
56 float $cost_replacement = 1.0,
57 float $cost_deletion = 1.0
58 ) {
59 $this->primary_string = $primary_string;
60 $this->maximum_distance = $maximum_distance;
61 $this->cost_insertion = $cost_insertion;
62 $this->cost_replacement = $cost_replacement;
63 $this->cost_deletion = $cost_deletion;
64 }
65
79 protected function levenshtein(string $secondary_string): float
80 {
81 $cost_matrix = [];
82 $primary_string_array = $this->stringToCharacterArray($this->primary_string);
83 $secondary_string_array = $this->stringToCharacterArray($secondary_string);
84 $primary_string_length = count($primary_string_array);
85 $secondary_string_length = count($secondary_string_array);
86
87 // if the difference between string length is bigger than the maximum allowed levenshtein distance
88 // the code can be skipped
89 if ($this->maximum_distance !== 0 && abs($primary_string_length - $secondary_string_length) > $this->maximum_distance) {
90 return -1.0;
91 }
92
93 $current_row = [];
94 $current_row[0] = 0.0;
95 for ($j = 1; $j < $secondary_string_length + 1; $j++) {
96 $current_row[$j] = $j * $this->cost_insertion;
97 }
98
99 $cost_matrix[0] = $current_row;
100 for ($i = 0; $i < $primary_string_length; $i++) {
101 $current_row = [];
102 $current_row[0] = ($i + 1) * $this->cost_deletion;
103 for ($j = 0; $j < $secondary_string_length; $j++) {
104 $current_row[$j + 1] = min(
105 $cost_matrix[$i][$j + 1] + $this->cost_deletion,
106 $current_row[$j] + $this->cost_insertion,
107 $cost_matrix[$i][$j] + ($primary_string_array[$i] === $secondary_string_array[$j] ? 0.0 : $this->cost_replacement)
108 );
109 }
110 // maximum distance reached
111 if ($this->maximum_distance !== 0 && min($current_row) > $this->maximum_distance) {
112 return -1.0;
113 }
114 $cost_matrix[$i + 1] = $current_row;
115 }
116 if ($cost_matrix[$primary_string_length][$secondary_string_length] > $this->maximum_distance && $this->maximum_distance != 0) {
117 return -1.0;
118 }
119 return $cost_matrix[$primary_string_length][$secondary_string_length];
120 }
121
128 private function stringToCharacterArray(string $string_to_convert): array
129 {
130 $length = ilStr::strLen($string_to_convert);
131 $character_array = [];
132 for ($index = 0; $index < $length; $index++) {
133 $character_array[$index] = ilStr::subStr($string_to_convert, $index, 1);
134 }
135 return $character_array;
136 }
137
147 public function transform($from): float
148 {
149 if (!is_string($from)) {
150 throw new InvalidArgumentException(__METHOD__ . " the argument is not a string.");
151 }
152
153 return $this->levenshtein($from);
154 }
155}
transform($from)
The transform method checks if the $form variable contains a string alternatively an InvalidArgumentE...
__construct(string $primary_string='', int $maximum_distance=0, float $cost_insertion=1.0, float $cost_replacement=1.0, float $cost_deletion=1.0)
This constructor allows to parameterize the levenshtein distance function.
levenshtein(string $secondary_string)
Levenshtein function alternative code as mentioned in the bug report: https://mantis....
stringToCharacterArray(string $string_to_convert)
Helper function for levenshtein distance calculation, used to convert strings into character arrays.
This file is part of ILIAS, a powerful learning management system published by ILIAS open source e-Le...
Definition: class.ilStr.php:20
static subStr(string $a_str, int $a_start, ?int $a_length=null)
Definition: class.ilStr.php:21
static strLen(string $a_string)
Definition: class.ilStr.php:60
A transformation is a function from one datatype to another.