ILIAS  trunk Revision v11.0_alpha-2645-g16283d3b3f8
Tokenizer.php
Go to the documentation of this file.
1 <?php
2 
19 declare(strict_types=1);
20 
22 
25 
26 class Tokenizer
27 {
28  public static array $operators = [
29  Operators::ADDITION,
30  Operators::SUBTRACTION,
31  Operators::MULTIPLICATION,
32  Operators::DIVISION,
33  Operators::POWER
34  ];
35 
36  public static array $functions = [
37  Functions::SUM,
38  Functions::AVERAGE,
39  Functions::MIN,
41  ];
42 
43  public const FIELD_OPENER = '[[';
44 
49  protected function valuesToTokens(array $return): array
50  {
51  return array_map(function (string $token): Token {
52  if ($this->isMathToken($token)) {
53  return new MathToken(trim($token));
54  }
55  return new Token(trim($token));
56  }, $return);
57  }
58 
64  public function tokenize(string $expression): array
65  {
66  $expression = ltrim($expression, '=');
67  $expression = trim($expression);
68 
69  $matches = [];
70  //Match all & inside [] (e.g. [[Metadaten & OER]])
71  preg_match_all("/\[\[[^\]]*&[^\]]*\]\]/ui", $expression, $matches);
72  $matches_inside_brackets = $matches[0];
73  $replace_random = sha1("replacement_string");
74 
75  //Replace those & with a set of unprobable chars, to be ignored by the following selection of tokens
76  foreach ($matches_inside_brackets as $match) {
77  if (!$match) {
78  continue;
79  }
80  $match_save = str_replace("&", $replace_random, $match);
81  $expression = str_replace($match, $match_save, $expression);
82  }
83 
84  //var_dump($expression);
85  preg_match_all("/([^\\\\&]|\\\\&)*/ui", $expression, $matches);
86  $results = $matches[0];
87 
88  $return = [];
89  foreach ($results as $result) {
90  if (!$result) {
91  continue;
92  }
93  $replace = str_ireplace('\&', '&', $result);
94 
95  //Replace those & before replaced chars back
96  $return[] = str_replace($replace_random, "&", $replace);
97  }
98 
99  return $this->valuesToTokens($return);
100  }
101 
102  private function isMathToken(string $token): bool
103  {
104  $operators = array_map(
105  static fn(Operators $operator): string => $operator->value,
106  self::$operators
107  );
108 
109  $functions = array_map(
110  static fn(Functions $functions): string => $functions->value,
111  self::$functions
112  );
113 
114  $result = (bool) preg_match(
115  '#(\\' . implode("|\\", $operators) . '|' . implode('|', $functions) . ')#',
116  $token
117  );
118 
119  return $result;
120  }
121 
127  public function tokenizeMath(string $math_expression): array
128  {
129  $operators = array_map(
130  static fn(Operators $operator): string => $operator->value,
131  self::$operators
132  );
133  $pattern = '#((^\[\[)[\d\.]+)|(\(|\)|\\' . implode("|\\", $operators) . ')#';
134  $tokens = preg_split($pattern, $math_expression, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
135 
136  return $this->valuesToTokens($tokens);
137  }
138 }
Token
The string representation of these tokens must not occur in the names of metadata elements...
Definition: Token.php:27
tokenizeMath(string $math_expression)
Generate tokens for a math expression.
Definition: Tokenizer.php:127
$token
Definition: xapitoken.php:70
tokenize(string $expression)
Split expression by & (ignore escaped &-symbols with backslash)
Definition: Tokenizer.php:64
$results