ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
FormulaParser.php
Go to the documentation of this file.
1 <?php
2 
4 
28 {
29  // Character constants
30  const QUOTE_DOUBLE = '"';
31  const QUOTE_SINGLE = '\'';
32  const BRACKET_CLOSE = ']';
33  const BRACKET_OPEN = '[';
34  const BRACE_OPEN = '{';
35  const BRACE_CLOSE = '}';
36  const PAREN_OPEN = '(';
37  const PAREN_CLOSE = ')';
38  const SEMICOLON = ';';
39  const WHITESPACE = ' ';
40  const COMMA = ',';
41  const ERROR_START = '#';
42 
43  const OPERATORS_SN = '+-';
44  const OPERATORS_INFIX = '+-*/^&=><';
45  const OPERATORS_POSTFIX = '%';
46 
52  private $formula;
53 
59  private $tokens = [];
60 
66  public function __construct($pFormula = '')
67  {
68  // Check parameters
69  if ($pFormula === null) {
70  throw new Exception('Invalid parameter passed: formula');
71  }
72 
73  // Initialise values
74  $this->formula = trim($pFormula);
75  // Parse!
76  $this->parseToTokens();
77  }
78 
84  public function getFormula()
85  {
86  return $this->formula;
87  }
88 
94  public function getToken(int $pId = 0): FormulaToken
95  {
96  if (isset($this->tokens[$pId])) {
97  return $this->tokens[$pId];
98  }
99 
100  throw new Exception("Token with id $pId does not exist.");
101  }
102 
108  public function getTokenCount()
109  {
110  return count($this->tokens);
111  }
112 
118  public function getTokens()
119  {
120  return $this->tokens;
121  }
122 
126  private function parseToTokens(): void
127  {
128  // No attempt is made to verify formulas; assumes formulas are derived from Excel, where
129  // they can only exist if valid; stack overflows/underflows sunk as nulls without exceptions.
130 
131  // Check if the formula has a valid starting =
132  $formulaLength = strlen($this->formula);
133  if ($formulaLength < 2 || $this->formula[0] != '=') {
134  return;
135  }
136 
137  // Helper variables
138  $tokens1 = $tokens2 = $stack = [];
139  $inString = $inPath = $inRange = $inError = false;
140  $token = $previousToken = $nextToken = null;
141 
142  $index = 1;
143  $value = '';
144 
145  $ERRORS = ['#NULL!', '#DIV/0!', '#VALUE!', '#REF!', '#NAME?', '#NUM!', '#N/A'];
146  $COMPARATORS_MULTI = ['>=', '<=', '<>'];
147 
148  while ($index < $formulaLength) {
149  // state-dependent character evaluation (order is important)
150 
151  // double-quoted strings
152  // embeds are doubled
153  // end marks token
154  if ($inString) {
155  if ($this->formula[$index] == self::QUOTE_DOUBLE) {
156  if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_DOUBLE)) {
157  $value .= self::QUOTE_DOUBLE;
158  ++$index;
159  } else {
160  $inString = false;
162  $value = '';
163  }
164  } else {
165  $value .= $this->formula[$index];
166  }
167  ++$index;
168 
169  continue;
170  }
171 
172  // single-quoted strings (links)
173  // embeds are double
174  // end does not mark a token
175  if ($inPath) {
176  if ($this->formula[$index] == self::QUOTE_SINGLE) {
177  if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_SINGLE)) {
178  $value .= self::QUOTE_SINGLE;
179  ++$index;
180  } else {
181  $inPath = false;
182  }
183  } else {
184  $value .= $this->formula[$index];
185  }
186  ++$index;
187 
188  continue;
189  }
190 
191  // bracked strings (R1C1 range index or linked workbook name)
192  // no embeds (changed to "()" by Excel)
193  // end does not mark a token
194  if ($inRange) {
195  if ($this->formula[$index] == self::BRACKET_CLOSE) {
196  $inRange = false;
197  }
198  $value .= $this->formula[$index];
199  ++$index;
200 
201  continue;
202  }
203 
204  // error values
205  // end marks a token, determined from absolute list of values
206  if ($inError) {
207  $value .= $this->formula[$index];
208  ++$index;
209  if (in_array($value, $ERRORS)) {
210  $inError = false;
212  $value = '';
213  }
214 
215  continue;
216  }
217 
218  // scientific notation check
219  if (strpos(self::OPERATORS_SN, $this->formula[$index]) !== false) {
220  if (strlen($value) > 1) {
221  if (preg_match('/^[1-9]{1}(\\.\\d+)?E{1}$/', $this->formula[$index]) != 0) {
222  $value .= $this->formula[$index];
223  ++$index;
224 
225  continue;
226  }
227  }
228  }
229 
230  // independent character evaluation (order not important)
231 
232  // establish state-dependent character evaluations
233  if ($this->formula[$index] == self::QUOTE_DOUBLE) {
234  if (strlen($value) > 0) {
235  // unexpected
236  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
237  $value = '';
238  }
239  $inString = true;
240  ++$index;
241 
242  continue;
243  }
244 
245  if ($this->formula[$index] == self::QUOTE_SINGLE) {
246  if (strlen($value) > 0) {
247  // unexpected
248  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
249  $value = '';
250  }
251  $inPath = true;
252  ++$index;
253 
254  continue;
255  }
256 
257  if ($this->formula[$index] == self::BRACKET_OPEN) {
258  $inRange = true;
259  $value .= self::BRACKET_OPEN;
260  ++$index;
261 
262  continue;
263  }
264 
265  if ($this->formula[$index] == self::ERROR_START) {
266  if (strlen($value) > 0) {
267  // unexpected
268  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
269  $value = '';
270  }
271  $inError = true;
272  $value .= self::ERROR_START;
273  ++$index;
274 
275  continue;
276  }
277 
278  // mark start and end of arrays and array rows
279  if ($this->formula[$index] == self::BRACE_OPEN) {
280  if (strlen($value) > 0) {
281  // unexpected
282  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
283  $value = '';
284  }
285 
287  $tokens1[] = $tmp;
288  $stack[] = clone $tmp;
289 
291  $tokens1[] = $tmp;
292  $stack[] = clone $tmp;
293 
294  ++$index;
295 
296  continue;
297  }
298 
299  if ($this->formula[$index] == self::SEMICOLON) {
300  if (strlen($value) > 0) {
301  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
302  $value = '';
303  }
304 
305  $tmp = array_pop($stack);
306  $tmp->setValue('');
307  $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
308  $tokens1[] = $tmp;
309 
311  $tokens1[] = $tmp;
312 
314  $tokens1[] = $tmp;
315  $stack[] = clone $tmp;
316 
317  ++$index;
318 
319  continue;
320  }
321 
322  if ($this->formula[$index] == self::BRACE_CLOSE) {
323  if (strlen($value) > 0) {
324  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
325  $value = '';
326  }
327 
328  $tmp = array_pop($stack);
329  $tmp->setValue('');
330  $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
331  $tokens1[] = $tmp;
332 
333  $tmp = array_pop($stack);
334  $tmp->setValue('');
335  $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
336  $tokens1[] = $tmp;
337 
338  ++$index;
339 
340  continue;
341  }
342 
343  // trim white-space
344  if ($this->formula[$index] == self::WHITESPACE) {
345  if (strlen($value) > 0) {
346  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
347  $value = '';
348  }
350  ++$index;
351  while (($this->formula[$index] == self::WHITESPACE) && ($index < $formulaLength)) {
352  ++$index;
353  }
354 
355  continue;
356  }
357 
358  // multi-character comparators
359  if (($index + 2) <= $formulaLength) {
360  if (in_array(substr($this->formula, $index, 2), $COMPARATORS_MULTI)) {
361  if (strlen($value) > 0) {
362  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
363  $value = '';
364  }
365  $tokens1[] = new FormulaToken(substr($this->formula, $index, 2), FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_LOGICAL);
366  $index += 2;
367 
368  continue;
369  }
370  }
371 
372  // standard infix operators
373  if (strpos(self::OPERATORS_INFIX, $this->formula[$index]) !== false) {
374  if (strlen($value) > 0) {
375  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
376  $value = '';
377  }
378  $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORINFIX);
379  ++$index;
380 
381  continue;
382  }
383 
384  // standard postfix operators (only one)
385  if (strpos(self::OPERATORS_POSTFIX, $this->formula[$index]) !== false) {
386  if (strlen($value) > 0) {
387  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
388  $value = '';
389  }
390  $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX);
391  ++$index;
392 
393  continue;
394  }
395 
396  // start subexpression or function
397  if ($this->formula[$index] == self::PAREN_OPEN) {
398  if (strlen($value) > 0) {
400  $tokens1[] = $tmp;
401  $stack[] = clone $tmp;
402  $value = '';
403  } else {
405  $tokens1[] = $tmp;
406  $stack[] = clone $tmp;
407  }
408  ++$index;
409 
410  continue;
411  }
412 
413  // function, subexpression, or array parameters, or operand unions
414  if ($this->formula[$index] == self::COMMA) {
415  if (strlen($value) > 0) {
416  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
417  $value = '';
418  }
419 
420  $tmp = array_pop($stack);
421  $tmp->setValue('');
422  $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
423  $stack[] = $tmp;
424 
425  if ($tmp->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) {
427  } else {
428  $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT);
429  }
430  ++$index;
431 
432  continue;
433  }
434 
435  // stop subexpression
436  if ($this->formula[$index] == self::PAREN_CLOSE) {
437  if (strlen($value) > 0) {
438  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
439  $value = '';
440  }
441 
442  $tmp = array_pop($stack);
443  $tmp->setValue('');
444  $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
445  $tokens1[] = $tmp;
446 
447  ++$index;
448 
449  continue;
450  }
451 
452  // token accumulation
453  $value .= $this->formula[$index];
454  ++$index;
455  }
456 
457  // dump remaining accumulation
458  if (strlen($value) > 0) {
459  $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
460  }
461 
462  // move tokenList to new set, excluding unnecessary white-space tokens and converting necessary ones to intersections
463  $tokenCount = count($tokens1);
464  for ($i = 0; $i < $tokenCount; ++$i) {
465  $token = $tokens1[$i];
466  if (isset($tokens1[$i - 1])) {
467  $previousToken = $tokens1[$i - 1];
468  } else {
469  $previousToken = null;
470  }
471  if (isset($tokens1[$i + 1])) {
472  $nextToken = $tokens1[$i + 1];
473  } else {
474  $nextToken = null;
475  }
476 
477  if ($token === null) {
478  continue;
479  }
480 
481  if ($token->getTokenType() != FormulaToken::TOKEN_TYPE_WHITESPACE) {
482  $tokens2[] = $token;
483 
484  continue;
485  }
486 
487  if ($previousToken === null) {
488  continue;
489  }
490 
491  if (
492  !(
493  (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
494  (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
495  ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)
496  )
497  ) {
498  continue;
499  }
500 
501  if ($nextToken === null) {
502  continue;
503  }
504 
505  if (
506  !(
507  (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) ||
508  (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) ||
509  ($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)
510  )
511  ) {
512  continue;
513  }
514 
516  }
517 
518  // move tokens to final list, switching infix "-" operators to prefix when appropriate, switching infix "+" operators
519  // to noop when appropriate, identifying operand and infix-operator subtypes, and pulling "@" from function names
520  $this->tokens = [];
521 
522  $tokenCount = count($tokens2);
523  for ($i = 0; $i < $tokenCount; ++$i) {
524  $token = $tokens2[$i];
525  if (isset($tokens2[$i - 1])) {
526  $previousToken = $tokens2[$i - 1];
527  } else {
528  $previousToken = null;
529  }
530  if (isset($tokens2[$i + 1])) {
531  $nextToken = $tokens2[$i + 1];
532  } else {
533  $nextToken = null;
534  }
535 
536  if ($token === null) {
537  continue;
538  }
539 
540  if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '-') {
541  if ($i == 0) {
543  } elseif (
544  (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) &&
545  ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
546  (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) &&
547  ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
548  ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) ||
549  ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)
550  ) {
551  $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH);
552  } else {
554  }
555 
556  $this->tokens[] = $token;
557 
558  continue;
559  }
560 
561  if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '+') {
562  if ($i == 0) {
563  continue;
564  } elseif (
565  (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) &&
566  ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
567  (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) &&
568  ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
569  ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) ||
570  ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)
571  ) {
572  $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH);
573  } else {
574  continue;
575  }
576 
577  $this->tokens[] = $token;
578 
579  continue;
580  }
581 
582  if (
583  $token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX &&
584  $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING
585  ) {
586  if (strpos('<>=', substr($token->getValue(), 0, 1)) !== false) {
588  } elseif ($token->getValue() == '&') {
590  } else {
591  $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH);
592  }
593 
594  $this->tokens[] = $token;
595 
596  continue;
597  }
598 
599  if (
600  $token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND &&
601  $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING
602  ) {
603  if (!is_numeric($token->getValue())) {
604  if (strtoupper($token->getValue()) == 'TRUE' || strtoupper($token->getValue()) == 'FALSE') {
606  } else {
607  $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_RANGE);
608  }
609  } else {
610  $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_NUMBER);
611  }
612 
613  $this->tokens[] = $token;
614 
615  continue;
616  }
617 
618  if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) {
619  if (strlen($token->getValue()) > 0) {
620  if (substr($token->getValue(), 0, 1) == '@') {
621  $token->setValue(substr($token->getValue(), 1));
622  }
623  }
624  }
625 
626  $this->tokens[] = $token;
627  }
628  }
629 }
PARTLY BASED ON: Copyright (c) 2007 E.
$index
Definition: metadata.php:60
PARTLY BASED ON: Copyright (c) 2007 E.
$i
Definition: disco.tpl.php:19
__construct($pFormula='')
Create a new FormulaParser.