ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
FormulaParser.php
Go to the documentation of this file.
1<?php
2
4
28{
29 // Character constants
30 const QUOTE_DOUBLE = '"';
31 const QUOTE_SINGLE = '\'';
32 const BRACKET_CLOSE = ']';
33 const BRACKET_OPEN = '[';
34 const BRACE_OPEN = '{';
35 const BRACE_CLOSE = '}';
36 const PAREN_OPEN = '(';
37 const PAREN_CLOSE = ')';
38 const SEMICOLON = ';';
39 const WHITESPACE = ' ';
40 const COMMA = ',';
41 const ERROR_START = '#';
42
43 const OPERATORS_SN = '+-';
44 const OPERATORS_INFIX = '+-*/^&=><';
45 const OPERATORS_POSTFIX = '%';
46
52 private $formula;
53
59 private $tokens = [];
60
66 public function __construct($pFormula = '')
67 {
68 // Check parameters
69 if ($pFormula === null) {
70 throw new Exception('Invalid parameter passed: formula');
71 }
72
73 // Initialise values
74 $this->formula = trim($pFormula);
75 // Parse!
76 $this->parseToTokens();
77 }
78
84 public function getFormula()
85 {
86 return $this->formula;
87 }
88
94 public function getToken(int $pId = 0): FormulaToken
95 {
96 if (isset($this->tokens[$pId])) {
97 return $this->tokens[$pId];
98 }
99
100 throw new Exception("Token with id $pId does not exist.");
101 }
102
108 public function getTokenCount()
109 {
110 return count($this->tokens);
111 }
112
118 public function getTokens()
119 {
120 return $this->tokens;
121 }
122
126 private function parseToTokens(): void
127 {
128 // No attempt is made to verify formulas; assumes formulas are derived from Excel, where
129 // they can only exist if valid; stack overflows/underflows sunk as nulls without exceptions.
130
131 // Check if the formula has a valid starting =
132 $formulaLength = strlen($this->formula);
133 if ($formulaLength < 2 || $this->formula[0] != '=') {
134 return;
135 }
136
137 // Helper variables
138 $tokens1 = $tokens2 = $stack = [];
139 $inString = $inPath = $inRange = $inError = false;
140 $token = $previousToken = $nextToken = null;
141
142 $index = 1;
143 $value = '';
144
145 $ERRORS = ['#NULL!', '#DIV/0!', '#VALUE!', '#REF!', '#NAME?', '#NUM!', '#N/A'];
146 $COMPARATORS_MULTI = ['>=', '<=', '<>'];
147
148 while ($index < $formulaLength) {
149 // state-dependent character evaluation (order is important)
150
151 // double-quoted strings
152 // embeds are doubled
153 // end marks token
154 if ($inString) {
155 if ($this->formula[$index] == self::QUOTE_DOUBLE) {
156 if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_DOUBLE)) {
157 $value .= self::QUOTE_DOUBLE;
158 ++$index;
159 } else {
160 $inString = false;
162 $value = '';
163 }
164 } else {
165 $value .= $this->formula[$index];
166 }
167 ++$index;
168
169 continue;
170 }
171
172 // single-quoted strings (links)
173 // embeds are double
174 // end does not mark a token
175 if ($inPath) {
176 if ($this->formula[$index] == self::QUOTE_SINGLE) {
177 if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_SINGLE)) {
178 $value .= self::QUOTE_SINGLE;
179 ++$index;
180 } else {
181 $inPath = false;
182 }
183 } else {
184 $value .= $this->formula[$index];
185 }
186 ++$index;
187
188 continue;
189 }
190
191 // bracked strings (R1C1 range index or linked workbook name)
192 // no embeds (changed to "()" by Excel)
193 // end does not mark a token
194 if ($inRange) {
195 if ($this->formula[$index] == self::BRACKET_CLOSE) {
196 $inRange = false;
197 }
198 $value .= $this->formula[$index];
199 ++$index;
200
201 continue;
202 }
203
204 // error values
205 // end marks a token, determined from absolute list of values
206 if ($inError) {
207 $value .= $this->formula[$index];
208 ++$index;
209 if (in_array($value, $ERRORS)) {
210 $inError = false;
212 $value = '';
213 }
214
215 continue;
216 }
217
218 // scientific notation check
219 if (strpos(self::OPERATORS_SN, $this->formula[$index]) !== false) {
220 if (strlen($value) > 1) {
221 if (preg_match('/^[1-9]{1}(\\.\\d+)?E{1}$/', $this->formula[$index]) != 0) {
222 $value .= $this->formula[$index];
223 ++$index;
224
225 continue;
226 }
227 }
228 }
229
230 // independent character evaluation (order not important)
231
232 // establish state-dependent character evaluations
233 if ($this->formula[$index] == self::QUOTE_DOUBLE) {
234 if (strlen($value) > 0) {
235 // unexpected
236 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
237 $value = '';
238 }
239 $inString = true;
240 ++$index;
241
242 continue;
243 }
244
245 if ($this->formula[$index] == self::QUOTE_SINGLE) {
246 if (strlen($value) > 0) {
247 // unexpected
248 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
249 $value = '';
250 }
251 $inPath = true;
252 ++$index;
253
254 continue;
255 }
256
257 if ($this->formula[$index] == self::BRACKET_OPEN) {
258 $inRange = true;
259 $value .= self::BRACKET_OPEN;
260 ++$index;
261
262 continue;
263 }
264
265 if ($this->formula[$index] == self::ERROR_START) {
266 if (strlen($value) > 0) {
267 // unexpected
268 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
269 $value = '';
270 }
271 $inError = true;
272 $value .= self::ERROR_START;
273 ++$index;
274
275 continue;
276 }
277
278 // mark start and end of arrays and array rows
279 if ($this->formula[$index] == self::BRACE_OPEN) {
280 if (strlen($value) > 0) {
281 // unexpected
282 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
283 $value = '';
284 }
285
287 $tokens1[] = $tmp;
288 $stack[] = clone $tmp;
289
291 $tokens1[] = $tmp;
292 $stack[] = clone $tmp;
293
294 ++$index;
295
296 continue;
297 }
298
299 if ($this->formula[$index] == self::SEMICOLON) {
300 if (strlen($value) > 0) {
301 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
302 $value = '';
303 }
304
305 $tmp = array_pop($stack);
306 $tmp->setValue('');
307 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
308 $tokens1[] = $tmp;
309
311 $tokens1[] = $tmp;
312
314 $tokens1[] = $tmp;
315 $stack[] = clone $tmp;
316
317 ++$index;
318
319 continue;
320 }
321
322 if ($this->formula[$index] == self::BRACE_CLOSE) {
323 if (strlen($value) > 0) {
324 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
325 $value = '';
326 }
327
328 $tmp = array_pop($stack);
329 $tmp->setValue('');
330 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
331 $tokens1[] = $tmp;
332
333 $tmp = array_pop($stack);
334 $tmp->setValue('');
335 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
336 $tokens1[] = $tmp;
337
338 ++$index;
339
340 continue;
341 }
342
343 // trim white-space
344 if ($this->formula[$index] == self::WHITESPACE) {
345 if (strlen($value) > 0) {
346 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
347 $value = '';
348 }
350 ++$index;
351 while (($this->formula[$index] == self::WHITESPACE) && ($index < $formulaLength)) {
352 ++$index;
353 }
354
355 continue;
356 }
357
358 // multi-character comparators
359 if (($index + 2) <= $formulaLength) {
360 if (in_array(substr($this->formula, $index, 2), $COMPARATORS_MULTI)) {
361 if (strlen($value) > 0) {
362 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
363 $value = '';
364 }
366 $index += 2;
367
368 continue;
369 }
370 }
371
372 // standard infix operators
373 if (strpos(self::OPERATORS_INFIX, $this->formula[$index]) !== false) {
374 if (strlen($value) > 0) {
375 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
376 $value = '';
377 }
378 $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORINFIX);
379 ++$index;
380
381 continue;
382 }
383
384 // standard postfix operators (only one)
385 if (strpos(self::OPERATORS_POSTFIX, $this->formula[$index]) !== false) {
386 if (strlen($value) > 0) {
387 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
388 $value = '';
389 }
390 $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX);
391 ++$index;
392
393 continue;
394 }
395
396 // start subexpression or function
397 if ($this->formula[$index] == self::PAREN_OPEN) {
398 if (strlen($value) > 0) {
400 $tokens1[] = $tmp;
401 $stack[] = clone $tmp;
402 $value = '';
403 } else {
405 $tokens1[] = $tmp;
406 $stack[] = clone $tmp;
407 }
408 ++$index;
409
410 continue;
411 }
412
413 // function, subexpression, or array parameters, or operand unions
414 if ($this->formula[$index] == self::COMMA) {
415 if (strlen($value) > 0) {
416 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
417 $value = '';
418 }
419
420 $tmp = array_pop($stack);
421 $tmp->setValue('');
422 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
423 $stack[] = $tmp;
424
425 if ($tmp->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) {
427 } else {
428 $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT);
429 }
430 ++$index;
431
432 continue;
433 }
434
435 // stop subexpression
436 if ($this->formula[$index] == self::PAREN_CLOSE) {
437 if (strlen($value) > 0) {
438 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
439 $value = '';
440 }
441
442 $tmp = array_pop($stack);
443 $tmp->setValue('');
444 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
445 $tokens1[] = $tmp;
446
447 ++$index;
448
449 continue;
450 }
451
452 // token accumulation
453 $value .= $this->formula[$index];
454 ++$index;
455 }
456
457 // dump remaining accumulation
458 if (strlen($value) > 0) {
459 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
460 }
461
462 // move tokenList to new set, excluding unnecessary white-space tokens and converting necessary ones to intersections
463 $tokenCount = count($tokens1);
464 for ($i = 0; $i < $tokenCount; ++$i) {
465 $token = $tokens1[$i];
466 if (isset($tokens1[$i - 1])) {
467 $previousToken = $tokens1[$i - 1];
468 } else {
469 $previousToken = null;
470 }
471 if (isset($tokens1[$i + 1])) {
472 $nextToken = $tokens1[$i + 1];
473 } else {
474 $nextToken = null;
475 }
476
477 if ($token === null) {
478 continue;
479 }
480
481 if ($token->getTokenType() != FormulaToken::TOKEN_TYPE_WHITESPACE) {
482 $tokens2[] = $token;
483
484 continue;
485 }
486
487 if ($previousToken === null) {
488 continue;
489 }
490
491 if (
492 !(
493 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
494 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
495 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)
496 )
497 ) {
498 continue;
499 }
500
501 if ($nextToken === null) {
502 continue;
503 }
504
505 if (
506 !(
507 (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) ||
508 (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) ||
509 ($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)
510 )
511 ) {
512 continue;
513 }
514
516 }
517
518 // move tokens to final list, switching infix "-" operators to prefix when appropriate, switching infix "+" operators
519 // to noop when appropriate, identifying operand and infix-operator subtypes, and pulling "@" from function names
520 $this->tokens = [];
521
522 $tokenCount = count($tokens2);
523 for ($i = 0; $i < $tokenCount; ++$i) {
524 $token = $tokens2[$i];
525 if (isset($tokens2[$i - 1])) {
526 $previousToken = $tokens2[$i - 1];
527 } else {
528 $previousToken = null;
529 }
530 if (isset($tokens2[$i + 1])) {
531 $nextToken = $tokens2[$i + 1];
532 } else {
533 $nextToken = null;
534 }
535
536 if ($token === null) {
537 continue;
538 }
539
540 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '-') {
541 if ($i == 0) {
543 } elseif (
544 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) &&
545 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
546 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) &&
547 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
548 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) ||
549 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)
550 ) {
552 } else {
554 }
555
556 $this->tokens[] = $token;
557
558 continue;
559 }
560
561 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '+') {
562 if ($i == 0) {
563 continue;
564 } elseif (
565 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) &&
566 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
567 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) &&
568 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
569 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) ||
570 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)
571 ) {
573 } else {
574 continue;
575 }
576
577 $this->tokens[] = $token;
578
579 continue;
580 }
581
582 if (
584 $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING
585 ) {
586 if (strpos('<>=', substr($token->getValue(), 0, 1)) !== false) {
588 } elseif ($token->getValue() == '&') {
590 } else {
592 }
593
594 $this->tokens[] = $token;
595
596 continue;
597 }
598
599 if (
600 $token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND &&
601 $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING
602 ) {
603 if (!is_numeric($token->getValue())) {
604 if (strtoupper($token->getValue()) == 'TRUE' || strtoupper($token->getValue()) == 'FALSE') {
606 } else {
608 }
609 } else {
611 }
612
613 $this->tokens[] = $token;
614
615 continue;
616 }
617
618 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) {
619 if (strlen($token->getValue()) > 0) {
620 if (substr($token->getValue(), 0, 1) == '@') {
621 $token->setValue(substr($token->getValue(), 1));
622 }
623 }
624 }
625
626 $this->tokens[] = $token;
627 }
628 }
629}
An exception for terminatinating execution or to throw for unit testing.
PARTLY BASED ON: Copyright (c) 2007 E.
__construct($pFormula='')
Create a new FormulaParser.
PARTLY BASED ON: Copyright (c) 2007 E.
$i
Definition: disco.tpl.php:19
$index
Definition: metadata.php:60