ILIAS  Release_4_0_x_branch Revision 61816
 All Data Structures Namespaces Files Functions Variables Groups Pages
MakeWellFormed.php
Go to the documentation of this file.
1 <?php
2 
7 {
8 
12  protected $tokens;
13 
17  protected $t;
18 
22  protected $stack;
23 
27  protected $injectors;
28 
32  protected $config;
33 
37  protected $context;
38 
39  public function execute($tokens, $config, $context) {
40 
41  $definition = $config->getHTMLDefinition();
42 
43  // local variables
44  $generator = new HTMLPurifier_Generator($config, $context);
45  $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
46  $e = $context->get('ErrorCollector', true);
47  $t = false; // token index
48  $i = false; // injector index
49  $token = false; // the current token
50  $reprocess = false; // whether or not to reprocess the same token
51  $stack = array();
52 
53  // member variables
54  $this->stack =& $stack;
55  $this->t =& $t;
56  $this->tokens =& $tokens;
57  $this->config = $config;
58  $this->context = $context;
59 
60  // context variables
61  $context->register('CurrentNesting', $stack);
62  $context->register('InputIndex', $t);
63  $context->register('InputTokens', $tokens);
64  $context->register('CurrentToken', $token);
65 
66  // -- begin INJECTOR --
67 
68  $this->injectors = array();
69 
70  $injectors = $config->getBatch('AutoFormat');
71  $def_injectors = $definition->info_injector;
72  $custom_injectors = $injectors['Custom'];
73  unset($injectors['Custom']); // special case
74  foreach ($injectors as $injector => $b) {
75  // XXX: Fix with a legitimate lookup table of enabled filters
76  if (strpos($injector, '.') !== false) continue;
77  $injector = "HTMLPurifier_Injector_$injector";
78  if (!$b) continue;
79  $this->injectors[] = new $injector;
80  }
81  foreach ($def_injectors as $injector) {
82  // assumed to be objects
83  $this->injectors[] = $injector;
84  }
85  foreach ($custom_injectors as $injector) {
86  if (is_string($injector)) {
87  $injector = "HTMLPurifier_Injector_$injector";
88  $injector = new $injector;
89  }
90  $this->injectors[] = $injector;
91  }
92 
93  // give the injectors references to the definition and context
94  // variables for performance reasons
95  foreach ($this->injectors as $ix => $injector) {
96  $error = $injector->prepare($config, $context);
97  if (!$error) continue;
98  array_splice($this->injectors, $ix, 1); // rm the injector
99  trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
100  }
101 
102  // -- end INJECTOR --
103 
104  // a note on punting:
105  // In order to reduce code duplication, whenever some code needs
106  // to make HTML changes in order to make things "correct", the
107  // new HTML gets sent through the purifier, regardless of its
108  // status. This means that if we add a start token, because it
109  // was totally necessary, we don't have to update nesting; we just
110  // punt ($reprocess = true; continue;) and it does that for us.
111 
112  // isset is in loop because $tokens size changes during loop exec
113  for (
114  $t = 0;
115  $t == 0 || isset($tokens[$t - 1]);
116  // only increment if we don't need to reprocess
117  $reprocess ? $reprocess = false : $t++
118  ) {
119 
120  // check for a rewind
121  if (is_int($i) && $i >= 0) {
122  // possibility: disable rewinding if the current token has a
123  // rewind set on it already. This would offer protection from
124  // infinite loop, but might hinder some advanced rewinding.
125  $rewind_to = $this->injectors[$i]->getRewind();
126  if (is_int($rewind_to) && $rewind_to < $t) {
127  if ($rewind_to < 0) $rewind_to = 0;
128  while ($t > $rewind_to) {
129  $t--;
130  $prev = $tokens[$t];
131  // indicate that other injectors should not process this token,
132  // but we need to reprocess it
133  unset($prev->skip[$i]);
134  $prev->rewind = $i;
135  if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack);
136  elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start;
137  }
138  }
139  $i = false;
140  }
141 
142  // handle case of document end
143  if (!isset($tokens[$t])) {
144  // kill processing if stack is empty
145  if (empty($this->stack)) break;
146 
147  // peek
148  $top_nesting = array_pop($this->stack);
149  $this->stack[] = $top_nesting;
150 
151  // send error
152  if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
153  $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
154  }
155 
156  // append, don't splice, since this is the end
157  $tokens[] = new HTMLPurifier_Token_End($top_nesting->name);
158 
159  // punt!
160  $reprocess = true;
161  continue;
162  }
163 
164  $token = $tokens[$t];
165 
166  //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
167 
168  // quick-check: if it's not a tag, no need to process
169  if (empty($token->is_tag)) {
170  if ($token instanceof HTMLPurifier_Token_Text) {
171  foreach ($this->injectors as $i => $injector) {
172  if (isset($token->skip[$i])) continue;
173  if ($token->rewind !== null && $token->rewind !== $i) continue;
174  $injector->handleText($token);
175  $this->processToken($token, $i);
176  $reprocess = true;
177  break;
178  }
179  }
180  // another possibility is a comment
181  continue;
182  }
183 
184  if (isset($definition->info[$token->name])) {
185  $type = $definition->info[$token->name]->child->type;
186  } else {
187  $type = false; // Type is unknown, treat accordingly
188  }
189 
190  // quick tag checks: anything that's *not* an end tag
191  $ok = false;
192  if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
193  // claims to be a start tag but is empty
194  $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
195  $ok = true;
196  } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
197  // claims to be empty but really is a start tag
198  $this->swap(new HTMLPurifier_Token_End($token->name));
199  $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
200  // punt (since we had to modify the input stream in a non-trivial way)
201  $reprocess = true;
202  continue;
203  } elseif ($token instanceof HTMLPurifier_Token_Empty) {
204  // real empty token
205  $ok = true;
206  } elseif ($token instanceof HTMLPurifier_Token_Start) {
207  // start tag
208 
209  // ...unless they also have to close their parent
210  if (!empty($this->stack)) {
211 
212  $parent = array_pop($this->stack);
213  $this->stack[] = $parent;
214 
215  if (isset($definition->info[$parent->name])) {
216  $elements = $definition->info[$parent->name]->child->getAllowedElements($config);
217  $autoclose = !isset($elements[$token->name]);
218  } else {
219  $autoclose = false;
220  }
221 
222  $carryover = false;
223  if ($autoclose && $definition->info[$parent->name]->formatting) {
224  $carryover = true;
225  }
226 
227  if ($autoclose) {
228  // errors need to be updated
229  $new_token = new HTMLPurifier_Token_End($parent->name);
230  $new_token->start = $parent;
231  if ($carryover) {
232  $element = clone $parent;
233  $element->armor['MakeWellFormed_TagClosedError'] = true;
234  $element->carryover = true;
235  $this->processToken(array($new_token, $token, $element));
236  } else {
237  $this->insertBefore($new_token);
238  }
239  if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
240  if (!$carryover) {
241  $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
242  } else {
243  $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
244  }
245  }
246  $reprocess = true;
247  continue;
248  }
249 
250  }
251  $ok = true;
252  }
253 
254  if ($ok) {
255  foreach ($this->injectors as $i => $injector) {
256  if (isset($token->skip[$i])) continue;
257  if ($token->rewind !== null && $token->rewind !== $i) continue;
258  $injector->handleElement($token);
259  $this->processToken($token, $i);
260  $reprocess = true;
261  break;
262  }
263  if (!$reprocess) {
264  // ah, nothing interesting happened; do normal processing
265  $this->swap($token);
266  if ($token instanceof HTMLPurifier_Token_Start) {
267  $this->stack[] = $token;
268  } elseif ($token instanceof HTMLPurifier_Token_End) {
269  throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed');
270  }
271  }
272  continue;
273  }
274 
275  // sanity check: we should be dealing with a closing tag
276  if (!$token instanceof HTMLPurifier_Token_End) {
277  throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
278  }
279 
280  // make sure that we have something open
281  if (empty($this->stack)) {
282  if ($escape_invalid_tags) {
283  if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
284  $this->swap(new HTMLPurifier_Token_Text(
285  $generator->generateFromToken($token)
286  ));
287  } else {
288  $this->remove();
289  if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
290  }
291  $reprocess = true;
292  continue;
293  }
294 
295  // first, check for the simplest case: everything closes neatly.
296  // Eventually, everything passes through here; if there are problems
297  // we modify the input stream accordingly and then punt, so that
298  // the tokens get processed again.
299  $current_parent = array_pop($this->stack);
300  if ($current_parent->name == $token->name) {
301  $token->start = $current_parent;
302  foreach ($this->injectors as $i => $injector) {
303  if (isset($token->skip[$i])) continue;
304  if ($token->rewind !== null && $token->rewind !== $i) continue;
305  $injector->handleEnd($token);
306  $this->processToken($token, $i);
307  $this->stack[] = $current_parent;
308  $reprocess = true;
309  break;
310  }
311  continue;
312  }
313 
314  // okay, so we're trying to close the wrong tag
315 
316  // undo the pop previous pop
317  $this->stack[] = $current_parent;
318 
319  // scroll back the entire nest, trying to find our tag.
320  // (feature could be to specify how far you'd like to go)
321  $size = count($this->stack);
322  // -2 because -1 is the last element, but we already checked that
323  $skipped_tags = false;
324  for ($j = $size - 2; $j >= 0; $j--) {
325  if ($this->stack[$j]->name == $token->name) {
326  $skipped_tags = array_slice($this->stack, $j);
327  break;
328  }
329  }
330 
331  // we didn't find the tag, so remove
332  if ($skipped_tags === false) {
333  if ($escape_invalid_tags) {
334  $this->swap(new HTMLPurifier_Token_Text(
335  $generator->generateFromToken($token)
336  ));
337  if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
338  } else {
339  $this->remove();
340  if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
341  }
342  $reprocess = true;
343  continue;
344  }
345 
346  // do errors, in REVERSE $j order: a,b,c with </a></b></c>
347  $c = count($skipped_tags);
348  if ($e) {
349  for ($j = $c - 1; $j > 0; $j--) {
350  // notice we exclude $j == 0, i.e. the current ending tag, from
351  // the errors...
352  if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
353  $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
354  }
355  }
356  }
357 
358  // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
359  $replace = array($token);
360  for ($j = 1; $j < $c; $j++) {
361  // ...as well as from the insertions
362  $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
363  $new_token->start = $skipped_tags[$j];
364  array_unshift($replace, $new_token);
365  if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
366  $element = clone $skipped_tags[$j];
367  $element->carryover = true;
368  $element->armor['MakeWellFormed_TagClosedError'] = true;
369  $replace[] = $element;
370  }
371  }
372  $this->processToken($replace);
373  $reprocess = true;
374  continue;
375  }
376 
377  $context->destroy('CurrentNesting');
378  $context->destroy('InputTokens');
379  $context->destroy('InputIndex');
380  $context->destroy('CurrentToken');
381 
382  unset($this->injectors, $this->stack, $this->tokens, $this->t);
383  return $tokens;
384  }
385 
406  protected function processToken($token, $injector = -1) {
407 
408  // normalize forms of token
409  if (is_object($token)) $token = array(1, $token);
410  if (is_int($token)) $token = array($token);
411  if ($token === false) $token = array(1);
412  if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector');
413  if (!is_int($token[0])) array_unshift($token, 1);
414  if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
415 
416  // $token is now an array with the following form:
417  // array(number nodes to delete, new node 1, new node 2, ...)
418 
419  $delete = array_shift($token);
420  $old = array_splice($this->tokens, $this->t, $delete, $token);
421 
422  if ($injector > -1) {
423  // determine appropriate skips
424  $oldskip = isset($old[0]) ? $old[0]->skip : array();
425  foreach ($token as $object) {
426  $object->skip = $oldskip;
427  $object->skip[$injector] = true;
428  }
429  }
430 
431  }
432 
436  private function insertBefore($token) {
437  array_splice($this->tokens, $this->t, 0, array($token));
438  }
439 
444  private function remove() {
445  array_splice($this->tokens, $this->t, 1);
446  }
447 
451  private function swap($token) {
452  $this->tokens[$this->t] = $token;
453  }
454 
455 }
456 
457 // vim: et sw=4 sts=4