ILIAS  Release_4_4_x_branch Revision 61816
 All Data Structures Namespaces Files Functions Variables Groups Pages
FixNesting.php
Go to the documentation of this file.
1 <?php
2 
48 {
49 
50  public function execute($tokens, $config, $context) {
51  //####################################################################//
52  // Pre-processing
53 
54  // get a copy of the HTML definition
55  $definition = $config->getHTMLDefinition();
56 
57  $excludes_enabled = !$config->get('Core.DisableExcludes');
58 
59  // insert implicit "parent" node, will be removed at end.
60  // DEFINITION CALL
61  $parent_name = $definition->info_parent;
62  array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
63  $tokens[] = new HTMLPurifier_Token_End($parent_name);
64 
65  // setup the context variable 'IsInline', for chameleon processing
66  // is 'false' when we are not inline, 'true' when it must always
67  // be inline, and an integer when it is inline for a certain
68  // branch of the document tree
69  $is_inline = $definition->info_parent_def->descendants_are_inline;
70  $context->register('IsInline', $is_inline);
71 
72  // setup error collector
73  $e =& $context->get('ErrorCollector', true);
74 
75  //####################################################################//
76  // Loop initialization
77 
78  // stack that contains the indexes of all parents,
79  // $stack[count($stack)-1] being the current parent
80  $stack = array();
81 
82  // stack that contains all elements that are excluded
83  // it is organized by parent elements, similar to $stack,
84  // but it is only populated when an element with exclusions is
85  // processed, i.e. there won't be empty exclusions.
86  $exclude_stack = array();
87 
88  // variable that contains the start token while we are processing
89  // nodes. This enables error reporting to do its job
90  $start_token = false;
91  $context->register('CurrentToken', $start_token);
92 
93  //####################################################################//
94  // Loop
95 
96  // iterate through all start nodes. Determining the start node
97  // is complicated so it has been omitted from the loop construct
98  for ($i = 0, $size = count($tokens) ; $i < $size; ) {
99 
100  //################################################################//
101  // Gather information on children
102 
103  // child token accumulator
104  $child_tokens = array();
105 
106  // scroll to the end of this node, report number, and collect
107  // all children
108  for ($j = $i, $depth = 0; ; $j++) {
109  if ($tokens[$j] instanceof HTMLPurifier_Token_Start) {
110  $depth++;
111  // skip token assignment on first iteration, this is the
112  // token we currently are on
113  if ($depth == 1) continue;
114  } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) {
115  $depth--;
116  // skip token assignment on last iteration, this is the
117  // end token of the token we're currently on
118  if ($depth == 0) break;
119  }
120  $child_tokens[] = $tokens[$j];
121  }
122 
123  // $i is index of start token
124  // $j is index of end token
125 
126  $start_token = $tokens[$i]; // to make token available via CurrentToken
127 
128  //################################################################//
129  // Gather information on parent
130 
131  // calculate parent information
132  if ($count = count($stack)) {
133  $parent_index = $stack[$count-1];
134  $parent_name = $tokens[$parent_index]->name;
135  if ($parent_index == 0) {
136  $parent_def = $definition->info_parent_def;
137  } else {
138  $parent_def = $definition->info[$parent_name];
139  }
140  } else {
141  // processing as if the parent were the "root" node
142  // unknown info, it won't be used anyway, in the future,
143  // we may want to enforce one element only (this is
144  // necessary for HTML Purifier to clean entire documents
145  $parent_index = $parent_name = $parent_def = null;
146  }
147 
148  // calculate context
149  if ($is_inline === false) {
150  // check if conditions make it inline
151  if (!empty($parent_def) && $parent_def->descendants_are_inline) {
152  $is_inline = $count - 1;
153  }
154  } else {
155  // check if we're out of inline
156  if ($count === $is_inline) {
157  $is_inline = false;
158  }
159  }
160 
161  //################################################################//
162  // Determine whether element is explicitly excluded SGML-style
163 
164  // determine whether or not element is excluded by checking all
165  // parent exclusions. The array should not be very large, two
166  // elements at most.
167  $excluded = false;
168  if (!empty($exclude_stack) && $excludes_enabled) {
169  foreach ($exclude_stack as $lookup) {
170  if (isset($lookup[$tokens[$i]->name])) {
171  $excluded = true;
172  // no need to continue processing
173  break;
174  }
175  }
176  }
177 
178  //################################################################//
179  // Perform child validation
180 
181  if ($excluded) {
182  // there is an exclusion, remove the entire node
183  $result = false;
184  $excludes = array(); // not used, but good to initialize anyway
185  } else {
186  // DEFINITION CALL
187  if ($i === 0) {
188  // special processing for the first node
189  $def = $definition->info_parent_def;
190  } else {
191  $def = $definition->info[$tokens[$i]->name];
192 
193  }
194 
195  if (!empty($def->child)) {
196  // have DTD child def validate children
197  $result = $def->child->validateChildren(
198  $child_tokens, $config, $context);
199  } else {
200  // weird, no child definition, get rid of everything
201  $result = false;
202  }
203 
204  // determine whether or not this element has any exclusions
205  $excludes = $def->excludes;
206  }
207 
208  // $result is now a bool or array
209 
210  //################################################################//
211  // Process result by interpreting $result
212 
213  if ($result === true || $child_tokens === $result) {
214  // leave the node as is
215 
216  // register start token as a parental node start
217  $stack[] = $i;
218 
219  // register exclusions if there are any
220  if (!empty($excludes)) $exclude_stack[] = $excludes;
221 
222  // move cursor to next possible start node
223  $i++;
224 
225  } elseif($result === false) {
226  // remove entire node
227 
228  if ($e) {
229  if ($excluded) {
230  $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
231  } else {
232  $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
233  }
234  }
235 
236  // calculate length of inner tokens and current tokens
237  $length = $j - $i + 1;
238 
239  // perform removal
240  array_splice($tokens, $i, $length);
241 
242  // update size
243  $size -= $length;
244 
245  // there is no start token to register,
246  // current node is now the next possible start node
247  // unless it turns out that we need to do a double-check
248 
249  // this is a rought heuristic that covers 100% of HTML's
250  // cases and 99% of all other cases. A child definition
251  // that would be tricked by this would be something like:
252  // ( | a b c) where it's all or nothing. Fortunately,
253  // our current implementation claims that that case would
254  // not allow empty, even if it did
255  if (!$parent_def->child->allow_empty) {
256  // we need to do a double-check [BACKTRACK]
257  $i = $parent_index;
258  array_pop($stack);
259  }
260 
261  // PROJECTED OPTIMIZATION: Process all children elements before
262  // reprocessing parent node.
263 
264  } else {
265  // replace node with $result
266 
267  // calculate length of inner tokens
268  $length = $j - $i - 1;
269 
270  if ($e) {
271  if (empty($result) && $length) {
272  $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
273  } else {
274  $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
275  }
276  }
277 
278  // perform replacement
279  array_splice($tokens, $i + 1, $length, $result);
280 
281  // update size
282  $size -= $length;
283  $size += count($result);
284 
285  // register start token as a parental node start
286  $stack[] = $i;
287 
288  // register exclusions if there are any
289  if (!empty($excludes)) $exclude_stack[] = $excludes;
290 
291  // move cursor to next possible start node
292  $i++;
293 
294  }
295 
296  //################################################################//
297  // Scroll to next start node
298 
299  // We assume, at this point, that $i is the index of the token
300  // that is the first possible new start point for a node.
301 
302  // Test if the token indeed is a start tag, if not, move forward
303  // and test again.
304  $size = count($tokens);
305  while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) {
306  if ($tokens[$i] instanceof HTMLPurifier_Token_End) {
307  // pop a token index off the stack if we ended a node
308  array_pop($stack);
309  // pop an exclusion lookup off exclusion stack if
310  // we ended node and that node had exclusions
311  if ($i == 0 || $i == $size - 1) {
312  // use specialized var if it's the super-parent
313  $s_excludes = $definition->info_parent_def->excludes;
314  } else {
315  $s_excludes = $definition->info[$tokens[$i]->name]->excludes;
316  }
317  if ($s_excludes) {
318  array_pop($exclude_stack);
319  }
320  }
321  $i++;
322  }
323 
324  }
325 
326  //####################################################################//
327  // Post-processing
328 
329  // remove implicit parent tokens at the beginning and end
330  array_shift($tokens);
331  array_pop($tokens);
332 
333  // remove context variables
334  $context->destroy('IsInline');
335  $context->destroy('CurrentToken');
336 
337  //####################################################################//
338  // Return
339 
340  return $tokens;
341 
342  }
343 
344 }
345 
346 // vim: et sw=4 sts=4