ILIAS  Release_4_0_x_branch Revision 61816
 All Data Structures Namespaces Files Functions Variables Groups Pages
FixNesting.php
Go to the documentation of this file.
1 <?php
2 
32 {
33 
34  public function execute($tokens, $config, $context) {
35  //####################################################################//
36  // Pre-processing
37 
38  // get a copy of the HTML definition
39  $definition = $config->getHTMLDefinition();
40 
41  // insert implicit "parent" node, will be removed at end.
42  // DEFINITION CALL
43  $parent_name = $definition->info_parent;
44  array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
45  $tokens[] = new HTMLPurifier_Token_End($parent_name);
46 
47  // setup the context variable 'IsInline', for chameleon processing
48  // is 'false' when we are not inline, 'true' when it must always
49  // be inline, and an integer when it is inline for a certain
50  // branch of the document tree
51  $is_inline = $definition->info_parent_def->descendants_are_inline;
52  $context->register('IsInline', $is_inline);
53 
54  // setup error collector
55  $e =& $context->get('ErrorCollector', true);
56 
57  //####################################################################//
58  // Loop initialization
59 
60  // stack that contains the indexes of all parents,
61  // $stack[count($stack)-1] being the current parent
62  $stack = array();
63 
64  // stack that contains all elements that are excluded
65  // it is organized by parent elements, similar to $stack,
66  // but it is only populated when an element with exclusions is
67  // processed, i.e. there won't be empty exclusions.
68  $exclude_stack = array();
69 
70  // variable that contains the start token while we are processing
71  // nodes. This enables error reporting to do its job
72  $start_token = false;
73  $context->register('CurrentToken', $start_token);
74 
75  //####################################################################//
76  // Loop
77 
78  // iterate through all start nodes. Determining the start node
79  // is complicated so it has been omitted from the loop construct
80  for ($i = 0, $size = count($tokens) ; $i < $size; ) {
81 
82  //################################################################//
83  // Gather information on children
84 
85  // child token accumulator
86  $child_tokens = array();
87 
88  // scroll to the end of this node, report number, and collect
89  // all children
90  for ($j = $i, $depth = 0; ; $j++) {
91  if ($tokens[$j] instanceof HTMLPurifier_Token_Start) {
92  $depth++;
93  // skip token assignment on first iteration, this is the
94  // token we currently are on
95  if ($depth == 1) continue;
96  } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) {
97  $depth--;
98  // skip token assignment on last iteration, this is the
99  // end token of the token we're currently on
100  if ($depth == 0) break;
101  }
102  $child_tokens[] = $tokens[$j];
103  }
104 
105  // $i is index of start token
106  // $j is index of end token
107 
108  $start_token = $tokens[$i]; // to make token available via CurrentToken
109 
110  //################################################################//
111  // Gather information on parent
112 
113  // calculate parent information
114  if ($count = count($stack)) {
115  $parent_index = $stack[$count-1];
116  $parent_name = $tokens[$parent_index]->name;
117  if ($parent_index == 0) {
118  $parent_def = $definition->info_parent_def;
119  } else {
120  $parent_def = $definition->info[$parent_name];
121  }
122  } else {
123  // processing as if the parent were the "root" node
124  // unknown info, it won't be used anyway, in the future,
125  // we may want to enforce one element only (this is
126  // necessary for HTML Purifier to clean entire documents
127  $parent_index = $parent_name = $parent_def = null;
128  }
129 
130  // calculate context
131  if ($is_inline === false) {
132  // check if conditions make it inline
133  if (!empty($parent_def) && $parent_def->descendants_are_inline) {
134  $is_inline = $count - 1;
135  }
136  } else {
137  // check if we're out of inline
138  if ($count === $is_inline) {
139  $is_inline = false;
140  }
141  }
142 
143  //################################################################//
144  // Determine whether element is explicitly excluded SGML-style
145 
146  // determine whether or not element is excluded by checking all
147  // parent exclusions. The array should not be very large, two
148  // elements at most.
149  $excluded = false;
150  if (!empty($exclude_stack)) {
151  foreach ($exclude_stack as $lookup) {
152  if (isset($lookup[$tokens[$i]->name])) {
153  $excluded = true;
154  // no need to continue processing
155  break;
156  }
157  }
158  }
159 
160  //################################################################//
161  // Perform child validation
162 
163  if ($excluded) {
164  // there is an exclusion, remove the entire node
165  $result = false;
166  $excludes = array(); // not used, but good to initialize anyway
167  } else {
168  // DEFINITION CALL
169  if ($i === 0) {
170  // special processing for the first node
171  $def = $definition->info_parent_def;
172  } else {
173  $def = $definition->info[$tokens[$i]->name];
174 
175  }
176 
177  if (!empty($def->child)) {
178  // have DTD child def validate children
179  $result = $def->child->validateChildren(
180  $child_tokens, $config, $context);
181  } else {
182  // weird, no child definition, get rid of everything
183  $result = false;
184  }
185 
186  // determine whether or not this element has any exclusions
187  $excludes = $def->excludes;
188  }
189 
190  // $result is now a bool or array
191 
192  //################################################################//
193  // Process result by interpreting $result
194 
195  if ($result === true || $child_tokens === $result) {
196  // leave the node as is
197 
198  // register start token as a parental node start
199  $stack[] = $i;
200 
201  // register exclusions if there are any
202  if (!empty($excludes)) $exclude_stack[] = $excludes;
203 
204  // move cursor to next possible start node
205  $i++;
206 
207  } elseif($result === false) {
208  // remove entire node
209 
210  if ($e) {
211  if ($excluded) {
212  $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
213  } else {
214  $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
215  }
216  }
217 
218  // calculate length of inner tokens and current tokens
219  $length = $j - $i + 1;
220 
221  // perform removal
222  array_splice($tokens, $i, $length);
223 
224  // update size
225  $size -= $length;
226 
227  // there is no start token to register,
228  // current node is now the next possible start node
229  // unless it turns out that we need to do a double-check
230 
231  // this is a rought heuristic that covers 100% of HTML's
232  // cases and 99% of all other cases. A child definition
233  // that would be tricked by this would be something like:
234  // ( | a b c) where it's all or nothing. Fortunately,
235  // our current implementation claims that that case would
236  // not allow empty, even if it did
237  if (!$parent_def->child->allow_empty) {
238  // we need to do a double-check
239  $i = $parent_index;
240  array_pop($stack);
241  }
242 
243  // PROJECTED OPTIMIZATION: Process all children elements before
244  // reprocessing parent node.
245 
246  } else {
247  // replace node with $result
248 
249  // calculate length of inner tokens
250  $length = $j - $i - 1;
251 
252  if ($e) {
253  if (empty($result) && $length) {
254  $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
255  } else {
256  $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
257  }
258  }
259 
260  // perform replacement
261  array_splice($tokens, $i + 1, $length, $result);
262 
263  // update size
264  $size -= $length;
265  $size += count($result);
266 
267  // register start token as a parental node start
268  $stack[] = $i;
269 
270  // register exclusions if there are any
271  if (!empty($excludes)) $exclude_stack[] = $excludes;
272 
273  // move cursor to next possible start node
274  $i++;
275 
276  }
277 
278  //################################################################//
279  // Scroll to next start node
280 
281  // We assume, at this point, that $i is the index of the token
282  // that is the first possible new start point for a node.
283 
284  // Test if the token indeed is a start tag, if not, move forward
285  // and test again.
286  $size = count($tokens);
287  while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) {
288  if ($tokens[$i] instanceof HTMLPurifier_Token_End) {
289  // pop a token index off the stack if we ended a node
290  array_pop($stack);
291  // pop an exclusion lookup off exclusion stack if
292  // we ended node and that node had exclusions
293  if ($i == 0 || $i == $size - 1) {
294  // use specialized var if it's the super-parent
295  $s_excludes = $definition->info_parent_def->excludes;
296  } else {
297  $s_excludes = $definition->info[$tokens[$i]->name]->excludes;
298  }
299  if ($s_excludes) {
300  array_pop($exclude_stack);
301  }
302  }
303  $i++;
304  }
305 
306  }
307 
308  //####################################################################//
309  // Post-processing
310 
311  // remove implicit parent tokens at the beginning and end
312  array_shift($tokens);
313  array_pop($tokens);
314 
315  // remove context variables
316  $context->destroy('IsInline');
317  $context->destroy('CurrentToken');
318 
319  //####################################################################//
320  // Return
321 
322  return $tokens;
323 
324  }
325 
326 }
327 
328 // vim: et sw=4 sts=4