ILIAS  release_5-2 Revision v5.2.25-18-g3f80b828510
RemoveForeignElements.php
Go to the documentation of this file.
1 <?php
2 
12 {
13 
20  public function execute($tokens, $config, $context)
21  {
22  $definition = $config->getHTMLDefinition();
23  $generator = new HTMLPurifier_Generator($config, $context);
24  $result = array();
25 
26  $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
27  $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
28 
29  // currently only used to determine if comments should be kept
30  $trusted = $config->get('HTML.Trusted');
31  $comment_lookup = $config->get('HTML.AllowedComments');
32  $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
33  $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
34 
35  $remove_script_contents = $config->get('Core.RemoveScriptContents');
36  $hidden_elements = $config->get('Core.HiddenElements');
37 
38  // remove script contents compatibility
39  if ($remove_script_contents === true) {
40  $hidden_elements['script'] = true;
41  } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
42  unset($hidden_elements['script']);
43  }
44 
45  $attr_validator = new HTMLPurifier_AttrValidator();
46 
47  // removes tokens until it reaches a closing tag with its value
48  $remove_until = false;
49 
50  // converts comments into text tokens when this is equal to a tag name
51  $textify_comments = false;
52 
53  $token = false;
54  $context->register('CurrentToken', $token);
55 
56  $e = false;
57  if ($config->get('Core.CollectErrors')) {
58  $e =& $context->get('ErrorCollector');
59  }
60 
61  foreach ($tokens as $token) {
62  if ($remove_until) {
63  if (empty($token->is_tag) || $token->name !== $remove_until) {
64  continue;
65  }
66  }
67  if (!empty($token->is_tag)) {
68  // DEFINITION CALL
69 
70  // before any processing, try to transform the element
71  if (isset($definition->info_tag_transform[$token->name])) {
72  $original_name = $token->name;
73  // there is a transformation for this tag
74  // DEFINITION CALL
75  $token = $definition->
76  info_tag_transform[$token->name]->transform($token, $config, $context);
77  if ($e) {
78  $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
79  }
80  }
81 
82  if (isset($definition->info[$token->name])) {
83  // mostly everything's good, but
84  // we need to make sure required attributes are in order
85  if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
86  $definition->info[$token->name]->required_attr &&
87  ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
88  ) {
89  $attr_validator->validateToken($token, $config, $context);
90  $ok = true;
91  foreach ($definition->info[$token->name]->required_attr as $name) {
92  if (!isset($token->attr[$name])) {
93  $ok = false;
94  break;
95  }
96  }
97  if (!$ok) {
98  if ($e) {
99  $e->send(
100  E_ERROR,
101  'Strategy_RemoveForeignElements: Missing required attribute',
102  $name
103  );
104  }
105  continue;
106  }
107  $token->armor['ValidateAttributes'] = true;
108  }
109 
110  if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
111  $textify_comments = $token->name;
112  } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
113  $textify_comments = false;
114  }
115 
116  } elseif ($escape_invalid_tags) {
117  // invalid tag, generate HTML representation and insert in
118  if ($e) {
119  $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
120  }
121  $token = new HTMLPurifier_Token_Text(
122  $generator->generateFromToken($token)
123  );
124  } else {
125  // check if we need to destroy all of the tag's children
126  // CAN BE GENERICIZED
127  if (isset($hidden_elements[$token->name])) {
128  if ($token instanceof HTMLPurifier_Token_Start) {
129  $remove_until = $token->name;
130  } elseif ($token instanceof HTMLPurifier_Token_Empty) {
131  // do nothing: we're still looking
132  } else {
133  $remove_until = false;
134  }
135  if ($e) {
136  $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
137  }
138  } else {
139  if ($e) {
140  $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
141  }
142  }
143  continue;
144  }
145  } elseif ($token instanceof HTMLPurifier_Token_Comment) {
146  // textify comments in script tags when they are allowed
147  if ($textify_comments !== false) {
148  $data = $token->data;
149  $token = new HTMLPurifier_Token_Text($data);
150  } elseif ($trusted || $check_comments) {
151  // always cleanup comments
152  $trailing_hyphen = false;
153  if ($e) {
154  // perform check whether or not there's a trailing hyphen
155  if (substr($token->data, -1) == '-') {
156  $trailing_hyphen = true;
157  }
158  }
159  $token->data = rtrim($token->data, '-');
160  $found_double_hyphen = false;
161  while (strpos($token->data, '--') !== false) {
162  $found_double_hyphen = true;
163  $token->data = str_replace('--', '-', $token->data);
164  }
165  if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
166  ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
167  // OK good
168  if ($e) {
169  if ($trailing_hyphen) {
170  $e->send(
171  E_NOTICE,
172  'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
173  );
174  }
175  if ($found_double_hyphen) {
176  $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
177  }
178  }
179  } else {
180  if ($e) {
181  $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
182  }
183  continue;
184  }
185  } else {
186  // strip comments
187  if ($e) {
188  $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
189  }
190  continue;
191  }
192  } elseif ($token instanceof HTMLPurifier_Token_Text) {
193  } else {
194  continue;
195  }
196  $result[] = $token;
197  }
198  if ($remove_until && $e) {
199  // we removed tokens until the end, throw error
200  $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
201  }
202  $context->destroy('CurrentToken');
203  return $result;
204  }
205 }
206 
207 // vim: et sw=4 sts=4
Concrete end token class.
Definition: End.php:10
$result
Generates HTML from tokens.
Definition: Generator.php:10
Validates the attributes of a token.
Concrete start token class.
Definition: Start.php:6
Supertype for classes that define a strategy for modifying/purifying tokens.
Definition: Strategy.php:12
Create styles array
The data for the language used.
Concrete empty token class.
Definition: Empty.php:6
Removes all unrecognized tags from the list of tokens.
Concrete text token class.
Definition: Text.php:12
Concrete comment token class.
Definition: Comment.php:6