ILIAS  Release_4_4_x_branch Revision 61816
 All Data Structures Namespaces Files Functions Variables Groups Pages
RemoveForeignElements.php
Go to the documentation of this file.
1 <?php
2 
12 {
13 
14  public function execute($tokens, $config, $context) {
15  $definition = $config->getHTMLDefinition();
16  $generator = new HTMLPurifier_Generator($config, $context);
17  $result = array();
18 
19  $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
20  $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
21 
22  // currently only used to determine if comments should be kept
23  $trusted = $config->get('HTML.Trusted');
24  $comment_lookup = $config->get('HTML.AllowedComments');
25  $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
26  $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
27 
28  $remove_script_contents = $config->get('Core.RemoveScriptContents');
29  $hidden_elements = $config->get('Core.HiddenElements');
30 
31  // remove script contents compatibility
32  if ($remove_script_contents === true) {
33  $hidden_elements['script'] = true;
34  } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
35  unset($hidden_elements['script']);
36  }
37 
38  $attr_validator = new HTMLPurifier_AttrValidator();
39 
40  // removes tokens until it reaches a closing tag with its value
41  $remove_until = false;
42 
43  // converts comments into text tokens when this is equal to a tag name
44  $textify_comments = false;
45 
46  $token = false;
47  $context->register('CurrentToken', $token);
48 
49  $e = false;
50  if ($config->get('Core.CollectErrors')) {
51  $e =& $context->get('ErrorCollector');
52  }
53 
54  foreach($tokens as $token) {
55  if ($remove_until) {
56  if (empty($token->is_tag) || $token->name !== $remove_until) {
57  continue;
58  }
59  }
60  if (!empty( $token->is_tag )) {
61  // DEFINITION CALL
62 
63  // before any processing, try to transform the element
64  if (
65  isset($definition->info_tag_transform[$token->name])
66  ) {
67  $original_name = $token->name;
68  // there is a transformation for this tag
69  // DEFINITION CALL
70  $token = $definition->
71  info_tag_transform[$token->name]->
72  transform($token, $config, $context);
73  if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
74  }
75 
76  if (isset($definition->info[$token->name])) {
77 
78  // mostly everything's good, but
79  // we need to make sure required attributes are in order
80  if (
81  ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
82  $definition->info[$token->name]->required_attr &&
83  ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
84  ) {
85  $attr_validator->validateToken($token, $config, $context);
86  $ok = true;
87  foreach ($definition->info[$token->name]->required_attr as $name) {
88  if (!isset($token->attr[$name])) {
89  $ok = false;
90  break;
91  }
92  }
93  if (!$ok) {
94  if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
95  continue;
96  }
97  $token->armor['ValidateAttributes'] = true;
98  }
99 
100  if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
101  $textify_comments = $token->name;
102  } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
103  $textify_comments = false;
104  }
105 
106  } elseif ($escape_invalid_tags) {
107  // invalid tag, generate HTML representation and insert in
108  if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
109  $token = new HTMLPurifier_Token_Text(
110  $generator->generateFromToken($token)
111  );
112  } else {
113  // check if we need to destroy all of the tag's children
114  // CAN BE GENERICIZED
115  if (isset($hidden_elements[$token->name])) {
116  if ($token instanceof HTMLPurifier_Token_Start) {
117  $remove_until = $token->name;
118  } elseif ($token instanceof HTMLPurifier_Token_Empty) {
119  // do nothing: we're still looking
120  } else {
121  $remove_until = false;
122  }
123  if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
124  } else {
125  if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
126  }
127  continue;
128  }
129  } elseif ($token instanceof HTMLPurifier_Token_Comment) {
130  // textify comments in script tags when they are allowed
131  if ($textify_comments !== false) {
132  $data = $token->data;
133  $token = new HTMLPurifier_Token_Text($data);
134  } elseif ($trusted || $check_comments) {
135  // always cleanup comments
136  $trailing_hyphen = false;
137  if ($e) {
138  // perform check whether or not there's a trailing hyphen
139  if (substr($token->data, -1) == '-') {
140  $trailing_hyphen = true;
141  }
142  }
143  $token->data = rtrim($token->data, '-');
144  $found_double_hyphen = false;
145  while (strpos($token->data, '--') !== false) {
146  $found_double_hyphen = true;
147  $token->data = str_replace('--', '-', $token->data);
148  }
149  if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
150  // OK good
151  if ($e) {
152  if ($trailing_hyphen) {
153  $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
154  }
155  if ($found_double_hyphen) {
156  $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
157  }
158  }
159  } else {
160  if ($e) {
161  $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
162  }
163  continue;
164  }
165  } else {
166  // strip comments
167  if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
168  continue;
169  }
170  } elseif ($token instanceof HTMLPurifier_Token_Text) {
171  } else {
172  continue;
173  }
174  $result[] = $token;
175  }
176  if ($remove_until && $e) {
177  // we removed tokens until the end, throw error
178  $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
179  }
180 
181  $context->destroy('CurrentToken');
182 
183  return $result;
184  }
185 
186 }
187 
188 // vim: et sw=4 sts=4