ILIAS  release_5-1 Revision 5.0.0-5477-g43f3e3fab5f
Parse.php
Go to the documentation of this file.
1<?php
2
85require_once "Auth/OpenID.php";
86
88
92 var $_re_flags = "si";
93
98 "<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>";
99
104 var $_tag_expr = "<%s\b(?!:)([^>]*?)(?:\/>|>(.*)(?:<\/?%s\s*>|\Z))";
105
106 var $_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)';
107
108 var $_open_tag_expr = "<%s\b";
109 var $_close_tag_expr = "<((\/%s\b)|(%s[^>\/]*\/))>";
110
112 {
113 $this->_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s",
114 $this->_re_flags);
115
116 $this->_entity_replacements = array(
117 'amp' => '&',
118 'lt' => '<',
119 'gt' => '>',
120 'quot' => '"'
121 );
122
123 $this->_attr_find = sprintf("/%s/%s",
124 $this->_attr_find,
125 $this->_re_flags);
126
127 $this->_removed_re = sprintf("/%s/%s",
128 $this->_removed_re,
129 $this->_re_flags);
130
131 $this->_ent_replace =
132 sprintf("&(%s);", implode("|",
133 $this->_entity_replacements));
134 }
135
140 function tagMatcher($tag_name, $close_tags = null)
141 {
142 $expr = $this->_tag_expr;
143
144 if ($close_tags) {
145 $options = implode("|", array_merge(array($tag_name), $close_tags));
146 $closer = sprintf("(?:%s)", $options);
147 } else {
148 $closer = $tag_name;
149 }
150
151 $expr = sprintf($expr, $tag_name, $closer);
152 return sprintf("/%s/%s", $expr, $this->_re_flags);
153 }
154
155 function openTag($tag_name)
156 {
157 $expr = sprintf($this->_open_tag_expr, $tag_name);
158 return sprintf("/%s/%s", $expr, $this->_re_flags);
159 }
160
161 function closeTag($tag_name)
162 {
163 $expr = sprintf($this->_close_tag_expr, $tag_name, $tag_name);
164 return sprintf("/%s/%s", $expr, $this->_re_flags);
165 }
166
167 function htmlBegin($s)
168 {
169 $matches = array();
170 $result = preg_match($this->openTag('html'), $s,
171 $matches, PREG_OFFSET_CAPTURE);
172 if ($result === false || !$matches) {
173 return false;
174 }
175 // Return the offset of the first match.
176 return $matches[0][1];
177 }
178
179 function htmlEnd($s)
180 {
181 $matches = array();
182 $result = preg_match($this->closeTag('html'), $s,
183 $matches, PREG_OFFSET_CAPTURE);
184 if ($result === false || !$matches) {
185 return false;
186 }
187 // Return the offset of the first match.
188 return $matches[count($matches) - 1][1];
189 }
190
191 function headFind()
192 {
193 return $this->tagMatcher('head', array('body', 'html'));
194 }
195
196 function replaceEntities($str)
197 {
198 foreach ($this->_entity_replacements as $old => $new) {
199 $str = preg_replace(sprintf("/&%s;/", $old), $new, $str);
200 }
201 return $str;
202 }
203
204 function removeQuotes($str)
205 {
206 $matches = array();
207 $double = '/^"(.*)"$/';
208 $single = "/^\'(.*)\'$/";
209
210 if (preg_match($double, $str, $matches)) {
211 return $matches[1];
212 } else if (preg_match($single, $str, $matches)) {
213 return $matches[1];
214 } else {
215 return $str;
216 }
217 }
218
219 function match($regexp, $text, &$match)
220 {
221 if (!is_callable('mb_ereg_search_init')) {
222 return preg_match($regexp, $text, $match);
223 }
224
225 $regexp = substr($regexp, 1, strlen($regexp) - 2 - strlen($this->_re_flags));
226 mb_ereg_search_init($text);
227 if (!mb_ereg_search($regexp)) {
228 return false;
229 }
230 list($match) = mb_ereg_search_getregs();
231 return true;
232 }
233
248 {
249 $stripped = preg_replace($this->_removed_re,
250 "",
251 $html);
252
253 $html_begin = $this->htmlBegin($stripped);
254 $html_end = $this->htmlEnd($stripped);
255
256 if ($html_begin === false) {
257 return array();
258 }
259
260 if ($html_end === false) {
261 $html_end = strlen($stripped);
262 }
263
264 $stripped = substr($stripped, $html_begin,
265 $html_end - $html_begin);
266
267 // Workaround to prevent PREG_BACKTRACK_LIMIT_ERROR:
268 $old_btlimit = ini_set( 'pcre.backtrack_limit', -1 );
269
270 // Try to find the <HEAD> tag.
271 $head_re = $this->headFind();
272 $head_match = '';
273 if (!$this->match($head_re, $stripped, $head_match)) {
274 ini_set( 'pcre.backtrack_limit', $old_btlimit );
275 return array();
276 }
277
278 $link_data = array();
279 $link_matches = array();
280
281 if (!preg_match_all($this->_link_find, $head_match,
282 $link_matches)) {
283 ini_set( 'pcre.backtrack_limit', $old_btlimit );
284 return array();
285 }
286
287 foreach ($link_matches[0] as $link) {
288 $attr_matches = array();
289 preg_match_all($this->_attr_find, $link, $attr_matches);
290 $link_attrs = array();
291 foreach ($attr_matches[0] as $index => $full_match) {
292 $name = $attr_matches[1][$index];
293 $value = $this->replaceEntities(
294 $this->removeQuotes($attr_matches[2][$index]));
295
296 $link_attrs[strtolower($name)] = $value;
297 }
298 $link_data[] = $link_attrs;
299 }
300
301 ini_set( 'pcre.backtrack_limit', $old_btlimit );
302 return $link_data;
303 }
304
305 function relMatches($rel_attr, $target_rel)
306 {
307 // Does this target_rel appear in the rel_str?
308 // XXX: TESTME
309 $rels = preg_split("/\s+/", trim($rel_attr));
310 foreach ($rels as $rel) {
311 $rel = strtolower($rel);
312 if ($rel == $target_rel) {
313 return 1;
314 }
315 }
316
317 return 0;
318 }
319
320 function linkHasRel($link_attrs, $target_rel)
321 {
322 // Does this link have target_rel as a relationship?
323 // XXX: TESTME
324 $rel_attr = Auth_OpeniD::arrayGet($link_attrs, 'rel', null);
325 return ($rel_attr && $this->relMatches($rel_attr,
326 $target_rel));
327 }
328
329 function findLinksRel($link_attrs_list, $target_rel)
330 {
331 // Filter the list of link attributes on whether it has
332 // target_rel as a relationship.
333 // XXX: TESTME
334 $result = array();
335 foreach ($link_attrs_list as $attr) {
336 if ($this->linkHasRel($attr, $target_rel)) {
337 $result[] = $attr;
338 }
339 }
340
341 return $result;
342 }
343
344 function findFirstHref($link_attrs_list, $target_rel)
345 {
346 // Return the value of the href attribute for the first link
347 // tag in the list that has target_rel as a relationship.
348 // XXX: TESTME
349 $matches = $this->findLinksRel($link_attrs_list,
350 $target_rel);
351 if (!$matches) {
352 return null;
353 }
354 $first = $matches[0];
355 return Auth_OpenID::arrayGet($first, 'href', null);
356 }
357}
358
359function Auth_OpenID_legacy_discover($html_text, $server_rel,
360 $delegate_rel)
361{
362 $p = new Auth_OpenID_Parse();
363
364 $link_attrs = $p->parseLinkAttrs($html_text);
365
366 $server_url = $p->findFirstHref($link_attrs,
367 $server_rel);
368
369 if ($server_url === null) {
370 return false;
371 } else {
372 $delegate_url = $p->findFirstHref($link_attrs,
373 $delegate_rel);
374 return array($delegate_url, $server_url);
375 }
376}
377
$result
Auth_OpenID_legacy_discover($html_text, $server_rel, $delegate_rel)
Definition: Parse.php:359
This module implements a VERY limited parser that finds <link> tags in the head of HTML or XHTML docu...
Definition: Parse.php:87
$_removed_re
Stuff to remove before we start looking for tags.
Definition: Parse.php:97
findLinksRel($link_attrs_list, $target_rel)
Definition: Parse.php:329
removeQuotes($str)
Definition: Parse.php:204
findFirstHref($link_attrs_list, $target_rel)
Definition: Parse.php:344
openTag($tag_name)
Definition: Parse.php:155
tagMatcher($tag_name, $close_tags=null)
Returns a regular expression that will match a given tag in an SGML string.
Definition: Parse.php:140
relMatches($rel_attr, $target_rel)
Definition: Parse.php:305
replaceEntities($str)
Definition: Parse.php:196
linkHasRel($link_attrs, $target_rel)
Definition: Parse.php:320
$_re_flags
Specify some flags for use with regex matching.
Definition: Parse.php:92
closeTag($tag_name)
Definition: Parse.php:161
$_tag_expr
Starts with the tag name at a word boundary, where the tag name is not a namespace.
Definition: Parse.php:104
parseLinkAttrs($html)
Find all link tags in a string representing a HTML document and return a list of their attributes.
Definition: Parse.php:247
match($regexp, $text, &$match)
Definition: Parse.php:219
static arrayGet($arr, $key, $fallback=null)
Convenience function for getting array values.
Definition: OpenID.php:242
$html
Definition: example_001.php:87
$text
if(!is_array($argv)) $options