Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments.
371 {
372 global $wgUseTidy;
373
374 static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
375 $htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised;
376
377 wfProfileIn(__METHOD__);
378
379 if (!$staticInitialised) {
380 $htmlpairs = array( # Tags that must be closed
381 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
382 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
383 'strike', 'strong', 'tt', 'var', 'div', 'center',
384 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
385 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u'
386 );
387 $htmlsingle = array(
388 'br', 'hr', 'li', 'dt', 'dd'
389 );
390 $htmlsingleonly = array( # Elements that cannot have
close tags
391 'br', 'hr'
392 );
393 $htmlnest = array( # Tags that can be nested--??
394 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
395 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
396 );
397 $tabletags = array( # Can only appear inside table, we will
close them
398 'td', 'th', 'tr',
399 );
400 $htmllist = array( # Tags used by list
401 'ul','ol',
402 );
403 $listtags = array( # Tags that can appear in a list
404 'li',
405 );
406
407 $htmlsingleallowed = array_merge($htmlsingle, $tabletags);
408 $htmlelements = array_merge($htmlsingle, $htmlpairs, $htmlnest);
409
410 # Convert them all to hashtables for faster lookup
411 $vars = array( 'htmlpairs', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags',
412 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelements' );
413 foreach ($vars as $var) {
414 $$var = array_flip($$var);
415 }
416 $staticInitialised = true;
417 }
418
419 # Remove HTML comments
421 $bits = explode('<', $text);
422 $text = str_replace('>', '>', array_shift($bits));
423 if (!$wgUseTidy) {
424 $tagstack = $tablestack = array();
425 foreach ($bits as $x) {
426 $regs = array();
427 if (preg_match('!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs)) {
429 } else {
431 }
432
433 $badtag = 0 ;
434 if (isset($htmlelements[$t = strtolower($t)])) {
435 # Check our stack
436 if ($slash) {
437 # Closing a tag...
438 if (isset($htmlsingleonly[$t])) {
439 $badtag = 1;
440 } elseif (($ot = @array_pop($tagstack)) != $t) {
441 if (isset($htmlsingleallowed[$ot])) {
442 # Pop all elements with an optional close tag
443 # and see if we find a match below them
444 $optstack = array();
445 $optstack[] = $ot;
446 while ((($ot = @array_pop($tagstack)) != $t) &&
447 isset($htmlsingleallowed[$ot])) {
448 $optstack[] = $ot;
449 }
450 if ($t != $ot) {
451 # No match. Push the optinal elements back again
452 $badtag = 1;
453 while ($ot = @array_pop($optstack)) {
454 $tagstack[] = $ot;
455 }
456 }
457 } else {
458 @array_push($tagstack, $ot);
459 # <li> can be nested in <ul> or <ol>, skip those cases:
460 if (!(isset($htmllist[$ot]) && isset($listtags[$t]))) {
461 $badtag = 1;
462 }
463 }
464 } else {
465 if ($t == 'table') {
466 $tagstack = array_pop($tablestack);
467 }
468 }
469 $newparams = '';
470 } else {
471 # Keep track for later
472 if (isset($tabletags[$t]) &&
473 !in_array('table', $tagstack)) {
474 $badtag = 1;
475 } elseif (in_array($t, $tagstack) &&
476 !isset($htmlnest [$t ])) {
477 $badtag = 1 ;
478 # Is it a self closed htmlpair ? (bug 5487)
479 } elseif ($brace == '/>' &&
480 isset($htmlpairs[$t])) {
481 $badtag = 1;
482 } elseif (isset($htmlsingleonly[$t])) {
483 # Hack to force empty tag for uncloseable elements
484 $brace = '/>';
485 } elseif (isset($htmlsingle[$t])) {
486 # Hack to not close $htmlsingle tags
487 $brace = null;
488 } elseif (isset($tabletags[$t])
489 && in_array($t, $tagstack)) {
490
491 $text .= "</$t>";
492 } else {
493 if ($t == 'table') {
494 $tablestack[] = $tagstack;
495 $tagstack = array();
496 }
497 $tagstack[] = $t;
498 }
499
500 # Replace any variables or template parameters with
501 # plaintext results.
502 if (is_callable($processCallback)) {
503 call_user_func_array($processCallback, array( &
$params, $args ));
504 }
505
506 # Strip non-approved attributes from the tag
508 }
509 if (!$badtag) {
511 $close = ($brace == '/>' && !$slash) ? ' /' : '';
512 $text .= "<$slash$t$newparams$close>$rest";
513 continue;
514 }
515 }
516 $text .= '<' . str_replace('>', '>', $x);
517 }
518 # Close off any remaining tags
519 while (is_array($tagstack) && ($t = array_pop($tagstack))) {
520 $text .= "</$t>\n";
521 if ($t == 'table') {
522 $tagstack = array_pop($tablestack);
523 }
524 }
525 } else {
526 # this might be possible using tidy itself
527 foreach ($bits as $x) {
528 preg_match(
529 '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
530 $x,
531 $regs
532 );
534 if (isset($htmlelements[$t = strtolower($t)])) {
535 if (is_callable($processCallback)) {
536 call_user_func_array($processCallback, array( &
$params, $args ));
537 }
540 $text .= "<$slash$t$newparams$brace$rest";
541 } else {
542 $text .= '<' . str_replace('>', '>', $x);
543 }
544 }
static removeHTMLcomments($text)
Remove '', and everything between.
static fixTagAttributes($text, $element)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML,...
if(! $DIC->user() ->getId()||!ilLTIConsumerAccess::hasCustomProviderCreationAccess()) $params