Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. 
  348    {
  349        global $wgUseTidy;
  350 
  351        static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
  352            $htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised;
  353 
  354        wfProfileIn(__METHOD__);
  355 
  356        if (!$staticInitialised) {
  357            $htmlpairs = array( # Tags that must be closed
  358                'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
  359                'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
  360                'strike', 'strong', 'tt', 'var', 'div', 'center',
  361                'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
  362                'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u'
  363            );
  364            $htmlsingle = array(
  365                'br', 'hr', 'li', 'dt', 'dd'
  366            );
  367            $htmlsingleonly = array( # Elements that cannot have close tags
  368                'br', 'hr'
  369            );
  370            $htmlnest = array( # Tags that can be nested--??
  371                'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
  372                'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
  373            );
  374            $tabletags = array( # Can only appear inside table, we will close them
  375                'td', 'th', 'tr',
  376            );
  377            $htmllist = array( # Tags used by list
  378                'ul','ol',
  379            );
  380            $listtags = array( # Tags that can appear in a list
  381                'li',
  382            );
  383 
  384            $htmlsingleallowed = array_merge($htmlsingle, $tabletags);
  385            $htmlelements = array_merge($htmlsingle, $htmlpairs, $htmlnest);
  386 
  387            # Convert them all to hashtables for faster lookup
  388            $vars = array( 'htmlpairs', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags',
  389                'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelements' );
  390            foreach ($vars as $var) {
  391                $$var = array_flip($$var);
  392            }
  393            $staticInitialised = true;
  394        }
  395 
  396        # Remove HTML comments
  398        $bits = explode(
'<', 
$text);
 
  399        $text = str_replace(
'>', 
'>', array_shift($bits));
 
  400        if (!$wgUseTidy) {
  401            $tagstack = $tablestack = array();
  402            foreach ($bits as 
$x) {
 
  403                $regs = array();
  404                if (preg_match(
'!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', 
$x, $regs)) {
 
  406                } else {
  408                }
  409 
  410                $badtag = 0 ;
  411                if (isset($htmlelements[
$t = strtolower(
$t)])) {
 
  412                    # Check our stack
  413                    if ($slash) {
  414                        # Closing a tag...
  415                        if (isset($htmlsingleonly[
$t])) {
 
  416                            $badtag = 1;
  417                        } elseif (($ot = @array_pop($tagstack)) != 
$t) {
 
  418                            if (isset($htmlsingleallowed[$ot])) {
  419                                # Pop all elements with an optional close tag
  420                                # and see if we find a match below them
  421                                $optstack = array();
  422                                array_push($optstack, $ot);
  423                                while ((($ot = @array_pop($tagstack)) != 
$t) &&
 
  424                                        isset($htmlsingleallowed[$ot])) {
  425                                    array_push($optstack, $ot);
  426                                }
  428                                    # No match. Push the optinal elements back again
  429                                    $badtag = 1;
  430                                    while ($ot = @array_pop($optstack)) {
  431                                        array_push($tagstack, $ot);
  432                                    }
  433                                }
  434                            } else {
  435                                @array_push($tagstack, $ot);
  436                                # <li> can be nested in <ul> or <ol>, skip those cases:
  437                                if (!(isset($htmllist[$ot]) && isset($listtags[
$t]))) {
 
  438                                    $badtag = 1;
  439                                }
  440                            }
  441                        } else {
  443                                $tagstack = array_pop($tablestack);
  444                            }
  445                        }
  446                        $newparams = '';
  447                    } else {
  448                        # Keep track for later
  449                        if (isset($tabletags[
$t]) &&
 
  450                        !in_array('table', $tagstack)) {
  451                            $badtag = 1;
  452                        } elseif (in_array(
$t, $tagstack) &&
 
  453                        !isset($htmlnest [
$t ])) {
 
  454                            $badtag = 1 ;
  455                        # Is it a self closed htmlpair ? (bug 5487)
  456                        } elseif ($brace == '/>' &&
  457                        isset($htmlpairs[
$t])) {
 
  458                            $badtag = 1;
  459                        } elseif (isset($htmlsingleonly[
$t])) {
 
  460                            # Hack to force empty tag for uncloseable elements
  461                            $brace = '/>';
  462                        } elseif (isset($htmlsingle[
$t])) {
 
  463                            # Hack to not close $htmlsingle tags
  464                            $brace = null;
  465                        } elseif (isset($tabletags[
$t])
 
  466                        &&  in_array(
$t, $tagstack)) {
 
  467                            
  469                        } else {
  471                                array_push($tablestack, $tagstack);
  472                                $tagstack = array();
  473                            }
  474                            array_push($tagstack, 
$t);
 
  475                        }
  476 
  477                        # Replace any variables or template parameters with
  478                        # plaintext results.
  479                        if (is_callable($processCallback)) {
  480                            call_user_func_array($processCallback, array( &
$params, $args ));
 
  481                        }
  482 
  483                        # Strip non-approved attributes from the tag
  485                    }
  486                    if (!$badtag) {
  488                        $close = ($brace == '/>' && !$slash) ? ' /' : '';
  489                        $text .= 
"<$slash$t$newparams$close>$rest";
 
  490                        continue;
  491                    }
  492                }
  493                $text .= 
'<' . str_replace(
'>', 
'>', 
$x);
 
  494            }
  495            # Close off any remaining tags
  496            while (is_array($tagstack) && (
$t = array_pop($tagstack))) {
 
  499                    $tagstack = array_pop($tablestack);
  500                }
  501            }
  502        } else {
  503            # this might be possible using tidy itself
  504            foreach ($bits as 
$x) {
 
  505                preg_match(
  506                    '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
  508                    $regs
  509                );
  511                if (isset($htmlelements[
$t = strtolower(
$t)])) {
 
  512                    if (is_callable($processCallback)) {
  513                        call_user_func_array($processCallback, array( &
$params, $args ));
 
  514                    }
  517                    $text .= 
"<$slash$t$newparams$brace$rest";
 
  518                } else {
  519                    $text .= 
'<' . str_replace(
'>', 
'>', 
$x);
 
  520                }
  521            }
static removeHTMLcomments($text)
Remove '', and everything between.
static fixTagAttributes($text, $element)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML,...