Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. 
  342                                                                                          {
  343                global $wgUseTidy;
  344 
  345                static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
  346                        $htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised;
  347 
  348                wfProfileIn( __METHOD__ );
  349 
  350                if ( !$staticInitialised ) {
  351 
  352                        $htmlpairs = array( # Tags that must be closed
  353                                'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
  354                                'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
  355                                'strike', 'strong', 'tt', 'var', 'div', 'center',
  356                                'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
  357                                'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u'
  358                        );
  359                        $htmlsingle = array(
  360                                'br', 'hr', 'li', 'dt', 'dd'
  361                        );
  362                        $htmlsingleonly = array( # Elements that cannot have close tags
  363                                'br', 'hr'
  364                        );
  365                        $htmlnest = array( # Tags that can be nested--??
  366                                'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
  367                                'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
  368                        );
  369                        $tabletags = array( # Can only appear inside table, we will close them
  370                                'td', 'th', 'tr',
  371                        );
  372                        $htmllist = array( # Tags used by list
  373                                'ul','ol',
  374                        );
  375                        $listtags = array( # Tags that can appear in a list
  376                                'li',
  377                        );
  378 
  379                        $htmlsingleallowed = array_merge( $htmlsingle, $tabletags );
  380                        $htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest );
  381 
  382                        # Convert them all to hashtables for faster lookup
  383                        $vars = array( 'htmlpairs', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', 
  384                                'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelements' );
  385                        foreach ( $vars as $var ) {
  386                                $$var = array_flip( $$var );
  387                        }
  388                        $staticInitialised = true;
  389                }
  390 
  391                # Remove HTML comments
  393                $bits = explode( 
'<', 
$text );
 
  394                $text = str_replace( 
'>', 
'>', array_shift( $bits ) );
 
  395                if(!$wgUseTidy) {
  396                        $tagstack = $tablestack = array();
  397                        foreach ( $bits as 
$x ) {
 
  398                                $regs = array();
  399                                if( preg_match( 
'!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', 
$x, $regs ) ) {
 
  401                                } else {
  403                                }
  404 
  405                                $badtag = 0 ;
  406                                if ( isset( $htmlelements[
$t = strtolower( 
$t )] ) ) {
 
  407                                        # Check our stack
  408                                        if ( $slash ) {
  409                                                # Closing a tag...
  410                                                if( isset( $htmlsingleonly[
$t] ) ) {
 
  411                                                        $badtag = 1;
  412                                                } elseif ( ( $ot = @array_pop( $tagstack ) ) != 
$t ) {
 
  413                                                        if ( isset( $htmlsingleallowed[$ot] ) ) {
  414                                                                # Pop all elements with an optional close tag
  415                                                                # and see if we find a match below them
  416                                                                $optstack = array();
  417                                                                array_push ($optstack, $ot);
  418                                                                while ( ( ( $ot = @array_pop( $tagstack ) ) != 
$t ) &&
 
  419                                                                                isset( $htmlsingleallowed[$ot] ) ) 
  420                                                                {
  421                                                                        array_push ($optstack, $ot);
  422                                                                }
  424                                                                        # No match. Push the optinal elements back again
  425                                                                        $badtag = 1;
  426                                                                        while ( $ot = @array_pop( $optstack ) ) {
  427                                                                                array_push( $tagstack, $ot );
  428                                                                        }
  429                                                                }
  430                                                        } else {
  431                                                                @array_push( $tagstack, $ot );
  432                                                                # <li> can be nested in <ul> or <ol>, skip those cases:
  433                                                                if(!(isset( $htmllist[$ot] ) && isset( $listtags[
$t] ) )) {
 
  434                                                                        $badtag = 1;
  435                                                                }
  436                                                        }
  437                                                } else {
  438                                                        if ( 
$t == 
'table' ) {
 
  439                                                                $tagstack = array_pop( $tablestack );
  440                                                        }
  441                                                }
  442                                                $newparams = '';
  443                                        } else {
  444                                                # Keep track for later
  445                                                if ( isset( $tabletags[
$t] ) &&
 
  446                                                ! in_array( 'table', $tagstack ) ) {
  447                                                        $badtag = 1;
  448                                                } 
else if ( in_array( 
$t, $tagstack ) &&
 
  449                                                ! isset( $htmlnest [
$t ] ) ) {
 
  450                                                        $badtag = 1 ;
  451                                                # Is it a self closed htmlpair ? (bug 5487)
  452                                                } else if( $brace == '/>' &&
  453                                                isset( $htmlpairs[
$t] ) ) {
 
  454                                                        $badtag = 1;
  455                                                } elseif( isset( $htmlsingleonly[
$t] ) ) {
 
  456                                                        # Hack to force empty tag for uncloseable elements
  457                                                        $brace = '/>';
  458                                                } 
else if( isset( $htmlsingle[
$t] ) ) {
 
  459                                                        # Hack to not close $htmlsingle tags
  460                                                        $brace = NULL;
  461                                                } 
else if( isset( $tabletags[
$t] )
 
  462                                                &&  in_array(
$t ,$tagstack) ) {
 
  463                                                        
  465                                                } else {
  466                                                        if ( 
$t == 
'table' ) {
 
  467                                                                array_push( $tablestack, $tagstack );
  468                                                                $tagstack = array();
  469                                                        }
  470                                                        array_push( $tagstack, 
$t );
 
  471                                                }
  472 
  473                                                # Replace any variables or template parameters with
  474                                                # plaintext results.
  475                                                if( is_callable( $processCallback ) ) {
  476                                                        call_user_func_array( $processCallback, array( &
$params, $args ) );
 
  477                                                }
  478 
  479                                                # Strip non-approved attributes from the tag
  481                                        }
  482                                        if ( ! $badtag ) {
  484                                                $close = ( $brace == '/>' && !$slash ) ? ' /' : '';
  485                                                $text .= 
"<$slash$t$newparams$close>$rest";
 
  486                                                continue;
  487                                        }
  488                                }
  489                                $text .= 
'<' . str_replace( 
'>', 
'>', 
$x);
 
  490                        }
  491                        # Close off any remaining tags
  492                        while ( is_array( $tagstack ) && (
$t = array_pop( $tagstack )) ) {
 
  494                                if ( 
$t == 
'table' ) { $tagstack = array_pop( $tablestack ); }
 
  495                        }
  496                } else {
  497                        # this might be possible using tidy itself
  498                        foreach ( $bits as 
$x ) {
 
  499                                preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
  502                                if ( isset( $htmlelements[
$t = strtolower( 
$t )] ) ) {
 
  503                                        if( is_callable( $processCallback ) ) {
  504                                                call_user_func_array( $processCallback, array( &
$params, $args ) );
 
  505                                        }
  508                                        $text .= 
"<$slash$t$newparams$brace$rest";
 
  509                                } else {
  510                                        $text .= 
'<' . str_replace( 
'>', 
'>', 
$x);
 
  511                                }
  512                        }
  513                }
  514                wfProfileOut( __METHOD__ );
static fixTagAttributes( $text, $element)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML,...
static removeHTMLcomments( $text)
Remove '', and everything between.