44 $fulltag =
'</' . $tagname .
'>';
46 $fulltag =
'<' . $tagname;
47 if (is_array($attary) &&
sizeof($attary)) {
49 while (list($attname, $attvalue) = each($attary)) {
50 array_push($atts,
"$attname=$attvalue");
52 $fulltag .=
' ' . join(
' ', $atts);
71 $val = strtolower($val);
86 preg_match(
'/^(\s*)/s', substr($body, $offset), $matches);
87 if (
sizeof($matches[1])) {
88 $count = strlen($matches[1]);
107 $pos = strpos($body, $needle, $offset);
108 if ($pos ===
false) {
109 $pos = strlen($body);
131 $preg_rule =
'%^(.*?)(' . $reg .
')%s';
132 preg_match($preg_rule, substr($body, $offset), $matches);
133 if (!isset($matches[0]) || !$matches[0]) {
136 $retarr[0] = $offset + strlen($matches[1]);
137 $retarr[1] = $matches[1];
138 $retarr[2] = $matches[2];
159 if ($offset > strlen($body)) {
163 if ($lt == strlen($body)) {
172 if ($pos >= strlen($body)) {
173 return array(
false,
false,
false, $lt, strlen($body));
184 switch (substr($body, $pos, 1)) {
193 if (substr($body, $pos + 1, 2) ==
'--') {
194 $gt = strpos($body,
'-->', $pos);
200 return array(
false,
false,
false, $lt, $gt);
203 return array(
false,
false,
false, $lt, $gt);
219 if ($regary ==
false) {
220 return array(
false,
false,
false, $lt, strlen($body));
222 list($pos, $tagname, $match) = $regary;
223 $tagname = strtolower($tagname);
240 if (substr($body, $pos, 2) ==
'/>') {
245 $retary =
array(
false,
false,
false, $lt, $gt);
250 return array($tagname,
false, $tagtype, $lt, $pos);
256 if (!preg_match(
'/\s/', $match)) {
261 return array(
false,
false,
false, $lt, $gt);
275 while ($pos <= strlen($body)) {
277 if ($pos == strlen($body)) {
281 return array(
false,
false,
false, $lt, $pos);
288 if (preg_match(
'%^(\s*)(>|/>)%s', substr($body, $pos), $matches)) {
292 $pos += strlen($matches[1]);
293 if ($matches[2] ==
'/>') {
297 return array($tagname, $attary, $tagtype, $lt, $pos);
318 if ($regary ==
false) {
322 return array(
false,
false,
false, $lt, strlen($body));
324 list($pos, $attname, $match) = $regary;
325 $attname = strtolower($attname);
341 if (substr($body, $pos, 2) ==
'/>') {
346 $retary =
array(
false,
false,
false, $lt, $gt);
351 $attary{$attname} =
'"yes"';
352 return array($tagname, $attary, $tagtype, $lt, $pos);
359 $char = substr($body, $pos, 1);
377 $quot = substr($body, $pos, 1);
380 if ($regary ==
false) {
381 return array(
false,
false,
false, $lt, strlen($body));
383 list($pos, $attval, $match) = $regary;
385 $attary{$attname} =
'\'' . $attval .
'\'';
386 } elseif ($quot ==
'"') {
388 if ($regary ==
false) {
389 return array(
false,
false,
false, $lt, strlen($body));
391 list($pos, $attval, $match) = $regary;
393 $attary{$attname} =
'"' . $attval .
'"';
399 if ($regary ==
false) {
400 return array(
false,
false,
false, $lt, strlen($body));
402 list($pos, $attval, $match) = $regary;
406 $attval = preg_replace(
'/\"/s',
'"', $attval);
407 $attary{$attname} =
'"' . $attval .
'"';
409 } elseif (preg_match(
'|[\w/>]|', $char)) {
413 $attary{$attname} =
'"yes"';
419 return array(
false,
false,
false, $lt, $gt);
428 return array(
false,
false,
false, $lt, strlen($body));
441 preg_match_all($regex, $attvalue, $matches);
442 if (is_array($matches) &&
sizeof($matches[0]) > 0) {
444 for ($i = 0; $i <
sizeof($matches[0]); $i++) {
445 $numval = $matches[1][$i];
447 $numval = hexdec($numval);
449 $repl{$matches[0][$i]} = chr($numval);
451 $attvalue = strtr($attvalue, $repl);
470 if (strpos($attvalue,
'&') ===
false 471 && strpos($attvalue,
'\\') ===
false 477 $m = $m ||
tln_deent($attvalue,
'/\�*(\d+);*/s');
478 $m = $m ||
tln_deent($attvalue,
'/\�*((\d|[a-f])+);*/si',
true);
479 $m = $m ||
tln_deent($attvalue,
'/\\\\(\d+)/s',
true);
480 }
while ($m ==
true);
481 $attvalue = stripslashes($attvalue);
493 if (strcspn($attvalue,
"\t\r\n\0 ") != strlen($attvalue)) {
494 $attvalue = str_replace(
495 array(
"\t",
"\r",
"\n",
"\0",
" "),
496 array(
'',
'',
'',
'',
''),
521 $block_external_images
523 while (list($attname, $attvalue) = each($attary)) {
527 foreach ($rm_attnames as $matchtag => $matchattrs) {
528 if (preg_match($matchtag, $tagname)) {
529 foreach ($matchattrs as $matchattr) {
530 if (preg_match($matchattr, $attname)) {
531 unset($attary{$attname});
540 $oldattvalue = $attvalue;
542 if ($attname ==
'style' && $attvalue !== $oldattvalue) {
543 $attvalue =
"idiocy";
544 $attary{$attname} = $attvalue;
554 foreach ($bad_attvals as $matchtag => $matchattrs) {
555 if (preg_match($matchtag, $tagname)) {
556 foreach ($matchattrs as $matchattr => $valary) {
557 if (preg_match($matchattr, $attname)) {
563 list($valmatch, $valrepl) = $valary;
564 $newvalue = preg_replace($valmatch, $valrepl, $attvalue);
565 if ($newvalue != $attvalue) {
566 $attary{$attname} = $newvalue;
567 $attvalue = $newvalue;
573 if ($attname ==
'style') {
574 if (preg_match(
'/[\0-\37\200-\377]+/', $attvalue)) {
575 $attary{$attname} =
'"disallowed character"';
577 preg_match_all(
"/url\s*\((.+)\)/si", $attvalue, $aMatch);
578 if (count($aMatch)) {
579 foreach($aMatch[1] as $sMatch) {
581 tln_fixurl($attname, $urlvalue, $trans_image_path, $block_external_images);
582 $attary{$attname} = str_replace($sMatch, $urlvalue, $attvalue);
590 foreach ($add_attr_to_tag as $matchtag => $addattary) {
591 if (preg_match($matchtag, $tagname)) {
592 $attary = array_merge($attary, $addattary);
598 function tln_fixurl($attname, &$attvalue, $trans_image_path, $block_external_images)
601 $attvalue = trim($attvalue);
602 if ($attvalue && ($attvalue[0] ==
'"'|| $attvalue[0] ==
"'")) {
604 $sQuote = $attvalue[0];
605 $attvalue = trim(substr($attvalue,1,-1));
614 if ($attvalue ==
'') {
615 $attvalue = $sQuote . $trans_image_path . $sQuote;
618 if (preg_match(
'/[\0-\37\200-\377]+/',$attvalue)) {
621 $attvalue = $sQuote .
'http://invalid-stuff-detected.example.com' . $sQuote;
624 $attvalue = $sQuote . $trans_image_path . $sQuote;
628 $aUrl = parse_url($attvalue);
629 if (isset($aUrl[
'scheme'])) {
630 switch(strtolower($aUrl[
'scheme'])) {
635 if ($attname !=
'href') {
636 if ($block_external_images ==
true) {
637 $attvalue = $sQuote . $trans_image_path . $sQuote;
639 if (!isset($aUrl[
'path'])) {
640 $attvalue = $sQuote . $trans_image_path . $sQuote;
644 $attvalue = $sQuote . $attvalue . $sQuote;
648 $attvalue = $sQuote . $attvalue . $sQuote;
651 $attvalue = $sQuote . $attvalue . $sQuote;
654 $attvalue = $sQuote . $trans_image_path . $sQuote;
658 if (!isset($aUrl[
'path']) || $aUrl[
'path'] != $trans_image_path) {
659 $$attvalue = $sQuote . $trans_image_path . $sQuote;
666 function tln_fixstyle($body, $pos, $trans_image_path, $block_external_images)
673 for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) {
680 if ($sToken ==
'<') {
690 if (preg_match(
'/<\/\s*style\s*>/i',$sToken,$aMatch)) {
703 if ($sToken ==
'<') {
705 if (isset($body{$i+2}) && substr($body,$i,3) ==
'!--') {
706 $i = strpos($body,
'-->',$i+3);
725 if ($bSucces == FALSE){
726 return array(FALSE, strlen($body));
737 $content = preg_replace(
"|body(\s*\{.*?\})|si",
".bodyclass\\1", $content);
746 if (preg_match(
'/[\16-\37\200-\377]+/',$content)) {
747 $content =
'<!-- style block removed by html filter due to presence of 8bit characters -->';
748 return array($content, $newpos);
752 $content = preg_replace(
"/^\s*(@import.*)$/mi",
"\n<!-- @import rules forbidden -->\n",$content);
754 $content = preg_replace(
"/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i",
'url', $content);
755 preg_match_all(
"/url\s*\((.+)\)/si",$content,$aMatch);
756 if (count($aMatch)) {
757 $aValue = $aReplace =
array();
758 foreach($aMatch[1] as $sMatch) {
761 tln_fixurl(
'style',$urlvalue, $trans_image_path, $block_external_images);
763 $aReplace[] = $urlvalue;
765 $content = str_replace($aValue,$aReplace,$content);
771 $contentTemp = $content;
775 $match =
array(
'/\/\*.*\*\//',
783 $replace =
array(
'',
'idiocy',
'idiocy',
'idiocy',
'idiocy',
'idiocy',
'idiocy',
'');
784 $contentNew = preg_replace($match, $replace, $contentTemp);
785 if ($contentNew !== $contentTemp) {
786 $content = $contentNew;
788 return array($content, $newpos);
793 $divattary =
array(
'class' =>
"'bodyclass'");
795 $has_bgc_stl = $has_txt_stl =
false;
797 if (is_array($attary) &&
sizeof($attary) > 0){
798 foreach ($attary as $attname=>$attvalue){
799 $quotchar = substr($attvalue, 0, 1);
800 $attvalue = str_replace($quotchar,
"", $attvalue);
803 $styledef .=
"background-image: url('$trans_image_path'); ";
807 $styledef .=
"background-color: $attvalue; ";
811 $styledef .=
"color: $attvalue; ";
817 if ($has_bgc_stl && !$has_txt_stl) {
818 $styledef .=
"color: $text; ";
820 if (strlen($styledef) > 0){
821 $divattary{
"style"} =
"\"$styledef\"";
845 $rm_tags_with_content,
852 $block_external_images
857 $rm_tags = array_shift($tag_list);
858 @array_walk($tag_list,
'tln_casenormalize');
859 @array_walk($rm_tags_with_content,
'tln_casenormalize');
860 @array_walk($self_closing_tags,
'tln_casenormalize');
867 $open_tags =
array();
868 $trusted =
"<!-- begin tln_sanitized html -->\n";
869 $skip_content =
false;
874 $body = preg_replace(
'/&(\{.*?\};)/si',
'&\\1', $body);
875 while (($curtag =
tln_getnxtag($body, $curpos)) !=
false) {
876 list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
877 $free_content = substr($body, $curpos, $lt-$curpos);
881 if ($tagname ==
"style" && $tagtype == 1){
882 list($free_content, $curpos) =
883 tln_fixstyle($body, $gt+1, $trans_image_path, $block_external_images);
884 if ($free_content != FALSE){
885 if ( !empty($attary) ) {
892 $block_external_images
896 $trusted .= $free_content;
901 if ($skip_content ==
false){
902 $trusted .= $free_content;
904 if ($tagname !=
false) {
906 if ($skip_content == $tagname) {
911 $skip_content =
false;
913 if ($skip_content ==
false) {
914 if ($tagname ==
"body") {
917 if (isset($open_tags{$tagname}) &&
918 $open_tags{$tagname} > 0
920 $open_tags{$tagname}--;
930 if ($skip_content ==
false) {
936 && in_array($tagname, $self_closing_tags)
945 && in_array($tagname, $rm_tags_with_content)
947 $skip_content = $tagname;
949 if (($rm_tags ==
false 950 && in_array($tagname, $tag_list)) ||
952 && !in_array($tagname, $tag_list))
959 if ($tagname ==
"body"){
964 if (isset($open_tags{$tagname})) {
965 $open_tags{$tagname}++;
967 $open_tags{$tagname} = 1;
973 if (is_array($attary) &&
sizeof($attary) > 0) {
981 $block_external_images
988 if ($tagname !=
false && $skip_content ==
false) {
994 $trusted .= substr($body, $curpos, strlen($body) - $curpos);
995 if ($force_tag_closing ==
true) {
996 foreach ($open_tags as $tagname => $opentimes) {
997 while ($opentimes > 0) {
998 $trusted .=
'</' . $tagname .
'>';
1004 $trusted .=
"<!-- end tln_sanitized html -->\n";
1013 function HTMLFilter($body, $trans_image_path, $block_external_images =
false)
1030 $rm_tags_with_content =
array(
1040 $self_closing_tags =
array(
1048 $force_tag_closing =
true;
1050 $rm_attnames =
array(
1061 $bad_attvals =
array(
1064 "/^src|background/i" =>
1067 '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
1068 '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
1069 '/^([\'"])\s*about\s*:.*([\'"])/si' 1072 "\\1$trans_image_path\\2",
1073 "\\1$trans_image_path\\2",
1074 "\\1$trans_image_path\\2" 1077 "/^href|action/i" =>
1080 '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
1081 '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
1082 '/^([\'"])\s*about\s*:.*([\'"])/si' 1097 "/include-source/i",
1099 '/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i',
1100 '/url\s*\(\s*([\'"])\s*\S+script\s*:.*([\'"])\s*\)/si',
1101 '/url\s*\(\s*([\'"])\s*mocha\s*:.*([\'"])\s*\)/si',
1102 '/url\s*\(\s*([\'"])\s*about\s*:.*([\'"])\s*\)/si',
1103 '/(.*)\s*:\s*url\s*\(\s*([\'"]*)\s*\S+script\s*:.*([\'"]*)\s*\)/si' 1122 if ($block_external_images) {
1124 $bad_attvals{
'/.*/'}{
'/^src|background/i'}[0],
1125 '/^([\'\"])\s*https*:.*([\'\"])/si' 1128 $bad_attvals{
'/.*/'}{
'/^src|background/i'}[1],
1129 "\\1$trans_image_path\\1" 1132 $bad_attvals{
'/.*/'}{
'/^style/i'}[0],
1133 '/url\(([\'\"])\s*https*:.*([\'\"])\)/si' 1136 $bad_attvals{
'/.*/'}{
'/^style/i'}[1],
1137 "url(\\1$trans_image_path\\1)" 1141 $add_attr_to_tag =
array(
1143 array(
'target' =>
'"_blank"')
1149 $rm_tags_with_content,
1156 $block_external_images
tln_fixatts( $tagname, $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag, $trans_image_path, $block_external_images)
This function runs various checks against the attributes.
tln_deent(&$attvalue, $regex, $hex=false)
Translates entities into literal values so they can be checked.
tln_getnxtag($body, $offset)
This function looks for the next tag.
tln_skipspace($body, $offset)
This function skips any whitespace from the current position within a string and to the next non-whit...
tln_fixstyle($body, $pos, $trans_image_path, $block_external_images)
tln_sanitize( $body, $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag, $trans_image_path, $block_external_images)
tln_defang(&$attvalue)
This function checks attribute values for entity-encoded values and returns them translated into 8-bi...
tln_findnxstr($body, $offset, $needle)
This function looks for the next character within a string.
Create styles array
The data for the language used.
tln_findnxreg($body, $offset, $reg)
This function takes a PCRE-style regexp and tries to match it within the string.
tln_tagprint($tagname, $attary, $tagtype)
htmlfilter.inc This set of functions allows you to filter html in order to remove any malicious tags ...
HTMLFilter($body, $trans_image_path, $block_external_images=false)
tln_unspace(&$attvalue)
Kill any tabs, newlines, or carriage returns.
tln_fixurl($attname, &$attvalue, $trans_image_path, $block_external_images)
tln_casenormalize(&$val)
A small helper function to use with array_walk.
tln_body2div($attary, $trans_image_path)