ILIAS  release_5-2 Revision v5.2.25-18-g3f80b82851
htmlfilter.php
Go to the documentation of this file.
1 <?php
41 function tln_tagprint($tagname, $attary, $tagtype)
42 {
43  if ($tagtype == 2) {
44  $fulltag = '</' . $tagname . '>';
45  } else {
46  $fulltag = '<' . $tagname;
47  if (is_array($attary) && sizeof($attary)) {
48  $atts = array();
49  while (list($attname, $attvalue) = each($attary)) {
50  array_push($atts, "$attname=$attvalue");
51  }
52  $fulltag .= ' ' . join(' ', $atts);
53  }
54  if ($tagtype == 3) {
55  $fulltag .= ' /';
56  }
57  $fulltag .= '>';
58  }
59  return $fulltag;
60 }
61 
69 function tln_casenormalize(&$val)
70 {
71  $val = strtolower($val);
72 }
73 
84 function tln_skipspace($body, $offset)
85 {
86  preg_match('/^(\s*)/s', substr($body, $offset), $matches);
87  if (sizeof($matches[1])) {
88  $count = strlen($matches[1]);
89  $offset += $count;
90  }
91  return $offset;
92 }
93 
105 function tln_findnxstr($body, $offset, $needle)
106 {
107  $pos = strpos($body, $needle, $offset);
108  if ($pos === false) {
109  $pos = strlen($body);
110  }
111  return $pos;
112 }
113 
127 function tln_findnxreg($body, $offset, $reg)
128 {
129  $matches = array();
130  $retarr = array();
131  $preg_rule = '%^(.*?)(' . $reg . ')%s';
132  preg_match($preg_rule, substr($body, $offset), $matches);
133  if (!isset($matches[0]) || !$matches[0]) {
134  $retarr = false;
135  } else {
136  $retarr[0] = $offset + strlen($matches[1]);
137  $retarr[1] = $matches[1];
138  $retarr[2] = $matches[2];
139  }
140  return $retarr;
141 }
142 
157 function tln_getnxtag($body, $offset)
158 {
159  if ($offset > strlen($body)) {
160  return false;
161  }
162  $lt = tln_findnxstr($body, $offset, '<');
163  if ($lt == strlen($body)) {
164  return false;
165  }
171  $pos = tln_skipspace($body, $lt + 1);
172  if ($pos >= strlen($body)) {
173  return array(false, false, false, $lt, strlen($body));
174  }
184  switch (substr($body, $pos, 1)) {
185  case '/':
186  $tagtype = 2;
187  $pos++;
188  break;
189  case '!':
193  if (substr($body, $pos + 1, 2) == '--') {
194  $gt = strpos($body, '-->', $pos);
195  if ($gt === false) {
196  $gt = strlen($body);
197  } else {
198  $gt += 2;
199  }
200  return array(false, false, false, $lt, $gt);
201  } else {
202  $gt = tln_findnxstr($body, $pos, '>');
203  return array(false, false, false, $lt, $gt);
204  }
205  break;
206  default:
211  $tagtype = 1;
212  break;
213  }
214 
218  $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
219  if ($regary == false) {
220  return array(false, false, false, $lt, strlen($body));
221  }
222  list($pos, $tagname, $match) = $regary;
223  $tagname = strtolower($tagname);
224 
233  switch ($match) {
234  case '/':
240  if (substr($body, $pos, 2) == '/>') {
241  $pos++;
242  $tagtype = 3;
243  } else {
244  $gt = tln_findnxstr($body, $pos, '>');
245  $retary = array(false, false, false, $lt, $gt);
246  return $retary;
247  }
248  //intentional fall-through
249  case '>':
250  return array($tagname, false, $tagtype, $lt, $pos);
251  break;
252  default:
256  if (!preg_match('/\s/', $match)) {
260  $gt = tln_findnxstr($body, $lt, '>');
261  return array(false, false, false, $lt, $gt);
262  }
263  break;
264  }
265 
273  $attary = array();
274 
275  while ($pos <= strlen($body)) {
276  $pos = tln_skipspace($body, $pos);
277  if ($pos == strlen($body)) {
281  return array(false, false, false, $lt, $pos);
282  }
287  $matches = array();
288  if (preg_match('%^(\s*)(>|/>)%s', substr($body, $pos), $matches)) {
292  $pos += strlen($matches[1]);
293  if ($matches[2] == '/>') {
294  $tagtype = 3;
295  $pos++;
296  }
297  return array($tagname, $attary, $tagtype, $lt, $pos);
298  }
299 
317  $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
318  if ($regary == false) {
322  return array(false, false, false, $lt, strlen($body));
323  }
324  list($pos, $attname, $match) = $regary;
325  $attname = strtolower($attname);
334  switch ($match) {
335  case '/':
341  if (substr($body, $pos, 2) == '/>') {
342  $pos++;
343  $tagtype = 3;
344  } else {
345  $gt = tln_findnxstr($body, $pos, '>');
346  $retary = array(false, false, false, $lt, $gt);
347  return $retary;
348  }
349  //intentional fall-through
350  case '>':
351  $attary{$attname} = '"yes"';
352  return array($tagname, $attary, $tagtype, $lt, $pos);
353  break;
354  default:
358  $pos = tln_skipspace($body, $pos);
359  $char = substr($body, $pos, 1);
368  if ($char == '=') {
369  $pos++;
370  $pos = tln_skipspace($body, $pos);
377  $quot = substr($body, $pos, 1);
378  if ($quot == '\'') {
379  $regary = tln_findnxreg($body, $pos + 1, '\'');
380  if ($regary == false) {
381  return array(false, false, false, $lt, strlen($body));
382  }
383  list($pos, $attval, $match) = $regary;
384  $pos++;
385  $attary{$attname} = '\'' . $attval . '\'';
386  } elseif ($quot == '"') {
387  $regary = tln_findnxreg($body, $pos + 1, '\"');
388  if ($regary == false) {
389  return array(false, false, false, $lt, strlen($body));
390  }
391  list($pos, $attval, $match) = $regary;
392  $pos++;
393  $attary{$attname} = '"' . $attval . '"';
394  } else {
398  $regary = tln_findnxreg($body, $pos, '[\s>]');
399  if ($regary == false) {
400  return array(false, false, false, $lt, strlen($body));
401  }
402  list($pos, $attval, $match) = $regary;
406  $attval = preg_replace('/\"/s', '&quot;', $attval);
407  $attary{$attname} = '"' . $attval . '"';
408  }
409  } elseif (preg_match('|[\w/>]|', $char)) {
413  $attary{$attname} = '"yes"';
414  } else {
418  $gt = tln_findnxstr($body, $pos, '>');
419  return array(false, false, false, $lt, $gt);
420  }
421  break;
422  }
423  }
428  return array(false, false, false, $lt, strlen($body));
429 }
430 
439 function tln_deent(&$attvalue, $regex, $hex = false)
440 {
441  preg_match_all($regex, $attvalue, $matches);
442  if (is_array($matches) && sizeof($matches[0]) > 0) {
443  $repl = array();
444  for ($i = 0; $i < sizeof($matches[0]); $i++) {
445  $numval = $matches[1][$i];
446  if ($hex) {
447  $numval = hexdec($numval);
448  }
449  $repl{$matches[0][$i]} = chr($numval);
450  }
451  $attvalue = strtr($attvalue, $repl);
452  return true;
453  } else {
454  return false;
455  }
456 }
457 
465 function tln_defang(&$attvalue)
466 {
470  if (strpos($attvalue, '&') === false
471  && strpos($attvalue, '\\') === false
472  ) {
473  return;
474  }
475  do {
476  $m = false;
477  $m = $m || tln_deent($attvalue, '/\&#0*(\d+);*/s');
478  $m = $m || tln_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
479  $m = $m || tln_deent($attvalue, '/\\\\(\d+)/s', true);
480  } while ($m == true);
481  $attvalue = stripslashes($attvalue);
482 }
483 
491 function tln_unspace(&$attvalue)
492 {
493  if (strcspn($attvalue, "\t\r\n\0 ") != strlen($attvalue)) {
494  $attvalue = str_replace(
495  array("\t", "\r", "\n", "\0", " "),
496  array('', '', '', '', ''),
497  $attvalue
498  );
499  }
500 }
501 
514 function tln_fixatts(
515  $tagname,
516  $attary,
517  $rm_attnames,
518  $bad_attvals,
519  $add_attr_to_tag,
520  $trans_image_path,
521  $block_external_images
522 ) {
523  while (list($attname, $attvalue) = each($attary)) {
527  foreach ($rm_attnames as $matchtag => $matchattrs) {
528  if (preg_match($matchtag, $tagname)) {
529  foreach ($matchattrs as $matchattr) {
530  if (preg_match($matchattr, $attname)) {
531  unset($attary{$attname});
532  continue;
533  }
534  }
535  }
536  }
540  $oldattvalue = $attvalue;
541  tln_defang($attvalue);
542  if ($attname == 'style' && $attvalue !== $oldattvalue) {
543  $attvalue = "idiocy";
544  $attary{$attname} = $attvalue;
545  }
546  tln_unspace($attvalue);
547 
554  foreach ($bad_attvals as $matchtag => $matchattrs) {
555  if (preg_match($matchtag, $tagname)) {
556  foreach ($matchattrs as $matchattr => $valary) {
557  if (preg_match($matchattr, $attname)) {
563  list($valmatch, $valrepl) = $valary;
564  $newvalue = preg_replace($valmatch, $valrepl, $attvalue);
565  if ($newvalue != $attvalue) {
566  $attary{$attname} = $newvalue;
567  $attvalue = $newvalue;
568  }
569  }
570  }
571  }
572  }
573  if ($attname == 'style') {
574  if (preg_match('/[\0-\37\200-\377]+/', $attvalue)) {
575  $attary{$attname} = '"disallowed character"';
576  }
577  preg_match_all("/url\s*\((.+)\)/si", $attvalue, $aMatch);
578  if (count($aMatch)) {
579  foreach($aMatch[1] as $sMatch) {
580  $urlvalue = $sMatch;
581  tln_fixurl($attname, $urlvalue, $trans_image_path, $block_external_images);
582  $attary{$attname} = str_replace($sMatch, $urlvalue, $attvalue);
583  }
584  }
585  }
586  }
590  foreach ($add_attr_to_tag as $matchtag => $addattary) {
591  if (preg_match($matchtag, $tagname)) {
592  $attary = array_merge($attary, $addattary);
593  }
594  }
595  return $attary;
596 }
597 
598 function tln_fixurl($attname, &$attvalue, $trans_image_path, $block_external_images)
599 {
600  $sQuote = '"';
601  $attvalue = trim($attvalue);
602  if ($attvalue && ($attvalue[0] =='"'|| $attvalue[0] == "'")) {
603  // remove the double quotes
604  $sQuote = $attvalue[0];
605  $attvalue = trim(substr($attvalue,1,-1));
606  }
607 
614  if ($attvalue == '') {
615  $attvalue = $sQuote . $trans_image_path . $sQuote;
616  } else {
617  // first, disallow 8 bit characters and control characters
618  if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
619  switch ($attname) {
620  case 'href':
621  $attvalue = $sQuote . 'http://invalid-stuff-detected.example.com' . $sQuote;
622  break;
623  default:
624  $attvalue = $sQuote . $trans_image_path . $sQuote;
625  break;
626  }
627  } else {
628  $aUrl = parse_url($attvalue);
629  if (isset($aUrl['scheme'])) {
630  switch(strtolower($aUrl['scheme'])) {
631  case 'mailto':
632  case 'http':
633  case 'https':
634  case 'ftp':
635  if ($attname != 'href') {
636  if ($block_external_images == true) {
637  $attvalue = $sQuote . $trans_image_path . $sQuote;
638  } else {
639  if (!isset($aUrl['path'])) {
640  $attvalue = $sQuote . $trans_image_path . $sQuote;
641  }
642  }
643  } else {
644  $attvalue = $sQuote . $attvalue . $sQuote;
645  }
646  break;
647  case 'outbind':
648  $attvalue = $sQuote . $attvalue . $sQuote;
649  break;
650  case 'cid':
651  $attvalue = $sQuote . $attvalue . $sQuote;
652  break;
653  default:
654  $attvalue = $sQuote . $trans_image_path . $sQuote;
655  break;
656  }
657  } else {
658  if (!isset($aUrl['path']) || $aUrl['path'] != $trans_image_path) {
659  $$attvalue = $sQuote . $trans_image_path . $sQuote;
660  }
661  }
662  }
663  }
664 }
665 
666 function tln_fixstyle($body, $pos, $trans_image_path, $block_external_images)
667 {
668  // workaround for </style> in between comments
669  $content = '';
670  $sToken = '';
671  $bSucces = false;
672  $bEndTag = false;
673  for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) {
674  $char = $body{$i};
675  switch ($char) {
676  case '<':
677  $sToken = $char;
678  break;
679  case '/':
680  if ($sToken == '<') {
681  $sToken .= $char;
682  $bEndTag = true;
683  } else {
684  $content .= $char;
685  }
686  break;
687  case '>':
688  if ($bEndTag) {
689  $sToken .= $char;
690  if (preg_match('/<\/\s*style\s*>/i',$sToken,$aMatch)) {
691  $newpos = $i + 1;
692  $bSucces = true;
693  break 2;
694  } else {
695  $content .= $sToken;
696  }
697  $bEndTag = false;
698  } else {
699  $content .= $char;
700  }
701  break;
702  case '!':
703  if ($sToken == '<') {
704  // possible comment
705  if (isset($body{$i+2}) && substr($body,$i,3) == '!--') {
706  $i = strpos($body,'-->',$i+3);
707  if ($i === false) { // no end comment
708  $i = strlen($body);
709  }
710  $sToken = '';
711  }
712  } else {
713  $content .= $char;
714  }
715  break;
716  default:
717  if ($bEndTag) {
718  $sToken .= $char;
719  } else {
720  $content .= $char;
721  }
722  break;
723  }
724  }
725  if ($bSucces == FALSE){
726  return array(FALSE, strlen($body));
727  }
728 
729 
730 
737  $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
738 
742  // $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
743  // "url(\\1$trans_image_path\\2)", $content);
744 
745  // first check for 8bit sequences and disallowed control characters
746  if (preg_match('/[\16-\37\200-\377]+/',$content)) {
747  $content = '<!-- style block removed by html filter due to presence of 8bit characters -->';
748  return array($content, $newpos);
749  }
750 
751  // remove @import line
752  $content = preg_replace("/^\s*(@import.*)$/mi","\n<!-- @import rules forbidden -->\n",$content);
753 
754  $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content);
755  preg_match_all("/url\s*\((.+)\)/si",$content,$aMatch);
756  if (count($aMatch)) {
757  $aValue = $aReplace = array();
758  foreach($aMatch[1] as $sMatch) {
759  // url value
760  $urlvalue = $sMatch;
761  tln_fixurl('style',$urlvalue, $trans_image_path, $block_external_images);
762  $aValue[] = $sMatch;
763  $aReplace[] = $urlvalue;
764  }
765  $content = str_replace($aValue,$aReplace,$content);
766  }
767 
771  $contentTemp = $content;
772  tln_defang($contentTemp);
773  tln_unspace($contentTemp);
774 
775  $match = array('/\/\*.*\*\//',
776  '/expression/i',
777  '/behaviou*r/i',
778  '/binding/i',
779  '/include-source/i',
780  '/javascript/i',
781  '/script/i',
782  '/position/i');
783  $replace = array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', '');
784  $contentNew = preg_replace($match, $replace, $contentTemp);
785  if ($contentNew !== $contentTemp) {
786  $content = $contentNew;
787  }
788  return array($content, $newpos);
789 }
790 
791 function tln_body2div($attary, $trans_image_path)
792 {
793  $divattary = array('class' => "'bodyclass'");
794  $text = '#000000';
795  $has_bgc_stl = $has_txt_stl = false;
796  $styledef = '';
797  if (is_array($attary) && sizeof($attary) > 0){
798  foreach ($attary as $attname=>$attvalue){
799  $quotchar = substr($attvalue, 0, 1);
800  $attvalue = str_replace($quotchar, "", $attvalue);
801  switch ($attname){
802  case 'background':
803  $styledef .= "background-image: url('$trans_image_path'); ";
804  break;
805  case 'bgcolor':
806  $has_bgc_stl = true;
807  $styledef .= "background-color: $attvalue; ";
808  break;
809  case 'text':
810  $has_txt_stl = true;
811  $styledef .= "color: $attvalue; ";
812  break;
813  }
814  }
815  // Outlook defines a white bgcolor and no text color. This can lead to
816  // white text on a white bg with certain themes.
817  if ($has_bgc_stl && !$has_txt_stl) {
818  $styledef .= "color: $text; ";
819  }
820  if (strlen($styledef) > 0){
821  $divattary{"style"} = "\"$styledef\"";
822  }
823  }
824  return $divattary;
825 }
826 
842 function tln_sanitize(
843  $body,
844  $tag_list,
845  $rm_tags_with_content,
846  $self_closing_tags,
847  $force_tag_closing,
848  $rm_attnames,
849  $bad_attvals,
850  $add_attr_to_tag,
851  $trans_image_path,
852  $block_external_images
853 ) {
857  $rm_tags = array_shift($tag_list);
858  @array_walk($tag_list, 'tln_casenormalize');
859  @array_walk($rm_tags_with_content, 'tln_casenormalize');
860  @array_walk($self_closing_tags, 'tln_casenormalize');
866  $curpos = 0;
867  $open_tags = array();
868  $trusted = "<!-- begin tln_sanitized html -->\n";
869  $skip_content = false;
874  $body = preg_replace('/&(\{.*?\};)/si', '&amp;\\1', $body);
875  while (($curtag = tln_getnxtag($body, $curpos)) != false) {
876  list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
877  $free_content = substr($body, $curpos, $lt-$curpos);
881  if ($tagname == "style" && $tagtype == 1){
882  list($free_content, $curpos) =
883  tln_fixstyle($body, $gt+1, $trans_image_path, $block_external_images);
884  if ($free_content != FALSE){
885  if ( !empty($attary) ) {
886  $attary = tln_fixatts($tagname,
887  $attary,
888  $rm_attnames,
889  $bad_attvals,
890  $add_attr_to_tag,
891  $trans_image_path,
892  $block_external_images
893  );
894  }
895  $trusted .= tln_tagprint($tagname, $attary, $tagtype);
896  $trusted .= $free_content;
897  $trusted .= tln_tagprint($tagname, null, 2);
898  }
899  continue;
900  }
901  if ($skip_content == false){
902  $trusted .= $free_content;
903  }
904  if ($tagname != false) {
905  if ($tagtype == 2) {
906  if ($skip_content == $tagname) {
910  $tagname = false;
911  $skip_content = false;
912  } else {
913  if ($skip_content == false) {
914  if ($tagname == "body") {
915  $tagname = "div";
916  }
917  if (isset($open_tags{$tagname}) &&
918  $open_tags{$tagname} > 0
919  ) {
920  $open_tags{$tagname}--;
921  } else {
922  $tagname = false;
923  }
924  }
925  }
926  } else {
930  if ($skip_content == false) {
935  if ($tagtype == 1
936  && in_array($tagname, $self_closing_tags)
937  ) {
938  $tagtype = 3;
939  }
944  if ($tagtype == 1
945  && in_array($tagname, $rm_tags_with_content)
946  ) {
947  $skip_content = $tagname;
948  } else {
949  if (($rm_tags == false
950  && in_array($tagname, $tag_list)) ||
951  ($rm_tags == true
952  && !in_array($tagname, $tag_list))
953  ) {
954  $tagname = false;
955  } else {
959  if ($tagname == "body"){
960  $tagname = "div";
961  $attary = tln_body2div($attary, $trans_image_path);
962  }
963  if ($tagtype == 1) {
964  if (isset($open_tags{$tagname})) {
965  $open_tags{$tagname}++;
966  } else {
967  $open_tags{$tagname} = 1;
968  }
969  }
973  if (is_array($attary) && sizeof($attary) > 0) {
974  $attary = tln_fixatts(
975  $tagname,
976  $attary,
977  $rm_attnames,
978  $bad_attvals,
979  $add_attr_to_tag,
980  $trans_image_path,
981  $block_external_images
982  );
983  }
984  }
985  }
986  }
987  }
988  if ($tagname != false && $skip_content == false) {
989  $trusted .= tln_tagprint($tagname, $attary, $tagtype);
990  }
991  }
992  $curpos = $gt + 1;
993  }
994  $trusted .= substr($body, $curpos, strlen($body) - $curpos);
995  if ($force_tag_closing == true) {
996  foreach ($open_tags as $tagname => $opentimes) {
997  while ($opentimes > 0) {
998  $trusted .= '</' . $tagname . '>';
999  $opentimes--;
1000  }
1001  }
1002  $trusted .= "\n";
1003  }
1004  $trusted .= "<!-- end tln_sanitized html -->\n";
1005  return $trusted;
1006 }
1007 
1008 //
1009 // Use the nifty htmlfilter library
1010 //
1011 
1012 
1013 function HTMLFilter($body, $trans_image_path, $block_external_images = false)
1014 {
1015 
1016  $tag_list = array(
1017  false,
1018  "object",
1019  "meta",
1020  "html",
1021  "head",
1022  "base",
1023  "link",
1024  "frame",
1025  "iframe",
1026  "plaintext",
1027  "marquee"
1028  );
1029 
1030  $rm_tags_with_content = array(
1031  "script",
1032  "applet",
1033  "embed",
1034  "title",
1035  "frameset",
1036  "xmp",
1037  "xml"
1038  );
1039 
1040  $self_closing_tags = array(
1041  "img",
1042  "br",
1043  "hr",
1044  "input",
1045  "outbind"
1046  );
1047 
1048  $force_tag_closing = true;
1049 
1050  $rm_attnames = array(
1051  "/.*/" =>
1052  array(
1053  // "/target/i",
1054  "/^on.*/i",
1055  "/^dynsrc/i",
1056  "/^data.*/i",
1057  "/^lowsrc.*/i"
1058  )
1059  );
1060 
1061  $bad_attvals = array(
1062  "/.*/" =>
1063  array(
1064  "/^src|background/i" =>
1065  array(
1066  array(
1067  '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
1068  '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
1069  '/^([\'"])\s*about\s*:.*([\'"])/si'
1070  ),
1071  array(
1072  "\\1$trans_image_path\\2",
1073  "\\1$trans_image_path\\2",
1074  "\\1$trans_image_path\\2"
1075  )
1076  ),
1077  "/^href|action/i" =>
1078  array(
1079  array(
1080  '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
1081  '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
1082  '/^([\'"])\s*about\s*:.*([\'"])/si'
1083  ),
1084  array(
1085  "\\1#\\1",
1086  "\\1#\\1",
1087  "\\1#\\1"
1088  )
1089  ),
1090  "/^style/i" =>
1091  array(
1092  array(
1093  "/\/\*.*\*\//",
1094  "/expression/i",
1095  "/binding/i",
1096  "/behaviou*r/i",
1097  "/include-source/i",
1098  '/position\s*:/i',
1099  '/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i',
1100  '/url\s*\(\s*([\'"])\s*\S+script\s*:.*([\'"])\s*\)/si',
1101  '/url\s*\(\s*([\'"])\s*mocha\s*:.*([\'"])\s*\)/si',
1102  '/url\s*\(\s*([\'"])\s*about\s*:.*([\'"])\s*\)/si',
1103  '/(.*)\s*:\s*url\s*\(\s*([\'"]*)\s*\S+script\s*:.*([\'"]*)\s*\)/si'
1104  ),
1105  array(
1106  "",
1107  "idiocy",
1108  "idiocy",
1109  "idiocy",
1110  "idiocy",
1111  "idiocy",
1112  "url",
1113  "url(\\1#\\1)",
1114  "url(\\1#\\1)",
1115  "url(\\1#\\1)",
1116  "\\1:url(\\2#\\3)"
1117  )
1118  )
1119  )
1120  );
1121 
1122  if ($block_external_images) {
1123  array_push(
1124  $bad_attvals{'/.*/'}{'/^src|background/i'}[0],
1125  '/^([\'\"])\s*https*:.*([\'\"])/si'
1126  );
1127  array_push(
1128  $bad_attvals{'/.*/'}{'/^src|background/i'}[1],
1129  "\\1$trans_image_path\\1"
1130  );
1131  array_push(
1132  $bad_attvals{'/.*/'}{'/^style/i'}[0],
1133  '/url\(([\'\"])\s*https*:.*([\'\"])\)/si'
1134  );
1135  array_push(
1136  $bad_attvals{'/.*/'}{'/^style/i'}[1],
1137  "url(\\1$trans_image_path\\1)"
1138  );
1139  }
1140 
1141  $add_attr_to_tag = array(
1142  "/^a$/i" =>
1143  array('target' => '"_blank"')
1144  );
1145 
1146  $trusted = tln_sanitize(
1147  $body,
1148  $tag_list,
1149  $rm_tags_with_content,
1150  $self_closing_tags,
1151  $force_tag_closing,
1152  $rm_attnames,
1153  $bad_attvals,
1154  $add_attr_to_tag,
1155  $trans_image_path,
1156  $block_external_images
1157  );
1158  return $trusted;
1159 }
tln_fixatts( $tagname, $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag, $trans_image_path, $block_external_images)
This function runs various checks against the attributes.
Definition: htmlfilter.php:514
tln_deent(&$attvalue, $regex, $hex=false)
Translates entities into literal values so they can be checked.
Definition: htmlfilter.php:439
tln_getnxtag($body, $offset)
This function looks for the next tag.
Definition: htmlfilter.php:157
tln_skipspace($body, $offset)
This function skips any whitespace from the current position within a string and to the next non-whit...
Definition: htmlfilter.php:84
tln_fixstyle($body, $pos, $trans_image_path, $block_external_images)
Definition: htmlfilter.php:666
tln_sanitize( $body, $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag, $trans_image_path, $block_external_images)
Definition: htmlfilter.php:842
tln_defang(&$attvalue)
This function checks attribute values for entity-encoded values and returns them translated into 8-bi...
Definition: htmlfilter.php:465
tln_findnxstr($body, $offset, $needle)
This function looks for the next character within a string.
Definition: htmlfilter.php:105
Create styles array
The data for the language used.
tln_findnxreg($body, $offset, $reg)
This function takes a PCRE-style regexp and tries to match it within the string.
Definition: htmlfilter.php:127
tln_tagprint($tagname, $attary, $tagtype)
htmlfilter.inc This set of functions allows you to filter html in order to remove any malicious tags ...
Definition: htmlfilter.php:41
$text
HTMLFilter($body, $trans_image_path, $block_external_images=false)
tln_unspace(&$attvalue)
Kill any tabs, newlines, or carriage returns.
Definition: htmlfilter.php:491
tln_fixurl($attname, &$attvalue, $trans_image_path, $block_external_images)
Definition: htmlfilter.php:598
tln_casenormalize(&$val)
A small helper function to use with array_walk.
Definition: htmlfilter.php:69
tln_body2div($attary, $trans_image_path)
Definition: htmlfilter.php:791