ILIAS  Release_4_0_x_branch Revision 61816
 All Data Structures Namespaces Files Functions Variables Groups Pages
PEARSax3.php
Go to the documentation of this file.
1 <?php
2 
23 {
24 
28  protected $tokens = array();
29 
30  public function tokenizeHTML($string, $config, $context) {
31 
32  $this->tokens = array();
33 
34  $string = $this->normalize($string, $config, $context);
35 
36  $parser = new XML_HTMLSax3();
37  $parser->set_object($this);
38  $parser->set_element_handler('openHandler','closeHandler');
39  $parser->set_data_handler('dataHandler');
40  $parser->set_escape_handler('escapeHandler');
41 
42  // doesn't seem to work correctly for attributes
43  $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
44 
45  $parser->parse($string);
46 
47  return $this->tokens;
48 
49  }
50 
54  public function openHandler(&$parser, $name, $attrs, $closed) {
55  // entities are not resolved in attrs
56  foreach ($attrs as $key => $attr) {
57  $attrs[$key] = $this->parseData($attr);
58  }
59  if ($closed) {
60  $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
61  } else {
62  $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
63  }
64  return true;
65  }
66 
70  public function closeHandler(&$parser, $name) {
71  // HTMLSax3 seems to always send empty tags an extra close tag
72  // check and ignore if you see it:
73  // [TESTME] to make sure it doesn't overreach
74  if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) {
75  return true;
76  }
77  $this->tokens[] = new HTMLPurifier_Token_End($name);
78  return true;
79  }
80 
84  public function dataHandler(&$parser, $data) {
85  $this->tokens[] = new HTMLPurifier_Token_Text($data);
86  return true;
87  }
88 
92  public function escapeHandler(&$parser, $data) {
93  if (strpos($data, '--') === 0) {
94  $this->tokens[] = new HTMLPurifier_Token_Comment($data);
95  }
96  // CDATA is handled elsewhere, but if it was handled here:
97  //if (strpos($data, '[CDATA[') === 0) {
98  // $this->tokens[] = new HTMLPurifier_Token_Text(
99  // substr($data, 7, strlen($data) - 9) );
100  //}
101  return true;
102  }
103 
104 }
105 
106 // vim: et sw=4 sts=4