ILIAS  Release_4_1_x_branch Revision 61804
 All Data Structures Namespaces Files Functions Variables Groups Pages
PEARSax3.php
Go to the documentation of this file.
1 <?php
2 
23 {
24 
28  protected $tokens = array();
30 
31  private $parent_handler;
32  private $stack = array();
33 
34  public function tokenizeHTML($string, $config, $context) {
35 
36  $this->tokens = array();
37  $this->last_token_was_empty = false;
38 
39  $string = $this->normalize($string, $config, $context);
40 
41  $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler'));
42 
43  $parser = new XML_HTMLSax3();
44  $parser->set_object($this);
45  $parser->set_element_handler('openHandler','closeHandler');
46  $parser->set_data_handler('dataHandler');
47  $parser->set_escape_handler('escapeHandler');
48 
49  // doesn't seem to work correctly for attributes
50  $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
51 
52  $parser->parse($string);
53 
54  restore_error_handler();
55 
56  return $this->tokens;
57 
58  }
59 
63  public function openHandler(&$parser, $name, $attrs, $closed) {
64  // entities are not resolved in attrs
65  foreach ($attrs as $key => $attr) {
66  $attrs[$key] = $this->parseData($attr);
67  }
68  if ($closed) {
69  $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
70  $this->last_token_was_empty = true;
71  } else {
72  $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
73  }
74  $this->stack[] = $name;
75  return true;
76  }
77 
81  public function closeHandler(&$parser, $name) {
82  // HTMLSax3 seems to always send empty tags an extra close tag
83  // check and ignore if you see it:
84  // [TESTME] to make sure it doesn't overreach
85  if ($this->last_token_was_empty) {
86  $this->last_token_was_empty = false;
87  return true;
88  }
89  $this->tokens[] = new HTMLPurifier_Token_End($name);
90  if (!empty($this->stack)) array_pop($this->stack);
91  return true;
92  }
93 
97  public function dataHandler(&$parser, $data) {
98  $this->last_token_was_empty = false;
99  $this->tokens[] = new HTMLPurifier_Token_Text($data);
100  return true;
101  }
102 
106  public function escapeHandler(&$parser, $data) {
107  if (strpos($data, '--') === 0) {
108  // remove trailing and leading double-dashes
109  $data = substr($data, 2);
110  if (strlen($data) >= 2 && substr($data, -2) == "--") {
111  $data = substr($data, 0, -2);
112  }
113  if (isset($this->stack[sizeof($this->stack) - 1]) &&
114  $this->stack[sizeof($this->stack) - 1] == "style") {
115  $this->tokens[] = new HTMLPurifier_Token_Text($data);
116  } else {
117  $this->tokens[] = new HTMLPurifier_Token_Comment($data);
118  }
119  $this->last_token_was_empty = false;
120  }
121  // CDATA is handled elsewhere, but if it was handled here:
122  //if (strpos($data, '[CDATA[') === 0) {
123  // $this->tokens[] = new HTMLPurifier_Token_Text(
124  // substr($data, 7, strlen($data) - 9) );
125  //}
126  return true;
127  }
128 
132  public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) {
133  if ($errno == E_STRICT) return;
134  return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext);
135  }
136 
137 }
138 
139 // vim: et sw=4 sts=4