ILIAS  Release_4_0_x_branch Revision 61816
 All Data Structures Namespaces Files Functions Variables Groups Pages
AutoParagraph.php
Go to the documentation of this file.
1 <?php
2 
10 {
11 
12  public $name = 'AutoParagraph';
13  public $needed = array('p');
14 
15  private function _pStart() {
16  $par = new HTMLPurifier_Token_Start('p');
17  $par->armor['MakeWellFormed_TagClosedError'] = true;
18  return $par;
19  }
20 
21  public function handleText(&$token) {
22  $text = $token->data;
23  // Does the current parent allow <p> tags?
24  if ($this->allowsElement('p')) {
25  if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
26  // Note that we have differing behavior when dealing with text
27  // in the anonymous root node, or a node inside the document.
28  // If the text as a double-newline, the treatment is the same;
29  // if it doesn't, see the next if-block if you're in the document.
30 
31  $i = $nesting = null;
32  if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
33  // State 1.1: ... ^ (whitespace, then document end)
34  // ----
35  // This is a degenerate case
36  } else {
37  // State 1.2: PAR1
38  // ----
39 
40  // State 1.3: PAR1\n\nPAR2
41  // ------------
42 
43  // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
44  // ------------
45  $token = array($this->_pStart());
46  $this->_splitText($text, $token);
47  }
48  } else {
49  // State 2: <div>PAR1... (similar to 1.4)
50  // ----
51 
52  // We're in an element that allows paragraph tags, but we're not
53  // sure if we're going to need them.
54  if ($this->_pLookAhead()) {
55  // State 2.1: <div>PAR1<b>PAR1\n\nPAR2
56  // ----
57  // Note: This will always be the first child, since any
58  // previous inline element would have triggered this very
59  // same routine, and found the double newline. One possible
60  // exception would be a comment.
61  $token = array($this->_pStart(), $token);
62  } else {
63  // State 2.2.1: <div>PAR1<div>
64  // ----
65 
66  // State 2.2.2: <div>PAR1<b>PAR1</b></div>
67  // ----
68  }
69  }
70  // Is the current parent a <p> tag?
71  } elseif (
72  !empty($this->currentNesting) &&
73  $this->currentNesting[count($this->currentNesting)-1]->name == 'p'
74  ) {
75  // State 3.1: ...<p>PAR1
76  // ----
77 
78  // State 3.2: ...<p>PAR1\n\nPAR2
79  // ------------
80  $token = array();
81  $this->_splitText($text, $token);
82  // Abort!
83  } else {
84  // State 4.1: ...<b>PAR1
85  // ----
86 
87  // State 4.2: ...<b>PAR1\n\nPAR2
88  // ------------
89  }
90  }
91 
92  public function handleElement(&$token) {
93  // We don't have to check if we're already in a <p> tag for block
94  // tokens, because the tag would have been autoclosed by MakeWellFormed.
95  if ($this->allowsElement('p')) {
96  if (!empty($this->currentNesting)) {
97  if ($this->_isInline($token)) {
98  // State 1: <div>...<b>
99  // ---
100 
101  // Check if this token is adjacent to the parent token
102  // (seek backwards until token isn't whitespace)
103  $i = null;
104  $this->backward($i, $prev);
105 
106  if (!$prev instanceof HTMLPurifier_Token_Start) {
107  // Token wasn't adjacent
108 
109  if (
110  $prev instanceof HTMLPurifier_Token_Text &&
111  substr($prev->data, -2) === "\n\n"
112  ) {
113  // State 1.1.4: <div><p>PAR1</p>\n\n<b>
114  // ---
115 
116  // Quite frankly, this should be handled by splitText
117  $token = array($this->_pStart(), $token);
118  } else {
119  // State 1.1.1: <div><p>PAR1</p><b>
120  // ---
121 
122  // State 1.1.2: <div><br /><b>
123  // ---
124 
125  // State 1.1.3: <div>PAR<b>
126  // ---
127  }
128 
129  } else {
130  // State 1.2.1: <div><b>
131  // ---
132 
133  // Lookahead to see if <p> is needed.
134  if ($this->_pLookAhead()) {
135  // State 1.3.1: <div><b>PAR1\n\nPAR2
136  // ---
137  $token = array($this->_pStart(), $token);
138  } else {
139  // State 1.3.2: <div><b>PAR1</b></div>
140  // ---
141 
142  // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
143  // ---
144  }
145  }
146  } else {
147  // State 2.3: ...<div>
148  // -----
149  }
150  } else {
151  if ($this->_isInline($token)) {
152  // State 3.1: <b>
153  // ---
154  // This is where the {p} tag is inserted, not reflected in
155  // inputTokens yet, however.
156  $token = array($this->_pStart(), $token);
157  } else {
158  // State 3.2: <div>
159  // -----
160  }
161 
162  $i = null;
163  if ($this->backward($i, $prev)) {
164  if (
165  !$prev instanceof HTMLPurifier_Token_Text
166  ) {
167  // State 3.1.1: ...</p>{p}<b>
168  // ---
169 
170  // State 3.2.1: ...</p><div>
171  // -----
172 
173  if (!is_array($token)) $token = array($token);
174  array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
175  } else {
176  // State 3.1.2: ...</p>\n\n{p}<b>
177  // ---
178 
179  // State 3.2.2: ...</p>\n\n<div>
180  // -----
181 
182  // Note: PAR<ELEM> cannot occur because PAR would have been
183  // wrapped in <p> tags.
184  }
185  }
186  }
187  } else {
188  // State 2.2: <ul><li>
189  // ----
190 
191  // State 2.4: <p><b>
192  // ---
193  }
194  }
195 
206  private function _splitText($data, &$result) {
207  $raw_paragraphs = explode("\n\n", $data);
208  $paragraphs = array(); // without empty paragraphs
209  $needs_start = false;
210  $needs_end = false;
211 
212  $c = count($raw_paragraphs);
213  if ($c == 1) {
214  // There were no double-newlines, abort quickly. In theory this
215  // should never happen.
217  return;
218  }
219  for ($i = 0; $i < $c; $i++) {
220  $par = $raw_paragraphs[$i];
221  if (trim($par) !== '') {
222  $paragraphs[] = $par;
223  } else {
224  if ($i == 0) {
225  // Double newline at the front
226  if (empty($result)) {
227  // The empty result indicates that the AutoParagraph
228  // injector did not add any start paragraph tokens.
229  // This means that we have been in a paragraph for
230  // a while, and the newline means we should start a new one.
231  $result[] = new HTMLPurifier_Token_End('p');
232  $result[] = new HTMLPurifier_Token_Text("\n\n");
233  // However, the start token should only be added if
234  // there is more processing to be done (i.e. there are
235  // real paragraphs in here). If there are none, the
236  // next start paragraph tag will be handled by the
237  // next call to the injector
238  $needs_start = true;
239  } else {
240  // We just started a new paragraph!
241  // Reinstate a double-newline for presentation's sake, since
242  // it was in the source code.
243  array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
244  }
245  } elseif ($i + 1 == $c) {
246  // Double newline at the end
247  // There should be a trailing </p> when we're finally done.
248  $needs_end = true;
249  }
250  }
251  }
252 
253  // Check if this was just a giant blob of whitespace. Move this earlier,
254  // perhaps?
255  if (empty($paragraphs)) {
256  return;
257  }
258 
259  // Add the start tag indicated by \n\n at the beginning of $data
260  if ($needs_start) {
261  $result[] = $this->_pStart();
262  }
263 
264  // Append the paragraphs onto the result
265  foreach ($paragraphs as $par) {
266  $result[] = new HTMLPurifier_Token_Text($par);
267  $result[] = new HTMLPurifier_Token_End('p');
268  $result[] = new HTMLPurifier_Token_Text("\n\n");
269  $result[] = $this->_pStart();
270  }
271 
272  // Remove trailing start token; Injector will handle this later if
273  // it was indeed needed. This prevents from needing to do a lookahead,
274  // at the cost of a lookbehind later.
275  array_pop($result);
276 
277  // If there is no need for an end tag, remove all of it and let
278  // MakeWellFormed close it later.
279  if (!$needs_end) {
280  array_pop($result); // removes \n\n
281  array_pop($result); // removes </p>
282  }
283 
284  }
285 
290  private function _isInline($token) {
291  return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
292  }
293 
298  private function _pLookAhead() {
299  $this->current($i, $current);
300  if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1;
301  else $nesting = 0;
302  $ok = false;
303  while ($this->forwardUntilEndToken($i, $current, $nesting)) {
304  $result = $this->_checkNeedsP($current);
305  if ($result !== null) {
306  $ok = $result;
307  break;
308  }
309  }
310  return $ok;
311  }
312 
317  private function _checkNeedsP($current) {
318  if ($current instanceof HTMLPurifier_Token_Start){
319  if (!$this->_isInline($current)) {
320  // <div>PAR1<div>
321  // ----
322  // Terminate early, since we hit a block element
323  return false;
324  }
325  } elseif ($current instanceof HTMLPurifier_Token_Text) {
326  if (strpos($current->data, "\n\n") !== false) {
327  // <div>PAR1<b>PAR1\n\nPAR2
328  // ----
329  return true;
330  } else {
331  // <div>PAR1<b>PAR1...
332  // ----
333  }
334  }
335  return null;
336  }
337 
338 }
339 
340 // vim: et sw=4 sts=4