ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
Parser.php
Go to the documentation of this file.
1<?php
13define('MW_PARSER_VERSION', '1.6.1');
14
15define('RLH_FOR_UPDATE', 1);
16
17# Allowed values for $mOutputType
18define('OT_HTML', 1);
19define('OT_WIKI', 2);
20define('OT_MSG', 3);
21define('OT_PREPROCESS', 4);
22
23# Flags for setFunctionHook
24define('SFH_NO_HASH', 1);
25
26# string parameter for extractTags which will cause it
27# to strip HTML comments in addition to regular
28# <XML>-style tags. This should not be anything we
29# may want to use in wikisyntax
30define('STRIP_COMMENTS', 'HTMLCommentStrip');
31
32# Constants needed for external link processing
33define('HTTP_PROTOCOLS', 'http:\/\/|https:\/\/');
34# Everything except bracket, space, or control characters
35define('EXT_LINK_URL_CLASS', '[^][<>"\\x00-\\x20\\x7F]');
36# Including space, but excluding newlines
37define('EXT_LINK_TEXT_CLASS', '[^\]\\x0a\\x0d]');
38define('EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]');
39define('EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg');
40define('EXT_LINK_BRACKETED', '/\[(\b(' . wfUrlProtocols() . ')' .
41 EXT_LINK_URL_CLASS . '+) *(' . EXT_LINK_TEXT_CLASS . '*?)\]/S');
42define(
43 'EXT_IMAGE_REGEX',
44 '/^(' . HTTP_PROTOCOLS . ')' . # Protocol
45 '(' . EXT_LINK_URL_CLASS . '+)\\/' . # Hostname and path
46 '(' . EXT_IMAGE_FNAME_CLASS . '+)\\.((?i)' . EXT_IMAGE_EXTENSIONS . ')$/S' # Filename
47);
49// State constants for the definition list colon extraction
50define('MW_COLON_STATE_TEXT', 0);
51define('MW_COLON_STATE_TAG', 1);
52define('MW_COLON_STATE_TAGSTART', 2);
53define('MW_COLON_STATE_CLOSETAG', 3);
54define('MW_COLON_STATE_TAGSLASH', 4);
55define('MW_COLON_STATE_COMMENT', 5);
56define('MW_COLON_STATE_COMMENTDASH', 6);
57define('MW_COLON_STATE_COMMENTDASHDASH', 7);
58
92class Parser
93{
98 # Persistent:
99 public $mTagHooks;
101 public $mFunctionSynonyms;
102 public $mVariables;
104 # Cleared with clearState():
105 public $mOutput;
107 public $mDTopen;
112 public $mInPre;
117 public $mDefaultSort;
118 public $mTemplates;
119 // cache of already loaded templates, avoids
120 // multiple SQL queries for the same string
121 public $mTemplatePath; // stores an unsorted hash of all the templates already loaded
122 // in this path. Used for loop detection.
123
124 # Temporary
125 # These are variables reset at least once per parse regardless of $clearState
126 public $mOptions;
127 // ParserOptions object
128 public $mTitle;
129 // Title context, used for self-link rendering and similar things
131 // Output type, one of the OT_xxx constants
132 public $ot;
133 // Shortcut alias, see setOutputType()
135 // ID to display in {{REVISIONID}} tags
137 // The timestamp of the specified revision ID
138 public $mRevIdForTs; // The revision ID which was used to fetch the timestamp
139
147 public function __construct()
148 {
149 $this->mTagHooks = array();
150 $this->mFunctionHooks = array();
151 $this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
152 $this->mFirstCall = true;
153 }
154
158 public function firstCallInit()
159 {
160 if (!$this->mFirstCall) {
161 return;
162 }
163
164 wfProfileIn(__METHOD__);
165 global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions;
166
167 $this->setHook('pre', array( $this, 'renderPreTag' ));
168
169 $this->setFunctionHook('int', array( 'CoreParserFunctions', 'intFunction' ), SFH_NO_HASH);
170 $this->setFunctionHook('ns', array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH);
171 $this->setFunctionHook('urlencode', array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH);
172 $this->setFunctionHook('lcfirst', array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH);
173 $this->setFunctionHook('ucfirst', array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH);
174 $this->setFunctionHook('lc', array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH);
175 $this->setFunctionHook('uc', array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH);
176 $this->setFunctionHook('localurl', array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH);
177 $this->setFunctionHook('localurle', array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH);
178 $this->setFunctionHook('fullurl', array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH);
179 $this->setFunctionHook('fullurle', array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH);
180 $this->setFunctionHook('formatnum', array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH);
181 $this->setFunctionHook('grammar', array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH);
182 $this->setFunctionHook('plural', array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH);
183 $this->setFunctionHook('numberofpages', array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH);
184 $this->setFunctionHook('numberofusers', array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH);
185 $this->setFunctionHook('numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH);
186 $this->setFunctionHook('numberoffiles', array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH);
187 $this->setFunctionHook('numberofadmins', array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH);
188 $this->setFunctionHook('numberofedits', array( 'CoreParserFunctions', 'numberofedits' ), SFH_NO_HASH);
189 $this->setFunctionHook('language', array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH);
190 $this->setFunctionHook('padleft', array( 'CoreParserFunctions', 'padleft' ), SFH_NO_HASH);
191 $this->setFunctionHook('padright', array( 'CoreParserFunctions', 'padright' ), SFH_NO_HASH);
192 $this->setFunctionHook('anchorencode', array( 'CoreParserFunctions', 'anchorencode' ), SFH_NO_HASH);
193 $this->setFunctionHook('special', array( 'CoreParserFunctions', 'special' ));
194 $this->setFunctionHook('defaultsort', array( 'CoreParserFunctions', 'defaultsort' ), SFH_NO_HASH);
195
196 if ($wgAllowDisplayTitle) {
197 $this->setFunctionHook('displaytitle', array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH);
198 }
199 if ($wgAllowSlowParserFunctions) {
200 $this->setFunctionHook('pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH);
201 }
202
203 $this->initialiseVariables();
204 $this->mFirstCall = false;
205 wfProfileOut(__METHOD__);
206 }
207
213 public function clearState()
214 {
215 wfProfileIn(__METHOD__);
216 if ($this->mFirstCall) {
217 $this->firstCallInit();
218 }
219 $this->mOutput = new ParserOutput;
220 $this->mAutonumber = 0;
221 $this->mLastSection = '';
222 $this->mDTopen = false;
223 $this->mIncludeCount = array();
224 $this->mStripState = new StripState;
225 $this->mArgStack = array();
226 $this->mInPre = false;
227 $this->mInterwikiLinkHolders = array(
228 'texts' => array(),
229 'titles' => array()
230 );
231 $this->mLinkHolders = array(
232 'namespaces' => array(),
233 'dbkeys' => array(),
234 'queries' => array(),
235 'texts' => array(),
236 'titles' => array()
237 );
238 $this->mRevisionTimestamp = $this->mRevisionId = null;
239
247 $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
248
249 # Clear these on every parse, bug 4549
250 $this->mTemplates = array();
251 $this->mTemplatePath = array();
252
253 $this->mShowToc = true;
254 $this->mForceTocPosition = false;
255 $this->mIncludeSizes = array(
256 'pre-expand' => 0,
257 'post-expand' => 0,
258 'arg' => 0
259 );
260 $this->mDefaultSort = false;
261
262 wfRunHooks('ParserClearState', array( &$this ));
263 wfProfileOut(__METHOD__);
265
266 public function setOutputType($ot)
267 {
268 $this->mOutputType = $ot;
269 // Shortcut alias
270 $this->ot = array(
271 'html' => $ot == OT_HTML,
272 'wiki' => $ot == OT_WIKI,
273 'msg' => $ot == OT_MSG,
274 'pre' => $ot == OT_PREPROCESS,
275 );
276 }
277
283 public function uniqPrefix()
284 {
285 return $this->mUniqPrefix;
286 }
287
300 public function parse($text, &$title, $options, $linestart = true, $clearState = true, $revid = null)
301 {
307 global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang;
308 $fname = 'Parser::parse-' . wfGetCaller();
309 wfProfileIn(__METHOD__);
310 wfProfileIn($fname);
311
312 if ($clearState) {
313 $this->clearState();
314 }
315
316 $this->mOptions = $options;
317 $this->mTitle = &$title;
318 $oldRevisionId = $this->mRevisionId;
319 $oldRevisionTimestamp = $this->mRevisionTimestamp;
320 if ($revid !== null) {
321 $this->mRevisionId = $revid;
322 $this->mRevisionTimestamp = null;
323 }
324 $this->setOutputType(OT_HTML);
325 wfRunHooks('ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ));
326 $text = $this->strip($text, $this->mStripState);
327 wfRunHooks('ParserAfterStrip', array( &$this, &$text, &$this->mStripState ));
328 $text = $this->internalParse($text);
329 $text = $this->mStripState->unstripGeneral($text);
330
331 # Clean up special characters, only run once, next-to-last before doBlockLevels
332 $fixtags = array(
333 # french spaces, last one Guillemet-left
334 # only if there is something before the space
335 '/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1&nbsp;\\2',
336 # french spaces, Guillemet-right
337 '/(\\302\\253) /' => '\\1&nbsp;',
338 );
339 $text = preg_replace(array_keys($fixtags), array_values($fixtags), $text);
340
341 # only once and last
342 $text = $this->doBlockLevels($text, $linestart);
343
345
346 # the position of the parserConvert() call should not be changed. it
347 # assumes that the links are all replaced and the only thing left
348 # is the <nowiki> mark.
349 # Side-effects: this calls $this->mOutput->setTitleText()
350 $text = $wgContLang->parserConvert($text, $this);
351
352 $text = $this->mStripState->unstripNoWiki($text);
353
354 wfRunHooks('ParserBeforeTidy', array( &$this, &$text ));
355
357
358 if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) {
360 } else {
361 # attempt to sanitize at least some nesting problems
362 # (bug #2702 and quite a few others)
363 $tidyregs = array(
364 # ''Something [http://www.cool.com cool''] -->
365 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
366 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
367 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
368 # fix up an anchor inside another anchor, only
369 # at least for a single single nested link (bug 3695)
370 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
371 '\\1\\2</a>\\3</a>\\1\\4</a>',
372 # fix div inside inline elements- doBlockLevels won't wrap a line which
373 # contains a div, so fix it up here; replace
374 # div with escaped text
375 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
376 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
377 # remove empty italic or bold tag pairs, some
378 # introduced by rules above
379 '/<([bi])><\/\\1>/' => '',
380 );
381
382 $text = preg_replace(
383 array_keys($tidyregs),
384 array_values($tidyregs),
385 $text
386 );
387 }
388
389 wfRunHooks('ParserAfterTidy', array( &$this, &$text ));
390
391 # Information on include size limits, for the benefit of users who try to skirt them
392 if (max($this->mIncludeSizes) > 1000) {
393 $max = $this->mOptions->getMaxIncludeSize();
394 $text .= "<!-- \n" .
395 "Pre-expand include size: {$this->mIncludeSizes['pre-expand']} bytes\n" .
396 "Post-expand include size: {$this->mIncludeSizes['post-expand']} bytes\n" .
397 "Template argument size: {$this->mIncludeSizes['arg']} bytes\n" .
398 "Maximum: $max bytes\n" .
399 "-->\n";
400 }
401 $this->mOutput->setText($text);
402 $this->mRevisionId = $oldRevisionId;
403 $this->mRevisionTimestamp = $oldRevisionTimestamp;
404 wfProfileOut($fname);
405 wfProfileOut(__METHOD__);
406
407 return $this->mOutput;
408 }
409
414 public function recursiveTagParse($text)
415 {
416 wfProfileIn(__METHOD__);
417 wfRunHooks('ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ));
418 $text = $this->strip($text, $this->mStripState);
419 wfRunHooks('ParserAfterStrip', array( &$this, &$text, &$this->mStripState ));
420 $text = $this->internalParse($text);
421 wfProfileOut(__METHOD__);
422 return $text;
423 }
424
429 public function preprocess($text, $title, $options)
430 {
431 wfProfileIn(__METHOD__);
432 $this->clearState();
433 $this->setOutputType(OT_PREPROCESS);
434 $this->mOptions = $options;
435 $this->mTitle = $title;
436 wfRunHooks('ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ));
437 $text = $this->strip($text, $this->mStripState);
438 wfRunHooks('ParserAfterStrip', array( &$this, &$text, &$this->mStripState ));
439 if ($this->mOptions->getRemoveComments()) {
440 $text = Sanitizer::removeHTMLcomments($text);
441 }
442 $text = $this->replaceVariables($text);
443 $text = $this->mStripState->unstripBoth($text);
444 wfProfileOut(__METHOD__);
445 return $text;
446 }
447
454 public function getRandomString()
455 {
456 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
458
459 public function &getTitle()
460 {
461 return $this->mTitle;
462 }
463 public function getOptions()
464 {
465 return $this->mOptions;
467
468 public function getFunctionLang()
469 {
470 global $wgLang, $wgContLang;
471 return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang;
472 }
473
492 public function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = '')
493 {
494 static $n = 1;
495 $stripped = '';
496 $matches = array();
497
498 $taglist = implode('|', $elements);
499 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
500
501 while ('' != $text) {
502 $p = preg_split($start, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
503 $stripped .= $p[0];
504 if (count($p) < 5) {
505 break;
506 }
507 if (count($p) > 5) {
508 // comment
509 $element = $p[4];
510 $attributes = '';
511 $close = '';
512 $inside = $p[5];
513 } else {
514 // tag
515 $element = $p[1];
516 $attributes = $p[2];
517 $close = $p[3];
518 $inside = $p[4];
519 }
520
521 $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU';
522 $stripped .= $marker;
523
524 if ($close === '/>') {
525 // Empty element tag, <tag />
526 $content = null;
527 $text = $inside;
528 $tail = null;
529 } else {
530 if ($element == '!--') {
531 $end = '/(-->)/';
532 } else {
533 $end = "/(<\\/$element\\s*>)/i";
534 }
535 $q = preg_split($end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE);
536 $content = $q[0];
537 if (count($q) < 3) {
538 # No end tag -- let it run out to the end of the text.
539 $tail = '';
540 $text = '';
541 } else {
542 $tail = $q[1];
543 $text = $q[2];
544 }
545 }
546
547 $matches[$marker] = array( $element,
548 $content,
549 Sanitizer::decodeTagAttributes($attributes),
550 "<$element$attributes$close$content$tail" );
551 }
552 return $stripped;
553 }
554
572 public function strip($text, $state, $stripcomments = false, $dontstrip = array())
573 {
574 global $wgContLang;
575 wfProfileIn(__METHOD__);
576 $render = ($this->mOutputType == OT_HTML);
577
578 $uniq_prefix = $this->mUniqPrefix;
579 $commentState = new ReplacementArray;
580 $nowikiItems = array();
581 $generalItems = array();
582
583 $elements = array_merge(
584 array( 'nowiki', 'gallery' ),
585 array_keys($this->mTagHooks)
586 );
587 global $wgRawHtml;
588 if ($wgRawHtml) {
589 $elements[] = 'html';
590 }
591 if ($this->mOptions->getUseTeX()) {
592 $elements[] = 'math';
593 }
594
595 # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700)
596 foreach ($elements as $k => $v) {
597 if (!in_array($v, $dontstrip)) {
598 continue;
599 }
600 unset($elements[$k]);
601 }
602
603 $matches = array();
604 $text = Parser::extractTagsAndParams($elements, $text, $matches, $uniq_prefix);
605
606 foreach ($matches as $marker => $data) {
607 list($element, $content, $params, $tag) = $data;
608 if ($render) {
609 $tagName = strtolower($element);
610 wfProfileIn(__METHOD__ . "-render-$tagName");
611 switch ($tagName) {
612 case '!--':
613 // Comment
614 if (substr($tag, -3) == '-->') {
615 $output = $tag;
616 } else {
617 // Unclosed comment in input.
618 // Close it so later stripping can remove it
619 $output = "$tag-->";
620 }
621 break;
622 case 'html':
623 if ($wgRawHtml) {
624 $output = $content;
625 break;
626 }
627 // Shouldn't happen otherwise. :)
628 // no break
629 case 'nowiki':
630 $output = Xml::escapeTagsOnly($content);
631 break;
632 case 'math':
633 $output = $wgContLang->armourMath(MathRenderer::renderMath($content));
634 break;
635 case 'gallery':
636 $output = $this->renderImageGallery($content, $params);
637 break;
638 default:
639 if (isset($this->mTagHooks[$tagName])) {
640 $output = call_user_func_array(
641 $this->mTagHooks[$tagName],
642 array( $content, $params, $this )
643 );
644 } else {
645 throw new MWException("Invalid call hook $element");
646 }
647 }
648 wfProfileOut(__METHOD__ . "-render-$tagName");
649 } else {
650 // Just stripping tags; keep the source
651 $output = $tag;
652 }
653
654 // Unstrip the output, to support recursive strip() calls
655 $output = $state->unstripBoth($output);
656
657 if (!$stripcomments && $element == '!--') {
658 $commentState->setPair($marker, $output);
659 } elseif ($element == 'html' || $element == 'nowiki') {
660 $nowikiItems[$marker] = $output;
661 } else {
662 $generalItems[$marker] = $output;
663 }
664 }
665 # Add the new items to the state
666 # We do this after the loop instead of during it to avoid slowing
667 # down the recursive unstrip
668 $state->nowiki->mergeArray($nowikiItems);
669 $state->general->mergeArray($generalItems);
670
671 # Unstrip comments unless explicitly told otherwise.
672 # (The comments are always stripped prior to this point, so as to
673 # not invoke any extension tags / parser hooks contained within
674 # a comment.)
675 if (!$stripcomments) {
676 // Put them all back and forget them
677 $text = $commentState->replace($text);
678 }
679
680 wfProfileOut(__METHOD__);
681 return $text;
682 }
683
691 public function unstrip($text, $state)
692 {
693 return $state->unstripGeneral($text);
694 }
695
702 public function unstripNoWiki($text, $state)
703 {
704 return $state->unstripNoWiki($text);
705 }
706
710 public function unstripForHTML($text)
711 {
712 return $this->mStripState->unstripBoth($text);
713 }
714
722 public function insertStripItem($text, &$state)
723 {
724 $rnd = $this->mUniqPrefix . '-item' . Parser::getRandomString();
725 $state->general->setPair($rnd, $text);
726 return $rnd;
727 }
728
743 public function tidy($text)
744 {
745 global $wgTidyInternal;
746 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' .
747' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>' .
748'<head><title>test</title></head><body>' . $text . '</body></html>';
749 if ($wgTidyInternal) {
750 $correctedtext = Parser::internalTidy($wrappedtext);
751 } else {
752 $correctedtext = Parser::externalTidy($wrappedtext);
753 }
754 if (is_null($correctedtext)) {
755 wfDebug("Tidy error detected!\n");
756 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
757 }
758 return $correctedtext;
759 }
760
767 public function externalTidy($text)
768 {
769 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
770 $fname = 'Parser::externalTidy';
771 wfProfileIn($fname);
772
773 $cleansource = '';
774 $opts = ' -utf8';
775
776 $descriptorspec = array(
777 0 => array('pipe', 'r'),
778 1 => array('pipe', 'w'),
779 2 => array('file', '/dev/null', 'a') // FIXME: this line in UNIX-specific, it generates a warning on Windows, because /dev/null is not a valid Windows file.
780 );
781 $pipes = array();
782 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
783 if (is_resource($process)) {
784 // Theoretically, this style of communication could cause a deadlock
785 // here. If the stdout buffer fills up, then writes to stdin could
786 // block. This doesn't appear to happen with tidy, because tidy only
787 // writes to stdout after it's finished reading from stdin. Search
788 // for tidyParseStdin and tidySaveStdout in console/tidy.c
789 fwrite($pipes[0], $text);
790 fclose($pipes[0]);
791 while (!feof($pipes[1])) {
792 $cleansource .= fgets($pipes[1], 1024);
793 }
794 fclose($pipes[1]);
795 proc_close($process);
796 }
797
798 wfProfileOut($fname);
799
800 if ($cleansource == '' && $text != '') {
801 // Some kind of error happened, so we couldn't get the corrected text.
802 // Just give up; we'll use the source text and append a warning.
803 return null;
804 } else {
805 return $cleansource;
806 }
807 }
808
819 public function internalTidy($text)
820 {
821 global $wgTidyConf;
822 $fname = 'Parser::internalTidy';
823 wfProfileIn($fname);
824
825 tidy_load_config($wgTidyConf);
826 tidy_set_encoding('utf8');
827 tidy_parse_string($text);
828 tidy_clean_repair();
829 if (tidy_get_status() == 2) {
830 // 2 is magic number for fatal error
831 // http://www.php.net/manual/en/function.tidy-get-status.php
832 $cleansource = null;
833 } else {
834 $cleansource = tidy_get_output();
835 }
836 wfProfileOut($fname);
837 return $cleansource;
838 }
839
845 public function doTableStuff($text)
846 {
847 $fname = 'Parser::doTableStuff';
848 wfProfileIn($fname);
849
850 $lines = explode("\n", $text);
851 $td_history = array(); // Is currently a td tag open?
852 $last_tag_history = array(); // Save history of last lag activated (td, th or caption)
853 $tr_history = array(); // Is currently a tr tag open?
854 $tr_attributes = array(); // history of tr attributes
855 $has_opened_tr = array(); // Did this table open a <tr> element?
856 $indent_level = 0; // indent level of the table
857 foreach ($lines as $key => $line) {
858 $line = trim($line);
859
860 if ($line == '') { // empty line, go to next line
861 continue;
862 }
863 $first_character = $line[0];
864 $matches = array();
865
866 if (preg_match('/^(:*)\{\|(.*)$/', $line, $matches)) {
867 // First check if we are starting a new table
868 $indent_level = strlen($matches[1]);
869
870 $attributes = $this->mStripState->unstripBoth($matches[2]);
872
873 $lines[$key] = str_repeat('<dl><dd>', $indent_level) . "<table{$attributes}>";
874 array_push($td_history, false);
875 array_push($last_tag_history, '');
876 array_push($tr_history, false);
877 array_push($tr_attributes, '');
878 array_push($has_opened_tr, false);
879 } elseif (count($td_history) == 0) {
880 // Don't do any of the following
881 continue;
882 } elseif (substr($line, 0, 2) == '|}') {
883 // We are ending a table
884 $line = '</table>' . substr($line, 2);
885 $last_tag = array_pop($last_tag_history);
886
887 if (!array_pop($has_opened_tr)) {
888 $line = "<tr><td></td></tr>{$line}";
889 }
890
891 if (array_pop($tr_history)) {
892 $line = "</tr>{$line}";
893 }
894
895 if (array_pop($td_history)) {
896 $line = "</{$last_tag}>{$line}";
897 }
898 array_pop($tr_attributes);
899 $lines[$key] = $line . str_repeat('</dd></dl>', $indent_level);
900 } elseif (substr($line, 0, 2) == '|-') {
901 // Now we have a table row
902 $line = preg_replace('#^\|-+#', '', $line);
903
904 // Whats after the tag is now only attributes
905 $attributes = $this->mStripState->unstripBoth($line);
907 array_pop($tr_attributes);
908 array_push($tr_attributes, $attributes);
909
910 $line = '';
911 $last_tag = array_pop($last_tag_history);
912 array_pop($has_opened_tr);
913 array_push($has_opened_tr, true);
914
915 if (array_pop($tr_history)) {
916 $line = '</tr>';
917 }
918
919 if (array_pop($td_history)) {
920 $line = "</{$last_tag}>{$line}";
921 }
922
923 $lines[$key] = $line;
924 array_push($tr_history, false);
925 array_push($td_history, false);
926 array_push($last_tag_history, '');
927 } elseif ($first_character == '|' || $first_character == '!' || substr($line, 0, 2) == '|+') {
928 // This might be cell elements, td, th or captions
929 if (substr($line, 0, 2) == '|+') {
930 $first_character = '+';
931 $line = substr($line, 1);
932 }
933
934 $line = substr($line, 1);
935
936 if ($first_character == '!') {
937 $line = str_replace('!!', '||', $line);
938 }
939
940 // Split up multiple cells on the same line.
941 // FIXME : This can result in improper nesting of tags processed
942 // by earlier parser steps, but should avoid splitting up eg
943 // attribute values containing literal "||".
944 $cells = StringUtils::explodeMarkup('||', $line);
945
946 $lines[$key] = '';
947
948 // Loop through each table cell
949 foreach ($cells as $cell) {
950 $previous = '';
951 if ($first_character != '+') {
952 $tr_after = array_pop($tr_attributes);
953 if (!array_pop($tr_history)) {
954 $previous = "<tr{$tr_after}>\n";
955 }
956 array_push($tr_history, true);
957 array_push($tr_attributes, '');
958 array_pop($has_opened_tr);
959 array_push($has_opened_tr, true);
960 }
961
962 $last_tag = array_pop($last_tag_history);
963
964 if (array_pop($td_history)) {
965 $previous = "</{$last_tag}>{$previous}";
966 }
967
968 if ($first_character == '|') {
969 $last_tag = 'td';
970 } elseif ($first_character == '!') {
971 $last_tag = 'th';
972 } elseif ($first_character == '+') {
973 $last_tag = 'caption';
974 } else {
975 $last_tag = '';
976 }
977
978 array_push($last_tag_history, $last_tag);
979
980 // A cell could contain both parameters and data
981 $cell_data = explode('|', $cell, 2);
982
983 // Bug 553: Note that a '|' inside an invalid link should not
984 // be mistaken as delimiting cell parameters
985 if (strpos($cell_data[0], '[[') !== false) {
986 $cell = "{$previous}<{$last_tag}>{$cell}";
987 } elseif (count($cell_data) == 1) {
988 $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
989 } else {
990 $attributes = $this->mStripState->unstripBoth($cell_data[0]);
992 $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
993 }
994
995 $lines[$key] .= $cell;
996 array_push($td_history, true);
997 }
998 }
999 }
1000
1001 // Closing open td, tr && table
1002 while (count($td_history) > 0) {
1003 if (array_pop($td_history)) {
1004 $lines[] = '</td>' ;
1005 }
1006 if (array_pop($tr_history)) {
1007 $lines[] = '</tr>' ;
1008 }
1009 if (!array_pop($has_opened_tr)) {
1010 $lines[] = "<tr><td></td></tr>" ;
1011 }
1012
1013 $lines[] = '</table>' ;
1014 }
1015
1016 $output = implode("\n", $lines) ;
1017
1018 // special case: don't return empty table
1019 if ($output == "<table>\n<tr><td></td></tr>\n</table>") {
1020 $output = '';
1021 }
1022
1023 wfProfileOut($fname);
1024
1025 return $output;
1026 }
1027
1034 public function internalParse($text)
1035 {
1036 $args = array();
1037 $isMain = true;
1038 $fname = 'Parser::internalParse';
1039 wfProfileIn($fname);
1040
1041 # Hook to suspend the parser in this state
1042 if (!wfRunHooks('ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ))) {
1043 wfProfileOut($fname);
1044 return $text ;
1045 }
1046
1047 # Remove <noinclude> tags and <includeonly> sections
1048 $text = strtr($text, array( '<onlyinclude>' => '' , '</onlyinclude>' => '' ));
1049 $text = strtr($text, array( '<noinclude>' => '', '</noinclude>' => ''));
1050 $text = StringUtils::delimiterReplace('<includeonly>', '</includeonly>', '', $text);
1051
1052 $text = Sanitizer::removeHTMLtags($text, array( &$this, 'attributeStripCallback' ));
1053
1054 $text = $this->replaceVariables($text, $args);
1055 wfRunHooks('InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ));
1056
1057 // Tables need to come after variable replacement for things to work
1058 // properly; putting them before other transformations should keep
1059 // exciting things like link expansions from showing up in surprising
1060 // places.
1061 $text = $this->doTableStuff($text);
1062
1063 $text = preg_replace('/(^|\n)-----*/', '\\1<hr />', $text);
1064
1065 $text = $this->stripToc($text);
1066 $this->stripNoGallery($text);
1067 $text = $this->doHeadings($text);
1068 if ($this->mOptions->getUseDynamicDates()) {
1069 $df = &DateFormatter::getInstance();
1070 $text = $df->reformat($this->mOptions->getDateFormat(), $text);
1071 }
1072 $text = $this->doAllQuotes($text);
1073 $text = $this->replaceInternalLinks($text);
1074 $text = $this->replaceExternalLinks($text);
1075
1076 # replaceInternalLinks may sometimes leave behind
1077 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1078 $text = str_replace($this->mUniqPrefix . "NOPARSE", "", $text);
1079
1080 $text = $this->doMagicLinks($text);
1081 $text = $this->formatHeadings($text, $isMain);
1082
1083 wfProfileOut($fname);
1084 return $text;
1085 }
1086
1093 public function &doMagicLinks(&$text)
1094 {
1095 wfProfileIn(__METHOD__);
1096 $text = preg_replace_callback(
1097 '!(?: # Start cases
1098 <a.*?</a> | # Skip link text
1099 <.*?> | # Skip stuff inside HTML elements
1100 (?:RFC|PMID)\s+([0-9]+) | # RFC or PMID, capture number as m[1]
1101 ISBN\s+(\b # ISBN, capture number as m[2]
1102 (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
1103 (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
1104 [0-9Xx] # check digit
1105 \b)
1106 )!x',
1107 array( &$this, 'magicLinkCallback' ),
1108 $text
1109 );
1110 wfProfileOut(__METHOD__);
1111 return $text;
1113
1114 public function magicLinkCallback($m)
1115 {
1116 if (substr($m[0], 0, 1) == '<') {
1117 # Skip HTML element
1118 return $m[0];
1119 } elseif (substr($m[0], 0, 4) == 'ISBN') {
1120 $isbn = $m[2];
1121 $num = strtr($isbn, array(
1122 '-' => '',
1123 ' ' => '',
1124 'x' => 'X',
1125 ));
1126 $titleObj = SpecialPage::getTitleFor('Booksources');
1127 $text = '<a href="' .
1128 $titleObj->escapeLocalUrl("isbn=$num") .
1129 "\" class=\"internal\">ISBN $isbn</a>";
1130 } else {
1131 if (substr($m[0], 0, 3) == 'RFC') {
1132 $keyword = 'RFC';
1133 $urlmsg = 'rfcurl';
1134 $id = $m[1];
1135 } elseif (substr($m[0], 0, 4) == 'PMID') {
1136 $keyword = 'PMID';
1137 $urlmsg = 'pubmedurl';
1138 $id = $m[1];
1139 } else {
1140 throw new MWException(__METHOD__ . ': unrecognised match type "' .
1141 substr($m[0], 0, 20) . '"');
1142 }
1143
1144 $url = wfMsg($urlmsg, $id);
1145 $sk = $this->mOptions->getSkin();
1146 $la = $sk->getExternalLinkAttributes($url, $keyword . $id);
1147 $text = "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
1148 }
1149 return $text;
1150 }
1151
1157 public function doHeadings($text)
1158 {
1159 $fname = 'Parser::doHeadings';
1160 wfProfileIn($fname);
1161 for ($i = 6; $i >= 1; --$i) {
1162 $h = str_repeat('=', $i);
1163 $text = preg_replace(
1164 "/^{$h}(.+){$h}\\s*$/m",
1165 "<h{$i}>\\1</h{$i}>\\2",
1166 $text
1167 );
1168 }
1169 wfProfileOut($fname);
1170 return $text;
1171 }
1172
1178 public function doAllQuotes($text)
1179 {
1180 $fname = 'Parser::doAllQuotes';
1181 wfProfileIn($fname);
1182 $outtext = '';
1183 $lines = explode("\n", $text);
1184 foreach ($lines as $line) {
1185 $outtext .= $this->doQuotes($line) . "\n";
1186 }
1187 $outtext = substr($outtext, 0, -1);
1188 wfProfileOut($fname);
1189 return $outtext;
1190 }
1191
1196 public function doQuotes($text)
1197 {
1198 $arr = preg_split("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
1199 if (count($arr) == 1) {
1200 return $text;
1201 } else {
1202 # First, do some preliminary work. This may shift some apostrophes from
1203 # being mark-up to being text. It also counts the number of occurrences
1204 # of bold and italics mark-ups.
1205 $i = 0;
1206 $numbold = 0;
1207 $numitalics = 0;
1208 foreach ($arr as $r) {
1209 if (($i % 2) == 1) {
1210 # If there are ever four apostrophes, assume the first is supposed to
1211 # be text, and the remaining three constitute mark-up for bold text.
1212 if (strlen($arr[$i]) == 4) {
1213 $arr[$i - 1] .= "'";
1214 $arr[$i] = "'''";
1215 }
1216 # If there are more than 5 apostrophes in a row, assume they're all
1217 # text except for the last 5.
1218 elseif (strlen($arr[$i]) > 5) {
1219 $arr[$i - 1] .= str_repeat("'", strlen($arr[$i]) - 5);
1220 $arr[$i] = "'''''";
1221 }
1222 # Count the number of occurrences of bold and italics mark-ups.
1223 # We are not counting sequences of five apostrophes.
1224 if (strlen($arr[$i]) == 2) {
1225 $numitalics++;
1226 } elseif (strlen($arr[$i]) == 3) {
1227 $numbold++;
1228 } elseif (strlen($arr[$i]) == 5) {
1229 $numitalics++;
1230 $numbold++;
1231 }
1232 }
1233 $i++;
1234 }
1235
1236 # If there is an odd number of both bold and italics, it is likely
1237 # that one of the bold ones was meant to be an apostrophe followed
1238 # by italics. Which one we cannot know for certain, but it is more
1239 # likely to be one that has a single-letter word before it.
1240 if (($numbold % 2 == 1) && ($numitalics % 2 == 1)) {
1241 $i = 0;
1242 $firstsingleletterword = -1;
1243 $firstmultiletterword = -1;
1244 $firstspace = -1;
1245 foreach ($arr as $r) {
1246 if (($i % 2 == 1) and (strlen($r) == 3)) {
1247 $x1 = substr($arr[$i - 1], -1);
1248 $x2 = substr($arr[$i - 1], -2, 1);
1249 if ($x1 == ' ') {
1250 if ($firstspace == -1) {
1251 $firstspace = $i;
1252 }
1253 } elseif ($x2 == ' ') {
1254 if ($firstsingleletterword == -1) {
1255 $firstsingleletterword = $i;
1256 }
1257 } else {
1258 if ($firstmultiletterword == -1) {
1259 $firstmultiletterword = $i;
1260 }
1261 }
1262 }
1263 $i++;
1264 }
1265
1266 # If there is a single-letter word, use it!
1267 if ($firstsingleletterword > -1) {
1268 $arr [ $firstsingleletterword ] = "''";
1269 $arr [ $firstsingleletterword - 1 ] .= "'";
1270 }
1271 # If not, but there's a multi-letter word, use that one.
1272 elseif ($firstmultiletterword > -1) {
1273 $arr [ $firstmultiletterword ] = "''";
1274 $arr [ $firstmultiletterword - 1 ] .= "'";
1275 }
1276 # ... otherwise use the first one that has neither.
1277 # (notice that it is possible for all three to be -1 if, for example,
1278 # there is only one pentuple-apostrophe in the line)
1279 elseif ($firstspace > -1) {
1280 $arr [ $firstspace ] = "''";
1281 $arr [ $firstspace - 1 ] .= "'";
1282 }
1283 }
1284
1285 # Now let's actually convert our apostrophic mush to HTML!
1286 $output = '';
1287 $buffer = '';
1288 $state = '';
1289 $i = 0;
1290 foreach ($arr as $r) {
1291 if (($i % 2) == 0) {
1292 if ($state == 'both') {
1293 $buffer .= $r;
1294 } else {
1295 $output .= $r;
1296 }
1297 } else {
1298 if (strlen($r) == 2) {
1299 if ($state == 'i') {
1300 $output .= '</i>';
1301 $state = '';
1302 } elseif ($state == 'bi') {
1303 $output .= '</i>';
1304 $state = 'b';
1305 } elseif ($state == 'ib') {
1306 $output .= '</b></i><b>';
1307 $state = 'b';
1308 } elseif ($state == 'both') {
1309 $output .= '<b><i>' . $buffer . '</i>';
1310 $state = 'b';
1311 } else { # $state can be 'b' or ''
1312 $output .= '<i>';
1313 $state .= 'i';
1314 }
1315 } elseif (strlen($r) == 3) {
1316 if ($state == 'b') {
1317 $output .= '</b>';
1318 $state = '';
1319 } elseif ($state == 'bi') {
1320 $output .= '</i></b><i>';
1321 $state = 'i';
1322 } elseif ($state == 'ib') {
1323 $output .= '</b>';
1324 $state = 'i';
1325 } elseif ($state == 'both') {
1326 $output .= '<i><b>' . $buffer . '</b>';
1327 $state = 'i';
1328 } else { # $state can be 'i' or ''
1329 $output .= '<b>';
1330 $state .= 'b';
1331 }
1332 } elseif (strlen($r) == 5) {
1333 if ($state == 'b') {
1334 $output .= '</b><i>';
1335 $state = 'i';
1336 } elseif ($state == 'i') {
1337 $output .= '</i><b>';
1338 $state = 'b';
1339 } elseif ($state == 'bi') {
1340 $output .= '</i></b>';
1341 $state = '';
1342 } elseif ($state == 'ib') {
1343 $output .= '</b></i>';
1344 $state = '';
1345 } elseif ($state == 'both') {
1346 $output .= '<i><b>' . $buffer . '</b></i>';
1347 $state = '';
1348 } else { # ($state == '')
1349 $buffer = '';
1350 $state = 'both';
1351 }
1352 }
1353 }
1354 $i++;
1355 }
1356 # Now close all remaining tags. Notice that the order is important.
1357 if ($state == 'b' || $state == 'ib') {
1358 $output .= '</b>';
1359 }
1360 if ($state == 'i' || $state == 'bi' || $state == 'ib') {
1361 $output .= '</i>';
1362 }
1363 if ($state == 'bi') {
1364 $output .= '</b>';
1365 }
1366 # There might be lonely ''''', so make sure we have a buffer
1367 if ($state == 'both' && $buffer) {
1368 $output .= '<b><i>' . $buffer . '</i></b>';
1369 }
1370 return $output;
1371 }
1372 }
1373
1382 public function replaceExternalLinks($text)
1383 {
1384 global $wgContLang;
1385 $fname = 'Parser::replaceExternalLinks';
1386 wfProfileIn($fname);
1387
1388 $sk = $this->mOptions->getSkin();
1389
1390 $bits = preg_split(EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE);
1391
1392 $s = $this->replaceFreeExternalLinks(array_shift($bits));
1393
1394 $i = 0;
1395 while ($i < count($bits)) {
1396 $url = $bits[$i++];
1397 $protocol = $bits[$i++];
1398 $text = $bits[$i++];
1399 $trail = $bits[$i++];
1400
1401 # The characters '<' and '>' (which were escaped by
1402 # removeHTMLtags()) should not be included in
1403 # URLs, per RFC 2396.
1404 $m2 = array();
1405 if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
1406 $text = substr($url, $m2[0][1]) . ' ' . $text;
1407 $url = substr($url, 0, $m2[0][1]);
1408 }
1409
1410 # If the link text is an image URL, replace it with an <img> tag
1411 # This happened by accident in the original parser, but some people used it extensively
1412 $img = $this->maybeMakeExternalImage($text);
1413 if ($img !== false) {
1414 $text = $img;
1415 }
1416
1417 $dtrail = '';
1418
1419 # Set linktype for CSS - if URL==text, link is essentially free
1420 $linktype = ($text == $url) ? 'free' : 'text';
1421
1422 # No link text, e.g. [http://domain.tld/some.link]
1423 if ($text == '') {
1424 # Autonumber if allowed. See bug #5918
1425 if (strpos(wfUrlProtocols(), substr($protocol, 0, strpos($protocol, ':'))) !== false) {
1426 $text = '[' . ++$this->mAutonumber . ']';
1427 $linktype = 'autonumber';
1428 } else {
1429 # Otherwise just use the URL
1430 $text = htmlspecialchars($url);
1431 $linktype = 'free';
1432 }
1433 } else {
1434 # Have link text, e.g. [http://domain.tld/some.link text]s
1435 # Check for trail
1436 list($dtrail, $trail) = Linker::splitTrail($trail);
1437 }
1438
1439 $text = $wgContLang->markNoConversion($text);
1440
1442
1443 # Process the trail (i.e. everything after this link up until start of the next link),
1444 # replacing any non-bracketed links
1445 $trail = $this->replaceFreeExternalLinks($trail);
1446
1447 # Use the encoded URL
1448 # This means that users can paste URLs directly into the text
1449 # Funny characters like &ouml; aren't valid in URLs anyway
1450 # This was changed in August 2004
1451 $s .= $sk->makeExternalLink($url, $text, false, $linktype, $this->mTitle->getNamespace()) . $dtrail . $trail;
1452
1453 # Register link in the output object.
1454 # Replace unnecessary URL escape codes with the referenced character
1455 # This prevents spammers from hiding links from the filters
1456 $pasteurized = Parser::replaceUnusualEscapes($url);
1457 $this->mOutput->addExternalLink($pasteurized);
1458 }
1459
1460 wfProfileOut($fname);
1461 return $s;
1462 }
1463
1468 public function replaceFreeExternalLinks($text)
1469 {
1470 global $wgContLang;
1471
1472 $fname = 'Parser::replaceFreeExternalLinks';
1473 wfProfileIn($fname);
1474
1475 $bits = preg_split('/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
1476 $s = array_shift($bits);
1477 $i = 0;
1478
1479 $sk = $this->mOptions->getSkin();
1480
1481 while ($i < count($bits)) {
1482 $protocol = $bits[$i++];
1483 $remainder = $bits[$i++];
1484
1485 $m = array();
1486 if (preg_match('/^(' . EXT_LINK_URL_CLASS . '+)(.*)$/s', $remainder, $m)) {
1487 # Found some characters after the protocol that look promising
1488 $url = $protocol . $m[1];
1489 $trail = $m[2];
1490
1491 # special case: handle urls as url args:
1492 # http://www.example.com/foo?=http://www.example.com/bar
1493 if (strlen($trail) == 0 &&
1494 isset($bits[$i]) &&
1495 preg_match('/^' . wfUrlProtocols() . '$/S', $bits[$i]) &&
1496 preg_match('/^(' . EXT_LINK_URL_CLASS . '+)(.*)$/s', $bits[$i + 1], $m)) {
1497 # add protocol, arg
1498 $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link
1499 $i += 2;
1500 $trail = $m[2];
1501 }
1502
1503 # The characters '<' and '>' (which were escaped by
1504 # removeHTMLtags()) should not be included in
1505 # URLs, per RFC 2396.
1506 $m2 = array();
1507 if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
1508 $trail = substr($url, $m2[0][1]) . $trail;
1509 $url = substr($url, 0, $m2[0][1]);
1510 }
1511
1512 # Move trailing punctuation to $trail
1513 $sep = ',;\.:!?';
1514 # If there is no left bracket, then consider right brackets fair game too
1515 if (strpos($url, '(') === false) {
1516 $sep .= ')';
1517 }
1518
1519 $numSepChars = strspn(strrev($url), $sep);
1520 if ($numSepChars) {
1521 $trail = substr($url, -$numSepChars) . $trail;
1522 $url = substr($url, 0, -$numSepChars);
1523 }
1524
1526 # Is this an external image?
1527 $text = $this->maybeMakeExternalImage($url);
1528 if ($text === false) {
1529 # Not an image, make a link
1530 $text = $sk->makeExternalLink($url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace());
1531 # Register it in the output object...
1532 # Replace unnecessary URL escape codes with their equivalent characters
1533 $pasteurized = Parser::replaceUnusualEscapes($url);
1534
1535 $this->mOutput->addExternalLink($pasteurized);
1536 }
1537 $s .= $text . $trail;
1538 } else {
1539 $s .= $protocol . $remainder;
1540 }
1541 }
1542 wfProfileOut($fname);
1543 return $s;
1544 }
1545
1556 public static function replaceUnusualEscapes($url)
1557 {
1558 return preg_replace_callback(
1559 '/%[0-9A-Fa-f]{2}/',
1560 array( 'Parser', 'replaceUnusualEscapesCallback' ),
1561 $url
1562 );
1563 }
1564
1571 private static function replaceUnusualEscapesCallback($matches)
1572 {
1573 $char = urldecode($matches[0]);
1574 $ord = ord($char);
1575
1576 // Is it an unsafe or HTTP reserved character according to RFC 1738?
1577 if ($ord > 32 && $ord < 127 && strpos('<>"#{}|\^~[]`;/?', $char) === false) {
1578 // No, shouldn't be escaped
1579 return $char;
1580 } else {
1581 // Yes, leave it escaped
1582 return $matches[0];
1583 }
1584 }
1585
1591 public function maybeMakeExternalImage($url)
1592 {
1593 $sk = $this->mOptions->getSkin();
1594 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1595 $imagesexception = !empty($imagesfrom);
1596 $text = false;
1597 if ($this->mOptions->getAllowExternalImages()
1598 || ($imagesexception && strpos($url, $imagesfrom) === 0)) {
1599 if (preg_match(EXT_IMAGE_REGEX, $url)) {
1600 # Image found
1601 $text = $sk->makeExternalImage(htmlspecialchars($url));
1602 }
1603 }
1604 return $text;
1605 }
1606
1612 public function replaceInternalLinks($s)
1613 {
1614 global $wgContLang;
1615 static $fname = 'Parser::replaceInternalLinks' ;
1616
1617 wfProfileIn($fname);
1618
1619 wfProfileIn($fname . '-setup');
1620 static $tc = false;
1621 # the % is needed to support urlencoded titles as well
1622 if (!$tc) {
1623 $tc = Title::legalChars() . '#%';
1624 }
1625
1626 $sk = $this->mOptions->getSkin();
1627
1628 #split the entire text string on occurences of [[
1629 $a = explode('[[', ' ' . $s);
1630 #get the first element (all text up to first [[), and remove the space we added
1631 $s = array_shift($a);
1632 $s = substr($s, 1);
1633
1634 # Match a link having the form [[namespace:link|alternate]]trail
1635 static $e1 = false;
1636 if (!$e1) {
1637 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
1638 }
1639 # Match cases where there is no "]]", which might still be images
1640 static $e1_img = false;
1641 if (!$e1_img) {
1642 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
1643 }
1644 # Match the end of a line for a word that's not followed by whitespace,
1645 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1646 $e2 = wfMsgForContent('linkprefix');
1647
1648 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
1649 if (is_null($this->mTitle)) {
1650 throw new MWException(__METHOD__ . ": \$this->mTitle is null\n");
1651 }
1652 $nottalk = !$this->mTitle->isTalkPage();
1653
1654 if ($useLinkPrefixExtension) {
1655 $m = array();
1656 if (preg_match($e2, $s, $m)) {
1657 $first_prefix = $m[2];
1658 } else {
1659 $first_prefix = false;
1660 }
1661 } else {
1662 $prefix = '';
1663 }
1664
1665 if ($wgContLang->hasVariants()) {
1666 $selflink = $wgContLang->convertLinkToAllVariants($this->mTitle->getPrefixedText());
1667 } else {
1668 $selflink = array($this->mTitle->getPrefixedText());
1669 }
1670 $useSubpages = $this->areSubpagesAllowed();
1671 wfProfileOut($fname . '-setup');
1672
1673 # Loop for each link
1674 for ($k = 0; isset($a[$k]); $k++) {
1675 $line = $a[$k];
1676 if ($useLinkPrefixExtension) {
1677 wfProfileIn($fname . '-prefixhandling');
1678 if (preg_match($e2, $s, $m)) {
1679 $prefix = $m[2];
1680 $s = $m[1];
1681 } else {
1682 $prefix = '';
1683 }
1684 # first link
1685 if ($first_prefix) {
1686 $prefix = $first_prefix;
1687 $first_prefix = false;
1688 }
1689 wfProfileOut($fname . '-prefixhandling');
1690 }
1691
1692 $might_be_img = false;
1693
1694 wfProfileIn("$fname-e1");
1695 if (preg_match($e1, $line, $m)) { # page with normal text or alt
1696 $text = $m[2];
1697 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
1698 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
1699 # the real problem is with the $e1 regex
1700 # See bug 1300.
1701 #
1702 # Still some problems for cases where the ] is meant to be outside punctuation,
1703 # and no image is in sight. See bug 2095.
1704 #
1705 if ($text !== '' &&
1706 substr($m[3], 0, 1) === ']' &&
1707 strpos($text, '[') !== false
1708 ) {
1709 $text .= ']'; # so that replaceExternalLinks($text) works later
1710 $m[3] = substr($m[3], 1);
1711 }
1712 # fix up urlencoded title texts
1713 if (strpos($m[1], '%') !== false) {
1714 # Should anchors '#' also be rejected?
1715 $m[1] = str_replace(array('<', '>'), array('&lt;', '&gt;'), urldecode($m[1]));
1716 }
1717 $trail = $m[3];
1718 } elseif (preg_match($e1_img, $line, $m)) { # Invalid, but might be an image with a link in its caption
1719 $might_be_img = true;
1720 $text = $m[2];
1721 if (strpos($m[1], '%') !== false) {
1722 $m[1] = urldecode($m[1]);
1723 }
1724 $trail = "";
1725 } else { # Invalid form; output directly
1726 $s .= $prefix . '[[' . $line ;
1727 wfProfileOut("$fname-e1");
1728 continue;
1729 }
1730 wfProfileOut("$fname-e1");
1731 wfProfileIn("$fname-misc");
1732
1733 # Don't allow internal links to pages containing
1734 # PROTO: where PROTO is a valid URL protocol; these
1735 # should be external links.
1736 if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) {
1737 $s .= $prefix . '[[' . $line ;
1738 continue;
1739 }
1740
1741 # Make subpage if necessary
1742 if ($useSubpages) {
1743 $link = $this->maybeDoSubpageLink($m[1], $text);
1744 } else {
1745 $link = $m[1];
1746 }
1747
1748 $noforce = (substr($m[1], 0, 1) != ':');
1749 if (!$noforce) {
1750 # Strip off leading ':'
1751 $link = substr($link, 1);
1752 }
1753
1754 wfProfileOut("$fname-misc");
1755 wfProfileIn("$fname-title");
1756 $nt = Title::newFromText($this->mStripState->unstripNoWiki($link));
1757 if (!$nt) {
1758 $s .= $prefix . '[[' . $line;
1759 wfProfileOut("$fname-title");
1760 continue;
1761 }
1762
1763 $ns = $nt->getNamespace();
1764 $iw = $nt->getInterWiki();
1765 wfProfileOut("$fname-title");
1766
1767 if ($might_be_img) { # if this is actually an invalid link
1768 wfProfileIn("$fname-might_be_img");
1769 if ($ns == NS_IMAGE && $noforce) { #but might be an image
1770 $found = false;
1771 while (isset($a[$k + 1])) {
1772 #look at the next 'line' to see if we can close it there
1773 $spliced = array_splice($a, $k + 1, 1);
1774 $next_line = array_shift($spliced);
1775 $m = explode(']]', $next_line, 3);
1776 if (count($m) == 3) {
1777 # the first ]] closes the inner link, the second the image
1778 $found = true;
1779 $text .= "[[{$m[0]}]]{$m[1]}";
1780 $trail = $m[2];
1781 break;
1782 } elseif (count($m) == 2) {
1783 #if there's exactly one ]] that's fine, we'll keep looking
1784 $text .= "[[{$m[0]}]]{$m[1]}";
1785 } else {
1786 #if $next_line is invalid too, we need look no further
1787 $text .= '[[' . $next_line;
1788 break;
1789 }
1790 }
1791 if (!$found) {
1792 # we couldn't find the end of this imageLink, so output it raw
1793 #but don't ignore what might be perfectly normal links in the text we've examined
1794 $text = $this->replaceInternalLinks($text);
1795 $s .= "{$prefix}[[$link|$text";
1796 # note: no $trail, because without an end, there *is* no trail
1797 wfProfileOut("$fname-might_be_img");
1798 continue;
1799 }
1800 } else { #it's not an image, so output it raw
1801 $s .= "{$prefix}[[$link|$text";
1802 # note: no $trail, because without an end, there *is* no trail
1803 wfProfileOut("$fname-might_be_img");
1804 continue;
1805 }
1806 wfProfileOut("$fname-might_be_img");
1807 }
1808
1809 $wasblank = ('' == $text);
1810 if ($wasblank) {
1811 $text = $link;
1812 }
1813
1814 # Link not escaped by : , create the various objects
1815 if ($noforce) {
1816
1817 # Interwikis
1818 wfProfileIn("$fname-interwiki");
1819 if ($iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName($iw)) {
1820 $this->mOutput->addLanguageLink($nt->getFullText());
1821 $s = rtrim($s . $prefix);
1822 $s .= trim($trail, "\n") == '' ? '': $prefix . $trail;
1823 wfProfileOut("$fname-interwiki");
1824 continue;
1825 }
1826 wfProfileOut("$fname-interwiki");
1827
1828 if ($ns == NS_IMAGE) {
1829 wfProfileIn("$fname-image");
1830 if (!wfIsBadImage($nt->getDBkey(), $this->mTitle)) {
1831 # recursively parse links inside the image caption
1832 # actually, this will parse them in any other parameters, too,
1833 # but it might be hard to fix that, and it doesn't matter ATM
1834 $text = $this->replaceExternalLinks($text);
1835 $text = $this->replaceInternalLinks($text);
1836
1837 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
1838 $s .= $prefix . $this->armorLinks($this->makeImage($nt, $text)) . $trail;
1839 $this->mOutput->addImage($nt->getDBkey());
1840
1841 wfProfileOut("$fname-image");
1842 continue;
1843 } else {
1844 # We still need to record the image's presence on the page
1845 $this->mOutput->addImage($nt->getDBkey());
1846 }
1847 wfProfileOut("$fname-image");
1848 }
1849
1850 if ($ns == NS_CATEGORY) {
1851 wfProfileIn("$fname-category");
1852 $s = rtrim($s . "\n"); # bug 87
1853
1854 if ($wasblank) {
1855 $sortkey = $this->getDefaultSort();
1856 } else {
1857 $sortkey = $text;
1858 }
1859 $sortkey = Sanitizer::decodeCharReferences($sortkey);
1860 $sortkey = str_replace("\n", '', $sortkey);
1861 $sortkey = $wgContLang->convertCategoryKey($sortkey);
1862 $this->mOutput->addCategory($nt->getDBkey(), $sortkey);
1863
1868 $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail;
1869
1870 wfProfileOut("$fname-category");
1871 continue;
1872 }
1873 }
1874
1875 # Self-link checking
1876 if ($nt->getFragment() === '') {
1877 if (in_array($nt->getPrefixedText(), $selflink, true)) {
1878 $s .= $prefix . $sk->makeSelfLinkObj($nt, $text, '', $trail);
1879 continue;
1880 }
1881 }
1882
1883 # Special and Media are pseudo-namespaces; no pages actually exist in them
1884 if ($ns == NS_MEDIA) {
1885 $link = $sk->makeMediaLinkObj($nt, $text);
1886 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
1887 $s .= $prefix . $this->armorLinks($link) . $trail;
1888 $this->mOutput->addImage($nt->getDBkey());
1889 continue;
1890 } elseif ($ns == NS_SPECIAL) {
1891 $s .= $this->makeKnownLinkHolder($nt, $text, '', $trail, $prefix);
1892 continue;
1893 } elseif ($ns == NS_IMAGE) {
1894 $img = new Image($nt);
1895 if ($img->exists()) {
1896 // Force a blue link if the file exists; may be a remote
1897 // upload on the shared repository, and we want to see its
1898 // auto-generated page.
1899 $s .= $this->makeKnownLinkHolder($nt, $text, '', $trail, $prefix);
1900 $this->mOutput->addLink($nt);
1901 continue;
1902 }
1903 }
1904 $s .= $this->makeLinkHolder($nt, $text, '', $trail, $prefix);
1905 }
1906 wfProfileOut($fname);
1907 return $s;
1908 }
1909
1917 public function makeLinkHolder(&$nt, $text = '', $query = '', $trail = '', $prefix = '')
1918 {
1919 wfProfileIn(__METHOD__);
1920 if (!is_object($nt)) {
1921 # Fail gracefully
1922 $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
1923 } else {
1924 # Separate the link trail from the rest of the link
1925 list($inside, $trail) = Linker::splitTrail($trail);
1926
1927 if ($nt->isExternal()) {
1928 $nr = array_push($this->mInterwikiLinkHolders['texts'], $prefix . $text . $inside);
1929 $this->mInterwikiLinkHolders['titles'][] = $nt;
1930 $retVal = '<!--IWLINK ' . ($nr - 1) . "-->{$trail}";
1931 } else {
1932 $nr = array_push($this->mLinkHolders['namespaces'], $nt->getNamespace());
1933 $this->mLinkHolders['dbkeys'][] = $nt->getDBkey();
1934 $this->mLinkHolders['queries'][] = $query;
1935 $this->mLinkHolders['texts'][] = $prefix . $text . $inside;
1936 $this->mLinkHolders['titles'][] = $nt;
1937
1938 $retVal = '<!--LINK ' . ($nr - 1) . "-->{$trail}";
1939 }
1940 }
1941 wfProfileOut(__METHOD__);
1942 return $retVal;
1943 }
1944
1959 public function makeKnownLinkHolder($nt, $text = '', $query = '', $trail = '', $prefix = '')
1960 {
1961 list($inside, $trail) = Linker::splitTrail($trail);
1962 $sk = $this->mOptions->getSkin();
1963 $link = $sk->makeKnownLinkObj($nt, $text, $query, $inside, $prefix);
1964 return $this->armorLinks($link) . $trail;
1965 }
1966
1979 public function armorLinks($text)
1980 {
1981 return preg_replace(
1982 '/\b(' . wfUrlProtocols() . ')/',
1983 "{$this->mUniqPrefix}NOPARSE$1",
1984 $text
1985 );
1986 }
1987
1992 public function areSubpagesAllowed()
1993 {
1994 # Some namespaces don't allow subpages
1995 global $wgNamespacesWithSubpages;
1996 return !empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]);
1997 }
1998
2006 public function maybeDoSubpageLink($target, &$text)
2007 {
2008 # Valid link forms:
2009 # Foobar -- normal
2010 # :Foobar -- override special treatment of prefix (images, language links)
2011 # /Foobar -- convert to CurrentPage/Foobar
2012 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
2013 # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage
2014 # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage
2015
2016 $fname = 'Parser::maybeDoSubpageLink';
2017 wfProfileIn($fname);
2018 $ret = $target; # default return value is no change
2019
2020 # bug 7425
2021 $target = trim($target);
2022
2023 # Some namespaces don't allow subpages,
2024 # so only perform processing if subpages are allowed
2025 if ($this->areSubpagesAllowed()) {
2026 # Look at the first character
2027 if ($target != '' && $target[0] == '/') {
2028 # / at end means we don't want the slash to be shown
2029 $trailingSlashes = preg_match_all('%(/+)$%', $target, $m);
2030 if ($trailingSlashes) {
2031 $noslash = $target = substr($target, 1, -strlen($m[0][0]));
2032 } else {
2033 $noslash = substr($target, 1);
2034 }
2035
2036 $ret = $this->mTitle->getPrefixedText() . '/' . trim($noslash);
2037 if ('' === $text) {
2038 $text = $target;
2039 } # this might be changed for ugliness reasons
2040 } else {
2041 # check for .. subpage backlinks
2042 $dotdotcount = 0;
2043 $nodotdot = $target;
2044 while (strncmp($nodotdot, "../", 3) == 0) {
2045 ++$dotdotcount;
2046 $nodotdot = substr($nodotdot, 3);
2047 }
2048 if ($dotdotcount > 0) {
2049 $exploded = explode('/', $this->mTitle->GetPrefixedText());
2050 if (count($exploded) > $dotdotcount) { # not allowed to go below top level page
2051 $ret = implode('/', array_slice($exploded, 0, -$dotdotcount));
2052 # / at the end means don't show full path
2053 if (substr($nodotdot, -1, 1) == '/') {
2054 $nodotdot = substr($nodotdot, 0, -1);
2055 if ('' === $text) {
2056 $text = $nodotdot;
2057 }
2058 }
2059 $nodotdot = trim($nodotdot);
2060 if ($nodotdot != '') {
2061 $ret .= '/' . $nodotdot;
2062 }
2063 }
2064 }
2065 }
2066 }
2067
2068 wfProfileOut($fname);
2069 return $ret;
2070 }
2071
2076 /* private */ public function closeParagraph()
2077 {
2078 $result = '';
2079 if ('' != $this->mLastSection) {
2080 $result = '</' . $this->mLastSection . ">\n";
2081 }
2082 $this->mInPre = false;
2083 $this->mLastSection = '';
2084 return $result;
2085 }
2086 # getCommon() returns the length of the longest common substring
2087 # of both arguments, starting at the beginning of both.
2088 #
2089 /* private */ public function getCommon($st1, $st2)
2090 {
2091 $fl = strlen($st1);
2092 $shorter = strlen($st2);
2093 if ($fl < $shorter) {
2094 $shorter = $fl;
2095 }
2096
2097 for ($i = 0; $i < $shorter; ++$i) {
2098 if ($st1[$i] != $st2[$i]) {
2099 break;
2100 }
2101 }
2102 return $i;
2103 }
2104 # These next three functions open, continue, and close the list
2105 # element appropriate to the prefix character passed into them.
2106 #
2107 /* private */ public function openList($char)
2108 {
2109 $result = $this->closeParagraph();
2110
2111 if ('*' == $char) {
2112 $result .= '<ul><li>';
2113 } elseif ('#' == $char) {
2114 $result .= '<ol><li>';
2115 } elseif (':' == $char) {
2116 $result .= '<dl><dd>';
2117 } elseif (';' == $char) {
2118 $result .= '<dl><dt>';
2119 $this->mDTopen = true;
2120 } else {
2121 $result = '<!-- ERR 1 -->';
2122 }
2123
2124 return $result;
2126
2127 /* private */ public function nextItem($char)
2128 {
2129 if ('*' == $char || '#' == $char) {
2130 return '</li><li>';
2131 } elseif (':' == $char || ';' == $char) {
2132 $close = '</dd>';
2133 if ($this->mDTopen) {
2134 $close = '</dt>';
2135 }
2136 if (';' == $char) {
2137 $this->mDTopen = true;
2138 return $close . '<dt>';
2139 } else {
2140 $this->mDTopen = false;
2141 return $close . '<dd>';
2142 }
2143 }
2144 return '<!-- ERR 2 -->';
2146
2147 /* private */ public function closeList($char)
2148 {
2149 if ('*' == $char) {
2150 $text = '</li></ul>';
2151 } elseif ('#' == $char) {
2152 $text = '</li></ol>';
2153 } elseif (':' == $char) {
2154 if ($this->mDTopen) {
2155 $this->mDTopen = false;
2156 $text = '</dt></dl>';
2157 } else {
2158 $text = '</dd></dl>';
2159 }
2160 } else {
2161 return '<!-- ERR 3 -->';
2162 }
2163 return $text . "\n";
2164 }
2173 public function doBlockLevels($text, $linestart)
2174 {
2175 $fname = 'Parser::doBlockLevels';
2176 wfProfileIn($fname);
2177
2178 # Parsing through the text line by line. The main thing
2179 # happening here is handling of block-level elements p, pre,
2180 # and making lists from lines starting with * # : etc.
2181 #
2182 $textLines = explode("\n", $text);
2183
2184 $lastPrefix = $output = '';
2185 $this->mDTopen = $inBlockElem = false;
2186 $prefixLength = 0;
2187 $paragraphStack = false;
2188
2189 if (!$linestart) {
2190 $output .= array_shift($textLines);
2191 }
2192 foreach ($textLines as $oLine) {
2193 $lastPrefixLength = strlen($lastPrefix);
2194 $preCloseMatch = preg_match('/<\\/pre/i', $oLine);
2195 $preOpenMatch = preg_match('/<pre/i', $oLine);
2196 if (!$this->mInPre) {
2197 # Multiple prefixes may abut each other for nested lists.
2198 $prefixLength = strspn($oLine, '*#:;');
2199 $pref = substr($oLine, 0, $prefixLength);
2200
2201 # eh?
2202 $pref2 = str_replace(';', ':', $pref);
2203 $t = substr($oLine, $prefixLength);
2204 $this->mInPre = !empty($preOpenMatch);
2205 } else {
2206 # Don't interpret any other prefixes in preformatted text
2207 $prefixLength = 0;
2208 $pref = $pref2 = '';
2209 $t = $oLine;
2210 }
2211
2212 # List generation
2213 if ($prefixLength && 0 == strcmp($lastPrefix, $pref2)) {
2214 # Same as the last item, so no need to deal with nesting or opening stuff
2215 $output .= $this->nextItem(substr($pref, -1));
2216 $paragraphStack = false;
2217
2218 if (substr($pref, -1) == ';') {
2219 # The one nasty exception: definition lists work like this:
2220 # ; title : definition text
2221 # So we check for : in the remainder text to split up the
2222 # title and definition, without b0rking links.
2223 $term = $t2 = '';
2224 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
2225 $t = $t2;
2226 $output .= $term . $this->nextItem(':');
2227 }
2228 }
2229 } elseif ($prefixLength || $lastPrefixLength) {
2230 # Either open or close a level...
2231 $commonPrefixLength = $this->getCommon($pref, $lastPrefix);
2232 $paragraphStack = false;
2233
2234 while ($commonPrefixLength < $lastPrefixLength) {
2235 $output .= $this->closeList($lastPrefix[$lastPrefixLength - 1]);
2236 --$lastPrefixLength;
2237 }
2238 if ($prefixLength <= $commonPrefixLength && $commonPrefixLength > 0) {
2239 $output .= $this->nextItem($pref[$commonPrefixLength - 1]);
2240 }
2241 while ($prefixLength > $commonPrefixLength) {
2242 $char = substr($pref, $commonPrefixLength, 1);
2243 $output .= $this->openList($char);
2244
2245 if (';' == $char) {
2246 # FIXME: This is dupe of code above
2247 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
2248 $t = $t2;
2249 $output .= $term . $this->nextItem(':');
2250 }
2251 }
2252 ++$commonPrefixLength;
2253 }
2254 $lastPrefix = $pref2;
2255 }
2256 if (0 == $prefixLength) {
2257 wfProfileIn("$fname-paragraph");
2258 # No prefix (not in list)--go to paragraph mode
2259 // XXX: use a stack for nestable elements like span, table and div
2260 $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t);
2261 $closematch = preg_match(
2262 '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' .
2263 '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|' . $this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS',
2264 $t
2265 );
2266 if ($openmatch or $closematch) {
2267 $paragraphStack = false;
2268 # TODO bug 5718: paragraph closed
2269 $output .= $this->closeParagraph();
2270 if ($preOpenMatch and !$preCloseMatch) {
2271 $this->mInPre = true;
2272 }
2273 if ($closematch) {
2274 $inBlockElem = false;
2275 } else {
2276 $inBlockElem = true;
2277 }
2278 } elseif (!$inBlockElem && !$this->mInPre) {
2279 if (' ' == $t[0] and ($this->mLastSection == 'pre' or trim($t) != '')) {
2280 // pre
2281 if ($this->mLastSection != 'pre') {
2282 $paragraphStack = false;
2283 $output .= $this->closeParagraph() . '<pre>';
2284 $this->mLastSection = 'pre';
2285 }
2286 $t = substr($t, 1);
2287 } else {
2288 // paragraph
2289 if ('' == trim($t)) {
2290 if ($paragraphStack) {
2291 $output .= $paragraphStack . '<br />';
2292 $paragraphStack = false;
2293 $this->mLastSection = 'p';
2294 } else {
2295 if ($this->mLastSection != 'p') {
2296 $output .= $this->closeParagraph();
2297 $this->mLastSection = '';
2298 $paragraphStack = '<p>';
2299 } else {
2300 $paragraphStack = '</p><p>';
2301 }
2302 }
2303 } else {
2304 if ($paragraphStack) {
2305 $output .= $paragraphStack;
2306 $paragraphStack = false;
2307 $this->mLastSection = 'p';
2308 } elseif ($this->mLastSection != 'p') {
2309 $output .= $this->closeParagraph() . '<p>';
2310 $this->mLastSection = 'p';
2311 }
2312 }
2313 }
2314 }
2315 wfProfileOut("$fname-paragraph");
2316 }
2317 // somewhere above we forget to get out of pre block (bug 785)
2318 if ($preCloseMatch && $this->mInPre) {
2319 $this->mInPre = false;
2320 }
2321 if ($paragraphStack === false) {
2322 $output .= $t . "\n";
2323 }
2324 }
2325 while ($prefixLength) {
2326 $output .= $this->closeList($pref2[$prefixLength - 1]);
2327 --$prefixLength;
2328 }
2329 if ('' != $this->mLastSection) {
2330 $output .= '</' . $this->mLastSection . '>';
2331 $this->mLastSection = '';
2332 }
2333
2334 wfProfileOut($fname);
2335 return $output;
2336 }
2337
2346 public function findColonNoLinks($str, &$before, &$after)
2347 {
2348 $fname = 'Parser::findColonNoLinks';
2349 wfProfileIn($fname);
2350
2351 $pos = strpos($str, ':');
2352 if ($pos === false) {
2353 // Nothing to find!
2354 wfProfileOut($fname);
2355 return false;
2356 }
2357
2358 $lt = strpos($str, '<');
2359 if ($lt === false || $lt > $pos) {
2360 // Easy; no tag nesting to worry about
2361 $before = substr($str, 0, $pos);
2362 $after = substr($str, $pos + 1);
2363 wfProfileOut($fname);
2364 return $pos;
2365 }
2366
2367 // Ugly state machine to walk through avoiding tags.
2369 $stack = 0;
2370 $len = strlen($str);
2371 for ($i = 0; $i < $len; $i++) {
2372 $c = $str[$i];
2373
2374 switch ($state) {
2375 // (Using the number is a performance hack for common cases)
2376 case 0: // MW_COLON_STATE_TEXT:
2377 switch ($c) {
2378 case "<":
2379 // Could be either a <start> tag or an </end> tag
2381 break;
2382 case ":":
2383 if ($stack == 0) {
2384 // We found it!
2385 $before = substr($str, 0, $i);
2386 $after = substr($str, $i + 1);
2387 wfProfileOut($fname);
2388 return $i;
2389 }
2390 // Embedded in a tag; don't break it.
2391 break;
2392 default:
2393 // Skip ahead looking for something interesting
2394 $colon = strpos($str, ':', $i);
2395 if ($colon === false) {
2396 // Nothing else interesting
2397 wfProfileOut($fname);
2398 return false;
2399 }
2400 $lt = strpos($str, '<', $i);
2401 if ($stack === 0) {
2402 if ($lt === false || $colon < $lt) {
2403 // We found it!
2404 $before = substr($str, 0, $colon);
2405 $after = substr($str, $colon + 1);
2406 wfProfileOut($fname);
2407 return $i;
2408 }
2409 }
2410 if ($lt === false) {
2411 // Nothing else interesting to find; abort!
2412 // We're nested, but there's no close tags left. Abort!
2413 break 2;
2414 }
2415 // Skip ahead to next tag start
2416 $i = $lt;
2418 }
2419 break;
2420 case 1: // MW_COLON_STATE_TAG:
2421 // In a <tag>
2422 switch ($c) {
2423 case ">":
2424 $stack++;
2426 break;
2427 case "/":
2428 // Slash may be followed by >?
2430 break;
2431 default:
2432 // ignore
2433 }
2434 break;
2435 case 2: // MW_COLON_STATE_TAGSTART:
2436 switch ($c) {
2437 case "/":
2439 break;
2440 case "!":
2442 break;
2443 case ">":
2444 // Illegal early close? This shouldn't happen D:
2446 break;
2447 default:
2449 }
2450 break;
2451 case 3: // MW_COLON_STATE_CLOSETAG:
2452 // In a </tag>
2453 if ($c == ">") {
2454 $stack--;
2455 if ($stack < 0) {
2456 wfDebug("Invalid input in $fname; too many close tags\n");
2457 wfProfileOut($fname);
2458 return false;
2459 }
2461 }
2462 break;
2464 if ($c == ">") {
2465 // Yes, a self-closed tag <blah/>
2467 } else {
2468 // Probably we're jumping the gun, and this is an attribute
2470 }
2471 break;
2472 case 5: // MW_COLON_STATE_COMMENT:
2473 if ($c == "-") {
2475 }
2476 break;
2478 if ($c == "-") {
2480 } else {
2482 }
2483 break;
2485 if ($c == ">") {
2487 } else {
2489 }
2490 break;
2491 default:
2492 throw new MWException("State machine error in $fname");
2493 }
2494 }
2495 if ($stack > 0) {
2496 wfDebug("Invalid input in $fname; not enough close tags (stack $stack, state $state)\n");
2497 return false;
2498 }
2499 wfProfileOut($fname);
2500 return false;
2501 }
2502
2508 public function getVariableValue($index)
2509 {
2510 global $wgContLang, $wgSitename, $wgServer, $wgServerName, $wgScriptPath;
2511
2516 static $varCache = array();
2517 if (wfRunHooks('ParserGetVariableValueVarCache', array( &$this, &$varCache ))) {
2518 if (isset($varCache[$index])) {
2519 return $varCache[$index];
2520 }
2521 }
2522
2523 $ts = time();
2524 wfRunHooks('ParserGetVariableValueTs', array( &$this, &$ts ));
2525
2526 # Use the time zone
2527 global $wgLocaltimezone;
2528 if (isset($wgLocaltimezone)) {
2529 $oldtz = getenv('TZ');
2530 putenv('TZ=' . $wgLocaltimezone);
2531 }
2532 $localTimestamp = date('YmdHis', $ts);
2533 $localMonth = date('m', $ts);
2534 $localMonthName = date('n', $ts);
2535 $localDay = date('j', $ts);
2536 $localDay2 = date('d', $ts);
2537 $localDayOfWeek = date('w', $ts);
2538 $localWeek = date('W', $ts);
2539 $localYear = date('Y', $ts);
2540 $localHour = date('H', $ts);
2541 if (isset($wgLocaltimezone)) {
2542 putenv('TZ=' . $oldtz);
2543 }
2544
2545 switch ($index) {
2546 case 'currentmonth':
2547 return $varCache[$index] = $wgContLang->formatNum(date('m', $ts));
2548 case 'currentmonthname':
2549 return $varCache[$index] = $wgContLang->getMonthName(date('n', $ts));
2550 case 'currentmonthnamegen':
2551 return $varCache[$index] = $wgContLang->getMonthNameGen(date('n', $ts));
2552 case 'currentmonthabbrev':
2553 return $varCache[$index] = $wgContLang->getMonthAbbreviation(date('n', $ts));
2554 case 'currentday':
2555 return $varCache[$index] = $wgContLang->formatNum(date('j', $ts));
2556 case 'currentday2':
2557 return $varCache[$index] = $wgContLang->formatNum(date('d', $ts));
2558 case 'localmonth':
2559 return $varCache[$index] = $wgContLang->formatNum($localMonth);
2560 case 'localmonthname':
2561 return $varCache[$index] = $wgContLang->getMonthName($localMonthName);
2562 case 'localmonthnamegen':
2563 return $varCache[$index] = $wgContLang->getMonthNameGen($localMonthName);
2564 case 'localmonthabbrev':
2565 return $varCache[$index] = $wgContLang->getMonthAbbreviation($localMonthName);
2566 case 'localday':
2567 return $varCache[$index] = $wgContLang->formatNum($localDay);
2568 case 'localday2':
2569 return $varCache[$index] = $wgContLang->formatNum($localDay2);
2570 case 'pagename':
2571 return $this->mTitle->getText();
2572 case 'pagenamee':
2573 return $this->mTitle->getPartialURL();
2574 case 'fullpagename':
2575 return $this->mTitle->getPrefixedText();
2576 case 'fullpagenamee':
2577 return $this->mTitle->getPrefixedURL();
2578 case 'subpagename':
2579 return $this->mTitle->getSubpageText();
2580 case 'subpagenamee':
2581 return $this->mTitle->getSubpageUrlForm();
2582 case 'basepagename':
2583 return $this->mTitle->getBaseText();
2584 case 'basepagenamee':
2585 return wfUrlEncode(str_replace(' ', '_', $this->mTitle->getBaseText()));
2586 case 'talkpagename':
2587 if ($this->mTitle->canTalk()) {
2588 $talkPage = $this->mTitle->getTalkPage();
2589 return $talkPage->getPrefixedText();
2590 } else {
2591 return '';
2592 }
2593 // no break
2594 case 'talkpagenamee':
2595 if ($this->mTitle->canTalk()) {
2596 $talkPage = $this->mTitle->getTalkPage();
2597 return $talkPage->getPrefixedUrl();
2598 } else {
2599 return '';
2600 }
2601 // no break
2602 case 'subjectpagename':
2603 $subjPage = $this->mTitle->getSubjectPage();
2604 return $subjPage->getPrefixedText();
2605 case 'subjectpagenamee':
2606 $subjPage = $this->mTitle->getSubjectPage();
2607 return $subjPage->getPrefixedUrl();
2608 case 'revisionid':
2609 return $this->mRevisionId;
2610 case 'revisionday':
2611 return intval(substr($this->getRevisionTimestamp(), 6, 2));
2612 case 'revisionday2':
2613 return substr($this->getRevisionTimestamp(), 6, 2);
2614 case 'revisionmonth':
2615 return intval(substr($this->getRevisionTimestamp(), 4, 2));
2616 case 'revisionyear':
2617 return substr($this->getRevisionTimestamp(), 0, 4);
2618 case 'revisiontimestamp':
2619 return $this->getRevisionTimestamp();
2620 case 'namespace':
2621 return str_replace('_', ' ', $wgContLang->getNsText($this->mTitle->getNamespace()));
2622 case 'namespacee':
2623 return wfUrlencode($wgContLang->getNsText($this->mTitle->getNamespace()));
2624 case 'talkspace':
2625 return $this->mTitle->canTalk() ? str_replace('_', ' ', $this->mTitle->getTalkNsText()) : '';
2626 case 'talkspacee':
2627 return $this->mTitle->canTalk() ? wfUrlencode($this->mTitle->getTalkNsText()) : '';
2628 case 'subjectspace':
2629 return $this->mTitle->getSubjectNsText();
2630 case 'subjectspacee':
2631 return(wfUrlencode($this->mTitle->getSubjectNsText()));
2632 case 'currentdayname':
2633 return $varCache[$index] = $wgContLang->getWeekdayName(date('w', $ts) + 1);
2634 case 'currentyear':
2635 return $varCache[$index] = $wgContLang->formatNum(date('Y', $ts), true);
2636 case 'currenttime':
2637 return $varCache[$index] = $wgContLang->time(wfTimestamp(TS_MW, $ts), false, false);
2638 case 'currenthour':
2639 return $varCache[$index] = $wgContLang->formatNum(date('H', $ts), true);
2640 case 'currentweek':
2641 // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2642 // int to remove the padding
2643 return $varCache[$index] = $wgContLang->formatNum((int) date('W', $ts));
2644 case 'currentdow':
2645 return $varCache[$index] = $wgContLang->formatNum(date('w', $ts));
2646 case 'localdayname':
2647 return $varCache[$index] = $wgContLang->getWeekdayName($localDayOfWeek + 1);
2648 case 'localyear':
2649 return $varCache[$index] = $wgContLang->formatNum($localYear, true);
2650 case 'localtime':
2651 return $varCache[$index] = $wgContLang->time($localTimestamp, false, false);
2652 case 'localhour':
2653 return $varCache[$index] = $wgContLang->formatNum($localHour, true);
2654 case 'localweek':
2655 // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2656 // int to remove the padding
2657 return $varCache[$index] = $wgContLang->formatNum((int) $localWeek);
2658 case 'localdow':
2659 return $varCache[$index] = $wgContLang->formatNum($localDayOfWeek);
2660 case 'numberofarticles':
2661 return $varCache[$index] = $wgContLang->formatNum(SiteStats::articles());
2662 case 'numberoffiles':
2663 return $varCache[$index] = $wgContLang->formatNum(SiteStats::images());
2664 case 'numberofusers':
2665 return $varCache[$index] = $wgContLang->formatNum(SiteStats::users());
2666 case 'numberofpages':
2667 return $varCache[$index] = $wgContLang->formatNum(SiteStats::pages());
2668 case 'numberofadmins':
2669 return $varCache[$index] = $wgContLang->formatNum(SiteStats::admins());
2670 case 'numberofedits':
2671 return $varCache[$index] = $wgContLang->formatNum(SiteStats::edits());
2672 case 'currenttimestamp':
2673 return $varCache[$index] = wfTimestampNow();
2674 case 'localtimestamp':
2675 return $varCache[$index] = $localTimestamp;
2676 case 'currentversion':
2677 return $varCache[$index] = SpecialVersion::getVersion();
2678 case 'sitename':
2679 return $wgSitename;
2680 case 'server':
2681 return $wgServer;
2682 case 'servername':
2683 return $wgServerName;
2684 case 'scriptpath':
2685 return $wgScriptPath;
2686 case 'directionmark':
2687 return $wgContLang->getDirMark();
2688 case 'contentlanguage':
2689 global $wgContLanguageCode;
2690 return $wgContLanguageCode;
2691 default:
2692 $ret = null;
2693 if (wfRunHooks('ParserGetVariableValueSwitch', array( &$this, &$varCache, &$index, &$ret ))) {
2694 return $ret;
2695 } else {
2696 return null;
2697 }
2698 }
2699 }
2700
2706 public function initialiseVariables()
2707 {
2708 $fname = 'Parser::initialiseVariables';
2709 wfProfileIn($fname);
2710 $variableIDs = MagicWord::getVariableIDs();
2711
2712 $this->mVariables = array();
2713 foreach ($variableIDs as $id) {
2714 $mw = &MagicWord::get($id);
2715 $mw->addToArray($this->mVariables, $id);
2716 }
2717 wfProfileOut($fname);
2718 }
2719
2736 public function replace_callback($text, $callbacks)
2737 {
2738 wfProfileIn(__METHOD__);
2739 $openingBraceStack = array(); # this array will hold a stack of parentheses which are not closed yet
2740 $lastOpeningBrace = -1; # last not closed parentheses
2741
2742 $validOpeningBraces = implode('', array_keys($callbacks));
2743
2744 $i = 0;
2745 while ($i < strlen($text)) {
2746 # Find next opening brace, closing brace or pipe
2747 if ($lastOpeningBrace == -1) {
2748 $currentClosing = '';
2749 $search = $validOpeningBraces;
2750 } else {
2751 $currentClosing = $openingBraceStack[$lastOpeningBrace]['braceEnd'];
2752 $search = $validOpeningBraces . '|' . $currentClosing;
2753 }
2754 $rule = null;
2755 $i += strcspn($text, $search, $i);
2756 if ($i < strlen($text)) {
2757 if ($text[$i] == '|') {
2758 $found = 'pipe';
2759 } elseif ($text[$i] == $currentClosing) {
2760 $found = 'close';
2761 } elseif (isset($callbacks[$text[$i]])) {
2762 $found = 'open';
2763 $rule = $callbacks[$text[$i]];
2764 } else {
2765 # Some versions of PHP have a strcspn which stops on null characters
2766 # Ignore and continue
2767 ++$i;
2768 continue;
2769 }
2770 } else {
2771 # All done
2772 break;
2773 }
2774
2775 if ($found == 'open') {
2776 # found opening brace, let's add it to parentheses stack
2777 $piece = array('brace' => $text[$i],
2778 'braceEnd' => $rule['end'],
2779 'title' => '',
2780 'parts' => null);
2781
2782 # count opening brace characters
2783 $piece['count'] = strspn($text, $piece['brace'], $i);
2784 $piece['startAt'] = $piece['partStart'] = $i + $piece['count'];
2785 $i += $piece['count'];
2786
2787 # we need to add to stack only if opening brace count is enough for one of the rules
2788 if ($piece['count'] >= $rule['min']) {
2789 $lastOpeningBrace++;
2790 $openingBraceStack[$lastOpeningBrace] = $piece;
2791 }
2792 } elseif ($found == 'close') {
2793 # lets check if it is enough characters for closing brace
2794 $maxCount = $openingBraceStack[$lastOpeningBrace]['count'];
2795 $count = strspn($text, $text[$i], $i, $maxCount);
2796
2797 # check for maximum matching characters (if there are 5 closing
2798 # characters, we will probably need only 3 - depending on the rules)
2799 $matchingCount = 0;
2800 $matchingCallback = null;
2801 $cbType = $callbacks[$openingBraceStack[$lastOpeningBrace]['brace']];
2802 if ($count > $cbType['max']) {
2803 # The specified maximum exists in the callback array, unless the caller
2804 # has made an error
2805 $matchingCount = $cbType['max'];
2806 } else {
2807 # Count is less than the maximum
2808 # Skip any gaps in the callback array to find the true largest match
2809 # Need to use array_key_exists not isset because the callback can be null
2810 $matchingCount = $count;
2811 while ($matchingCount > 0 && !array_key_exists($matchingCount, $cbType['cb'])) {
2812 --$matchingCount;
2813 }
2814 }
2815
2816 if ($matchingCount <= 0) {
2817 $i += $count;
2818 continue;
2819 }
2820 $matchingCallback = $cbType['cb'][$matchingCount];
2821
2822 # let's set a title or last part (if '|' was found)
2823 if (null === $openingBraceStack[$lastOpeningBrace]['parts']) {
2824 $openingBraceStack[$lastOpeningBrace]['title'] =
2825 substr(
2826 $text,
2827 $openingBraceStack[$lastOpeningBrace]['partStart'],
2828 $i - $openingBraceStack[$lastOpeningBrace]['partStart']
2829 );
2830 } else {
2831 $openingBraceStack[$lastOpeningBrace]['parts'][] =
2832 substr(
2833 $text,
2834 $openingBraceStack[$lastOpeningBrace]['partStart'],
2835 $i - $openingBraceStack[$lastOpeningBrace]['partStart']
2836 );
2837 }
2838
2839 $pieceStart = $openingBraceStack[$lastOpeningBrace]['startAt'] - $matchingCount;
2840 $pieceEnd = $i + $matchingCount;
2841
2842 if (is_callable($matchingCallback)) {
2843 $cbArgs = array(
2844 'text' => substr($text, $pieceStart, $pieceEnd - $pieceStart),
2845 'title' => trim($openingBraceStack[$lastOpeningBrace]['title']),
2846 'parts' => $openingBraceStack[$lastOpeningBrace]['parts'],
2847 'lineStart' => (($pieceStart > 0) && ($text[$pieceStart - 1] == "\n")),
2848 );
2849 # finally we can call a user callback and replace piece of text
2850 $replaceWith = call_user_func($matchingCallback, $cbArgs);
2851 $text = substr($text, 0, $pieceStart) . $replaceWith . substr($text, $pieceEnd);
2852 $i = $pieceStart + strlen($replaceWith);
2853 } else {
2854 # null value for callback means that parentheses should be parsed, but not replaced
2855 $i += $matchingCount;
2856 }
2857
2858 # reset last opening parentheses, but keep it in case there are unused characters
2859 $piece = array('brace' => $openingBraceStack[$lastOpeningBrace]['brace'],
2860 'braceEnd' => $openingBraceStack[$lastOpeningBrace]['braceEnd'],
2861 'count' => $openingBraceStack[$lastOpeningBrace]['count'],
2862 'title' => '',
2863 'parts' => null,
2864 'startAt' => $openingBraceStack[$lastOpeningBrace]['startAt']);
2865 $openingBraceStack[$lastOpeningBrace--] = null;
2866
2867 if ($matchingCount < $piece['count']) {
2868 $piece['count'] -= $matchingCount;
2869 $piece['startAt'] -= $matchingCount;
2870 $piece['partStart'] = $piece['startAt'];
2871 # do we still qualify for any callback with remaining count?
2872 $currentCbList = $callbacks[$piece['brace']]['cb'];
2873 while ($piece['count']) {
2874 if (array_key_exists($piece['count'], $currentCbList)) {
2875 $lastOpeningBrace++;
2876 $openingBraceStack[$lastOpeningBrace] = $piece;
2877 break;
2878 }
2879 --$piece['count'];
2880 }
2881 }
2882 } elseif ($found == 'pipe') {
2883 # lets set a title if it is a first separator, or next part otherwise
2884 if (null === $openingBraceStack[$lastOpeningBrace]['parts']) {
2885 $openingBraceStack[$lastOpeningBrace]['title'] =
2886 substr(
2887 $text,
2888 $openingBraceStack[$lastOpeningBrace]['partStart'],
2889 $i - $openingBraceStack[$lastOpeningBrace]['partStart']
2890 );
2891 $openingBraceStack[$lastOpeningBrace]['parts'] = array();
2892 } else {
2893 $openingBraceStack[$lastOpeningBrace]['parts'][] =
2894 substr(
2895 $text,
2896 $openingBraceStack[$lastOpeningBrace]['partStart'],
2897 $i - $openingBraceStack[$lastOpeningBrace]['partStart']
2898 );
2899 }
2900 $openingBraceStack[$lastOpeningBrace]['partStart'] = ++$i;
2901 }
2902 }
2903
2904 wfProfileOut(__METHOD__);
2905 return $text;
2906 }
2907
2923 public function replaceVariables($text, $args = array(), $argsOnly = false)
2924 {
2925 # Prevent too big inclusions
2926 if (strlen($text) > $this->mOptions->getMaxIncludeSize()) {
2927 return $text;
2928 }
2929
2930 $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/;
2931 wfProfileIn($fname);
2932
2933 # This function is called recursively. To keep track of arguments we need a stack:
2934 array_push($this->mArgStack, $args);
2935
2936 $braceCallbacks = array();
2937 if (!$argsOnly) {
2938 $braceCallbacks[2] = array( &$this, 'braceSubstitution' );
2939 }
2940 if ($this->mOutputType != OT_MSG) {
2941 $braceCallbacks[3] = array( &$this, 'argSubstitution' );
2942 }
2943 if ($braceCallbacks) {
2944 $callbacks = array(
2945 '{' => array(
2946 'end' => '}',
2947 'cb' => $braceCallbacks,
2948 'min' => $argsOnly ? 3 : 2,
2949 'max' => isset($braceCallbacks[3]) ? 3 : 2,
2950 ),
2951 '[' => array(
2952 'end' => ']',
2953 'cb' => array(2 => null),
2954 'min' => 2,
2955 'max' => 2,
2956 )
2957 );
2958 $text = $this->replace_callback($text, $callbacks);
2959
2960 array_pop($this->mArgStack);
2961 }
2962 wfProfileOut($fname);
2963 return $text;
2964 }
2965
2970 public function variableSubstitution($matches)
2971 {
2972 global $wgContLang;
2973 $fname = 'Parser::variableSubstitution';
2974 $varname = $wgContLang->lc($matches[1]);
2975 wfProfileIn($fname);
2976 $skip = false;
2977 if ($this->mOutputType == OT_WIKI) {
2978 # Do only magic variables prefixed by SUBST
2979 $mwSubst = &MagicWord::get('subst');
2980 if (!$mwSubst->matchStartAndRemove($varname)) {
2981 $skip = true;
2982 }
2983 # Note that if we don't substitute the variable below,
2984 # we don't remove the {{subst:}} magic word, in case
2985 # it is a template rather than a magic variable.
2986 }
2987 if (!$skip && array_key_exists($varname, $this->mVariables)) {
2988 $id = $this->mVariables[$varname];
2989 # Now check if we did really match, case sensitive or not
2990 $mw = &MagicWord::get($id);
2991 if ($mw->match($matches[1])) {
2992 $text = $this->getVariableValue($id);
2993 $this->mOutput->mContainsOldMagic = true;
2994 } else {
2995 $text = $matches[0];
2996 }
2997 } else {
2998 $text = $matches[0];
2999 }
3000 wfProfileOut($fname);
3001 return $text;
3002 }
3003
3006 public static function createAssocArgs($args)
3007 {
3008 $assocArgs = array();
3009 $index = 1;
3010 foreach ($args as $arg) {
3011 $eqpos = strpos($arg, '=');
3012 if ($eqpos === false) {
3013 $assocArgs[$index++] = $arg;
3014 } else {
3015 $name = trim(substr($arg, 0, $eqpos));
3016 $value = trim(substr($arg, $eqpos + 1));
3017 if ($value === false) {
3018 $value = '';
3019 }
3020 if ($name !== false) {
3021 $assocArgs[$name] = $value;
3022 }
3023 }
3024 }
3025
3026 return $assocArgs;
3027 }
3028
3040 public function braceSubstitution($piece)
3041 {
3042 global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces;
3043 $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/;
3044 wfProfileIn($fname);
3045 wfProfileIn(__METHOD__ . '-setup');
3046
3047 # Flags
3048 $found = false; # $text has been filled
3049 $nowiki = false; # wiki markup in $text should be escaped
3050 $noparse = false; # Unsafe HTML tags should not be stripped, etc.
3051 $noargs = false; # Don't replace triple-brace arguments in $text
3052 $replaceHeadings = false; # Make the edit section links go to the template not the article
3053 $headingOffset = 0; # Skip headings when number, to account for those that weren't transcluded.
3054 $isHTML = false; # $text is HTML, armour it against wikitext transformation
3055 $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered
3056
3057 # Title object, where $text came from
3058 $title = null;
3059
3060 $linestart = '';
3061
3062
3063 # $part1 is the bit before the first |, and must contain only title characters
3064 # $args is a list of arguments, starting from index 0, not including $part1
3065
3066 $titleText = $part1 = $piece['title'];
3067 # If the third subpattern matched anything, it will start with |
3068
3069 if (null == $piece['parts']) {
3070 $replaceWith = $this->variableSubstitution(array($piece['text'], $piece['title']));
3071 if ($replaceWith != $piece['text']) {
3072 $text = $replaceWith;
3073 $found = true;
3074 $noparse = true;
3075 $noargs = true;
3076 }
3077 }
3078
3079 $args = (null == $piece['parts']) ? array() : $piece['parts'];
3080 wfProfileOut(__METHOD__ . '-setup');
3081
3082 # SUBST
3083 wfProfileIn(__METHOD__ . '-modifiers');
3084 if (!$found) {
3085 $mwSubst = &MagicWord::get('subst');
3086 if ($mwSubst->matchStartAndRemove($part1) xor $this->ot['wiki']) {
3087 # One of two possibilities is true:
3088 # 1) Found SUBST but not in the PST phase
3089 # 2) Didn't find SUBST and in the PST phase
3090 # In either case, return without further processing
3091 $text = $piece['text'];
3092 $found = true;
3093 $noparse = true;
3094 $noargs = true;
3095 }
3096 }
3097
3098 # MSG, MSGNW and RAW
3099 if (!$found) {
3100 # Check for MSGNW:
3101 $mwMsgnw = &MagicWord::get('msgnw');
3102 if ($mwMsgnw->matchStartAndRemove($part1)) {
3103 $nowiki = true;
3104 } else {
3105 # Remove obsolete MSG:
3106 $mwMsg = &MagicWord::get('msg');
3107 $mwMsg->matchStartAndRemove($part1);
3108 }
3109
3110 # Check for RAW:
3111 $mwRaw = &MagicWord::get('raw');
3112 if ($mwRaw->matchStartAndRemove($part1)) {
3113 $forceRawInterwiki = true;
3114 }
3115 }
3116 wfProfileOut(__METHOD__ . '-modifiers');
3117
3118 //save path level before recursing into functions & templates.
3119 $lastPathLevel = $this->mTemplatePath;
3120
3121 # Parser functions
3122 if (!$found) {
3123 wfProfileIn(__METHOD__ . '-pfunc');
3124
3125 $colonPos = strpos($part1, ':');
3126 if ($colonPos !== false) {
3127 # Case sensitive functions
3128 $function = substr($part1, 0, $colonPos);
3129 if (isset($this->mFunctionSynonyms[1][$function])) {
3130 $function = $this->mFunctionSynonyms[1][$function];
3131 } else {
3132 # Case insensitive functions
3133 $function = strtolower($function);
3134 if (isset($this->mFunctionSynonyms[0][$function])) {
3135 $function = $this->mFunctionSynonyms[0][$function];
3136 } else {
3137 $function = false;
3138 }
3139 }
3140 if ($function) {
3141 $funcArgs = array_map('trim', $args);
3142 $funcArgs = array_merge(array( &$this, trim(substr($part1, $colonPos + 1)) ), $funcArgs);
3143 $result = call_user_func_array($this->mFunctionHooks[$function], $funcArgs);
3144 $found = true;
3145
3146 // The text is usually already parsed, doesn't need triple-brace tags expanded, etc.
3147 //$noargs = true;
3148 //$noparse = true;
3149
3150 if (is_array($result)) {
3151 if (isset($result[0])) {
3152 $text = $linestart . $result[0];
3153 unset($result[0]);
3154 }
3155
3156 // Extract flags into the local scope
3157 // This allows callers to set flags such as nowiki, noparse, found, etc.
3158 extract($result);
3159 } else {
3160 $text = $linestart . $result;
3161 }
3162 }
3163 }
3164 wfProfileOut(__METHOD__ . '-pfunc');
3165 }
3166
3167 # Template table test
3168
3169 # Did we encounter this template already? If yes, it is in the cache
3170 # and we need to check for loops.
3171 if (!$found && isset($this->mTemplates[$piece['title']])) {
3172 $found = true;
3173
3174 # Infinite loop test
3175 if (isset($this->mTemplatePath[$part1])) {
3176 $noparse = true;
3177 $noargs = true;
3178 $found = true;
3179 $text = $linestart .
3180 "[[$part1]]<!-- WARNING: template loop detected -->";
3181 wfDebug(__METHOD__ . ": template loop broken at '$part1'\n");
3182 } else {
3183 # set $text to cached message.
3184 $text = $linestart . $this->mTemplates[$piece['title']];
3185 #treat title for cached page the same as others
3186 $ns = NS_TEMPLATE;
3187 $subpage = '';
3188 $part1 = $this->maybeDoSubpageLink($part1, $subpage);
3189 if ($subpage !== '') {
3190 $ns = $this->mTitle->getNamespace();
3191 }
3192 $title = Title::newFromText($part1, $ns);
3193 //used by include size checking
3194 $titleText = $title->getPrefixedText();
3195 //used by edit section links
3196 $replaceHeadings = true;
3197 }
3198 }
3199
3200 # Load from database
3201 if (!$found) {
3202 wfProfileIn(__METHOD__ . '-loadtpl');
3203 $ns = NS_TEMPLATE;
3204 # declaring $subpage directly in the function call
3205 # does not work correctly with references and breaks
3206 # {{/subpage}}-style inclusions
3207 $subpage = '';
3208 $part1 = $this->maybeDoSubpageLink($part1, $subpage);
3209 if ($subpage !== '') {
3210 $ns = $this->mTitle->getNamespace();
3211 }
3212 $title = Title::newFromText($part1, $ns);
3213
3214
3215 if (!is_null($title)) {
3216 $titleText = $title->getPrefixedText();
3217 # Check for language variants if the template is not found
3218 if ($wgContLang->hasVariants() && $title->getArticleID() == 0) {
3219 $wgContLang->findVariantLink($part1, $title);
3220 }
3221
3222 if (!$title->isExternal()) {
3223 if ($title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html']) {
3224 $text = SpecialPage::capturePath($title);
3225 if (is_string($text)) {
3226 $found = true;
3227 $noparse = true;
3228 $noargs = true;
3229 $isHTML = true;
3230 $this->disableCache();
3231 }
3232 } elseif ($wgNonincludableNamespaces && in_array($title->getNamespace(), $wgNonincludableNamespaces)) {
3233 $found = false; //access denied
3234 wfDebug("$fname: template inclusion denied for " . $title->getPrefixedDBkey());
3235 } else {
3236 $articleContent = $this->fetchTemplate($title);
3237 if ($articleContent !== false) {
3238 $found = true;
3239 $text = $articleContent;
3240 $replaceHeadings = true;
3241 }
3242 }
3243
3244 # If the title is valid but undisplayable, make a link to it
3245 if (!$found && ($this->ot['html'] || $this->ot['pre'])) {
3246 $text = "[[:$titleText]]";
3247 $found = true;
3248 }
3249 } elseif ($title->isTrans()) {
3250 // Interwiki transclusion
3251 if ($this->ot['html'] && !$forceRawInterwiki) {
3252 $text = $this->interwikiTransclude($title, 'render');
3253 $isHTML = true;
3254 $noparse = true;
3255 } else {
3256 $text = $this->interwikiTransclude($title, 'raw');
3257 $replaceHeadings = true;
3258 }
3259 $found = true;
3260 }
3261
3262 # Template cache array insertion
3263 # Use the original $piece['title'] not the mangled $part1, so that
3264 # modifiers such as RAW: produce separate cache entries
3265 if ($found) {
3266 if ($isHTML) {
3267 // A special page; don't store it in the template cache.
3268 } else {
3269 $this->mTemplates[$piece['title']] = $text;
3270 }
3271 $text = $linestart . $text;
3272 }
3273 }
3274 wfProfileOut(__METHOD__ . '-loadtpl');
3275 }
3276
3277 if ($found && !$this->incrementIncludeSize('pre-expand', strlen($text))) {
3278 # Error, oversize inclusion
3279 $text = $linestart .
3280 "[[$titleText]]<!-- WARNING: template omitted, pre-expand include size too large -->";
3281 $noparse = true;
3282 $noargs = true;
3283 }
3284
3285 # Recursive parsing, escaping and link table handling
3286 # Only for HTML output
3287 if ($nowiki && $found && ($this->ot['html'] || $this->ot['pre'])) {
3288 $text = wfEscapeWikiText($text);
3289 } elseif (!$this->ot['msg'] && $found) {
3290 if ($noargs) {
3291 $assocArgs = array();
3292 } else {
3293 # Clean up argument array
3294 $assocArgs = self::createAssocArgs($args);
3295 # Add a new element to the templace recursion path
3296 $this->mTemplatePath[$part1] = 1;
3297 }
3298
3299 if (!$noparse) {
3300 # If there are any <onlyinclude> tags, only include them
3301 if (in_string('<onlyinclude>', $text) && in_string('</onlyinclude>', $text)) {
3302 $replacer = new OnlyIncludeReplacer;
3303 StringUtils::delimiterReplaceCallback(
3304 '<onlyinclude>',
3305 '</onlyinclude>',
3306 array( &$replacer, 'replace' ),
3307 $text
3308 );
3309 $text = $replacer->output;
3310 }
3311 # Remove <noinclude> sections and <includeonly> tags
3312 $text = StringUtils::delimiterReplace('<noinclude>', '</noinclude>', '', $text);
3313 $text = strtr($text, array( '<includeonly>' => '' , '</includeonly>' => '' ));
3314
3315 if ($this->ot['html'] || $this->ot['pre']) {
3316 # Strip <nowiki>, <pre>, etc.
3317 $text = $this->strip($text, $this->mStripState);
3318 if ($this->ot['html']) {
3319 $text = Sanitizer::removeHTMLtags($text, array( &$this, 'replaceVariables' ), $assocArgs);
3320 } elseif ($this->ot['pre'] && $this->mOptions->getRemoveComments()) {
3322 }
3323 }
3324 $text = $this->replaceVariables($text, $assocArgs);
3325
3326 # If the template begins with a table or block-level
3327 # element, it should be treated as beginning a new line.
3328 if (!$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{
3329 $text = "\n" . $text;
3330 }
3331 } elseif (!$noargs) {
3332 # $noparse and !$noargs
3333 # Just replace the arguments, not any double-brace items
3334 # This is used for rendered interwiki transclusion
3335 $text = $this->replaceVariables($text, $assocArgs, true);
3336 }
3337 }
3338 # Prune lower levels off the recursion check path
3339 $this->mTemplatePath = $lastPathLevel;
3340
3341 if ($found && !$this->incrementIncludeSize('post-expand', strlen($text))) {
3342 # Error, oversize inclusion
3343 $text = $linestart .
3344 "[[$titleText]]<!-- WARNING: template omitted, post-expand include size too large -->";
3345 $noparse = true;
3346 $noargs = true;
3347 }
3348
3349 if (!$found) {
3350 wfProfileOut($fname);
3351 return $piece['text'];
3352 } else {
3353 wfProfileIn(__METHOD__ . '-placeholders');
3354 if ($isHTML) {
3355 # Replace raw HTML by a placeholder
3356 # Add a blank line preceding, to prevent it from mucking up
3357 # immediately preceding headings
3358 $text = "\n\n" . $this->insertStripItem($text, $this->mStripState);
3359 } else {
3360 # replace ==section headers==
3361 # XXX this needs to go away once we have a better parser.
3362 if (!$this->ot['wiki'] && !$this->ot['pre'] && $replaceHeadings) {
3363 if (!is_null($title)) {
3364 $encodedname = base64_encode($title->getPrefixedDBkey());
3365 } else {
3366 $encodedname = base64_encode("");
3367 }
3368 $m = preg_split(
3369 '/(^={1,6}.*?={1,6}\s*?$)/m',
3370 $text,
3371 -1,
3373 );
3374 $text = '';
3375 $nsec = $headingOffset;
3376 for ($i = 0; $i < count($m); $i += 2) {
3377 $text .= $m[$i];
3378 if (!isset($m[$i + 1]) || $m[$i + 1] == "") {
3379 continue;
3380 }
3381 $hl = $m[$i + 1];
3382 if (strstr($hl, "<!--MWTEMPLATESECTION")) {
3383 $text .= $hl;
3384 continue;
3385 }
3386 $m2 = array();
3387 preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
3388 $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
3389 . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
3390
3391 $nsec++;
3392 }
3393 }
3394 }
3395 wfProfileOut(__METHOD__ . '-placeholders');
3396 }
3397
3398 # Prune lower levels off the recursion check path
3399 $this->mTemplatePath = $lastPathLevel;
3400
3401 if (!$found) {
3402 wfProfileOut($fname);
3403 return $piece['text'];
3404 } else {
3405 wfProfileOut($fname);
3406 return $text;
3407 }
3408 }
3409
3413 public function fetchTemplate($title)
3414 {
3415 $text = false;
3416 // Loop to fetch the article, with up to 1 redirect
3417 for ($i = 0; $i < 2 && is_object($title); $i++) {
3418 $rev = Revision::newFromTitle($title);
3419 $this->mOutput->addTemplate($title, $title->getArticleID());
3420 if ($rev) {
3421 $text = $rev->getText();
3422 } elseif ($title->getNamespace() == NS_MEDIAWIKI) {
3423 global $wgLang;
3424 $message = $wgLang->lcfirst($title->getText());
3425 $text = wfMsgForContentNoTrans($message);
3426 if (wfEmptyMsg($message, $text)) {
3427 $text = false;
3428 break;
3429 }
3430 } else {
3431 break;
3432 }
3433 if ($text === false) {
3434 break;
3435 }
3436 // Redirect?
3438 }
3439 return $text;
3440 }
3441
3445 public function interwikiTransclude($title, $action)
3446 {
3447 global $wgEnableScaryTranscluding;
3448
3449 if (!$wgEnableScaryTranscluding) {
3450 return wfMsg('scarytranscludedisabled');
3451 }
3452
3453 $url = $title->getFullUrl("action=$action");
3454
3455 if (strlen($url) > 255) {
3456 return wfMsg('scarytranscludetoolong');
3457 }
3458 return $this->fetchScaryTemplateMaybeFromCache($url);
3460
3461 public function fetchScaryTemplateMaybeFromCache($url)
3462 {
3463 global $wgTranscludeCacheExpiry;
3464 $dbr = wfGetDB(DB_SLAVE);
3465 $obj = $dbr->selectRow(
3466 'transcache',
3467 array('tc_time', 'tc_contents'),
3468 array('tc_url' => $url)
3469 );
3470 if ($obj) {
3471 $time = $obj->tc_time;
3472 $text = $obj->tc_contents;
3473 if ($time && time() < $time + $wgTranscludeCacheExpiry) {
3474 return $text;
3475 }
3476 }
3477
3478 $text = Http::get($url);
3479 if (!$text) {
3480 return wfMsg('scarytranscludefailed', $url);
3481 }
3482
3483 $dbw = wfGetDB(DB_MASTER);
3484 $dbw->replace('transcache', array('tc_url'), array(
3485 'tc_url' => $url,
3486 'tc_time' => time(),
3487 'tc_contents' => $text));
3488 return $text;
3489 }
3490
3491
3496 public function argSubstitution($matches)
3497 {
3498 $arg = trim($matches['title']);
3499 $text = $matches['text'];
3500 $inputArgs = end($this->mArgStack);
3501
3502 if (array_key_exists($arg, $inputArgs)) {
3503 $text = $inputArgs[$arg];
3504 } elseif (($this->mOutputType == OT_HTML || $this->mOutputType == OT_PREPROCESS) &&
3505 null != $matches['parts'] && count($matches['parts']) > 0) {
3506 $text = $matches['parts'][0];
3507 }
3508 if (!$this->incrementIncludeSize('arg', strlen($text))) {
3509 $text = $matches['text'] .
3510 '<!-- WARNING: argument omitted, expansion size too large -->';
3511 }
3512
3513 return $text;
3514 }
3515
3523 public function incrementIncludeSize($type, $size)
3524 {
3525 if ($this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize()) {
3526 return false;
3527 } else {
3528 $this->mIncludeSizes[$type] += $size;
3529 return true;
3530 }
3531 }
3532
3536 public function stripNoGallery(&$text)
3537 {
3538 # if the string __NOGALLERY__ (not case-sensitive) occurs in the HTML,
3539 # do not add TOC
3540 $mw = MagicWord::get('nogallery');
3541 $this->mOutput->mNoGallery = $mw->matchAndRemove($text) ;
3542 }
3543
3547 public function stripToc($text)
3548 {
3549 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
3550 # do not add TOC
3551 $mw = MagicWord::get('notoc');
3552 if ($mw->matchAndRemove($text)) {
3553 $this->mShowToc = false;
3554 }
3555
3556 $mw = MagicWord::get('toc');
3557 if ($mw->match($text)) {
3558 $this->mShowToc = true;
3559 $this->mForceTocPosition = true;
3560
3561 // Set a placeholder. At the end we'll fill it in with the TOC.
3562 $text = $mw->replace('<!--MWTOC-->', $text, 1);
3563
3564 // Only keep the first one.
3565 $text = $mw->replace('', $text);
3566 }
3567 return $text;
3568 }
3569
3584 public function formatHeadings($text, $isMain = true)
3585 {
3586 global $wgMaxTocLevel, $wgContLang;
3587
3588 $doNumberHeadings = $this->mOptions->getNumberHeadings();
3589 if (!$this->mTitle->quickUserCan('edit')) {
3590 $showEditLink = 0;
3591 } else {
3592 $showEditLink = $this->mOptions->getEditSection();
3593 }
3594
3595 # Inhibit editsection links if requested in the page
3596 $esw = &MagicWord::get('noeditsection');
3597 if ($esw->matchAndRemove($text)) {
3598 $showEditLink = 0;
3599 }
3600
3601 # Get all headlines for numbering them and adding funky stuff like [edit]
3602 # links - this is for later, but we need the number of headlines right now
3603 $matches = array();
3604 $numMatches = preg_match_all('/<H(?P<level>[1-6])(?P<attrib>.*?' . '>)(?P<header>.*?)<\/H[1-6] *>/i', $text, $matches);
3605
3606 # if there are fewer than 4 headlines in the article, do not show TOC
3607 # unless it's been explicitly enabled.
3608 $enoughToc = $this->mShowToc &&
3609 (($numMatches >= 4) || $this->mForceTocPosition);
3610
3611 # Allow user to stipulate that a page should have a "new section"
3612 # link added via __NEWSECTIONLINK__
3613 $mw = &MagicWord::get('newsectionlink');
3614 if ($mw->matchAndRemove($text)) {
3615 $this->mOutput->setNewSection(true);
3616 }
3617
3618 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
3619 # override above conditions and always show TOC above first header
3620 $mw = &MagicWord::get('forcetoc');
3621 if ($mw->matchAndRemove($text)) {
3622 $this->mShowToc = true;
3623 $enoughToc = true;
3624 }
3625
3626 # Never ever show TOC if no headers
3627 if ($numMatches < 1) {
3628 $enoughToc = false;
3629 }
3630
3631 # We need this to perform operations on the HTML
3632 $sk = $this->mOptions->getSkin();
3633
3634 # headline counter
3635 $headlineCount = 0;
3636 $sectionCount = 0; # headlineCount excluding template sections
3637
3638 # Ugh .. the TOC should have neat indentation levels which can be
3639 # passed to the skin functions. These are determined here
3640 $toc = '';
3641 $full = '';
3642 $head = array();
3643 $sublevelCount = array();
3644 $levelCount = array();
3645 $toclevel = 0;
3646 $level = 0;
3647 $prevlevel = 0;
3648 $toclevel = 0;
3649 $prevtoclevel = 0;
3650
3651 foreach ($matches[3] as $headline) {
3652 $istemplate = 0;
3653 $templatetitle = '';
3654 $templatesection = 0;
3655 $numbering = '';
3656 $mat = array();
3657 if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
3658 $istemplate = 1;
3659 $templatetitle = base64_decode($mat[1]);
3660 $templatesection = 1 + (int) base64_decode($mat[2]);
3661 $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
3662 }
3663
3664 if ($toclevel) {
3665 $prevlevel = $level;
3666 $prevtoclevel = $toclevel;
3667 }
3668 $level = $matches[1][$headlineCount];
3669
3670 if ($doNumberHeadings || $enoughToc) {
3671 if ($level > $prevlevel) {
3672 # Increase TOC level
3673 $toclevel++;
3674 $sublevelCount[$toclevel] = 0;
3675 if ($toclevel < $wgMaxTocLevel) {
3676 $toc .= $sk->tocIndent();
3677 }
3678 } elseif ($level < $prevlevel && $toclevel > 1) {
3679 # Decrease TOC level, find level to jump to
3680
3681 if ($toclevel == 2 && $level <= $levelCount[1]) {
3682 # Can only go down to level 1
3683 $toclevel = 1;
3684 } else {
3685 for ($i = $toclevel; $i > 0; $i--) {
3686 if ($levelCount[$i] == $level) {
3687 # Found last matching level
3688 $toclevel = $i;
3689 break;
3690 } elseif ($levelCount[$i] < $level) {
3691 # Found first matching level below current level
3692 $toclevel = $i + 1;
3693 break;
3694 }
3695 }
3696 }
3697 if ($toclevel < $wgMaxTocLevel) {
3698 $toc .= $sk->tocUnindent($prevtoclevel - $toclevel);
3699 }
3700 } else {
3701 # No change in level, end TOC line
3702 if ($toclevel < $wgMaxTocLevel) {
3703 $toc .= $sk->tocLineEnd();
3704 }
3705 }
3706
3707 $levelCount[$toclevel] = $level;
3708
3709 # count number of headlines for each level
3710 @$sublevelCount[$toclevel]++;
3711 $dot = 0;
3712 for ($i = 1; $i <= $toclevel; $i++) {
3713 if (!empty($sublevelCount[$i])) {
3714 if ($dot) {
3715 $numbering .= '.';
3716 }
3717 $numbering .= $wgContLang->formatNum($sublevelCount[$i]);
3718 $dot = 1;
3719 }
3720 }
3721 }
3722
3723 # The canonized header is a version of the header text safe to use for links
3724 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
3725 $canonized_headline = $this->mStripState->unstripBoth($headline);
3726
3727 # Remove link placeholders by the link text.
3728 # <!--LINK number-->
3729 # turns into
3730 # link text with suffix
3731 $canonized_headline = preg_replace_callback(
3732 '/<!--LINK ([0-9]*)-->/',
3733 function ($hit) {
3734 return $this->mLinkHolders['texts'][$hit[1]];
3735 },
3736 $canonized_headline
3737 );
3738 $canonized_headline = preg_replace_callback(
3739 '/<!--IWLINK ([0-9]*)-->/',
3740 function ($hit) {
3741 return $this->mInterwikiLinkHolders['texts'][$hit[1]];
3742 },
3743 $canonized_headline
3744 );
3745
3746 # strip out HTML
3747 $canonized_headline = preg_replace('/<.*?' . '>/', '', $canonized_headline);
3748 $tocline = trim($canonized_headline);
3749 # Save headline for section edit hint before it's escaped
3750 $headline_hint = trim($canonized_headline);
3751 $canonized_headline = Sanitizer::escapeId($tocline);
3752 $refers[$headlineCount] = $canonized_headline;
3753
3754 # count how many in assoc. array so we can track dupes in anchors
3755 isset($refers[$canonized_headline]) ? $refers[$canonized_headline]++ : $refers[$canonized_headline] = 1;
3756 $refcount[$headlineCount] = $refers[$canonized_headline];
3757
3758 # Don't number the heading if it is the only one (looks silly)
3759 if ($doNumberHeadings && count($matches[3]) > 1) {
3760 # the two are different if the line contains a link
3761 $headline = $numbering . ' ' . $headline;
3762 }
3763
3764 # Create the anchor for linking from the TOC to the section
3765 $anchor = $canonized_headline;
3766 if ($refcount[$headlineCount] > 1) {
3767 $anchor .= '_' . $refcount[$headlineCount];
3768 }
3769 if ($enoughToc && (!isset($wgMaxTocLevel) || $toclevel < $wgMaxTocLevel)) {
3770 $toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel);
3771 }
3772 # give headline the correct <h#> tag
3773 if ($showEditLink && (!$istemplate || $templatetitle !== "")) {
3774 if ($istemplate) {
3775 $editlink = $sk->editSectionLinkForOther($templatetitle, $templatesection);
3776 } else {
3777 $editlink = $sk->editSectionLink($this->mTitle, $sectionCount + 1, $headline_hint);
3778 }
3779 } else {
3780 $editlink = '';
3781 }
3782 $head[$headlineCount] = $sk->makeHeadline($level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink);
3783
3784 $headlineCount++;
3785 if (!$istemplate) {
3786 $sectionCount++;
3787 }
3788 }
3789
3790 if ($enoughToc) {
3791 if ($toclevel < $wgMaxTocLevel) {
3792 $toc .= $sk->tocUnindent($toclevel - 1);
3793 }
3794 $toc = $sk->tocList($toc);
3795 }
3796
3797 # split up and insert constructed headlines
3798
3799 $blocks = preg_split('/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text);
3800 $i = 0;
3801
3802 foreach ($blocks as $block) {
3803 if ($showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n") {
3804 # This is the [edit] link that appears for the top block of text when
3805 # section editing is enabled
3806
3807 # Disabled because it broke block formatting
3808 # For example, a bullet point in the top line
3809 # $full .= $sk->editSectionLink(0);
3810 }
3811 $full .= $block;
3812 if ($enoughToc && !$i && $isMain && !$this->mForceTocPosition) {
3813 # Top anchor now in skin
3814 $full = $full . $toc;
3815 }
3816
3817 if (!empty($head[$i])) {
3818 $full .= $head[$i];
3819 }
3820 $i++;
3821 }
3822 if ($this->mForceTocPosition) {
3823 return str_replace('<!--MWTOC-->', $toc, $full);
3824 } else {
3825 return $full;
3826 }
3827 }
3828
3841 public function preSaveTransform($text, &$title, $user, $options, $clearState = true)
3842 {
3843 $this->mOptions = $options;
3844 $this->mTitle = &$title;
3845 $this->setOutputType(OT_WIKI);
3846
3847 if ($clearState) {
3848 $this->clearState();
3849 }
3850
3851 $stripState = new StripState;
3852 $pairs = array(
3853 "\r\n" => "\n",
3854 );
3855 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
3856 $text = $this->strip($text, $stripState, true, array( 'gallery' ));
3857 $text = $this->pstPass2($text, $stripState, $user);
3858 $text = $stripState->unstripBoth($text);
3859 return $text;
3860 }
3861
3866 public function pstPass2($text, &$stripState, $user)
3867 {
3868 global $wgContLang, $wgLocaltimezone;
3869
3870 /* Note: This is the timestamp saved as hardcoded wikitext to
3871 * the database, we use $wgContLang here in order to give
3872 * everyone the same signature and use the default one rather
3873 * than the one selected in each user's preferences.
3874 */
3875 if (isset($wgLocaltimezone)) {
3876 $oldtz = getenv('TZ');
3877 putenv('TZ=' . $wgLocaltimezone);
3878 }
3879 $d = $wgContLang->timeanddate(date('YmdHis'), false, false) .
3880 ' (' . date('T') . ')';
3881 if (isset($wgLocaltimezone)) {
3882 putenv('TZ=' . $oldtz);
3883 }
3884
3885 # Variable replacement
3886 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
3887 $text = $this->replaceVariables($text);
3888
3889 # Strip out <nowiki> etc. added via replaceVariables
3890 $text = $this->strip($text, $stripState, false, array( 'gallery' ));
3891
3892 # Signatures
3893 $sigText = $this->getUserSig($user);
3894 $text = strtr($text, array(
3895 '~~~~~' => $d,
3896 '~~~~' => "$sigText $d",
3897 '~~~' => $sigText
3898 ));
3899
3900 # Context links: [[|name]] and [[name (context)|]]
3901 #
3902 global $wgLegalTitleChars;
3903 $tc = "[$wgLegalTitleChars]";
3904 $nc = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
3905
3906 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]]
3907 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]]
3908 $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]]
3909
3910 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
3911 $text = preg_replace($p1, '[[\\1\\2\\3|\\2]]', $text);
3912 $text = preg_replace($p3, '[[\\1\\2\\3\\4|\\2]]', $text);
3913
3914 $t = $this->mTitle->getText();
3915 $m = array();
3916 if (preg_match("/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m)) {
3917 $text = preg_replace($p2, "[[$m[1]\\1$m[2]|\\1]]", $text);
3918 } elseif (preg_match("/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m) && '' != "$m[1]$m[2]") {
3919 $text = preg_replace($p2, "[[$m[1]\\1$m[2]|\\1]]", $text);
3920 } else {
3921 # if there's no context, don't bother duplicating the title
3922 $text = preg_replace($p2, '[[\\1]]', $text);
3923 }
3924
3925 # Trim trailing whitespace
3926 $text = rtrim($text);
3927
3928 return $text;
3929 }
3930
3939 public function getUserSig(&$user)
3940 {
3941 $username = $user->getName();
3942 $nickname = $user->getOption('nickname');
3943 $nickname = $nickname === '' ? $username : $nickname;
3944
3945 if ($user->getBoolOption('fancysig') !== false) {
3946 # Sig. might contain markup; validate this
3947 if ($this->validateSig($nickname) !== false) {
3948 # Validated; clean up (if needed) and return it
3949 return $this->cleanSig($nickname, true);
3950 } else {
3951 # Failed to validate; fall back to the default
3952 $nickname = $username;
3953 wfDebug("Parser::getUserSig: $username has bad XML tags in signature.\n");
3954 }
3955 }
3956
3957 // Make sure nickname doesnt get a sig in a sig
3958 $nickname = $this->cleanSigInSig($nickname);
3959
3960 # If we're still here, make it a link to the user page
3961 $userpage = $user->getUserPage();
3962 return('[[' . $userpage->getPrefixedText() . '|' . wfEscapeWikiText($nickname) . ']]');
3963 }
3964
3971 public function validateSig($text)
3972 {
3973 return(wfIsWellFormedXmlFragment($text) ? $text : false);
3974 }
3975
3986 public function cleanSig($text, $parsing = false)
3987 {
3988 global $wgTitle;
3989 $this->startExternalParse($wgTitle, new ParserOptions(), $parsing ? OT_WIKI : OT_MSG);
3990
3991 $substWord = MagicWord::get('subst');
3992 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
3993 $substText = '{{' . $substWord->getSynonym(0);
3994
3995 $text = preg_replace($substRegex, $substText, $text);
3996 $text = $this->cleanSigInSig($text);
3997 $text = $this->replaceVariables($text);
3998
3999 $this->clearState();
4000 return $text;
4001 }
4002
4008 public function cleanSigInSig($text)
4009 {
4010 $text = preg_replace('/~{3,5}/', '', $text);
4011 return $text;
4012 }
4013
4019 public function startExternalParse(&$title, $options, $outputType, $clearState = true)
4020 {
4021 $this->mTitle = &$title;
4022 $this->mOptions = $options;
4023 $this->setOutputType($outputType);
4024 if ($clearState) {
4025 $this->clearState();
4026 }
4027 }
4028
4037 public function transformMsg($text, $options)
4038 {
4039 global $wgTitle;
4040 static $executing = false;
4041
4042 $fname = "Parser::transformMsg";
4043
4044 # Guard against infinite recursion
4045 if ($executing) {
4046 return $text;
4047 }
4048 $executing = true;
4049
4050 wfProfileIn($fname);
4051
4052 if ($wgTitle && !($wgTitle instanceof FakeTitle)) {
4053 $this->mTitle = $wgTitle;
4054 } else {
4055 $this->mTitle = Title::newFromText('msg');
4056 }
4057 $this->mOptions = $options;
4058 $this->setOutputType(OT_MSG);
4059 $this->clearState();
4060 $text = $this->replaceVariables($text);
4061
4062 $executing = false;
4063 wfProfileOut($fname);
4064 return $text;
4065 }
4066
4082 public function setHook($tag, $callback)
4083 {
4084 $tag = strtolower($tag);
4085 $oldVal = isset($this->mTagHooks[$tag]) ? $this->mTagHooks[$tag] : null;
4086 $this->mTagHooks[$tag] = $callback;
4087
4088 return $oldVal;
4089 }
4090
4115 public function setFunctionHook($id, $callback, $flags = 0)
4116 {
4117 $oldVal = isset($this->mFunctionHooks[$id]) ? $this->mFunctionHooks[$id] : null;
4118 $this->mFunctionHooks[$id] = $callback;
4119
4120 # Add to function cache
4121 $mw = MagicWord::get($id);
4122 if (!$mw) {
4123 throw new MWException('Parser::setFunctionHook() expecting a magic word identifier.');
4124 }
4125
4126 $synonyms = $mw->getSynonyms();
4127 $sensitive = intval($mw->isCaseSensitive());
4128
4129 foreach ($synonyms as $syn) {
4130 # Case
4131 if (!$sensitive) {
4132 $syn = strtolower($syn);
4133 }
4134 # Add leading hash
4135 if (!($flags & SFH_NO_HASH)) {
4136 $syn = '#' . $syn;
4137 }
4138 # Remove trailing colon
4139 if (substr($syn, -1, 1) == ':') {
4140 $syn = substr($syn, 0, -1);
4141 }
4142 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
4143 }
4144 return $oldVal;
4145 }
4146
4152 public function getFunctionHooks()
4153 {
4154 return array_keys($this->mFunctionHooks);
4155 }
4156
4166 public function replaceLinkHolders(&$text, $options = 0)
4167 {
4168 global $wgUser;
4169 global $wgContLang;
4170
4171 $fname = 'Parser::replaceLinkHolders';
4172 wfProfileIn($fname);
4173
4174 $pdbks = array();
4175 $colours = array();
4176 $sk = $this->mOptions->getSkin();
4177 $linkCache = &LinkCache::singleton();
4178
4179 if (!empty($this->mLinkHolders['namespaces'])) {
4180 wfProfileIn($fname . '-check');
4181 $dbr = wfGetDB(DB_SLAVE);
4182 $page = $dbr->tableName('page');
4183 $threshold = $wgUser->getOption('stubthreshold');
4184
4185 # Sort by namespace
4186 asort($this->mLinkHolders['namespaces']);
4187
4188 # Generate query
4189 $query = false;
4190 $current = null;
4191 foreach ($this->mLinkHolders['namespaces'] as $key => $ns) {
4192 # Make title object
4193 $title = $this->mLinkHolders['titles'][$key];
4194
4195 # Skip invalid entries.
4196 # Result will be ugly, but prevents crash.
4197 if (is_null($title)) {
4198 continue;
4199 }
4200 $pdbk = $pdbks[$key] = $title->getPrefixedDBkey();
4201
4202 # Check if it's a static known link, e.g. interwiki
4203 if ($title->isAlwaysKnown()) {
4204 $colours[$pdbk] = 1;
4205 } elseif (($id = $linkCache->getGoodLinkID($pdbk)) != 0) {
4206 $colours[$pdbk] = 1;
4207 $this->mOutput->addLink($title, $id);
4208 } elseif ($linkCache->isBadLink($pdbk)) {
4209 $colours[$pdbk] = 0;
4210 } else {
4211 # Not in the link cache, add it to the query
4212 if (!isset($current)) {
4213 $current = $ns;
4214 $query = "SELECT page_id, page_namespace, page_title";
4215 if ($threshold > 0) {
4216 $query .= ', page_len, page_is_redirect';
4217 }
4218 $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN(";
4219 } elseif ($current != $ns) {
4220 $current = $ns;
4221 $query .= ")) OR (page_namespace=$ns AND page_title IN(";
4222 } else {
4223 $query .= ', ';
4224 }
4225
4226 $query .= $dbr->addQuotes($this->mLinkHolders['dbkeys'][$key]);
4227 }
4228 }
4229 if ($query) {
4230 $query .= '))';
4231 if ($options & RLH_FOR_UPDATE) {
4232 $query .= ' FOR UPDATE';
4233 }
4234
4235 $res = $dbr->query($query, $fname);
4236
4237 # Fetch data and form into an associative array
4238 # non-existent = broken
4239 # 1 = known
4240 # 2 = stub
4241 while ($s = $dbr->fetchObject($res)) {
4242 $title = Title::makeTitle($s->page_namespace, $s->page_title);
4243 $pdbk = $title->getPrefixedDBkey();
4244 $linkCache->addGoodLinkObj($s->page_id, $title);
4245 $this->mOutput->addLink($title, $s->page_id);
4246
4247 if ($threshold > 0) {
4248 $size = $s->page_len;
4249 if ($s->page_is_redirect || $s->page_namespace != 0 || $size >= $threshold) {
4250 $colours[$pdbk] = 1;
4251 } else {
4252 $colours[$pdbk] = 2;
4253 }
4254 } else {
4255 $colours[$pdbk] = 1;
4256 }
4257 }
4258 }
4259 wfProfileOut($fname . '-check');
4260
4261 # Do a second query for different language variants of links and categories
4262 if ($wgContLang->hasVariants()) {
4263 $linkBatch = new LinkBatch();
4264 $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
4265 $categoryMap = array(); // maps $category_variant => $category (dbkeys)
4266 $varCategories = array(); // category replacements oldDBkey => newDBkey
4267
4268 $categories = $this->mOutput->getCategoryLinks();
4269
4270 // Add variants of links to link batch
4271 foreach ($this->mLinkHolders['namespaces'] as $key => $ns) {
4272 $title = $this->mLinkHolders['titles'][$key];
4273 if (is_null($title)) {
4274 continue;
4275 }
4276
4277 $pdbk = $title->getPrefixedDBkey();
4278 $titleText = $title->getText();
4279
4280 // generate all variants of the link title text
4281 $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText);
4282
4283 // if link was not found (in first query), add all variants to query
4284 if (!isset($colours[$pdbk])) {
4285 foreach ($allTextVariants as $textVariant) {
4286 if ($textVariant != $titleText) {
4287 $variantTitle = Title::makeTitle($ns, $textVariant);
4288 if (is_null($variantTitle)) {
4289 continue;
4290 }
4291 $linkBatch->addObj($variantTitle);
4292 $variantMap[$variantTitle->getPrefixedDBkey()][] = $key;
4293 }
4294 }
4295 }
4296 }
4297
4298 // process categories, check if a category exists in some variant
4299 foreach ($categories as $category) {
4300 $variants = $wgContLang->convertLinkToAllVariants($category);
4301 foreach ($variants as $variant) {
4302 if ($variant != $category) {
4303 $variantTitle = Title::newFromDBkey(Title::makeName(NS_CATEGORY, $variant));
4304 if (is_null($variantTitle)) {
4305 continue;
4306 }
4307 $linkBatch->addObj($variantTitle);
4308 $categoryMap[$variant] = $category;
4309 }
4310 }
4311 }
4312
4313
4314 if (!$linkBatch->isEmpty()) {
4315 // construct query
4316 $titleClause = $linkBatch->constructSet('page', $dbr);
4317
4318 $variantQuery = "SELECT page_id, page_namespace, page_title";
4319 if ($threshold > 0) {
4320 $variantQuery .= ', page_len, page_is_redirect';
4321 }
4322
4323 $variantQuery .= " FROM $page WHERE $titleClause";
4324 if ($options & RLH_FOR_UPDATE) {
4325 $variantQuery .= ' FOR UPDATE';
4326 }
4327
4328 $varRes = $dbr->query($variantQuery, $fname);
4329
4330 // for each found variants, figure out link holders and replace
4331 while ($s = $dbr->fetchObject($varRes)) {
4332 $variantTitle = Title::makeTitle($s->page_namespace, $s->page_title);
4333 $varPdbk = $variantTitle->getPrefixedDBkey();
4334 $vardbk = $variantTitle->getDBkey();
4335
4336 $holderKeys = array();
4337 if (isset($variantMap[$varPdbk])) {
4338 $holderKeys = $variantMap[$varPdbk];
4339 $linkCache->addGoodLinkObj($s->page_id, $variantTitle);
4340 $this->mOutput->addLink($variantTitle, $s->page_id);
4341 }
4342
4343 // loop over link holders
4344 foreach ($holderKeys as $key) {
4345 $title = $this->mLinkHolders['titles'][$key];
4346 if (is_null($title)) {
4347 continue;
4348 }
4349
4350 $pdbk = $title->getPrefixedDBkey();
4351
4352 if (!isset($colours[$pdbk])) {
4353 // found link in some of the variants, replace the link holder data
4354 $this->mLinkHolders['titles'][$key] = $variantTitle;
4355 $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey();
4356
4357 // set pdbk and colour
4358 $pdbks[$key] = $varPdbk;
4359 if ($threshold > 0) {
4360 $size = $s->page_len;
4361 if ($s->page_is_redirect || $s->page_namespace != 0 || $size >= $threshold) {
4362 $colours[$varPdbk] = 1;
4363 } else {
4364 $colours[$varPdbk] = 2;
4365 }
4366 } else {
4367 $colours[$varPdbk] = 1;
4368 }
4369 }
4370 }
4371
4372 // check if the object is a variant of a category
4373 if (isset($categoryMap[$vardbk])) {
4374 $oldkey = $categoryMap[$vardbk];
4375 if ($oldkey != $vardbk) {
4376 $varCategories[$oldkey] = $vardbk;
4377 }
4378 }
4379 }
4380
4381 // rebuild the categories in original order (if there are replacements)
4382 if (count($varCategories) > 0) {
4383 $newCats = array();
4384 $originalCats = $this->mOutput->getCategories();
4385 foreach ($originalCats as $cat => $sortkey) {
4386 // make the replacement
4387 if (array_key_exists($cat, $varCategories)) {
4388 $newCats[$varCategories[$cat]] = $sortkey;
4389 } else {
4390 $newCats[$cat] = $sortkey;
4391 }
4392 }
4393 $this->mOutput->setCategoryLinks($newCats);
4394 }
4395 }
4396 }
4397
4398 # Construct search and replace arrays
4399 wfProfileIn($fname . '-construct');
4400 $replacePairs = array();
4401 foreach ($this->mLinkHolders['namespaces'] as $key => $ns) {
4402 $pdbk = $pdbks[$key];
4403 $searchkey = "<!--LINK $key-->";
4404 $title = $this->mLinkHolders['titles'][$key];
4405 if (empty($colours[$pdbk])) {
4406 $linkCache->addBadLinkObj($title);
4407 $colours[$pdbk] = 0;
4408 $this->mOutput->addLink($title, 0);
4409 $replacePairs[$searchkey] = $sk->makeBrokenLinkObj(
4410 $title,
4411 $this->mLinkHolders['texts'][$key],
4412 $this->mLinkHolders['queries'][$key]
4413 );
4414 } elseif ($colours[$pdbk] == 1) {
4415 $replacePairs[$searchkey] = $sk->makeKnownLinkObj(
4416 $title,
4417 $this->mLinkHolders['texts'][$key],
4418 $this->mLinkHolders['queries'][$key]
4419 );
4420 } elseif ($colours[$pdbk] == 2) {
4421 $replacePairs[$searchkey] = $sk->makeStubLinkObj(
4422 $title,
4423 $this->mLinkHolders['texts'][$key],
4424 $this->mLinkHolders['queries'][$key]
4425 );
4426 }
4427 }
4428 $replacer = new HashtableReplacer($replacePairs, 1);
4429 wfProfileOut($fname . '-construct');
4430
4431 # Do the thing
4432 wfProfileIn($fname . '-replace');
4433 $text = preg_replace_callback(
4434 '/(<!--LINK .*?-->)/',
4435 $replacer->cb(),
4436 $text
4437 );
4438
4439 wfProfileOut($fname . '-replace');
4440 }
4441
4442 # Now process interwiki link holders
4443 # This is quite a bit simpler than internal links
4444 if (!empty($this->mInterwikiLinkHolders['texts'])) {
4445 wfProfileIn($fname . '-interwiki');
4446 # Make interwiki link HTML
4447 $replacePairs = array();
4448 foreach ($this->mInterwikiLinkHolders['texts'] as $key => $link) {
4449 $title = $this->mInterwikiLinkHolders['titles'][$key];
4450 $replacePairs[$key] = $sk->makeLinkObj($title, $link);
4451 }
4452 $replacer = new HashtableReplacer($replacePairs, 1);
4453
4454 $text = preg_replace_callback(
4455 '/<!--IWLINK (.*?)-->/',
4456 $replacer->cb(),
4457 $text
4458 );
4459 wfProfileOut($fname . '-interwiki');
4460 }
4461
4462 wfProfileOut($fname);
4463 return $colours;
4464 }
4465
4472 public function replaceLinkHoldersText($text)
4473 {
4474 $fname = 'Parser::replaceLinkHoldersText';
4475 wfProfileIn($fname);
4476
4477 $text = preg_replace_callback(
4478 '/<!--(LINK|IWLINK) (.*?)-->/',
4479 array( &$this, 'replaceLinkHoldersTextCallback' ),
4480 $text
4481 );
4482
4483 wfProfileOut($fname);
4484 return $text;
4485 }
4486
4492 public function replaceLinkHoldersTextCallback($matches)
4493 {
4494 $type = $matches[1];
4495 $key = $matches[2];
4496 if ($type == 'LINK') {
4497 if (isset($this->mLinkHolders['texts'][$key])) {
4498 return $this->mLinkHolders['texts'][$key];
4499 }
4500 } elseif ($type == 'IWLINK') {
4501 if (isset($this->mInterwikiLinkHolders['texts'][$key])) {
4502 return $this->mInterwikiLinkHolders['texts'][$key];
4503 }
4504 }
4505 return $matches[0];
4506 }
4507
4511 public function renderPreTag($text, $attribs)
4512 {
4513 // Backwards-compatibility hack
4514 $content = StringUtils::delimiterReplace('<nowiki>', '</nowiki>', '$1', $text, 'i');
4515
4516 $attribs = Sanitizer::validateTagAttributes($attribs, 'pre');
4517 return wfOpenElement('pre', $attribs) .
4518 Xml::escapeTagsOnly($content) .
4519 '</pre>';
4520 }
4521
4531 public function renderImageGallery($text, $params)
4532 {
4533 $ig = new ImageGallery();
4534 $ig->setContextTitle($this->mTitle);
4535 $ig->setShowBytes(false);
4536 $ig->setShowFilename(false);
4537 $ig->setParsing();
4538 $ig->useSkin($this->mOptions->getSkin());
4539
4540 if (isset($params['caption'])) {
4541 $caption = $params['caption'];
4542 $caption = htmlspecialchars($caption);
4543 $caption = $this->replaceInternalLinks($caption);
4544 $ig->setCaptionHtml($caption);
4545 }
4546 if (isset($params['perrow'])) {
4547 $ig->setPerRow($params['perrow']);
4548 }
4549 if (isset($params['widths'])) {
4550 $ig->setWidths($params['widths']);
4551 }
4552 if (isset($params['heights'])) {
4553 $ig->setHeights($params['heights']);
4554 }
4555
4556 $lines = explode("\n", $text);
4557 foreach ($lines as $line) {
4558 # match lines like these:
4559 # Image:someimage.jpg|This is some image
4560 $matches = array();
4561 preg_match("/^([^|]+)(\\|(.*))?$/", $line, $matches);
4562 # Skip empty lines
4563 if (count($matches) == 0) {
4564 continue;
4565 }
4566 $tp = Title::newFromText($matches[1]);
4567 $nt = &$tp;
4568 if (is_null($nt)) {
4569 # Bogus title. Ignore these so we don't bomb out later.
4570 continue;
4571 }
4572 if (isset($matches[3])) {
4573 $label = $matches[3];
4574 } else {
4575 $label = '';
4576 }
4577
4578 $pout = $this->parse(
4579 $label,
4580 $this->mTitle,
4581 $this->mOptions,
4582 false, // Strip whitespace...?
4583 false // Don't clear state!
4584 );
4585 $html = $pout->getText();
4586
4587 $ig->add(new Image($nt), $html);
4588
4589 # Only add real images (bug #5586)
4590 if ($nt->getNamespace() == NS_IMAGE) {
4591 $this->mOutput->addImage($nt->getDBkey());
4592 }
4593 }
4594 return $ig->toHTML();
4595 }
4596
4600 public function makeImage($nt, $options)
4601 {
4602 # @TODO: let the MediaHandler specify its transform parameters
4603 #
4604 # Check if the options text is of the form "options|alt text"
4605 # Options are:
4606 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
4607 # * left no resizing, just left align. label is used for alt= only
4608 # * right same, but right aligned
4609 # * none same, but not aligned
4610 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
4611 # * center center the image
4612 # * framed Keep original image size, no magnify-button.
4613 # vertical-align values (no % or length right now):
4614 # * baseline
4615 # * sub
4616 # * super
4617 # * top
4618 # * text-top
4619 # * middle
4620 # * bottom
4621 # * text-bottom
4622
4623
4624 $part = array_map('trim', explode('|', $options));
4625
4626 $mwAlign = array();
4627 $alignments = array( 'left', 'right', 'center', 'none', 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 'bottom', 'text-bottom' );
4628 foreach ($alignments as $alignment) {
4629 $mwAlign[$alignment] = &MagicWord::get('img_' . $alignment);
4630 }
4631 $mwThumb = &MagicWord::get('img_thumbnail');
4632 $mwManualThumb = &MagicWord::get('img_manualthumb');
4633 $mwWidth = &MagicWord::get('img_width');
4634 $mwFramed = &MagicWord::get('img_framed');
4635 $mwPage = &MagicWord::get('img_page');
4636 $caption = '';
4637
4638 $params = array();
4639 $framed = $thumb = false;
4640 $manual_thumb = '' ;
4641 $align = $valign = '';
4642 $sk = $this->mOptions->getSkin();
4643
4644 foreach ($part as $val) {
4645 if (!is_null($mwThumb->matchVariableStartToEnd($val))) {
4646 $thumb = true;
4647 } elseif (!is_null($match = $mwManualThumb->matchVariableStartToEnd($val))) {
4648 # use manually specified thumbnail
4649 $thumb = true;
4650 $manual_thumb = $match;
4651 } else {
4652 foreach ($alignments as $alignment) {
4653 if (!is_null($mwAlign[$alignment]->matchVariableStartToEnd($val))) {
4654 switch ($alignment) {
4655 case 'left': case 'right': case 'center': case 'none':
4656 $align = $alignment; break;
4657 default:
4658 $valign = $alignment;
4659 }
4660 continue 2;
4661 }
4662 }
4663 if (!is_null($match = $mwPage->matchVariableStartToEnd($val))) {
4664 # Select a page in a multipage document
4665 $params['page'] = $match;
4666 } elseif (!isset($params['width']) && !is_null($match = $mwWidth->matchVariableStartToEnd($val))) {
4667 wfDebug("img_width match: $match\n");
4668 # $match is the image width in pixels
4669 $m = array();
4670 if (preg_match('/^([0-9]*)x([0-9]*)$/', $match, $m)) {
4671 $params['width'] = intval($m[1]);
4672 $params['height'] = intval($m[2]);
4673 } else {
4674 $params['width'] = intval($match);
4675 }
4676 } elseif (!is_null($mwFramed->matchVariableStartToEnd($val))) {
4677 $framed = true;
4678 } else {
4679 $caption = $val;
4680 }
4681 }
4682 }
4683 # Strip bad stuff out of the alt text
4684 $alt = $this->replaceLinkHoldersText($caption);
4685
4686 # make sure there are no placeholders in thumbnail attributes
4687 # that are later expanded to html- so expand them now and
4688 # remove the tags
4689 $alt = $this->mStripState->unstripBoth($alt);
4690 $alt = Sanitizer::stripAllTags($alt);
4691
4692 # Linker does the rest
4693 return $sk->makeImageLinkObj($nt, $caption, $alt, $align, $params, $framed, $thumb, $manual_thumb, $valign);
4694 }
4695
4700 public function disableCache()
4701 {
4702 wfDebug("Parser output marked as uncacheable.\n");
4703 $this->mOutput->mCacheTime = -1;
4704 }
4705
4714 public function attributeStripCallback(&$text, $args)
4715 {
4716 $text = $this->replaceVariables($text, $args);
4717 $text = $this->mStripState->unstripBoth($text);
4718 return $text;
4719 }
4720
4726 public function Title($x = null)
4727 {
4728 return wfSetVar($this->mTitle, $x);
4729 }
4730 public function Options($x = null)
4731 {
4732 return wfSetVar($this->mOptions, $x);
4733 }
4734 public function OutputType($x = null)
4735 {
4736 return wfSetVar($this->mOutputType, $x);
4737 }
4743 public function getTags()
4744 {
4745 return array_keys($this->mTagHooks);
4746 }
4765 private function extractSections($text, $section, $mode, $newtext = '')
4766 {
4767 # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
4768 # comments to be stripped as well)
4769 $stripState = new StripState;
4770
4771 $oldOutputType = $this->mOutputType;
4772 $oldOptions = $this->mOptions;
4773 $this->mOptions = new ParserOptions();
4774 $this->setOutputType(OT_WIKI);
4775
4776 $striptext = $this->strip($text, $stripState, true);
4777
4778 $this->setOutputType($oldOutputType);
4779 $this->mOptions = $oldOptions;
4780
4781 # now that we can be sure that no pseudo-sections are in the source,
4782 # split it up by section
4783 $uniq = preg_quote($this->uniqPrefix(), '/');
4784 $comment = "(?:$uniq-!--.*?QINU)";
4785 $secs = preg_split(
4786 "/
4787 (
4788 ^
4789 (?:$comment|<\/?noinclude>)* # Initial comments will be stripped
4790 (=+) # Should this be limited to 6?
4791 .+? # Section title...
4792 \\2 # Ending = count must match start
4793 (?:$comment|<\/?noinclude>|[ \\t]+)* # Trailing whitespace ok
4794 $
4795 |
4796 <h([1-6])\b.*?>
4797 .*?
4798 <\/h\\3\s*>
4799 )
4800 /mix",
4801 $striptext,
4802 -1,
4804 );
4805
4806 if ($mode == "get") {
4807 if ($section == 0) {
4808 // "Section 0" returns the content before any other section.
4809 $rv = $secs[0];
4810 } else {
4811 //track missing section, will replace if found.
4812 $rv = $newtext;
4813 }
4814 } elseif ($mode == "replace") {
4815 if ($section == 0) {
4816 $rv = $newtext . "\n\n";
4817 $remainder = true;
4818 } else {
4819 $rv = $secs[0];
4820 $remainder = false;
4821 }
4822 }
4823 $count = 0;
4824 $sectionLevel = 0;
4825 for ($index = 1; $index < count($secs);) {
4826 $headerLine = $secs[$index++];
4827 if ($secs[$index]) {
4828 // A wiki header
4829 $headerLevel = strlen($secs[$index++]);
4830 } else {
4831 // An HTML header
4832 $index++;
4833 $headerLevel = intval($secs[$index++]);
4834 }
4835 $content = $secs[$index++];
4836
4837 $count++;
4838 if ($mode == "get") {
4839 if ($count == $section) {
4840 $rv = $headerLine . $content;
4841 $sectionLevel = $headerLevel;
4842 } elseif ($count > $section) {
4843 if ($sectionLevel && $headerLevel > $sectionLevel) {
4844 $rv .= $headerLine . $content;
4845 } else {
4846 // Broke out to a higher-level section
4847 break;
4848 }
4849 }
4850 } elseif ($mode == "replace") {
4851 if ($count < $section) {
4852 $rv .= $headerLine . $content;
4853 } elseif ($count == $section) {
4854 $rv .= $newtext . "\n\n";
4855 $sectionLevel = $headerLevel;
4856 } elseif ($count > $section) {
4857 if ($headerLevel <= $sectionLevel) {
4858 // Passed the section's sub-parts.
4859 $remainder = true;
4860 }
4861 if ($remainder) {
4862 $rv .= $headerLine . $content;
4863 }
4864 }
4865 }
4866 }
4867 if (is_string($rv)) {
4868 # reinsert stripped tags
4869 $rv = trim($stripState->unstripBoth($rv));
4870 }
4871
4872 return $rv;
4873 }
4874
4887 public function getSection($text, $section, $deftext = '')
4888 {
4889 return $this->extractSections($text, $section, "get", $deftext);
4891
4892 public function replaceSection($oldtext, $section, $text)
4893 {
4894 return $this->extractSections($oldtext, $section, "replace", $text);
4895 }
4896
4901 public function getRevisionTimestamp()
4902 {
4903 if (is_null($this->mRevisionTimestamp)) {
4904 wfProfileIn(__METHOD__);
4905 global $wgContLang;
4906 $dbr = wfGetDB(DB_SLAVE);
4907 $timestamp = $dbr->selectField(
4908 'revision',
4909 'rev_timestamp',
4910 array( 'rev_id' => $this->mRevisionId ),
4911 __METHOD__
4912 );
4913
4914 // Normalize timestamp to internal MW format for timezone processing.
4915 // This has the added side-effect of replacing a null value with
4916 // the current time, which gives us more sensible behavior for
4917 // previews.
4918 $timestamp = wfTimestamp(TS_MW, $timestamp);
4919
4920 // The cryptic '' timezone parameter tells to use the site-default
4921 // timezone offset instead of the user settings.
4922 //
4923 // Since this value will be saved into the parser cache, served
4924 // to other users, and potentially even used inside links and such,
4925 // it needs to be consistent for all visitors.
4926 $this->mRevisionTimestamp = $wgContLang->userAdjust($timestamp, '');
4927
4928 wfProfileOut(__METHOD__);
4929 }
4930 return $this->mRevisionTimestamp;
4931 }
4932
4938 public function setDefaultSort($sort)
4939 {
4940 $this->mDefaultSort = $sort;
4941 }
4942
4949 public function getDefaultSort()
4950 {
4951 if ($this->mDefaultSort !== false) {
4952 return $this->mDefaultSort;
4953 } else {
4954 return $this->mTitle->getNamespace() == NS_CATEGORY
4955 ? $this->mTitle->getText()
4956 : $this->mTitle->getPrefixedText();
4957 }
4958 }
4959}
4960
4966{
4967 public $output = '';
4968
4969 public function replace($matches)
4970 {
4971 if (substr($matches[1], -1) == "\n") {
4972 $this->output .= substr($matches[1], 0, -1);
4973 } else {
4974 $this->output .= $matches[1];
4975 }
4976 }
4977}
4978
4985 public $general;
4986 public $nowiki;
4987
4988 public function __construct()
4989 {
4990 $this->general = new ReplacementArray;
4991 $this->nowiki = new ReplacementArray;
4993
4994 public function unstripGeneral($text)
4995 {
4996 wfProfileIn(__METHOD__);
4997 $text = $this->general->replace($text);
4998 wfProfileOut(__METHOD__);
4999 return $text;
5001
5002 public function unstripNoWiki($text)
5003 {
5004 wfProfileIn(__METHOD__);
5005 $text = $this->nowiki->replace($text);
5006 wfProfileOut(__METHOD__);
5007 return $text;
5009
5010 public function unstripBoth($text)
5011 {
5012 wfProfileIn(__METHOD__);
5013 $text = $this->general->replace($text);
5014 $text = $this->nowiki->replace($text);
5015 wfProfileOut(__METHOD__);
5016 return $text;
5017 }
5018}
$result
html()
const NS_SPECIAL
Definition: Title.php:16
wfMsg($x)
Definition: RandomTest.php:63
$size
Definition: RandomTest.php:84
const OT_WIKI
Definition: Parser.php:19
const SFH_NO_HASH
Definition: Parser.php:24
const EXT_LINK_BRACKETED
Definition: Parser.php:40
const MW_COLON_STATE_TAGSTART
Definition: Parser.php:50
const MW_COLON_STATE_COMMENTDASHDASH
Definition: Parser.php:55
const MW_COLON_STATE_TAG
Definition: Parser.php:49
const HTTP_PROTOCOLS
Definition: Parser.php:33
const MW_COLON_STATE_CLOSETAG
Definition: Parser.php:51
const EXT_IMAGE_FNAME_CLASS
Definition: Parser.php:38
const EXT_IMAGE_EXTENSIONS
Definition: Parser.php:39
const OT_PREPROCESS
Definition: Parser.php:21
const MW_COLON_STATE_TEXT
Definition: Parser.php:48
const RLH_FOR_UPDATE
Definition: Parser.php:15
const EXT_LINK_TEXT_CLASS
Definition: Parser.php:37
const MW_COLON_STATE_COMMENTDASH
Definition: Parser.php:54
const EXT_LINK_URL_CLASS
Definition: Parser.php:35
const MW_COLON_STATE_TAGSLASH
Definition: Parser.php:52
const EXT_IMAGE_REGEX
Definition: Parser.php:42
const OT_HTML
Definition: Parser.php:18
const MW_PARSER_VERSION
Update this version number when the ParserOutput format changes in an incompatible way,...
Definition: Parser.php:13
const MW_COLON_STATE_COMMENT
Definition: Parser.php:53
const OT_MSG
Definition: Parser.php:20
$section
Definition: Utf8Test.php:83
if(!array_key_exists('stateid', $_REQUEST)) $state
Handle linkback() response from LinkedIn.
Definition: linkback.php:10
$comment
Definition: buildRTE.php:83
foreach($mandatory_scripts as $file) $timestamp
Definition: buildRTE.php:81
An exception for terminatinating execution or to throw for unit testing.
replace($matches)
Definition: Parser.php:4967
$mLastSection
Definition: Parser.php:109
externalTidy($text)
Spawn an external HTML tidy process and get corrected markup back from it.
Definition: Parser.php:765
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:281
$mStripState
Definition: Parser.php:106
$mInterwikiLinkHolders
Definition: Parser.php:111
$mInPre
Definition: Parser.php:110
$mRevisionId
Definition: Parser.php:132
$mTemplatePath
Definition: Parser.php:119
$mDefaultSort
Definition: Parser.php:115
$mTagHooks
#+
Definition: Parser.php:97
$mFunctionHooks
Definition: Parser.php:98
setFunctionHook($id, $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4113
$mOutput
Definition: Parser.php:103
$mArgStack
Definition: Parser.php:108
const VERSION
Definition: Parser.php:92
$mVariables
Definition: Parser.php:100
$mTitle
Definition: Parser.php:126
$mOutputType
Definition: Parser.php:128
$mAutonumber
Definition: Parser.php:104
$mOptions
Definition: Parser.php:124
static replaceUnusualEscapes($url)
Replace unusual URL escape codes with their equivalent characters.
Definition: Parser.php:1554
$mDTopen
Definition: Parser.php:105
setHook($tag, $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4080
$mFunctionSynonyms
Definition: Parser.php:99
$mRevisionTimestamp
Definition: Parser.php:134
setOutputType($ot)
Definition: Parser.php:264
strip($text, $state, $stripcomments=false, $dontstrip=array())
Strips and renders nowiki, pre, math, hiero If $render is set, performs necessary rendering operation...
Definition: Parser.php:570
internalParse($text)
Helper function for parse() that transforms wiki markup into HTML.
Definition: Parser.php:1032
getRandomString()
Get a random string.
Definition: Parser.php:452
$mLinkHolders
Definition: Parser.php:112
internalTidy($text)
Use the HTML tidy PECL extension to use the tidy library in-process, saving the overhead of spawning ...
Definition: Parser.php:817
$mIncludeCount
Definition: Parser.php:107
$mTemplates
Definition: Parser.php:116
$mIncludeSizes
Definition: Parser.php:114
tidy($text)
Interface with html tidy, used if $wgUseTidy = true.
Definition: Parser.php:741
doBlockLevels($text, $linestart)
#-
Definition: Parser.php:2171
$mRevIdForTs
Definition: Parser.php:136
replaceLinkHolders(&$text, $options=0)
Replace link placeholders with actual links, in the buffer Placeholders created in Skin::makeLinkObj(...
Definition: Parser.php:4164
$mUniqPrefix
Definition: Parser.php:113
renderImageGallery($text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:4529
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:156
clearState()
Clear Parser state.
Definition: Parser.php:211
__construct()
#-
Definition: Parser.php:145
parse($text, &$title, $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:298
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME)
Definition: Parser.php:2704
static removeHTMLtags($text, $processCallback=null, $args=array())
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments.
Definition: Sanitizer.php:343
static escapeId($id)
Given a value escape it so that it can be used in an id attribute and return it, this does not valida...
Definition: Sanitizer.php:760
static removeHTMLcomments($text)
Remove '', and everything between.
Definition: Sanitizer.php:533
static cleanUrl($url, $hostname=true)
Definition: Sanitizer.php:1288
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string.
Definition: Sanitizer.php:1018
static normalizeCharReferences($text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:921
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed,...
Definition: Sanitizer.php:1255
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:583
static fixTagAttributes($text, $element)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML,...
Definition: Sanitizer.php:669
unstripNoWiki($text)
Definition: Parser.php:5000
__construct()
Definition: Parser.php:4986
unstripGeneral($text)
Definition: Parser.php:4992
unstripBoth($text)
Definition: Parser.php:5008
Title class.
Definition: Title.php:37
static newFromRedirect($text)
Create a new Title for a redirect.
Definition: Title.php:301
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:124
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:355
static makeName($ns, $title)
Definition: Title.php:400
static & makeTitle($ns, $title)
Create a new Title from a namespace index and a DB key.
Definition: Title.php:253
static newFromDBkey($key)
Create a new Title from a prefixed DB key.
Definition: Title.php:102
wfUrlProtocols()
Returns a regular expression of url protocols.
$x
Definition: complexTest.php:9
$action
$key
Definition: croninfo.php:18
for( $i=6;$i< 13;$i++) for($i=1; $i< 13; $i++) $d
Definition: date.php:296
$i
Definition: disco.tpl.php:19
$html
Definition: example_001.php:87
$h
$r
Definition: example_031.php:79
$alignments
Definition: example_063.php:81
expand()
Definition: expand.php:2
if(!array_key_exists('StateId', $_REQUEST)) $id
if(array_key_exists('yes', $_REQUEST)) $attributes
Definition: getconsent.php:85
margin left
Definition: langcheck.php:164
if(function_exists( 'posix_getuid') &&posix_getuid()===0) if(!array_key_exists('t', $options)) $tag
Definition: cron.php:35
$time
Definition: cron.php:21
$index
Definition: metadata.php:60
catch(Exception $e) $message
$target
Definition: test.php:19
$user
Definition: migrateto20.php:57
static http()
Fetches the global http state from ILIAS.
get(string $class_name)
parse($uri)
Parses a URI and returns its individual components.
Definition: functions.php:181
$ret
Definition: parser.php:6
$query
$type
$url
$s
Definition: pwgen.php:45
foreach($_POST as $key=> $value) $res
echo;exit;}function LogoutNotification($SessionID){ global $ilDB;$q="SELECT session_id, data FROM usr_session WHERE expires > (\w+)\|/" PREG_SPLIT_NO_EMPTY PREG_SPLIT_DELIM_CAPTURE
$rule
Definition: showstats.php:43
$errors general
Definition: imgupload.php:50
up()
Definition: up.php:2
$text
Definition: errorreport.php:18