ILIAS  release_4-3 Revision
 All Data Structures Namespaces Files Functions Variables Groups Pages
Parser.php
Go to the documentation of this file.
1 <?php
13 define( 'MW_PARSER_VERSION', '1.6.1' );
14 
15 define( 'RLH_FOR_UPDATE', 1 );
16 
17 # Allowed values for $mOutputType
18 define( 'OT_HTML', 1 );
19 define( 'OT_WIKI', 2 );
20 define( 'OT_MSG' , 3 );
21 define( 'OT_PREPROCESS', 4 );
22 
23 # Flags for setFunctionHook
24 define( 'SFH_NO_HASH', 1 );
25 
26 # string parameter for extractTags which will cause it
27 # to strip HTML comments in addition to regular
28 # <XML>-style tags. This should not be anything we
29 # may want to use in wikisyntax
30 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
31 
32 # Constants needed for external link processing
33 define( 'HTTP_PROTOCOLS', 'http:\/\/|https:\/\/' );
34 # Everything except bracket, space, or control characters
35 define( 'EXT_LINK_URL_CLASS', '[^][<>"\\x00-\\x20\\x7F]' );
36 # Including space, but excluding newlines
37 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x0a\\x0d]' );
38 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
39 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
40 define( 'EXT_LINK_BRACKETED', '/\[(\b(' . wfUrlProtocols() . ')'.
41  EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
42 define( 'EXT_IMAGE_REGEX',
43  '/^('.HTTP_PROTOCOLS.')'. # Protocol
44  '('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
45  '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
46 );
47 
48 // State constants for the definition list colon extraction
49 define( 'MW_COLON_STATE_TEXT', 0 );
50 define( 'MW_COLON_STATE_TAG', 1 );
51 define( 'MW_COLON_STATE_TAGSTART', 2 );
52 define( 'MW_COLON_STATE_CLOSETAG', 3 );
53 define( 'MW_COLON_STATE_TAGSLASH', 4 );
54 define( 'MW_COLON_STATE_COMMENT', 5 );
55 define( 'MW_COLON_STATE_COMMENTDASH', 6 );
56 define( 'MW_COLON_STATE_COMMENTDASHDASH', 7 );
57 
91 class Parser
92 {
93  const VERSION = MW_PARSER_VERSION;
97  # Persistent:
99 
100  # Cleared with clearState():
105  var $mTemplates, // cache of already loaded templates, avoids
106  // multiple SQL queries for the same string
107  $mTemplatePath; // stores an unsorted hash of all the templates already loaded
108  // in this path. Used for loop detection.
109 
110  # Temporary
111  # These are variables reset at least once per parse regardless of $clearState
112  var $mOptions, // ParserOptions object
113  $mTitle, // Title context, used for self-link rendering and similar things
114  $mOutputType, // Output type, one of the OT_xxx constants
115  $ot, // Shortcut alias, see setOutputType()
116  $mRevisionId, // ID to display in {{REVISIONID}} tags
117  $mRevisionTimestamp, // The timestamp of the specified revision ID
118  $mRevIdForTs; // The revision ID which was used to fetch the timestamp
119 
127  function Parser() {
128  $this->mTagHooks = array();
129  $this->mFunctionHooks = array();
130  $this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
131  $this->mFirstCall = true;
132  }
133 
137  function firstCallInit() {
138  if ( !$this->mFirstCall ) {
139  return;
140  }
141 
142  wfProfileIn( __METHOD__ );
143  global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions;
144 
145  $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
146 
147  $this->setFunctionHook( 'int', array( 'CoreParserFunctions', 'intFunction' ), SFH_NO_HASH );
148  $this->setFunctionHook( 'ns', array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH );
149  $this->setFunctionHook( 'urlencode', array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH );
150  $this->setFunctionHook( 'lcfirst', array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH );
151  $this->setFunctionHook( 'ucfirst', array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH );
152  $this->setFunctionHook( 'lc', array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH );
153  $this->setFunctionHook( 'uc', array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH );
154  $this->setFunctionHook( 'localurl', array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH );
155  $this->setFunctionHook( 'localurle', array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH );
156  $this->setFunctionHook( 'fullurl', array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH );
157  $this->setFunctionHook( 'fullurle', array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH );
158  $this->setFunctionHook( 'formatnum', array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH );
159  $this->setFunctionHook( 'grammar', array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH );
160  $this->setFunctionHook( 'plural', array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH );
161  $this->setFunctionHook( 'numberofpages', array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH );
162  $this->setFunctionHook( 'numberofusers', array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH );
163  $this->setFunctionHook( 'numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH );
164  $this->setFunctionHook( 'numberoffiles', array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH );
165  $this->setFunctionHook( 'numberofadmins', array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH );
166  $this->setFunctionHook( 'numberofedits', array( 'CoreParserFunctions', 'numberofedits' ), SFH_NO_HASH );
167  $this->setFunctionHook( 'language', array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH );
168  $this->setFunctionHook( 'padleft', array( 'CoreParserFunctions', 'padleft' ), SFH_NO_HASH );
169  $this->setFunctionHook( 'padright', array( 'CoreParserFunctions', 'padright' ), SFH_NO_HASH );
170  $this->setFunctionHook( 'anchorencode', array( 'CoreParserFunctions', 'anchorencode' ), SFH_NO_HASH );
171  $this->setFunctionHook( 'special', array( 'CoreParserFunctions', 'special' ) );
172  $this->setFunctionHook( 'defaultsort', array( 'CoreParserFunctions', 'defaultsort' ), SFH_NO_HASH );
173 
174  if ( $wgAllowDisplayTitle ) {
175  $this->setFunctionHook( 'displaytitle', array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH );
176  }
177  if ( $wgAllowSlowParserFunctions ) {
178  $this->setFunctionHook( 'pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH );
179  }
180 
181  $this->initialiseVariables();
182  $this->mFirstCall = false;
183  wfProfileOut( __METHOD__ );
184  }
185 
191  function clearState() {
192  wfProfileIn( __METHOD__ );
193  if ( $this->mFirstCall ) {
194  $this->firstCallInit();
195  }
196  $this->mOutput = new ParserOutput;
197  $this->mAutonumber = 0;
198  $this->mLastSection = '';
199  $this->mDTopen = false;
200  $this->mIncludeCount = array();
201  $this->mStripState = new StripState;
202  $this->mArgStack = array();
203  $this->mInPre = false;
204  $this->mInterwikiLinkHolders = array(
205  'texts' => array(),
206  'titles' => array()
207  );
208  $this->mLinkHolders = array(
209  'namespaces' => array(),
210  'dbkeys' => array(),
211  'queries' => array(),
212  'texts' => array(),
213  'titles' => array()
214  );
215  $this->mRevisionTimestamp = $this->mRevisionId = null;
216 
224  $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
225 
226  # Clear these on every parse, bug 4549
227  $this->mTemplates = array();
228  $this->mTemplatePath = array();
229 
230  $this->mShowToc = true;
231  $this->mForceTocPosition = false;
232  $this->mIncludeSizes = array(
233  'pre-expand' => 0,
234  'post-expand' => 0,
235  'arg' => 0
236  );
237  $this->mDefaultSort = false;
238 
239  wfRunHooks( 'ParserClearState', array( &$this ) );
240  wfProfileOut( __METHOD__ );
241  }
243  function setOutputType( $ot ) {
244  $this->mOutputType = $ot;
245  // Shortcut alias
246  $this->ot = array(
247  'html' => $ot == OT_HTML,
248  'wiki' => $ot == OT_WIKI,
249  'msg' => $ot == OT_MSG,
250  'pre' => $ot == OT_PREPROCESS,
251  );
252  }
253 
259  function uniqPrefix() {
260  return $this->mUniqPrefix;
261  }
262 
275  public function parse( $text, &$title, $options, $linestart = true, $clearState = true, $revid = null ) {
281  global $wgUseTidy, $wgAlwaysUseTidy, $wgContLang;
282  $fname = 'Parser::parse-' . wfGetCaller();
283  wfProfileIn( __METHOD__ );
284  wfProfileIn( $fname );
285 
286  if ( $clearState ) {
287  $this->clearState();
288  }
289 
290  $this->mOptions = $options;
291  $this->mTitle =& $title;
292  $oldRevisionId = $this->mRevisionId;
293  $oldRevisionTimestamp = $this->mRevisionTimestamp;
294  if( $revid !== null ) {
295  $this->mRevisionId = $revid;
296  $this->mRevisionTimestamp = null;
297  }
298  $this->setOutputType( OT_HTML );
299  wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
300  $text = $this->strip( $text, $this->mStripState );
301  wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
302  $text = $this->internalParse( $text );
303  $text = $this->mStripState->unstripGeneral( $text );
304 
305  # Clean up special characters, only run once, next-to-last before doBlockLevels
306  $fixtags = array(
307  # french spaces, last one Guillemet-left
308  # only if there is something before the space
309  '/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1&nbsp;\\2',
310  # french spaces, Guillemet-right
311  '/(\\302\\253) /' => '\\1&nbsp;',
312  );
313  $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
314 
315  # only once and last
316  $text = $this->doBlockLevels( $text, $linestart );
317 
318  $this->replaceLinkHolders( $text );
319 
320  # the position of the parserConvert() call should not be changed. it
321  # assumes that the links are all replaced and the only thing left
322  # is the <nowiki> mark.
323  # Side-effects: this calls $this->mOutput->setTitleText()
324  $text = $wgContLang->parserConvert( $text, $this );
325 
326  $text = $this->mStripState->unstripNoWiki( $text );
327 
328  wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
329 
330  $text = Sanitizer::normalizeCharReferences( $text );
331 
332  if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) {
333  $text = Parser::tidy($text);
334  } else {
335  # attempt to sanitize at least some nesting problems
336  # (bug #2702 and quite a few others)
337  $tidyregs = array(
338  # ''Something [http://www.cool.com cool''] -->
339  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
340  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
341  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
342  # fix up an anchor inside another anchor, only
343  # at least for a single single nested link (bug 3695)
344  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
345  '\\1\\2</a>\\3</a>\\1\\4</a>',
346  # fix div inside inline elements- doBlockLevels won't wrap a line which
347  # contains a div, so fix it up here; replace
348  # div with escaped text
349  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
350  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
351  # remove empty italic or bold tag pairs, some
352  # introduced by rules above
353  '/<([bi])><\/\\1>/' => '',
354  );
355 
356  $text = preg_replace(
357  array_keys( $tidyregs ),
358  array_values( $tidyregs ),
359  $text );
360  }
361 
362  wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
363 
364  # Information on include size limits, for the benefit of users who try to skirt them
365  if ( max( $this->mIncludeSizes ) > 1000 ) {
366  $max = $this->mOptions->getMaxIncludeSize();
367  $text .= "<!-- \n" .
368  "Pre-expand include size: {$this->mIncludeSizes['pre-expand']} bytes\n" .
369  "Post-expand include size: {$this->mIncludeSizes['post-expand']} bytes\n" .
370  "Template argument size: {$this->mIncludeSizes['arg']} bytes\n" .
371  "Maximum: $max bytes\n" .
372  "-->\n";
373  }
374  $this->mOutput->setText( $text );
375  $this->mRevisionId = $oldRevisionId;
376  $this->mRevisionTimestamp = $oldRevisionTimestamp;
377  wfProfileOut( $fname );
378  wfProfileOut( __METHOD__ );
379 
380  return $this->mOutput;
381  }
382 
387  function recursiveTagParse( $text ) {
388  wfProfileIn( __METHOD__ );
389  wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
390  $text = $this->strip( $text, $this->mStripState );
391  wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
392  $text = $this->internalParse( $text );
393  wfProfileOut( __METHOD__ );
394  return $text;
395  }
396 
401  function preprocess( $text, $title, $options ) {
402  wfProfileIn( __METHOD__ );
403  $this->clearState();
404  $this->setOutputType( OT_PREPROCESS );
405  $this->mOptions = $options;
406  $this->mTitle = $title;
407  wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
408  $text = $this->strip( $text, $this->mStripState );
409  wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
410  if ( $this->mOptions->getRemoveComments() ) {
411  $text = Sanitizer::removeHTMLcomments( $text );
412  }
413  $text = $this->replaceVariables( $text );
414  $text = $this->mStripState->unstripBoth( $text );
415  wfProfileOut( __METHOD__ );
416  return $text;
417  }
418 
425  function getRandomString() {
426  return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
427  }
429  function &getTitle() { return $this->mTitle; }
430  function getOptions() { return $this->mOptions; }
432  function getFunctionLang() {
433  global $wgLang, $wgContLang;
434  return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang;
435  }
436 
455  function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
456  static $n = 1;
457  $stripped = '';
458  $matches = array();
459 
460  $taglist = implode( '|', $elements );
461  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
462 
463  while ( '' != $text ) {
464  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
465  $stripped .= $p[0];
466  if( count( $p ) < 5 ) {
467  break;
468  }
469  if( count( $p ) > 5 ) {
470  // comment
471  $element = $p[4];
472  $attributes = '';
473  $close = '';
474  $inside = $p[5];
475  } else {
476  // tag
477  $element = $p[1];
478  $attributes = $p[2];
479  $close = $p[3];
480  $inside = $p[4];
481  }
482 
483  $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU';
484  $stripped .= $marker;
485 
486  if ( $close === '/>' ) {
487  // Empty element tag, <tag />
488  $content = null;
489  $text = $inside;
490  $tail = null;
491  } else {
492  if( $element == '!--' ) {
493  $end = '/(-->)/';
494  } else {
495  $end = "/(<\\/$element\\s*>)/i";
496  }
497  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
498  $content = $q[0];
499  if( count( $q ) < 3 ) {
500  # No end tag -- let it run out to the end of the text.
501  $tail = '';
502  $text = '';
503  } else {
504  $tail = $q[1];
505  $text = $q[2];
506  }
507  }
508 
509  $matches[$marker] = array( $element,
510  $content,
511  Sanitizer::decodeTagAttributes( $attributes ),
512  "<$element$attributes$close$content$tail" );
513  }
514  return $stripped;
515  }
516 
534  function strip( $text, $state, $stripcomments = false , $dontstrip = array () ) {
535  global $wgContLang;
536  wfProfileIn( __METHOD__ );
537  $render = ($this->mOutputType == OT_HTML);
538 
539  $uniq_prefix = $this->mUniqPrefix;
540  $commentState = new ReplacementArray;
541  $nowikiItems = array();
542  $generalItems = array();
543 
544  $elements = array_merge(
545  array( 'nowiki', 'gallery' ),
546  array_keys( $this->mTagHooks ) );
547  global $wgRawHtml;
548  if( $wgRawHtml ) {
549  $elements[] = 'html';
550  }
551  if( $this->mOptions->getUseTeX() ) {
552  $elements[] = 'math';
553  }
554 
555  # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700)
556  foreach ( $elements AS $k => $v ) {
557  if ( !in_array ( $v , $dontstrip ) ) continue;
558  unset ( $elements[$k] );
559  }
560 
561  $matches = array();
562  $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
563 
564  foreach( $matches as $marker => $data ) {
565  list( $element, $content, $params, $tag ) = $data;
566  if( $render ) {
567  $tagName = strtolower( $element );
568  wfProfileIn( __METHOD__."-render-$tagName" );
569  switch( $tagName ) {
570  case '!--':
571  // Comment
572  if( substr( $tag, -3 ) == '-->' ) {
573  $output = $tag;
574  } else {
575  // Unclosed comment in input.
576  // Close it so later stripping can remove it
577  $output = "$tag-->";
578  }
579  break;
580  case 'html':
581  if( $wgRawHtml ) {
582  $output = $content;
583  break;
584  }
585  // Shouldn't happen otherwise. :)
586  case 'nowiki':
587  $output = Xml::escapeTagsOnly( $content );
588  break;
589  case 'math':
590  $output = $wgContLang->armourMath( MathRenderer::renderMath( $content ) );
591  break;
592  case 'gallery':
593  $output = $this->renderImageGallery( $content, $params );
594  break;
595  default:
596  if( isset( $this->mTagHooks[$tagName] ) ) {
597  $output = call_user_func_array( $this->mTagHooks[$tagName],
598  array( $content, $params, $this ) );
599  } else {
600  throw new MWException( "Invalid call hook $element" );
601  }
602  }
603  wfProfileOut( __METHOD__."-render-$tagName" );
604  } else {
605  // Just stripping tags; keep the source
606  $output = $tag;
607  }
608 
609  // Unstrip the output, to support recursive strip() calls
610  $output = $state->unstripBoth( $output );
611 
612  if( !$stripcomments && $element == '!--' ) {
613  $commentState->setPair( $marker, $output );
614  } elseif ( $element == 'html' || $element == 'nowiki' ) {
615  $nowikiItems[$marker] = $output;
616  } else {
617  $generalItems[$marker] = $output;
618  }
619  }
620  # Add the new items to the state
621  # We do this after the loop instead of during it to avoid slowing
622  # down the recursive unstrip
623  $state->nowiki->mergeArray( $nowikiItems );
624  $state->general->mergeArray( $generalItems );
625 
626  # Unstrip comments unless explicitly told otherwise.
627  # (The comments are always stripped prior to this point, so as to
628  # not invoke any extension tags / parser hooks contained within
629  # a comment.)
630  if ( !$stripcomments ) {
631  // Put them all back and forget them
632  $text = $commentState->replace( $text );
633  }
634 
635  wfProfileOut( __METHOD__ );
636  return $text;
637  }
638 
646  function unstrip( $text, $state ) {
647  return $state->unstripGeneral( $text );
648  }
649 
656  function unstripNoWiki( $text, $state ) {
657  return $state->unstripNoWiki( $text );
658  }
659 
663  function unstripForHTML( $text ) {
664  return $this->mStripState->unstripBoth( $text );
665  }
666 
674  function insertStripItem( $text, &$state ) {
675  $rnd = $this->mUniqPrefix . '-item' . Parser::getRandomString();
676  $state->general->setPair( $rnd, $text );
677  return $rnd;
678  }
679 
694  function tidy( $text ) {
695  global $wgTidyInternal;
696  $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
697 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
698 '<head><title>test</title></head><body>'.$text.'</body></html>';
699  if( $wgTidyInternal ) {
700  $correctedtext = Parser::internalTidy( $wrappedtext );
701  } else {
702  $correctedtext = Parser::externalTidy( $wrappedtext );
703  }
704  if( is_null( $correctedtext ) ) {
705  wfDebug( "Tidy error detected!\n" );
706  return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
707  }
708  return $correctedtext;
709  }
710 
717  function externalTidy( $text ) {
718  global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
719  $fname = 'Parser::externalTidy';
720  wfProfileIn( $fname );
721 
722  $cleansource = '';
723  $opts = ' -utf8';
724 
725  $descriptorspec = array(
726  0 => array('pipe', 'r'),
727  1 => array('pipe', 'w'),
728  2 => array('file', '/dev/null', 'a') // FIXME: this line in UNIX-specific, it generates a warning on Windows, because /dev/null is not a valid Windows file.
729  );
730  $pipes = array();
731  $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
732  if (is_resource($process)) {
733  // Theoretically, this style of communication could cause a deadlock
734  // here. If the stdout buffer fills up, then writes to stdin could
735  // block. This doesn't appear to happen with tidy, because tidy only
736  // writes to stdout after it's finished reading from stdin. Search
737  // for tidyParseStdin and tidySaveStdout in console/tidy.c
738  fwrite($pipes[0], $text);
739  fclose($pipes[0]);
740  while (!feof($pipes[1])) {
741  $cleansource .= fgets($pipes[1], 1024);
742  }
743  fclose($pipes[1]);
744  proc_close($process);
745  }
746 
747  wfProfileOut( $fname );
748 
749  if( $cleansource == '' && $text != '') {
750  // Some kind of error happened, so we couldn't get the corrected text.
751  // Just give up; we'll use the source text and append a warning.
752  return null;
753  } else {
754  return $cleansource;
755  }
756  }
757 
768  function internalTidy( $text ) {
769  global $wgTidyConf;
770  $fname = 'Parser::internalTidy';
771  wfProfileIn( $fname );
772 
773  tidy_load_config( $wgTidyConf );
774  tidy_set_encoding( 'utf8' );
775  tidy_parse_string( $text );
776  tidy_clean_repair();
777  if( tidy_get_status() == 2 ) {
778  // 2 is magic number for fatal error
779  // http://www.php.net/manual/en/function.tidy-get-status.php
780  $cleansource = null;
781  } else {
782  $cleansource = tidy_get_output();
783  }
784  wfProfileOut( $fname );
785  return $cleansource;
786  }
787 
793  function doTableStuff ( $text ) {
794  $fname = 'Parser::doTableStuff';
795  wfProfileIn( $fname );
796 
797  $lines = explode ( "\n" , $text );
798  $td_history = array (); // Is currently a td tag open?
799  $last_tag_history = array (); // Save history of last lag activated (td, th or caption)
800  $tr_history = array (); // Is currently a tr tag open?
801  $tr_attributes = array (); // history of tr attributes
802  $has_opened_tr = array(); // Did this table open a <tr> element?
803  $indent_level = 0; // indent level of the table
804  foreach ( $lines as $key => $line )
805  {
806  $line = trim ( $line );
807 
808  if( $line == '' ) { // empty line, go to next line
809  continue;
810  }
811  $first_character = $line{0};
812  $matches = array();
813 
814  if ( preg_match( '/^(:*)\{\|(.*)$/' , $line , $matches ) ) {
815  // First check if we are starting a new table
816  $indent_level = strlen( $matches[1] );
817 
818  $attributes = $this->mStripState->unstripBoth( $matches[2] );
819  $attributes = Sanitizer::fixTagAttributes ( $attributes , 'table' );
820 
821  $lines[$key] = str_repeat( '<dl><dd>' , $indent_level ) . "<table{$attributes}>";
822  array_push ( $td_history , false );
823  array_push ( $last_tag_history , '' );
824  array_push ( $tr_history , false );
825  array_push ( $tr_attributes , '' );
826  array_push ( $has_opened_tr , false );
827  } else if ( count ( $td_history ) == 0 ) {
828  // Don't do any of the following
829  continue;
830  } else if ( substr ( $line , 0 , 2 ) == '|}' ) {
831  // We are ending a table
832  $line = '</table>' . substr ( $line , 2 );
833  $last_tag = array_pop ( $last_tag_history );
834 
835  if ( !array_pop ( $has_opened_tr ) ) {
836  $line = "<tr><td></td></tr>{$line}";
837  }
838 
839  if ( array_pop ( $tr_history ) ) {
840  $line = "</tr>{$line}";
841  }
842 
843  if ( array_pop ( $td_history ) ) {
844  $line = "</{$last_tag}>{$line}";
845  }
846  array_pop ( $tr_attributes );
847  $lines[$key] = $line . str_repeat( '</dd></dl>' , $indent_level );
848  } else if ( substr ( $line , 0 , 2 ) == '|-' ) {
849  // Now we have a table row
850  $line = preg_replace( '#^\|-+#', '', $line );
851 
852  // Whats after the tag is now only attributes
853  $attributes = $this->mStripState->unstripBoth( $line );
854  $attributes = Sanitizer::fixTagAttributes ( $attributes , 'tr' );
855  array_pop ( $tr_attributes );
856  array_push ( $tr_attributes , $attributes );
857 
858  $line = '';
859  $last_tag = array_pop ( $last_tag_history );
860  array_pop ( $has_opened_tr );
861  array_push ( $has_opened_tr , true );
862 
863  if ( array_pop ( $tr_history ) ) {
864  $line = '</tr>';
865  }
866 
867  if ( array_pop ( $td_history ) ) {
868  $line = "</{$last_tag}>{$line}";
869  }
870 
871  $lines[$key] = $line;
872  array_push ( $tr_history , false );
873  array_push ( $td_history , false );
874  array_push ( $last_tag_history , '' );
875  }
876  else if ( $first_character == '|' || $first_character == '!' || substr ( $line , 0 , 2 ) == '|+' ) {
877  // This might be cell elements, td, th or captions
878  if ( substr ( $line , 0 , 2 ) == '|+' ) {
879  $first_character = '+';
880  $line = substr ( $line , 1 );
881  }
882 
883  $line = substr ( $line , 1 );
884 
885  if ( $first_character == '!' ) {
886  $line = str_replace ( '!!' , '||' , $line );
887  }
888 
889  // Split up multiple cells on the same line.
890  // FIXME : This can result in improper nesting of tags processed
891  // by earlier parser steps, but should avoid splitting up eg
892  // attribute values containing literal "||".
893  $cells = StringUtils::explodeMarkup( '||' , $line );
894 
895  $lines[$key] = '';
896 
897  // Loop through each table cell
898  foreach ( $cells as $cell )
899  {
900  $previous = '';
901  if ( $first_character != '+' )
902  {
903  $tr_after = array_pop ( $tr_attributes );
904  if ( !array_pop ( $tr_history ) ) {
905  $previous = "<tr{$tr_after}>\n";
906  }
907  array_push ( $tr_history , true );
908  array_push ( $tr_attributes , '' );
909  array_pop ( $has_opened_tr );
910  array_push ( $has_opened_tr , true );
911  }
912 
913  $last_tag = array_pop ( $last_tag_history );
914 
915  if ( array_pop ( $td_history ) ) {
916  $previous = "</{$last_tag}>{$previous}";
917  }
918 
919  if ( $first_character == '|' ) {
920  $last_tag = 'td';
921  } else if ( $first_character == '!' ) {
922  $last_tag = 'th';
923  } else if ( $first_character == '+' ) {
924  $last_tag = 'caption';
925  } else {
926  $last_tag = '';
927  }
928 
929  array_push ( $last_tag_history , $last_tag );
930 
931  // A cell could contain both parameters and data
932  $cell_data = explode ( '|' , $cell , 2 );
933 
934  // Bug 553: Note that a '|' inside an invalid link should not
935  // be mistaken as delimiting cell parameters
936  if ( strpos( $cell_data[0], '[[' ) !== false ) {
937  $cell = "{$previous}<{$last_tag}>{$cell}";
938  } else if ( count ( $cell_data ) == 1 )
939  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
940  else {
941  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
942  $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
943  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
944  }
945 
946  $lines[$key] .= $cell;
947  array_push ( $td_history , true );
948  }
949  }
950  }
951 
952  // Closing open td, tr && table
953  while ( count ( $td_history ) > 0 )
954  {
955  if ( array_pop ( $td_history ) ) {
956  $lines[] = '</td>' ;
957  }
958  if ( array_pop ( $tr_history ) ) {
959  $lines[] = '</tr>' ;
960  }
961  if ( !array_pop ( $has_opened_tr ) ) {
962  $lines[] = "<tr><td></td></tr>" ;
963  }
964 
965  $lines[] = '</table>' ;
966  }
967 
968  $output = implode ( "\n" , $lines ) ;
969 
970  // special case: don't return empty table
971  if( $output == "<table>\n<tr><td></td></tr>\n</table>" ) {
972  $output = '';
973  }
974 
975  wfProfileOut( $fname );
976 
977  return $output;
978  }
979 
986  function internalParse( $text ) {
987  $args = array();
988  $isMain = true;
989  $fname = 'Parser::internalParse';
990  wfProfileIn( $fname );
991 
992  # Hook to suspend the parser in this state
993  if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
994  wfProfileOut( $fname );
995  return $text ;
996  }
997 
998  # Remove <noinclude> tags and <includeonly> sections
999  $text = strtr( $text, array( '<onlyinclude>' => '' , '</onlyinclude>' => '' ) );
1000  $text = strtr( $text, array( '<noinclude>' => '', '</noinclude>' => '') );
1001  $text = StringUtils::delimiterReplace( '<includeonly>', '</includeonly>', '', $text );
1002 
1003  $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ) );
1004 
1005  $text = $this->replaceVariables( $text, $args );
1006  wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
1007 
1008  // Tables need to come after variable replacement for things to work
1009  // properly; putting them before other transformations should keep
1010  // exciting things like link expansions from showing up in surprising
1011  // places.
1012  $text = $this->doTableStuff( $text );
1013 
1014  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1015 
1016  $text = $this->stripToc( $text );
1017  $this->stripNoGallery( $text );
1018  $text = $this->doHeadings( $text );
1019  if($this->mOptions->getUseDynamicDates()) {
1020  $df =& DateFormatter::getInstance();
1021  $text = $df->reformat( $this->mOptions->getDateFormat(), $text );
1022  }
1023  $text = $this->doAllQuotes( $text );
1024  $text = $this->replaceInternalLinks( $text );
1025  $text = $this->replaceExternalLinks( $text );
1026 
1027  # replaceInternalLinks may sometimes leave behind
1028  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1029  $text = str_replace($this->mUniqPrefix."NOPARSE", "", $text);
1030 
1031  $text = $this->doMagicLinks( $text );
1032  $text = $this->formatHeadings( $text, $isMain );
1033 
1034  wfProfileOut( $fname );
1035  return $text;
1036  }
1037 
1044  function &doMagicLinks( &$text ) {
1045  wfProfileIn( __METHOD__ );
1046  $text = preg_replace_callback(
1047  '!(?: # Start cases
1048  <a.*?</a> | # Skip link text
1049  <.*?> | # Skip stuff inside HTML elements
1050  (?:RFC|PMID)\s+([0-9]+) | # RFC or PMID, capture number as m[1]
1051  ISBN\s+(\b # ISBN, capture number as m[2]
1052  (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
1053  (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
1054  [0-9Xx] # check digit
1055  \b)
1056  )!x', array( &$this, 'magicLinkCallback' ), $text );
1057  wfProfileOut( __METHOD__ );
1058  return $text;
1059  }
1061  function magicLinkCallback( $m ) {
1062  if ( substr( $m[0], 0, 1 ) == '<' ) {
1063  # Skip HTML element
1064  return $m[0];
1065  } elseif ( substr( $m[0], 0, 4 ) == 'ISBN' ) {
1066  $isbn = $m[2];
1067  $num = strtr( $isbn, array(
1068  '-' => '',
1069  ' ' => '',
1070  'x' => 'X',
1071  ));
1072  $titleObj = SpecialPage::getTitleFor( 'Booksources' );
1073  $text = '<a href="' .
1074  $titleObj->escapeLocalUrl( "isbn=$num" ) .
1075  "\" class=\"internal\">ISBN $isbn</a>";
1076  } else {
1077  if ( substr( $m[0], 0, 3 ) == 'RFC' ) {
1078  $keyword = 'RFC';
1079  $urlmsg = 'rfcurl';
1080  $id = $m[1];
1081  } elseif ( substr( $m[0], 0, 4 ) == 'PMID' ) {
1082  $keyword = 'PMID';
1083  $urlmsg = 'pubmedurl';
1084  $id = $m[1];
1085  } else {
1086  throw new MWException( __METHOD__.': unrecognised match type "' .
1087  substr($m[0], 0, 20 ) . '"' );
1088  }
1089 
1090  $url = wfMsg( $urlmsg, $id);
1091  $sk = $this->mOptions->getSkin();
1092  $la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
1093  $text = "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
1094  }
1095  return $text;
1096  }
1097 
1103  function doHeadings( $text ) {
1104  $fname = 'Parser::doHeadings';
1105  wfProfileIn( $fname );
1106  for ( $i = 6; $i >= 1; --$i ) {
1107  $h = str_repeat( '=', $i );
1108  $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m",
1109  "<h{$i}>\\1</h{$i}>\\2", $text );
1110  }
1111  wfProfileOut( $fname );
1112  return $text;
1113  }
1114 
1120  function doAllQuotes( $text ) {
1121  $fname = 'Parser::doAllQuotes';
1122  wfProfileIn( $fname );
1123  $outtext = '';
1124  $lines = explode( "\n", $text );
1125  foreach ( $lines as $line ) {
1126  $outtext .= $this->doQuotes ( $line ) . "\n";
1127  }
1128  $outtext = substr($outtext, 0,-1);
1129  wfProfileOut( $fname );
1130  return $outtext;
1131  }
1132 
1137  function doQuotes( $text ) {
1138  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1139  if ( count( $arr ) == 1 )
1140  return $text;
1141  else
1142  {
1143  # First, do some preliminary work. This may shift some apostrophes from
1144  # being mark-up to being text. It also counts the number of occurrences
1145  # of bold and italics mark-ups.
1146  $i = 0;
1147  $numbold = 0;
1148  $numitalics = 0;
1149  foreach ( $arr as $r )
1150  {
1151  if ( ( $i % 2 ) == 1 )
1152  {
1153  # If there are ever four apostrophes, assume the first is supposed to
1154  # be text, and the remaining three constitute mark-up for bold text.
1155  if ( strlen( $arr[$i] ) == 4 )
1156  {
1157  $arr[$i-1] .= "'";
1158  $arr[$i] = "'''";
1159  }
1160  # If there are more than 5 apostrophes in a row, assume they're all
1161  # text except for the last 5.
1162  else if ( strlen( $arr[$i] ) > 5 )
1163  {
1164  $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
1165  $arr[$i] = "'''''";
1166  }
1167  # Count the number of occurrences of bold and italics mark-ups.
1168  # We are not counting sequences of five apostrophes.
1169  if ( strlen( $arr[$i] ) == 2 ) { $numitalics++; }
1170  else if ( strlen( $arr[$i] ) == 3 ) { $numbold++; }
1171  else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; }
1172  }
1173  $i++;
1174  }
1175 
1176  # If there is an odd number of both bold and italics, it is likely
1177  # that one of the bold ones was meant to be an apostrophe followed
1178  # by italics. Which one we cannot know for certain, but it is more
1179  # likely to be one that has a single-letter word before it.
1180  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) )
1181  {
1182  $i = 0;
1183  $firstsingleletterword = -1;
1184  $firstmultiletterword = -1;
1185  $firstspace = -1;
1186  foreach ( $arr as $r )
1187  {
1188  if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) )
1189  {
1190  $x1 = substr ($arr[$i-1], -1);
1191  $x2 = substr ($arr[$i-1], -2, 1);
1192  if ($x1 == ' ') {
1193  if ($firstspace == -1) $firstspace = $i;
1194  } else if ($x2 == ' ') {
1195  if ($firstsingleletterword == -1) $firstsingleletterword = $i;
1196  } else {
1197  if ($firstmultiletterword == -1) $firstmultiletterword = $i;
1198  }
1199  }
1200  $i++;
1201  }
1202 
1203  # If there is a single-letter word, use it!
1204  if ($firstsingleletterword > -1)
1205  {
1206  $arr [ $firstsingleletterword ] = "''";
1207  $arr [ $firstsingleletterword-1 ] .= "'";
1208  }
1209  # If not, but there's a multi-letter word, use that one.
1210  else if ($firstmultiletterword > -1)
1211  {
1212  $arr [ $firstmultiletterword ] = "''";
1213  $arr [ $firstmultiletterword-1 ] .= "'";
1214  }
1215  # ... otherwise use the first one that has neither.
1216  # (notice that it is possible for all three to be -1 if, for example,
1217  # there is only one pentuple-apostrophe in the line)
1218  else if ($firstspace > -1)
1219  {
1220  $arr [ $firstspace ] = "''";
1221  $arr [ $firstspace-1 ] .= "'";
1222  }
1223  }
1224 
1225  # Now let's actually convert our apostrophic mush to HTML!
1226  $output = '';
1227  $buffer = '';
1228  $state = '';
1229  $i = 0;
1230  foreach ($arr as $r)
1231  {
1232  if (($i % 2) == 0)
1233  {
1234  if ($state == 'both')
1235  $buffer .= $r;
1236  else
1237  $output .= $r;
1238  }
1239  else
1240  {
1241  if (strlen ($r) == 2)
1242  {
1243  if ($state == 'i')
1244  { $output .= '</i>'; $state = ''; }
1245  else if ($state == 'bi')
1246  { $output .= '</i>'; $state = 'b'; }
1247  else if ($state == 'ib')
1248  { $output .= '</b></i><b>'; $state = 'b'; }
1249  else if ($state == 'both')
1250  { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
1251  else # $state can be 'b' or ''
1252  { $output .= '<i>'; $state .= 'i'; }
1253  }
1254  else if (strlen ($r) == 3)
1255  {
1256  if ($state == 'b')
1257  { $output .= '</b>'; $state = ''; }
1258  else if ($state == 'bi')
1259  { $output .= '</i></b><i>'; $state = 'i'; }
1260  else if ($state == 'ib')
1261  { $output .= '</b>'; $state = 'i'; }
1262  else if ($state == 'both')
1263  { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
1264  else # $state can be 'i' or ''
1265  { $output .= '<b>'; $state .= 'b'; }
1266  }
1267  else if (strlen ($r) == 5)
1268  {
1269  if ($state == 'b')
1270  { $output .= '</b><i>'; $state = 'i'; }
1271  else if ($state == 'i')
1272  { $output .= '</i><b>'; $state = 'b'; }
1273  else if ($state == 'bi')
1274  { $output .= '</i></b>'; $state = ''; }
1275  else if ($state == 'ib')
1276  { $output .= '</b></i>'; $state = ''; }
1277  else if ($state == 'both')
1278  { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
1279  else # ($state == '')
1280  { $buffer = ''; $state = 'both'; }
1281  }
1282  }
1283  $i++;
1284  }
1285  # Now close all remaining tags. Notice that the order is important.
1286  if ($state == 'b' || $state == 'ib')
1287  $output .= '</b>';
1288  if ($state == 'i' || $state == 'bi' || $state == 'ib')
1289  $output .= '</i>';
1290  if ($state == 'bi')
1291  $output .= '</b>';
1292  # There might be lonely ''''', so make sure we have a buffer
1293  if ($state == 'both' && $buffer)
1294  $output .= '<b><i>'.$buffer.'</i></b>';
1295  return $output;
1296  }
1297  }
1298 
1307  function replaceExternalLinks( $text ) {
1308  global $wgContLang;
1309  $fname = 'Parser::replaceExternalLinks';
1310  wfProfileIn( $fname );
1311 
1312  $sk = $this->mOptions->getSkin();
1313 
1314  $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1315 
1316  $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
1317 
1318  $i = 0;
1319  while ( $i<count( $bits ) ) {
1320  $url = $bits[$i++];
1321  $protocol = $bits[$i++];
1322  $text = $bits[$i++];
1323  $trail = $bits[$i++];
1324 
1325  # The characters '<' and '>' (which were escaped by
1326  # removeHTMLtags()) should not be included in
1327  # URLs, per RFC 2396.
1328  $m2 = array();
1329  if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
1330  $text = substr($url, $m2[0][1]) . ' ' . $text;
1331  $url = substr($url, 0, $m2[0][1]);
1332  }
1333 
1334  # If the link text is an image URL, replace it with an <img> tag
1335  # This happened by accident in the original parser, but some people used it extensively
1336  $img = $this->maybeMakeExternalImage( $text );
1337  if ( $img !== false ) {
1338  $text = $img;
1339  }
1340 
1341  $dtrail = '';
1342 
1343  # Set linktype for CSS - if URL==text, link is essentially free
1344  $linktype = ($text == $url) ? 'free' : 'text';
1345 
1346  # No link text, e.g. [http://domain.tld/some.link]
1347  if ( $text == '' ) {
1348  # Autonumber if allowed. See bug #5918
1349  if ( strpos( wfUrlProtocols(), substr($protocol, 0, strpos($protocol, ':')) ) !== false ) {
1350  $text = '[' . ++$this->mAutonumber . ']';
1351  $linktype = 'autonumber';
1352  } else {
1353  # Otherwise just use the URL
1354  $text = htmlspecialchars( $url );
1355  $linktype = 'free';
1356  }
1357  } else {
1358  # Have link text, e.g. [http://domain.tld/some.link text]s
1359  # Check for trail
1360  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1361  }
1362 
1363  $text = $wgContLang->markNoConversion($text);
1364 
1365  $url = Sanitizer::cleanUrl( $url );
1366 
1367  # Process the trail (i.e. everything after this link up until start of the next link),
1368  # replacing any non-bracketed links
1369  $trail = $this->replaceFreeExternalLinks( $trail );
1370 
1371  # Use the encoded URL
1372  # This means that users can paste URLs directly into the text
1373  # Funny characters like &ouml; aren't valid in URLs anyway
1374  # This was changed in August 2004
1375  $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->mTitle->getNamespace() ) . $dtrail . $trail;
1376 
1377  # Register link in the output object.
1378  # Replace unnecessary URL escape codes with the referenced character
1379  # This prevents spammers from hiding links from the filters
1380  $pasteurized = Parser::replaceUnusualEscapes( $url );
1381  $this->mOutput->addExternalLink( $pasteurized );
1382  }
1383 
1384  wfProfileOut( $fname );
1385  return $s;
1386  }
1387 
1392  function replaceFreeExternalLinks( $text ) {
1393  global $wgContLang;
1394 
1395  $fname = 'Parser::replaceFreeExternalLinks';
1396  wfProfileIn( $fname );
1397 
1398  $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1399  $s = array_shift( $bits );
1400  $i = 0;
1401 
1402  $sk = $this->mOptions->getSkin();
1403 
1404  while ( $i < count( $bits ) ){
1405  $protocol = $bits[$i++];
1406  $remainder = $bits[$i++];
1407 
1408  $m = array();
1409  if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1410  # Found some characters after the protocol that look promising
1411  $url = $protocol . $m[1];
1412  $trail = $m[2];
1413 
1414  # special case: handle urls as url args:
1415  # http://www.example.com/foo?=http://www.example.com/bar
1416  if(strlen($trail) == 0 &&
1417  isset($bits[$i]) &&
1418  preg_match('/^'. wfUrlProtocols() . '$/S', $bits[$i]) &&
1419  preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m ))
1420  {
1421  # add protocol, arg
1422  $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link
1423  $i += 2;
1424  $trail = $m[2];
1425  }
1426 
1427  # The characters '<' and '>' (which were escaped by
1428  # removeHTMLtags()) should not be included in
1429  # URLs, per RFC 2396.
1430  $m2 = array();
1431  if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
1432  $trail = substr($url, $m2[0][1]) . $trail;
1433  $url = substr($url, 0, $m2[0][1]);
1434  }
1435 
1436  # Move trailing punctuation to $trail
1437  $sep = ',;\.:!?';
1438  # If there is no left bracket, then consider right brackets fair game too
1439  if ( strpos( $url, '(' ) === false ) {
1440  $sep .= ')';
1441  }
1442 
1443  $numSepChars = strspn( strrev( $url ), $sep );
1444  if ( $numSepChars ) {
1445  $trail = substr( $url, -$numSepChars ) . $trail;
1446  $url = substr( $url, 0, -$numSepChars );
1447  }
1448 
1449  $url = Sanitizer::cleanUrl( $url );
1450  # Is this an external image?
1451  $text = $this->maybeMakeExternalImage( $url );
1452  if ( $text === false ) {
1453  # Not an image, make a link
1454  $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() );
1455  # Register it in the output object...
1456  # Replace unnecessary URL escape codes with their equivalent characters
1457  $pasteurized = Parser::replaceUnusualEscapes( $url );
1458 
1459  $this->mOutput->addExternalLink( $pasteurized );
1460  }
1461  $s .= $text . $trail;
1462  } else {
1463  $s .= $protocol . $remainder;
1464  }
1465  }
1466  wfProfileOut( $fname );
1467  return $s;
1468  }
1469 
1480  static function replaceUnusualEscapes( $url ) {
1481  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
1482  array( 'Parser', 'replaceUnusualEscapesCallback' ), $url );
1483  }
1484 
1491  private static function replaceUnusualEscapesCallback( $matches ) {
1492  $char = urldecode( $matches[0] );
1493  $ord = ord( $char );
1494 
1495  // Is it an unsafe or HTTP reserved character according to RFC 1738?
1496  if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
1497  // No, shouldn't be escaped
1498  return $char;
1499  } else {
1500  // Yes, leave it escaped
1501  return $matches[0];
1502  }
1503  }
1504 
1510  function maybeMakeExternalImage( $url ) {
1511  $sk = $this->mOptions->getSkin();
1512  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1513  $imagesexception = !empty($imagesfrom);
1514  $text = false;
1515  if ( $this->mOptions->getAllowExternalImages()
1516  || ( $imagesexception && strpos( $url, $imagesfrom ) === 0 ) ) {
1517  if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1518  # Image found
1519  $text = $sk->makeExternalImage( htmlspecialchars( $url ) );
1520  }
1521  }
1522  return $text;
1523  }
1524 
1530  function replaceInternalLinks( $s ) {
1531  global $wgContLang;
1532  static $fname = 'Parser::replaceInternalLinks' ;
1533 
1534  wfProfileIn( $fname );
1535 
1536  wfProfileIn( $fname.'-setup' );
1537  static $tc = FALSE;
1538  # the % is needed to support urlencoded titles as well
1539  if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1540 
1541  $sk = $this->mOptions->getSkin();
1542 
1543  #split the entire text string on occurences of [[
1544  $a = explode( '[[', ' ' . $s );
1545  #get the first element (all text up to first [[), and remove the space we added
1546  $s = array_shift( $a );
1547  $s = substr( $s, 1 );
1548 
1549  # Match a link having the form [[namespace:link|alternate]]trail
1550  static $e1 = FALSE;
1551  if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; }
1552  # Match cases where there is no "]]", which might still be images
1553  static $e1_img = FALSE;
1554  if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; }
1555  # Match the end of a line for a word that's not followed by whitespace,
1556  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1557  $e2 = wfMsgForContent( 'linkprefix' );
1558 
1559  $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
1560  if( is_null( $this->mTitle ) ) {
1561  throw new MWException( __METHOD__.": \$this->mTitle is null\n" );
1562  }
1563  $nottalk = !$this->mTitle->isTalkPage();
1564 
1565  if ( $useLinkPrefixExtension ) {
1566  $m = array();
1567  if ( preg_match( $e2, $s, $m ) ) {
1568  $first_prefix = $m[2];
1569  } else {
1570  $first_prefix = false;
1571  }
1572  } else {
1573  $prefix = '';
1574  }
1575 
1576  if($wgContLang->hasVariants()) {
1577  $selflink = $wgContLang->convertLinkToAllVariants($this->mTitle->getPrefixedText());
1578  } else {
1579  $selflink = array($this->mTitle->getPrefixedText());
1580  }
1581  $useSubpages = $this->areSubpagesAllowed();
1582  wfProfileOut( $fname.'-setup' );
1583 
1584  # Loop for each link
1585  for ($k = 0; isset( $a[$k] ); $k++) {
1586  $line = $a[$k];
1587  if ( $useLinkPrefixExtension ) {
1588  wfProfileIn( $fname.'-prefixhandling' );
1589  if ( preg_match( $e2, $s, $m ) ) {
1590  $prefix = $m[2];
1591  $s = $m[1];
1592  } else {
1593  $prefix='';
1594  }
1595  # first link
1596  if($first_prefix) {
1597  $prefix = $first_prefix;
1598  $first_prefix = false;
1599  }
1600  wfProfileOut( $fname.'-prefixhandling' );
1601  }
1602 
1603  $might_be_img = false;
1604 
1605  wfProfileIn( "$fname-e1" );
1606  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1607  $text = $m[2];
1608  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
1609  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
1610  # the real problem is with the $e1 regex
1611  # See bug 1300.
1612  #
1613  # Still some problems for cases where the ] is meant to be outside punctuation,
1614  # and no image is in sight. See bug 2095.
1615  #
1616  if( $text !== '' &&
1617  substr( $m[3], 0, 1 ) === ']' &&
1618  strpos($text, '[') !== false
1619  )
1620  {
1621  $text .= ']'; # so that replaceExternalLinks($text) works later
1622  $m[3] = substr( $m[3], 1 );
1623  }
1624  # fix up urlencoded title texts
1625  if( strpos( $m[1], '%' ) !== false ) {
1626  # Should anchors '#' also be rejected?
1627  $m[1] = str_replace( array('<', '>'), array('&lt;', '&gt;'), urldecode($m[1]) );
1628  }
1629  $trail = $m[3];
1630  } elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption
1631  $might_be_img = true;
1632  $text = $m[2];
1633  if ( strpos( $m[1], '%' ) !== false ) {
1634  $m[1] = urldecode($m[1]);
1635  }
1636  $trail = "";
1637  } else { # Invalid form; output directly
1638  $s .= $prefix . '[[' . $line ;
1639  wfProfileOut( "$fname-e1" );
1640  continue;
1641  }
1642  wfProfileOut( "$fname-e1" );
1643  wfProfileIn( "$fname-misc" );
1644 
1645  # Don't allow internal links to pages containing
1646  # PROTO: where PROTO is a valid URL protocol; these
1647  # should be external links.
1648  if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) {
1649  $s .= $prefix . '[[' . $line ;
1650  continue;
1651  }
1652 
1653  # Make subpage if necessary
1654  if( $useSubpages ) {
1655  $link = $this->maybeDoSubpageLink( $m[1], $text );
1656  } else {
1657  $link = $m[1];
1658  }
1659 
1660  $noforce = (substr($m[1], 0, 1) != ':');
1661  if (!$noforce) {
1662  # Strip off leading ':'
1663  $link = substr($link, 1);
1664  }
1665 
1666  wfProfileOut( "$fname-misc" );
1667  wfProfileIn( "$fname-title" );
1668  $nt = Title::newFromText( $this->mStripState->unstripNoWiki($link) );
1669  if( !$nt ) {
1670  $s .= $prefix . '[[' . $line;
1671  wfProfileOut( "$fname-title" );
1672  continue;
1673  }
1674 
1675  $ns = $nt->getNamespace();
1676  $iw = $nt->getInterWiki();
1677  wfProfileOut( "$fname-title" );
1678 
1679  if ($might_be_img) { # if this is actually an invalid link
1680  wfProfileIn( "$fname-might_be_img" );
1681  if ($ns == NS_IMAGE && $noforce) { #but might be an image
1682  $found = false;
1683  while (isset ($a[$k+1]) ) {
1684  #look at the next 'line' to see if we can close it there
1685  $spliced = array_splice( $a, $k + 1, 1 );
1686  $next_line = array_shift( $spliced );
1687  $m = explode( ']]', $next_line, 3 );
1688  if ( count( $m ) == 3 ) {
1689  # the first ]] closes the inner link, the second the image
1690  $found = true;
1691  $text .= "[[{$m[0]}]]{$m[1]}";
1692  $trail = $m[2];
1693  break;
1694  } elseif ( count( $m ) == 2 ) {
1695  #if there's exactly one ]] that's fine, we'll keep looking
1696  $text .= "[[{$m[0]}]]{$m[1]}";
1697  } else {
1698  #if $next_line is invalid too, we need look no further
1699  $text .= '[[' . $next_line;
1700  break;
1701  }
1702  }
1703  if ( !$found ) {
1704  # we couldn't find the end of this imageLink, so output it raw
1705  #but don't ignore what might be perfectly normal links in the text we've examined
1706  $text = $this->replaceInternalLinks($text);
1707  $s .= "{$prefix}[[$link|$text";
1708  # note: no $trail, because without an end, there *is* no trail
1709  wfProfileOut( "$fname-might_be_img" );
1710  continue;
1711  }
1712  } else { #it's not an image, so output it raw
1713  $s .= "{$prefix}[[$link|$text";
1714  # note: no $trail, because without an end, there *is* no trail
1715  wfProfileOut( "$fname-might_be_img" );
1716  continue;
1717  }
1718  wfProfileOut( "$fname-might_be_img" );
1719  }
1720 
1721  $wasblank = ( '' == $text );
1722  if( $wasblank ) $text = $link;
1723 
1724  # Link not escaped by : , create the various objects
1725  if( $noforce ) {
1726 
1727  # Interwikis
1728  wfProfileIn( "$fname-interwiki" );
1729  if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
1730  $this->mOutput->addLanguageLink( $nt->getFullText() );
1731  $s = rtrim($s . $prefix);
1732  $s .= trim($trail, "\n") == '' ? '': $prefix . $trail;
1733  wfProfileOut( "$fname-interwiki" );
1734  continue;
1735  }
1736  wfProfileOut( "$fname-interwiki" );
1737 
1738  if ( $ns == NS_IMAGE ) {
1739  wfProfileIn( "$fname-image" );
1740  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
1741  # recursively parse links inside the image caption
1742  # actually, this will parse them in any other parameters, too,
1743  # but it might be hard to fix that, and it doesn't matter ATM
1744  $text = $this->replaceExternalLinks($text);
1745  $text = $this->replaceInternalLinks($text);
1746 
1747  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
1748  $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail;
1749  $this->mOutput->addImage( $nt->getDBkey() );
1750 
1751  wfProfileOut( "$fname-image" );
1752  continue;
1753  } else {
1754  # We still need to record the image's presence on the page
1755  $this->mOutput->addImage( $nt->getDBkey() );
1756  }
1757  wfProfileOut( "$fname-image" );
1758 
1759  }
1760 
1761  if ( $ns == NS_CATEGORY ) {
1762  wfProfileIn( "$fname-category" );
1763  $s = rtrim($s . "\n"); # bug 87
1764 
1765  if ( $wasblank ) {
1766  $sortkey = $this->getDefaultSort();
1767  } else {
1768  $sortkey = $text;
1769  }
1770  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
1771  $sortkey = str_replace( "\n", '', $sortkey );
1772  $sortkey = $wgContLang->convertCategoryKey( $sortkey );
1773  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
1774 
1779  $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail;
1780 
1781  wfProfileOut( "$fname-category" );
1782  continue;
1783  }
1784  }
1785 
1786  # Self-link checking
1787  if( $nt->getFragment() === '' ) {
1788  if( in_array( $nt->getPrefixedText(), $selflink, true ) ) {
1789  $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1790  continue;
1791  }
1792  }
1793 
1794  # Special and Media are pseudo-namespaces; no pages actually exist in them
1795  if( $ns == NS_MEDIA ) {
1796  $link = $sk->makeMediaLinkObj( $nt, $text );
1797  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
1798  $s .= $prefix . $this->armorLinks( $link ) . $trail;
1799  $this->mOutput->addImage( $nt->getDBkey() );
1800  continue;
1801  } elseif( $ns == NS_SPECIAL ) {
1802  $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix );
1803  continue;
1804  } elseif( $ns == NS_IMAGE ) {
1805  $img = new Image( $nt );
1806  if( $img->exists() ) {
1807  // Force a blue link if the file exists; may be a remote
1808  // upload on the shared repository, and we want to see its
1809  // auto-generated page.
1810  $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix );
1811  $this->mOutput->addLink( $nt );
1812  continue;
1813  }
1814  }
1815  $s .= $this->makeLinkHolder( $nt, $text, '', $trail, $prefix );
1816  }
1817  wfProfileOut( $fname );
1818  return $s;
1819  }
1820 
1828  function makeLinkHolder( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
1829  wfProfileIn( __METHOD__ );
1830  if ( ! is_object($nt) ) {
1831  # Fail gracefully
1832  $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
1833  } else {
1834  # Separate the link trail from the rest of the link
1835  list( $inside, $trail ) = Linker::splitTrail( $trail );
1836 
1837  if ( $nt->isExternal() ) {
1838  $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside );
1839  $this->mInterwikiLinkHolders['titles'][] = $nt;
1840  $retVal = '<!--IWLINK '. ($nr-1) ."-->{$trail}";
1841  } else {
1842  $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() );
1843  $this->mLinkHolders['dbkeys'][] = $nt->getDBkey();
1844  $this->mLinkHolders['queries'][] = $query;
1845  $this->mLinkHolders['texts'][] = $prefix.$text.$inside;
1846  $this->mLinkHolders['titles'][] = $nt;
1847 
1848  $retVal = '<!--LINK '. ($nr-1) ."-->{$trail}";
1849  }
1850  }
1851  wfProfileOut( __METHOD__ );
1852  return $retVal;
1853  }
1854 
1869  function makeKnownLinkHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
1870  list( $inside, $trail ) = Linker::splitTrail( $trail );
1871  $sk = $this->mOptions->getSkin();
1872  $link = $sk->makeKnownLinkObj( $nt, $text, $query, $inside, $prefix );
1873  return $this->armorLinks( $link ) . $trail;
1874  }
1875 
1888  function armorLinks( $text ) {
1889  return preg_replace( '/\b(' . wfUrlProtocols() . ')/',
1890  "{$this->mUniqPrefix}NOPARSE$1", $text );
1891  }
1892 
1897  function areSubpagesAllowed() {
1898  # Some namespaces don't allow subpages
1899  global $wgNamespacesWithSubpages;
1900  return !empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]);
1901  }
1902 
1910  function maybeDoSubpageLink($target, &$text) {
1911  # Valid link forms:
1912  # Foobar -- normal
1913  # :Foobar -- override special treatment of prefix (images, language links)
1914  # /Foobar -- convert to CurrentPage/Foobar
1915  # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1916  # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage
1917  # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage
1918 
1919  $fname = 'Parser::maybeDoSubpageLink';
1920  wfProfileIn( $fname );
1921  $ret = $target; # default return value is no change
1922 
1923  # bug 7425
1924  $target = trim( $target );
1925 
1926  # Some namespaces don't allow subpages,
1927  # so only perform processing if subpages are allowed
1928  if( $this->areSubpagesAllowed() ) {
1929  # Look at the first character
1930  if( $target != '' && $target{0} == '/' ) {
1931  # / at end means we don't want the slash to be shown
1932  $trailingSlashes = preg_match_all( '%(/+)$%', $target, $m );
1933  if( $trailingSlashes ) {
1934  $noslash = $target = substr( $target, 1, -strlen($m[0][0]) );
1935  } else {
1936  $noslash = substr( $target, 1 );
1937  }
1938 
1939  $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1940  if( '' === $text ) {
1941  $text = $target;
1942  } # this might be changed for ugliness reasons
1943  } else {
1944  # check for .. subpage backlinks
1945  $dotdotcount = 0;
1946  $nodotdot = $target;
1947  while( strncmp( $nodotdot, "../", 3 ) == 0 ) {
1948  ++$dotdotcount;
1949  $nodotdot = substr( $nodotdot, 3 );
1950  }
1951  if($dotdotcount > 0) {
1952  $exploded = explode( '/', $this->mTitle->GetPrefixedText() );
1953  if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page
1954  $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) );
1955  # / at the end means don't show full path
1956  if( substr( $nodotdot, -1, 1 ) == '/' ) {
1957  $nodotdot = substr( $nodotdot, 0, -1 );
1958  if( '' === $text ) {
1959  $text = $nodotdot;
1960  }
1961  }
1962  $nodotdot = trim( $nodotdot );
1963  if( $nodotdot != '' ) {
1964  $ret .= '/' . $nodotdot;
1965  }
1966  }
1967  }
1968  }
1969  }
1970 
1971  wfProfileOut( $fname );
1972  return $ret;
1973  }
1974 
1979  /* private */ function closeParagraph() {
1980  $result = '';
1981  if ( '' != $this->mLastSection ) {
1982  $result = '</' . $this->mLastSection . ">\n";
1983  }
1984  $this->mInPre = false;
1985  $this->mLastSection = '';
1986  return $result;
1987  }
1988  # getCommon() returns the length of the longest common substring
1989  # of both arguments, starting at the beginning of both.
1990  #
1991  /* private */ function getCommon( $st1, $st2 ) {
1992  $fl = strlen( $st1 );
1993  $shorter = strlen( $st2 );
1994  if ( $fl < $shorter ) { $shorter = $fl; }
1995 
1996  for ( $i = 0; $i < $shorter; ++$i ) {
1997  if ( $st1{$i} != $st2{$i} ) { break; }
1998  }
1999  return $i;
2000  }
2001  # These next three functions open, continue, and close the list
2002  # element appropriate to the prefix character passed into them.
2003  #
2004  /* private */ function openList( $char ) {
2005  $result = $this->closeParagraph();
2006 
2007  if ( '*' == $char ) { $result .= '<ul><li>'; }
2008  else if ( '#' == $char ) { $result .= '<ol><li>'; }
2009  else if ( ':' == $char ) { $result .= '<dl><dd>'; }
2010  else if ( ';' == $char ) {
2011  $result .= '<dl><dt>';
2012  $this->mDTopen = true;
2013  }
2014  else { $result = '<!-- ERR 1 -->'; }
2015 
2016  return $result;
2017  }
2019  /* private */ function nextItem( $char ) {
2020  if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
2021  else if ( ':' == $char || ';' == $char ) {
2022  $close = '</dd>';
2023  if ( $this->mDTopen ) { $close = '</dt>'; }
2024  if ( ';' == $char ) {
2025  $this->mDTopen = true;
2026  return $close . '<dt>';
2027  } else {
2028  $this->mDTopen = false;
2029  return $close . '<dd>';
2030  }
2031  }
2032  return '<!-- ERR 2 -->';
2033  }
2035  /* private */ function closeList( $char ) {
2036  if ( '*' == $char ) { $text = '</li></ul>'; }
2037  else if ( '#' == $char ) { $text = '</li></ol>'; }
2038  else if ( ':' == $char ) {
2039  if ( $this->mDTopen ) {
2040  $this->mDTopen = false;
2041  $text = '</dt></dl>';
2042  } else {
2043  $text = '</dd></dl>';
2044  }
2045  }
2046  else { return '<!-- ERR 3 -->'; }
2047  return $text."\n";
2048  }
2057  function doBlockLevels( $text, $linestart ) {
2058  $fname = 'Parser::doBlockLevels';
2059  wfProfileIn( $fname );
2060 
2061  # Parsing through the text line by line. The main thing
2062  # happening here is handling of block-level elements p, pre,
2063  # and making lists from lines starting with * # : etc.
2064  #
2065  $textLines = explode( "\n", $text );
2066 
2067  $lastPrefix = $output = '';
2068  $this->mDTopen = $inBlockElem = false;
2069  $prefixLength = 0;
2070  $paragraphStack = false;
2071 
2072  if ( !$linestart ) {
2073  $output .= array_shift( $textLines );
2074  }
2075  foreach ( $textLines as $oLine ) {
2076  $lastPrefixLength = strlen( $lastPrefix );
2077  $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
2078  $preOpenMatch = preg_match('/<pre/i', $oLine );
2079  if ( !$this->mInPre ) {
2080  # Multiple prefixes may abut each other for nested lists.
2081  $prefixLength = strspn( $oLine, '*#:;' );
2082  $pref = substr( $oLine, 0, $prefixLength );
2083 
2084  # eh?
2085  $pref2 = str_replace( ';', ':', $pref );
2086  $t = substr( $oLine, $prefixLength );
2087  $this->mInPre = !empty($preOpenMatch);
2088  } else {
2089  # Don't interpret any other prefixes in preformatted text
2090  $prefixLength = 0;
2091  $pref = $pref2 = '';
2092  $t = $oLine;
2093  }
2094 
2095  # List generation
2096  if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
2097  # Same as the last item, so no need to deal with nesting or opening stuff
2098  $output .= $this->nextItem( substr( $pref, -1 ) );
2099  $paragraphStack = false;
2100 
2101  if ( substr( $pref, -1 ) == ';') {
2102  # The one nasty exception: definition lists work like this:
2103  # ; title : definition text
2104  # So we check for : in the remainder text to split up the
2105  # title and definition, without b0rking links.
2106  $term = $t2 = '';
2107  if ($this->findColonNoLinks($t, $term, $t2) !== false) {
2108  $t = $t2;
2109  $output .= $term . $this->nextItem( ':' );
2110  }
2111  }
2112  } elseif( $prefixLength || $lastPrefixLength ) {
2113  # Either open or close a level...
2114  $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
2115  $paragraphStack = false;
2116 
2117  while( $commonPrefixLength < $lastPrefixLength ) {
2118  $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
2119  --$lastPrefixLength;
2120  }
2121  if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2122  $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
2123  }
2124  while ( $prefixLength > $commonPrefixLength ) {
2125  $char = substr( $pref, $commonPrefixLength, 1 );
2126  $output .= $this->openList( $char );
2127 
2128  if ( ';' == $char ) {
2129  # FIXME: This is dupe of code above
2130  if ($this->findColonNoLinks($t, $term, $t2) !== false) {
2131  $t = $t2;
2132  $output .= $term . $this->nextItem( ':' );
2133  }
2134  }
2135  ++$commonPrefixLength;
2136  }
2137  $lastPrefix = $pref2;
2138  }
2139  if( 0 == $prefixLength ) {
2140  wfProfileIn( "$fname-paragraph" );
2141  # No prefix (not in list)--go to paragraph mode
2142  // XXX: use a stack for nestable elements like span, table and div
2143  $openmatch = preg_match('/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
2144  $closematch = preg_match(
2145  '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
2146  '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
2147  if ( $openmatch or $closematch ) {
2148  $paragraphStack = false;
2149  # TODO bug 5718: paragraph closed
2150  $output .= $this->closeParagraph();
2151  if ( $preOpenMatch and !$preCloseMatch ) {
2152  $this->mInPre = true;
2153  }
2154  if ( $closematch ) {
2155  $inBlockElem = false;
2156  } else {
2157  $inBlockElem = true;
2158  }
2159  } else if ( !$inBlockElem && !$this->mInPre ) {
2160  if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
2161  // pre
2162  if ($this->mLastSection != 'pre') {
2163  $paragraphStack = false;
2164  $output .= $this->closeParagraph().'<pre>';
2165  $this->mLastSection = 'pre';
2166  }
2167  $t = substr( $t, 1 );
2168  } else {
2169  // paragraph
2170  if ( '' == trim($t) ) {
2171  if ( $paragraphStack ) {
2172  $output .= $paragraphStack.'<br />';
2173  $paragraphStack = false;
2174  $this->mLastSection = 'p';
2175  } else {
2176  if ($this->mLastSection != 'p' ) {
2177  $output .= $this->closeParagraph();
2178  $this->mLastSection = '';
2179  $paragraphStack = '<p>';
2180  } else {
2181  $paragraphStack = '</p><p>';
2182  }
2183  }
2184  } else {
2185  if ( $paragraphStack ) {
2186  $output .= $paragraphStack;
2187  $paragraphStack = false;
2188  $this->mLastSection = 'p';
2189  } else if ($this->mLastSection != 'p') {
2190  $output .= $this->closeParagraph().'<p>';
2191  $this->mLastSection = 'p';
2192  }
2193  }
2194  }
2195  }
2196  wfProfileOut( "$fname-paragraph" );
2197  }
2198  // somewhere above we forget to get out of pre block (bug 785)
2199  if($preCloseMatch && $this->mInPre) {
2200  $this->mInPre = false;
2201  }
2202  if ($paragraphStack === false) {
2203  $output .= $t."\n";
2204  }
2205  }
2206  while ( $prefixLength ) {
2207  $output .= $this->closeList( $pref2{$prefixLength-1} );
2208  --$prefixLength;
2209  }
2210  if ( '' != $this->mLastSection ) {
2211  $output .= '</' . $this->mLastSection . '>';
2212  $this->mLastSection = '';
2213  }
2214 
2215  wfProfileOut( $fname );
2216  return $output;
2217  }
2218 
2227  function findColonNoLinks($str, &$before, &$after) {
2228  $fname = 'Parser::findColonNoLinks';
2229  wfProfileIn( $fname );
2230 
2231  $pos = strpos( $str, ':' );
2232  if( $pos === false ) {
2233  // Nothing to find!
2234  wfProfileOut( $fname );
2235  return false;
2236  }
2237 
2238  $lt = strpos( $str, '<' );
2239  if( $lt === false || $lt > $pos ) {
2240  // Easy; no tag nesting to worry about
2241  $before = substr( $str, 0, $pos );
2242  $after = substr( $str, $pos+1 );
2243  wfProfileOut( $fname );
2244  return $pos;
2245  }
2246 
2247  // Ugly state machine to walk through avoiding tags.
2248  $state = MW_COLON_STATE_TEXT;
2249  $stack = 0;
2250  $len = strlen( $str );
2251  for( $i = 0; $i < $len; $i++ ) {
2252  $c = $str{$i};
2253 
2254  switch( $state ) {
2255  // (Using the number is a performance hack for common cases)
2256  case 0: // MW_COLON_STATE_TEXT:
2257  switch( $c ) {
2258  case "<":
2259  // Could be either a <start> tag or an </end> tag
2260  $state = MW_COLON_STATE_TAGSTART;
2261  break;
2262  case ":":
2263  if( $stack == 0 ) {
2264  // We found it!
2265  $before = substr( $str, 0, $i );
2266  $after = substr( $str, $i + 1 );
2267  wfProfileOut( $fname );
2268  return $i;
2269  }
2270  // Embedded in a tag; don't break it.
2271  break;
2272  default:
2273  // Skip ahead looking for something interesting
2274  $colon = strpos( $str, ':', $i );
2275  if( $colon === false ) {
2276  // Nothing else interesting
2277  wfProfileOut( $fname );
2278  return false;
2279  }
2280  $lt = strpos( $str, '<', $i );
2281  if( $stack === 0 ) {
2282  if( $lt === false || $colon < $lt ) {
2283  // We found it!
2284  $before = substr( $str, 0, $colon );
2285  $after = substr( $str, $colon + 1 );
2286  wfProfileOut( $fname );
2287  return $i;
2288  }
2289  }
2290  if( $lt === false ) {
2291  // Nothing else interesting to find; abort!
2292  // We're nested, but there's no close tags left. Abort!
2293  break 2;
2294  }
2295  // Skip ahead to next tag start
2296  $i = $lt;
2297  $state = MW_COLON_STATE_TAGSTART;
2298  }
2299  break;
2300  case 1: // MW_COLON_STATE_TAG:
2301  // In a <tag>
2302  switch( $c ) {
2303  case ">":
2304  $stack++;
2305  $state = MW_COLON_STATE_TEXT;
2306  break;
2307  case "/":
2308  // Slash may be followed by >?
2309  $state = MW_COLON_STATE_TAGSLASH;
2310  break;
2311  default:
2312  // ignore
2313  }
2314  break;
2315  case 2: // MW_COLON_STATE_TAGSTART:
2316  switch( $c ) {
2317  case "/":
2318  $state = MW_COLON_STATE_CLOSETAG;
2319  break;
2320  case "!":
2321  $state = MW_COLON_STATE_COMMENT;
2322  break;
2323  case ">":
2324  // Illegal early close? This shouldn't happen D:
2325  $state = MW_COLON_STATE_TEXT;
2326  break;
2327  default:
2328  $state = MW_COLON_STATE_TAG;
2329  }
2330  break;
2331  case 3: // MW_COLON_STATE_CLOSETAG:
2332  // In a </tag>
2333  if( $c == ">" ) {
2334  $stack--;
2335  if( $stack < 0 ) {
2336  wfDebug( "Invalid input in $fname; too many close tags\n" );
2337  wfProfileOut( $fname );
2338  return false;
2339  }
2340  $state = MW_COLON_STATE_TEXT;
2341  }
2342  break;
2344  if( $c == ">" ) {
2345  // Yes, a self-closed tag <blah/>
2346  $state = MW_COLON_STATE_TEXT;
2347  } else {
2348  // Probably we're jumping the gun, and this is an attribute
2349  $state = MW_COLON_STATE_TAG;
2350  }
2351  break;
2352  case 5: // MW_COLON_STATE_COMMENT:
2353  if( $c == "-" ) {
2354  $state = MW_COLON_STATE_COMMENTDASH;
2355  }
2356  break;
2358  if( $c == "-" ) {
2360  } else {
2361  $state = MW_COLON_STATE_COMMENT;
2362  }
2363  break;
2365  if( $c == ">" ) {
2366  $state = MW_COLON_STATE_TEXT;
2367  } else {
2368  $state = MW_COLON_STATE_COMMENT;
2369  }
2370  break;
2371  default:
2372  throw new MWException( "State machine error in $fname" );
2373  }
2374  }
2375  if( $stack > 0 ) {
2376  wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" );
2377  return false;
2378  }
2379  wfProfileOut( $fname );
2380  return false;
2381  }
2382 
2388  function getVariableValue( $index ) {
2389  global $wgContLang, $wgSitename, $wgServer, $wgServerName, $wgScriptPath;
2390 
2395  static $varCache = array();
2396  if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$varCache ) ) ) {
2397  if ( isset( $varCache[$index] ) ) {
2398  return $varCache[$index];
2399  }
2400  }
2401 
2402  $ts = time();
2403  wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) );
2404 
2405  # Use the time zone
2406  global $wgLocaltimezone;
2407  if ( isset( $wgLocaltimezone ) ) {
2408  $oldtz = getenv( 'TZ' );
2409  putenv( 'TZ='.$wgLocaltimezone );
2410  }
2411  $localTimestamp = date( 'YmdHis', $ts );
2412  $localMonth = date( 'm', $ts );
2413  $localMonthName = date( 'n', $ts );
2414  $localDay = date( 'j', $ts );
2415  $localDay2 = date( 'd', $ts );
2416  $localDayOfWeek = date( 'w', $ts );
2417  $localWeek = date( 'W', $ts );
2418  $localYear = date( 'Y', $ts );
2419  $localHour = date( 'H', $ts );
2420  if ( isset( $wgLocaltimezone ) ) {
2421  putenv( 'TZ='.$oldtz );
2422  }
2423 
2424  switch ( $index ) {
2425  case 'currentmonth':
2426  return $varCache[$index] = $wgContLang->formatNum( date( 'm', $ts ) );
2427  case 'currentmonthname':
2428  return $varCache[$index] = $wgContLang->getMonthName( date( 'n', $ts ) );
2429  case 'currentmonthnamegen':
2430  return $varCache[$index] = $wgContLang->getMonthNameGen( date( 'n', $ts ) );
2431  case 'currentmonthabbrev':
2432  return $varCache[$index] = $wgContLang->getMonthAbbreviation( date( 'n', $ts ) );
2433  case 'currentday':
2434  return $varCache[$index] = $wgContLang->formatNum( date( 'j', $ts ) );
2435  case 'currentday2':
2436  return $varCache[$index] = $wgContLang->formatNum( date( 'd', $ts ) );
2437  case 'localmonth':
2438  return $varCache[$index] = $wgContLang->formatNum( $localMonth );
2439  case 'localmonthname':
2440  return $varCache[$index] = $wgContLang->getMonthName( $localMonthName );
2441  case 'localmonthnamegen':
2442  return $varCache[$index] = $wgContLang->getMonthNameGen( $localMonthName );
2443  case 'localmonthabbrev':
2444  return $varCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName );
2445  case 'localday':
2446  return $varCache[$index] = $wgContLang->formatNum( $localDay );
2447  case 'localday2':
2448  return $varCache[$index] = $wgContLang->formatNum( $localDay2 );
2449  case 'pagename':
2450  return $this->mTitle->getText();
2451  case 'pagenamee':
2452  return $this->mTitle->getPartialURL();
2453  case 'fullpagename':
2454  return $this->mTitle->getPrefixedText();
2455  case 'fullpagenamee':
2456  return $this->mTitle->getPrefixedURL();
2457  case 'subpagename':
2458  return $this->mTitle->getSubpageText();
2459  case 'subpagenamee':
2460  return $this->mTitle->getSubpageUrlForm();
2461  case 'basepagename':
2462  return $this->mTitle->getBaseText();
2463  case 'basepagenamee':
2464  return wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) );
2465  case 'talkpagename':
2466  if( $this->mTitle->canTalk() ) {
2467  $talkPage = $this->mTitle->getTalkPage();
2468  return $talkPage->getPrefixedText();
2469  } else {
2470  return '';
2471  }
2472  case 'talkpagenamee':
2473  if( $this->mTitle->canTalk() ) {
2474  $talkPage = $this->mTitle->getTalkPage();
2475  return $talkPage->getPrefixedUrl();
2476  } else {
2477  return '';
2478  }
2479  case 'subjectpagename':
2480  $subjPage = $this->mTitle->getSubjectPage();
2481  return $subjPage->getPrefixedText();
2482  case 'subjectpagenamee':
2483  $subjPage = $this->mTitle->getSubjectPage();
2484  return $subjPage->getPrefixedUrl();
2485  case 'revisionid':
2486  return $this->mRevisionId;
2487  case 'revisionday':
2488  return intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
2489  case 'revisionday2':
2490  return substr( $this->getRevisionTimestamp(), 6, 2 );
2491  case 'revisionmonth':
2492  return intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
2493  case 'revisionyear':
2494  return substr( $this->getRevisionTimestamp(), 0, 4 );
2495  case 'revisiontimestamp':
2496  return $this->getRevisionTimestamp();
2497  case 'namespace':
2498  return str_replace('_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2499  case 'namespacee':
2500  return wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2501  case 'talkspace':
2502  return $this->mTitle->canTalk() ? str_replace('_',' ',$this->mTitle->getTalkNsText()) : '';
2503  case 'talkspacee':
2504  return $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2505  case 'subjectspace':
2506  return $this->mTitle->getSubjectNsText();
2507  case 'subjectspacee':
2508  return( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2509  case 'currentdayname':
2510  return $varCache[$index] = $wgContLang->getWeekdayName( date( 'w', $ts ) + 1 );
2511  case 'currentyear':
2512  return $varCache[$index] = $wgContLang->formatNum( date( 'Y', $ts ), true );
2513  case 'currenttime':
2514  return $varCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2515  case 'currenthour':
2516  return $varCache[$index] = $wgContLang->formatNum( date( 'H', $ts ), true );
2517  case 'currentweek':
2518  // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2519  // int to remove the padding
2520  return $varCache[$index] = $wgContLang->formatNum( (int)date( 'W', $ts ) );
2521  case 'currentdow':
2522  return $varCache[$index] = $wgContLang->formatNum( date( 'w', $ts ) );
2523  case 'localdayname':
2524  return $varCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 );
2525  case 'localyear':
2526  return $varCache[$index] = $wgContLang->formatNum( $localYear, true );
2527  case 'localtime':
2528  return $varCache[$index] = $wgContLang->time( $localTimestamp, false, false );
2529  case 'localhour':
2530  return $varCache[$index] = $wgContLang->formatNum( $localHour, true );
2531  case 'localweek':
2532  // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2533  // int to remove the padding
2534  return $varCache[$index] = $wgContLang->formatNum( (int)$localWeek );
2535  case 'localdow':
2536  return $varCache[$index] = $wgContLang->formatNum( $localDayOfWeek );
2537  case 'numberofarticles':
2538  return $varCache[$index] = $wgContLang->formatNum( SiteStats::articles() );
2539  case 'numberoffiles':
2540  return $varCache[$index] = $wgContLang->formatNum( SiteStats::images() );
2541  case 'numberofusers':
2542  return $varCache[$index] = $wgContLang->formatNum( SiteStats::users() );
2543  case 'numberofpages':
2544  return $varCache[$index] = $wgContLang->formatNum( SiteStats::pages() );
2545  case 'numberofadmins':
2546  return $varCache[$index] = $wgContLang->formatNum( SiteStats::admins() );
2547  case 'numberofedits':
2548  return $varCache[$index] = $wgContLang->formatNum( SiteStats::edits() );
2549  case 'currenttimestamp':
2550  return $varCache[$index] = wfTimestampNow();
2551  case 'localtimestamp':
2552  return $varCache[$index] = $localTimestamp;
2553  case 'currentversion':
2554  return $varCache[$index] = SpecialVersion::getVersion();
2555  case 'sitename':
2556  return $wgSitename;
2557  case 'server':
2558  return $wgServer;
2559  case 'servername':
2560  return $wgServerName;
2561  case 'scriptpath':
2562  return $wgScriptPath;
2563  case 'directionmark':
2564  return $wgContLang->getDirMark();
2565  case 'contentlanguage':
2566  global $wgContLanguageCode;
2567  return $wgContLanguageCode;
2568  default:
2569  $ret = null;
2570  if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$varCache, &$index, &$ret ) ) )
2571  return $ret;
2572  else
2573  return null;
2574  }
2575  }
2576 
2582  function initialiseVariables() {
2583  $fname = 'Parser::initialiseVariables';
2584  wfProfileIn( $fname );
2585  $variableIDs = MagicWord::getVariableIDs();
2586 
2587  $this->mVariables = array();
2588  foreach ( $variableIDs as $id ) {
2589  $mw =& MagicWord::get( $id );
2590  $mw->addToArray( $this->mVariables, $id );
2591  }
2592  wfProfileOut( $fname );
2593  }
2594 
2611  function replace_callback ($text, $callbacks) {
2612  wfProfileIn( __METHOD__ );
2613  $openingBraceStack = array(); # this array will hold a stack of parentheses which are not closed yet
2614  $lastOpeningBrace = -1; # last not closed parentheses
2615 
2616  $validOpeningBraces = implode( '', array_keys( $callbacks ) );
2617 
2618  $i = 0;
2619  while ( $i < strlen( $text ) ) {
2620  # Find next opening brace, closing brace or pipe
2621  if ( $lastOpeningBrace == -1 ) {
2622  $currentClosing = '';
2623  $search = $validOpeningBraces;
2624  } else {
2625  $currentClosing = $openingBraceStack[$lastOpeningBrace]['braceEnd'];
2626  $search = $validOpeningBraces . '|' . $currentClosing;
2627  }
2628  $rule = null;
2629  $i += strcspn( $text, $search, $i );
2630  if ( $i < strlen( $text ) ) {
2631  if ( $text[$i] == '|' ) {
2632  $found = 'pipe';
2633  } elseif ( $text[$i] == $currentClosing ) {
2634  $found = 'close';
2635  } elseif ( isset( $callbacks[$text[$i]] ) ) {
2636  $found = 'open';
2637  $rule = $callbacks[$text[$i]];
2638  } else {
2639  # Some versions of PHP have a strcspn which stops on null characters
2640  # Ignore and continue
2641  ++$i;
2642  continue;
2643  }
2644  } else {
2645  # All done
2646  break;
2647  }
2648 
2649  if ( $found == 'open' ) {
2650  # found opening brace, let's add it to parentheses stack
2651  $piece = array('brace' => $text[$i],
2652  'braceEnd' => $rule['end'],
2653  'title' => '',
2654  'parts' => null);
2655 
2656  # count opening brace characters
2657  $piece['count'] = strspn( $text, $piece['brace'], $i );
2658  $piece['startAt'] = $piece['partStart'] = $i + $piece['count'];
2659  $i += $piece['count'];
2660 
2661  # we need to add to stack only if opening brace count is enough for one of the rules
2662  if ( $piece['count'] >= $rule['min'] ) {
2663  $lastOpeningBrace ++;
2664  $openingBraceStack[$lastOpeningBrace] = $piece;
2665  }
2666  } elseif ( $found == 'close' ) {
2667  # lets check if it is enough characters for closing brace
2668  $maxCount = $openingBraceStack[$lastOpeningBrace]['count'];
2669  $count = strspn( $text, $text[$i], $i, $maxCount );
2670 
2671  # check for maximum matching characters (if there are 5 closing
2672  # characters, we will probably need only 3 - depending on the rules)
2673  $matchingCount = 0;
2674  $matchingCallback = null;
2675  $cbType = $callbacks[$openingBraceStack[$lastOpeningBrace]['brace']];
2676  if ( $count > $cbType['max'] ) {
2677  # The specified maximum exists in the callback array, unless the caller
2678  # has made an error
2679  $matchingCount = $cbType['max'];
2680  } else {
2681  # Count is less than the maximum
2682  # Skip any gaps in the callback array to find the true largest match
2683  # Need to use array_key_exists not isset because the callback can be null
2684  $matchingCount = $count;
2685  while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $cbType['cb'] ) ) {
2686  --$matchingCount;
2687  }
2688  }
2689 
2690  if ($matchingCount <= 0) {
2691  $i += $count;
2692  continue;
2693  }
2694  $matchingCallback = $cbType['cb'][$matchingCount];
2695 
2696  # let's set a title or last part (if '|' was found)
2697  if (null === $openingBraceStack[$lastOpeningBrace]['parts']) {
2698  $openingBraceStack[$lastOpeningBrace]['title'] =
2699  substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'],
2700  $i - $openingBraceStack[$lastOpeningBrace]['partStart']);
2701  } else {
2702  $openingBraceStack[$lastOpeningBrace]['parts'][] =
2703  substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'],
2704  $i - $openingBraceStack[$lastOpeningBrace]['partStart']);
2705  }
2706 
2707  $pieceStart = $openingBraceStack[$lastOpeningBrace]['startAt'] - $matchingCount;
2708  $pieceEnd = $i + $matchingCount;
2709 
2710  if( is_callable( $matchingCallback ) ) {
2711  $cbArgs = array (
2712  'text' => substr($text, $pieceStart, $pieceEnd - $pieceStart),
2713  'title' => trim($openingBraceStack[$lastOpeningBrace]['title']),
2714  'parts' => $openingBraceStack[$lastOpeningBrace]['parts'],
2715  'lineStart' => (($pieceStart > 0) && ($text[$pieceStart-1] == "\n")),
2716  );
2717  # finally we can call a user callback and replace piece of text
2718  $replaceWith = call_user_func( $matchingCallback, $cbArgs );
2719  $text = substr($text, 0, $pieceStart) . $replaceWith . substr($text, $pieceEnd);
2720  $i = $pieceStart + strlen($replaceWith);
2721  } else {
2722  # null value for callback means that parentheses should be parsed, but not replaced
2723  $i += $matchingCount;
2724  }
2725 
2726  # reset last opening parentheses, but keep it in case there are unused characters
2727  $piece = array('brace' => $openingBraceStack[$lastOpeningBrace]['brace'],
2728  'braceEnd' => $openingBraceStack[$lastOpeningBrace]['braceEnd'],
2729  'count' => $openingBraceStack[$lastOpeningBrace]['count'],
2730  'title' => '',
2731  'parts' => null,
2732  'startAt' => $openingBraceStack[$lastOpeningBrace]['startAt']);
2733  $openingBraceStack[$lastOpeningBrace--] = null;
2734 
2735  if ($matchingCount < $piece['count']) {
2736  $piece['count'] -= $matchingCount;
2737  $piece['startAt'] -= $matchingCount;
2738  $piece['partStart'] = $piece['startAt'];
2739  # do we still qualify for any callback with remaining count?
2740  $currentCbList = $callbacks[$piece['brace']]['cb'];
2741  while ( $piece['count'] ) {
2742  if ( array_key_exists( $piece['count'], $currentCbList ) ) {
2743  $lastOpeningBrace++;
2744  $openingBraceStack[$lastOpeningBrace] = $piece;
2745  break;
2746  }
2747  --$piece['count'];
2748  }
2749  }
2750  } elseif ( $found == 'pipe' ) {
2751  # lets set a title if it is a first separator, or next part otherwise
2752  if (null === $openingBraceStack[$lastOpeningBrace]['parts']) {
2753  $openingBraceStack[$lastOpeningBrace]['title'] =
2754  substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'],
2755  $i - $openingBraceStack[$lastOpeningBrace]['partStart']);
2756  $openingBraceStack[$lastOpeningBrace]['parts'] = array();
2757  } else {
2758  $openingBraceStack[$lastOpeningBrace]['parts'][] =
2759  substr($text, $openingBraceStack[$lastOpeningBrace]['partStart'],
2760  $i - $openingBraceStack[$lastOpeningBrace]['partStart']);
2761  }
2762  $openingBraceStack[$lastOpeningBrace]['partStart'] = ++$i;
2763  }
2764  }
2765 
2766  wfProfileOut( __METHOD__ );
2767  return $text;
2768  }
2769 
2785  function replaceVariables( $text, $args = array(), $argsOnly = false ) {
2786  # Prevent too big inclusions
2787  if( strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
2788  return $text;
2789  }
2790 
2791  $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/;
2792  wfProfileIn( $fname );
2793 
2794  # This function is called recursively. To keep track of arguments we need a stack:
2795  array_push( $this->mArgStack, $args );
2796 
2797  $braceCallbacks = array();
2798  if ( !$argsOnly ) {
2799  $braceCallbacks[2] = array( &$this, 'braceSubstitution' );
2800  }
2801  if ( $this->mOutputType != OT_MSG ) {
2802  $braceCallbacks[3] = array( &$this, 'argSubstitution' );
2803  }
2804  if ( $braceCallbacks ) {
2805  $callbacks = array(
2806  '{' => array(
2807  'end' => '}',
2808  'cb' => $braceCallbacks,
2809  'min' => $argsOnly ? 3 : 2,
2810  'max' => isset( $braceCallbacks[3] ) ? 3 : 2,
2811  ),
2812  '[' => array(
2813  'end' => ']',
2814  'cb' => array(2=>null),
2815  'min' => 2,
2816  'max' => 2,
2817  )
2818  );
2819  $text = $this->replace_callback ($text, $callbacks);
2820 
2821  array_pop( $this->mArgStack );
2822  }
2823  wfProfileOut( $fname );
2824  return $text;
2825  }
2826 
2831  function variableSubstitution( $matches ) {
2832  global $wgContLang;
2833  $fname = 'Parser::variableSubstitution';
2834  $varname = $wgContLang->lc($matches[1]);
2835  wfProfileIn( $fname );
2836  $skip = false;
2837  if ( $this->mOutputType == OT_WIKI ) {
2838  # Do only magic variables prefixed by SUBST
2839  $mwSubst =& MagicWord::get( 'subst' );
2840  if (!$mwSubst->matchStartAndRemove( $varname ))
2841  $skip = true;
2842  # Note that if we don't substitute the variable below,
2843  # we don't remove the {{subst:}} magic word, in case
2844  # it is a template rather than a magic variable.
2845  }
2846  if ( !$skip && array_key_exists( $varname, $this->mVariables ) ) {
2847  $id = $this->mVariables[$varname];
2848  # Now check if we did really match, case sensitive or not
2849  $mw =& MagicWord::get( $id );
2850  if ($mw->match($matches[1])) {
2851  $text = $this->getVariableValue( $id );
2852  $this->mOutput->mContainsOldMagic = true;
2853  } else {
2854  $text = $matches[0];
2855  }
2856  } else {
2857  $text = $matches[0];
2858  }
2859  wfProfileOut( $fname );
2860  return $text;
2861  }
2862 
2863 
2865  static function createAssocArgs( $args ) {
2866  $assocArgs = array();
2867  $index = 1;
2868  foreach( $args as $arg ) {
2869  $eqpos = strpos( $arg, '=' );
2870  if ( $eqpos === false ) {
2871  $assocArgs[$index++] = $arg;
2872  } else {
2873  $name = trim( substr( $arg, 0, $eqpos ) );
2874  $value = trim( substr( $arg, $eqpos+1 ) );
2875  if ( $value === false ) {
2876  $value = '';
2877  }
2878  if ( $name !== false ) {
2879  $assocArgs[$name] = $value;
2880  }
2881  }
2882  }
2883 
2884  return $assocArgs;
2885  }
2886 
2898  function braceSubstitution( $piece ) {
2899  global $wgContLang, $wgLang, $wgAllowDisplayTitle, $wgNonincludableNamespaces;
2900  $fname = __METHOD__ /*. '-L' . count( $this->mArgStack )*/;
2901  wfProfileIn( $fname );
2902  wfProfileIn( __METHOD__.'-setup' );
2903 
2904  # Flags
2905  $found = false; # $text has been filled
2906  $nowiki = false; # wiki markup in $text should be escaped
2907  $noparse = false; # Unsafe HTML tags should not be stripped, etc.
2908  $noargs = false; # Don't replace triple-brace arguments in $text
2909  $replaceHeadings = false; # Make the edit section links go to the template not the article
2910  $headingOffset = 0; # Skip headings when number, to account for those that weren't transcluded.
2911  $isHTML = false; # $text is HTML, armour it against wikitext transformation
2912  $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered
2913 
2914  # Title object, where $text came from
2915  $title = NULL;
2916 
2917  $linestart = '';
2918 
2919 
2920  # $part1 is the bit before the first |, and must contain only title characters
2921  # $args is a list of arguments, starting from index 0, not including $part1
2922 
2923  $titleText = $part1 = $piece['title'];
2924  # If the third subpattern matched anything, it will start with |
2925 
2926  if (null == $piece['parts']) {
2927  $replaceWith = $this->variableSubstitution (array ($piece['text'], $piece['title']));
2928  if ($replaceWith != $piece['text']) {
2929  $text = $replaceWith;
2930  $found = true;
2931  $noparse = true;
2932  $noargs = true;
2933  }
2934  }
2935 
2936  $args = (null == $piece['parts']) ? array() : $piece['parts'];
2937  wfProfileOut( __METHOD__.'-setup' );
2938 
2939  # SUBST
2940  wfProfileIn( __METHOD__.'-modifiers' );
2941  if ( !$found ) {
2942  $mwSubst =& MagicWord::get( 'subst' );
2943  if ( $mwSubst->matchStartAndRemove( $part1 ) xor $this->ot['wiki'] ) {
2944  # One of two possibilities is true:
2945  # 1) Found SUBST but not in the PST phase
2946  # 2) Didn't find SUBST and in the PST phase
2947  # In either case, return without further processing
2948  $text = $piece['text'];
2949  $found = true;
2950  $noparse = true;
2951  $noargs = true;
2952  }
2953  }
2954 
2955  # MSG, MSGNW and RAW
2956  if ( !$found ) {
2957  # Check for MSGNW:
2958  $mwMsgnw =& MagicWord::get( 'msgnw' );
2959  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
2960  $nowiki = true;
2961  } else {
2962  # Remove obsolete MSG:
2963  $mwMsg =& MagicWord::get( 'msg' );
2964  $mwMsg->matchStartAndRemove( $part1 );
2965  }
2966 
2967  # Check for RAW:
2968  $mwRaw =& MagicWord::get( 'raw' );
2969  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
2970  $forceRawInterwiki = true;
2971  }
2972  }
2973  wfProfileOut( __METHOD__.'-modifiers' );
2974 
2975  //save path level before recursing into functions & templates.
2976  $lastPathLevel = $this->mTemplatePath;
2977 
2978  # Parser functions
2979  if ( !$found ) {
2980  wfProfileIn( __METHOD__ . '-pfunc' );
2981 
2982  $colonPos = strpos( $part1, ':' );
2983  if ( $colonPos !== false ) {
2984  # Case sensitive functions
2985  $function = substr( $part1, 0, $colonPos );
2986  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
2987  $function = $this->mFunctionSynonyms[1][$function];
2988  } else {
2989  # Case insensitive functions
2990  $function = strtolower( $function );
2991  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
2992  $function = $this->mFunctionSynonyms[0][$function];
2993  } else {
2994  $function = false;
2995  }
2996  }
2997  if ( $function ) {
2998  $funcArgs = array_map( 'trim', $args );
2999  $funcArgs = array_merge( array( &$this, trim( substr( $part1, $colonPos + 1 ) ) ), $funcArgs );
3000  $result = call_user_func_array( $this->mFunctionHooks[$function], $funcArgs );
3001  $found = true;
3002 
3003  // The text is usually already parsed, doesn't need triple-brace tags expanded, etc.
3004  //$noargs = true;
3005  //$noparse = true;
3006 
3007  if ( is_array( $result ) ) {
3008  if ( isset( $result[0] ) ) {
3009  $text = $linestart . $result[0];
3010  unset( $result[0] );
3011  }
3012 
3013  // Extract flags into the local scope
3014  // This allows callers to set flags such as nowiki, noparse, found, etc.
3015  extract( $result );
3016  } else {
3017  $text = $linestart . $result;
3018  }
3019  }
3020  }
3021  wfProfileOut( __METHOD__ . '-pfunc' );
3022  }
3023 
3024  # Template table test
3025 
3026  # Did we encounter this template already? If yes, it is in the cache
3027  # and we need to check for loops.
3028  if ( !$found && isset( $this->mTemplates[$piece['title']] ) ) {
3029  $found = true;
3030 
3031  # Infinite loop test
3032  if ( isset( $this->mTemplatePath[$part1] ) ) {
3033  $noparse = true;
3034  $noargs = true;
3035  $found = true;
3036  $text = $linestart .
3037  "[[$part1]]<!-- WARNING: template loop detected -->";
3038  wfDebug( __METHOD__.": template loop broken at '$part1'\n" );
3039  } else {
3040  # set $text to cached message.
3041  $text = $linestart . $this->mTemplates[$piece['title']];
3042  #treat title for cached page the same as others
3043  $ns = NS_TEMPLATE;
3044  $subpage = '';
3045  $part1 = $this->maybeDoSubpageLink( $part1, $subpage );
3046  if ($subpage !== '') {
3047  $ns = $this->mTitle->getNamespace();
3048  }
3049  $title = Title::newFromText( $part1, $ns );
3050  //used by include size checking
3051  $titleText = $title->getPrefixedText();
3052  //used by edit section links
3053  $replaceHeadings = true;
3054 
3055  }
3056  }
3057 
3058  # Load from database
3059  if ( !$found ) {
3060  wfProfileIn( __METHOD__ . '-loadtpl' );
3061  $ns = NS_TEMPLATE;
3062  # declaring $subpage directly in the function call
3063  # does not work correctly with references and breaks
3064  # {{/subpage}}-style inclusions
3065  $subpage = '';
3066  $part1 = $this->maybeDoSubpageLink( $part1, $subpage );
3067  if ($subpage !== '') {
3068  $ns = $this->mTitle->getNamespace();
3069  }
3070  $title = Title::newFromText( $part1, $ns );
3071 
3072 
3073  if ( !is_null( $title ) ) {
3074  $titleText = $title->getPrefixedText();
3075  # Check for language variants if the template is not found
3076  if($wgContLang->hasVariants() && $title->getArticleID() == 0){
3077  $wgContLang->findVariantLink($part1, $title);
3078  }
3079 
3080  if ( !$title->isExternal() ) {
3081  if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) {
3082  $text = SpecialPage::capturePath( $title );
3083  if ( is_string( $text ) ) {
3084  $found = true;
3085  $noparse = true;
3086  $noargs = true;
3087  $isHTML = true;
3088  $this->disableCache();
3089  }
3090  } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) {
3091  $found = false; //access denied
3092  wfDebug( "$fname: template inclusion denied for " . $title->getPrefixedDBkey() );
3093  } else {
3094  $articleContent = $this->fetchTemplate( $title );
3095  if ( $articleContent !== false ) {
3096  $found = true;
3097  $text = $articleContent;
3098  $replaceHeadings = true;
3099  }
3100  }
3101 
3102  # If the title is valid but undisplayable, make a link to it
3103  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3104  $text = "[[:$titleText]]";
3105  $found = true;
3106  }
3107  } elseif ( $title->isTrans() ) {
3108  // Interwiki transclusion
3109  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3110  $text = $this->interwikiTransclude( $title, 'render' );
3111  $isHTML = true;
3112  $noparse = true;
3113  } else {
3114  $text = $this->interwikiTransclude( $title, 'raw' );
3115  $replaceHeadings = true;
3116  }
3117  $found = true;
3118  }
3119 
3120  # Template cache array insertion
3121  # Use the original $piece['title'] not the mangled $part1, so that
3122  # modifiers such as RAW: produce separate cache entries
3123  if( $found ) {
3124  if( $isHTML ) {
3125  // A special page; don't store it in the template cache.
3126  } else {
3127  $this->mTemplates[$piece['title']] = $text;
3128  }
3129  $text = $linestart . $text;
3130  }
3131  }
3132  wfProfileOut( __METHOD__ . '-loadtpl' );
3133  }
3134 
3135  if ( $found && !$this->incrementIncludeSize( 'pre-expand', strlen( $text ) ) ) {
3136  # Error, oversize inclusion
3137  $text = $linestart .
3138  "[[$titleText]]<!-- WARNING: template omitted, pre-expand include size too large -->";
3139  $noparse = true;
3140  $noargs = true;
3141  }
3142 
3143  # Recursive parsing, escaping and link table handling
3144  # Only for HTML output
3145  if ( $nowiki && $found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3146  $text = wfEscapeWikiText( $text );
3147  } elseif ( !$this->ot['msg'] && $found ) {
3148  if ( $noargs ) {
3149  $assocArgs = array();
3150  } else {
3151  # Clean up argument array
3152  $assocArgs = self::createAssocArgs($args);
3153  # Add a new element to the templace recursion path
3154  $this->mTemplatePath[$part1] = 1;
3155  }
3156 
3157  if ( !$noparse ) {
3158  # If there are any <onlyinclude> tags, only include them
3159  if ( in_string( '<onlyinclude>', $text ) && in_string( '</onlyinclude>', $text ) ) {
3160  $replacer = new OnlyIncludeReplacer;
3161  StringUtils::delimiterReplaceCallback( '<onlyinclude>', '</onlyinclude>',
3162  array( &$replacer, 'replace' ), $text );
3163  $text = $replacer->output;
3164  }
3165  # Remove <noinclude> sections and <includeonly> tags
3166  $text = StringUtils::delimiterReplace( '<noinclude>', '</noinclude>', '', $text );
3167  $text = strtr( $text, array( '<includeonly>' => '' , '</includeonly>' => '' ) );
3168 
3169  if( $this->ot['html'] || $this->ot['pre'] ) {
3170  # Strip <nowiki>, <pre>, etc.
3171  $text = $this->strip( $text, $this->mStripState );
3172  if ( $this->ot['html'] ) {
3173  $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'replaceVariables' ), $assocArgs );
3174  } elseif ( $this->ot['pre'] && $this->mOptions->getRemoveComments() ) {
3175  $text = Sanitizer::removeHTMLcomments( $text );
3176  }
3177  }
3178  $text = $this->replaceVariables( $text, $assocArgs );
3179 
3180  # If the template begins with a table or block-level
3181  # element, it should be treated as beginning a new line.
3182  if (!$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{
3183  $text = "\n" . $text;
3184  }
3185  } elseif ( !$noargs ) {
3186  # $noparse and !$noargs
3187  # Just replace the arguments, not any double-brace items
3188  # This is used for rendered interwiki transclusion
3189  $text = $this->replaceVariables( $text, $assocArgs, true );
3190  }
3191  }
3192  # Prune lower levels off the recursion check path
3193  $this->mTemplatePath = $lastPathLevel;
3194 
3195  if ( $found && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3196  # Error, oversize inclusion
3197  $text = $linestart .
3198  "[[$titleText]]<!-- WARNING: template omitted, post-expand include size too large -->";
3199  $noparse = true;
3200  $noargs = true;
3201  }
3202 
3203  if ( !$found ) {
3204  wfProfileOut( $fname );
3205  return $piece['text'];
3206  } else {
3207  wfProfileIn( __METHOD__ . '-placeholders' );
3208  if ( $isHTML ) {
3209  # Replace raw HTML by a placeholder
3210  # Add a blank line preceding, to prevent it from mucking up
3211  # immediately preceding headings
3212  $text = "\n\n" . $this->insertStripItem( $text, $this->mStripState );
3213  } else {
3214  # replace ==section headers==
3215  # XXX this needs to go away once we have a better parser.
3216  if ( !$this->ot['wiki'] && !$this->ot['pre'] && $replaceHeadings ) {
3217  if( !is_null( $title ) )
3218  $encodedname = base64_encode($title->getPrefixedDBkey());
3219  else
3220  $encodedname = base64_encode("");
3221  $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
3222  PREG_SPLIT_DELIM_CAPTURE);
3223  $text = '';
3224  $nsec = $headingOffset;
3225  for( $i = 0; $i < count($m); $i += 2 ) {
3226  $text .= $m[$i];
3227  if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
3228  $hl = $m[$i + 1];
3229  if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
3230  $text .= $hl;
3231  continue;
3232  }
3233  $m2 = array();
3234  preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
3235  $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
3236  . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
3237 
3238  $nsec++;
3239  }
3240  }
3241  }
3242  wfProfileOut( __METHOD__ . '-placeholders' );
3243  }
3244 
3245  # Prune lower levels off the recursion check path
3246  $this->mTemplatePath = $lastPathLevel;
3247 
3248  if ( !$found ) {
3249  wfProfileOut( $fname );
3250  return $piece['text'];
3251  } else {
3252  wfProfileOut( $fname );
3253  return $text;
3254  }
3255  }
3256 
3260  function fetchTemplate( $title ) {
3261  $text = false;
3262  // Loop to fetch the article, with up to 1 redirect
3263  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3264  $rev = Revision::newFromTitle( $title );
3265  $this->mOutput->addTemplate( $title, $title->getArticleID() );
3266  if ( $rev ) {
3267  $text = $rev->getText();
3268  } elseif( $title->getNamespace() == NS_MEDIAWIKI ) {
3269  global $wgLang;
3270  $message = $wgLang->lcfirst( $title->getText() );
3271  $text = wfMsgForContentNoTrans( $message );
3272  if( wfEmptyMsg( $message, $text ) ) {
3273  $text = false;
3274  break;
3275  }
3276  } else {
3277  break;
3278  }
3279  if ( $text === false ) {
3280  break;
3281  }
3282  // Redirect?
3283  $title = Title::newFromRedirect( $text );
3284  }
3285  return $text;
3286  }
3287 
3291  function interwikiTransclude( $title, $action ) {
3292  global $wgEnableScaryTranscluding;
3293 
3294  if (!$wgEnableScaryTranscluding)
3295  return wfMsg('scarytranscludedisabled');
3296 
3297  $url = $title->getFullUrl( "action=$action" );
3298 
3299  if (strlen($url) > 255)
3300  return wfMsg('scarytranscludetoolong');
3301  return $this->fetchScaryTemplateMaybeFromCache($url);
3302  }
3304  function fetchScaryTemplateMaybeFromCache($url) {
3305  global $wgTranscludeCacheExpiry;
3306  $dbr = wfGetDB(DB_SLAVE);
3307  $obj = $dbr->selectRow('transcache', array('tc_time', 'tc_contents'),
3308  array('tc_url' => $url));
3309  if ($obj) {
3310  $time = $obj->tc_time;
3311  $text = $obj->tc_contents;
3312  if ($time && time() < $time + $wgTranscludeCacheExpiry ) {
3313  return $text;
3314  }
3315  }
3316 
3317  $text = Http::get($url);
3318  if (!$text)
3319  return wfMsg('scarytranscludefailed', $url);
3320 
3321  $dbw = wfGetDB(DB_MASTER);
3322  $dbw->replace('transcache', array('tc_url'), array(
3323  'tc_url' => $url,
3324  'tc_time' => time(),
3325  'tc_contents' => $text));
3326  return $text;
3327  }
3328 
3329 
3334  function argSubstitution( $matches ) {
3335  $arg = trim( $matches['title'] );
3336  $text = $matches['text'];
3337  $inputArgs = end( $this->mArgStack );
3338 
3339  if ( array_key_exists( $arg, $inputArgs ) ) {
3340  $text = $inputArgs[$arg];
3341  } else if (($this->mOutputType == OT_HTML || $this->mOutputType == OT_PREPROCESS ) &&
3342  null != $matches['parts'] && count($matches['parts']) > 0) {
3343  $text = $matches['parts'][0];
3344  }
3345  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3346  $text = $matches['text'] .
3347  '<!-- WARNING: argument omitted, expansion size too large -->';
3348  }
3349 
3350  return $text;
3351  }
3352 
3360  function incrementIncludeSize( $type, $size ) {
3361  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
3362  return false;
3363  } else {
3364  $this->mIncludeSizes[$type] += $size;
3365  return true;
3366  }
3367  }
3368 
3372  function stripNoGallery( &$text ) {
3373  # if the string __NOGALLERY__ (not case-sensitive) occurs in the HTML,
3374  # do not add TOC
3375  $mw = MagicWord::get( 'nogallery' );
3376  $this->mOutput->mNoGallery = $mw->matchAndRemove( $text ) ;
3377  }
3378 
3382  function stripToc( $text ) {
3383  # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
3384  # do not add TOC
3385  $mw = MagicWord::get( 'notoc' );
3386  if( $mw->matchAndRemove( $text ) ) {
3387  $this->mShowToc = false;
3388  }
3389 
3390  $mw = MagicWord::get( 'toc' );
3391  if( $mw->match( $text ) ) {
3392  $this->mShowToc = true;
3393  $this->mForceTocPosition = true;
3394 
3395  // Set a placeholder. At the end we'll fill it in with the TOC.
3396  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
3397 
3398  // Only keep the first one.
3399  $text = $mw->replace( '', $text );
3400  }
3401  return $text;
3402  }
3403 
3418  function formatHeadings( $text, $isMain=true ) {
3419  global $wgMaxTocLevel, $wgContLang;
3420 
3421  $doNumberHeadings = $this->mOptions->getNumberHeadings();
3422  if( !$this->mTitle->quickUserCan( 'edit' ) ) {
3423  $showEditLink = 0;
3424  } else {
3425  $showEditLink = $this->mOptions->getEditSection();
3426  }
3427 
3428  # Inhibit editsection links if requested in the page
3429  $esw =& MagicWord::get( 'noeditsection' );
3430  if( $esw->matchAndRemove( $text ) ) {
3431  $showEditLink = 0;
3432  }
3433 
3434  # Get all headlines for numbering them and adding funky stuff like [edit]
3435  # links - this is for later, but we need the number of headlines right now
3436  $matches = array();
3437  $numMatches = preg_match_all( '/<H(?P<level>[1-6])(?P<attrib>.*?'.'>)(?P<header>.*?)<\/H[1-6] *>/i', $text, $matches );
3438 
3439  # if there are fewer than 4 headlines in the article, do not show TOC
3440  # unless it's been explicitly enabled.
3441  $enoughToc = $this->mShowToc &&
3442  (($numMatches >= 4) || $this->mForceTocPosition);
3443 
3444  # Allow user to stipulate that a page should have a "new section"
3445  # link added via __NEWSECTIONLINK__
3446  $mw =& MagicWord::get( 'newsectionlink' );
3447  if( $mw->matchAndRemove( $text ) )
3448  $this->mOutput->setNewSection( true );
3449 
3450  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
3451  # override above conditions and always show TOC above first header
3452  $mw =& MagicWord::get( 'forcetoc' );
3453  if ($mw->matchAndRemove( $text ) ) {
3454  $this->mShowToc = true;
3455  $enoughToc = true;
3456  }
3457 
3458  # Never ever show TOC if no headers
3459  if( $numMatches < 1 ) {
3460  $enoughToc = false;
3461  }
3462 
3463  # We need this to perform operations on the HTML
3464  $sk = $this->mOptions->getSkin();
3465 
3466  # headline counter
3467  $headlineCount = 0;
3468  $sectionCount = 0; # headlineCount excluding template sections
3469 
3470  # Ugh .. the TOC should have neat indentation levels which can be
3471  # passed to the skin functions. These are determined here
3472  $toc = '';
3473  $full = '';
3474  $head = array();
3475  $sublevelCount = array();
3476  $levelCount = array();
3477  $toclevel = 0;
3478  $level = 0;
3479  $prevlevel = 0;
3480  $toclevel = 0;
3481  $prevtoclevel = 0;
3482 
3483  foreach( $matches[3] as $headline ) {
3484  $istemplate = 0;
3485  $templatetitle = '';
3486  $templatesection = 0;
3487  $numbering = '';
3488  $mat = array();
3489  if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
3490  $istemplate = 1;
3491  $templatetitle = base64_decode($mat[1]);
3492  $templatesection = 1 + (int)base64_decode($mat[2]);
3493  $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
3494  }
3495 
3496  if( $toclevel ) {
3497  $prevlevel = $level;
3498  $prevtoclevel = $toclevel;
3499  }
3500  $level = $matches[1][$headlineCount];
3501 
3502  if( $doNumberHeadings || $enoughToc ) {
3503 
3504  if ( $level > $prevlevel ) {
3505  # Increase TOC level
3506  $toclevel++;
3507  $sublevelCount[$toclevel] = 0;
3508  if( $toclevel<$wgMaxTocLevel ) {
3509  $toc .= $sk->tocIndent();
3510  }
3511  }
3512  elseif ( $level < $prevlevel && $toclevel > 1 ) {
3513  # Decrease TOC level, find level to jump to
3514 
3515  if ( $toclevel == 2 && $level <= $levelCount[1] ) {
3516  # Can only go down to level 1
3517  $toclevel = 1;
3518  } else {
3519  for ($i = $toclevel; $i > 0; $i--) {
3520  if ( $levelCount[$i] == $level ) {
3521  # Found last matching level
3522  $toclevel = $i;
3523  break;
3524  }
3525  elseif ( $levelCount[$i] < $level ) {
3526  # Found first matching level below current level
3527  $toclevel = $i + 1;
3528  break;
3529  }
3530  }
3531  }
3532  if( $toclevel<$wgMaxTocLevel ) {
3533  $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel );
3534  }
3535  }
3536  else {
3537  # No change in level, end TOC line
3538  if( $toclevel<$wgMaxTocLevel ) {
3539  $toc .= $sk->tocLineEnd();
3540  }
3541  }
3542 
3543  $levelCount[$toclevel] = $level;
3544 
3545  # count number of headlines for each level
3546  @$sublevelCount[$toclevel]++;
3547  $dot = 0;
3548  for( $i = 1; $i <= $toclevel; $i++ ) {
3549  if( !empty( $sublevelCount[$i] ) ) {
3550  if( $dot ) {
3551  $numbering .= '.';
3552  }
3553  $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
3554  $dot = 1;
3555  }
3556  }
3557  }
3558 
3559  # The canonized header is a version of the header text safe to use for links
3560  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
3561  $canonized_headline = $this->mStripState->unstripBoth( $headline );
3562 
3563  # Remove link placeholders by the link text.
3564  # <!--LINK number-->
3565  # turns into
3566  # link text with suffix
3567  $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
3568  "\$this->mLinkHolders['texts'][\$1]",
3569  $canonized_headline );
3570  $canonized_headline = preg_replace( '/<!--IWLINK ([0-9]*)-->/e',
3571  "\$this->mInterwikiLinkHolders['texts'][\$1]",
3572  $canonized_headline );
3573 
3574  # strip out HTML
3575  $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
3576  $tocline = trim( $canonized_headline );
3577  # Save headline for section edit hint before it's escaped
3578  $headline_hint = trim( $canonized_headline );
3579  $canonized_headline = Sanitizer::escapeId( $tocline );
3580  $refers[$headlineCount] = $canonized_headline;
3581 
3582  # count how many in assoc. array so we can track dupes in anchors
3583  isset( $refers[$canonized_headline] ) ? $refers[$canonized_headline]++ : $refers[$canonized_headline] = 1;
3584  $refcount[$headlineCount]=$refers[$canonized_headline];
3585 
3586  # Don't number the heading if it is the only one (looks silly)
3587  if( $doNumberHeadings && count( $matches[3] ) > 1) {
3588  # the two are different if the line contains a link
3589  $headline=$numbering . ' ' . $headline;
3590  }
3591 
3592  # Create the anchor for linking from the TOC to the section
3593  $anchor = $canonized_headline;
3594  if($refcount[$headlineCount] > 1 ) {
3595  $anchor .= '_' . $refcount[$headlineCount];
3596  }
3597  if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
3598  $toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel);
3599  }
3600  # give headline the correct <h#> tag
3601  if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
3602  if( $istemplate )
3603  $editlink = $sk->editSectionLinkForOther($templatetitle, $templatesection);
3604  else
3605  $editlink = $sk->editSectionLink($this->mTitle, $sectionCount+1, $headline_hint);
3606  } else {
3607  $editlink = '';
3608  }
3609  $head[$headlineCount] = $sk->makeHeadline( $level, $matches['attrib'][$headlineCount], $anchor, $headline, $editlink );
3610 
3611  $headlineCount++;
3612  if( !$istemplate )
3613  $sectionCount++;
3614  }
3615 
3616  if( $enoughToc ) {
3617  if( $toclevel<$wgMaxTocLevel ) {
3618  $toc .= $sk->tocUnindent( $toclevel - 1 );
3619  }
3620  $toc = $sk->tocList( $toc );
3621  }
3622 
3623  # split up and insert constructed headlines
3624 
3625  $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
3626  $i = 0;
3627 
3628  foreach( $blocks as $block ) {
3629  if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
3630  # This is the [edit] link that appears for the top block of text when
3631  # section editing is enabled
3632 
3633  # Disabled because it broke block formatting
3634  # For example, a bullet point in the top line
3635  # $full .= $sk->editSectionLink(0);
3636  }
3637  $full .= $block;
3638  if( $enoughToc && !$i && $isMain && !$this->mForceTocPosition ) {
3639  # Top anchor now in skin
3640  $full = $full.$toc;
3641  }
3642 
3643  if( !empty( $head[$i] ) ) {
3644  $full .= $head[$i];
3645  }
3646  $i++;
3647  }
3648  if( $this->mForceTocPosition ) {
3649  return str_replace( '<!--MWTOC-->', $toc, $full );
3650  } else {
3651  return $full;
3652  }
3653  }
3654 
3667  function preSaveTransform( $text, &$title, $user, $options, $clearState = true ) {
3668  $this->mOptions = $options;
3669  $this->mTitle =& $title;
3670  $this->setOutputType( OT_WIKI );
3671 
3672  if ( $clearState ) {
3673  $this->clearState();
3674  }
3675 
3676  $stripState = new StripState;
3677  $pairs = array(
3678  "\r\n" => "\n",
3679  );
3680  $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
3681  $text = $this->strip( $text, $stripState, true, array( 'gallery' ) );
3682  $text = $this->pstPass2( $text, $stripState, $user );
3683  $text = $stripState->unstripBoth( $text );
3684  return $text;
3685  }
3686 
3691  function pstPass2( $text, &$stripState, $user ) {
3692  global $wgContLang, $wgLocaltimezone;
3693 
3694  /* Note: This is the timestamp saved as hardcoded wikitext to
3695  * the database, we use $wgContLang here in order to give
3696  * everyone the same signature and use the default one rather
3697  * than the one selected in each user's preferences.
3698  */
3699  if ( isset( $wgLocaltimezone ) ) {
3700  $oldtz = getenv( 'TZ' );
3701  putenv( 'TZ='.$wgLocaltimezone );
3702  }
3703  $d = $wgContLang->timeanddate( date( 'YmdHis' ), false, false) .
3704  ' (' . date( 'T' ) . ')';
3705  if ( isset( $wgLocaltimezone ) ) {
3706  putenv( 'TZ='.$oldtz );
3707  }
3708 
3709  # Variable replacement
3710  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
3711  $text = $this->replaceVariables( $text );
3712 
3713  # Strip out <nowiki> etc. added via replaceVariables
3714  $text = $this->strip( $text, $stripState, false, array( 'gallery' ) );
3715 
3716  # Signatures
3717  $sigText = $this->getUserSig( $user );
3718  $text = strtr( $text, array(
3719  '~~~~~' => $d,
3720  '~~~~' => "$sigText $d",
3721  '~~~' => $sigText
3722  ) );
3723 
3724  # Context links: [[|name]] and [[name (context)|]]
3725  #
3726  global $wgLegalTitleChars;
3727  $tc = "[$wgLegalTitleChars]";
3728  $nc = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
3729 
3730  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]]
3731  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]]
3732  $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]]
3733 
3734  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
3735  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
3736  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
3737 
3738  $t = $this->mTitle->getText();
3739  $m = array();
3740  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
3741  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
3742  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && '' != "$m[1]$m[2]" ) {
3743  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
3744  } else {
3745  # if there's no context, don't bother duplicating the title
3746  $text = preg_replace( $p2, '[[\\1]]', $text );
3747  }
3748 
3749  # Trim trailing whitespace
3750  $text = rtrim( $text );
3751 
3752  return $text;
3753  }
3754 
3763  function getUserSig( &$user ) {
3764  $username = $user->getName();
3765  $nickname = $user->getOption( 'nickname' );
3766  $nickname = $nickname === '' ? $username : $nickname;
3767 
3768  if( $user->getBoolOption( 'fancysig' ) !== false ) {
3769  # Sig. might contain markup; validate this
3770  if( $this->validateSig( $nickname ) !== false ) {
3771  # Validated; clean up (if needed) and return it
3772  return $this->cleanSig( $nickname, true );
3773  } else {
3774  # Failed to validate; fall back to the default
3775  $nickname = $username;
3776  wfDebug( "Parser::getUserSig: $username has bad XML tags in signature.\n" );
3777  }
3778  }
3779 
3780  // Make sure nickname doesnt get a sig in a sig
3781  $nickname = $this->cleanSigInSig( $nickname );
3782 
3783  # If we're still here, make it a link to the user page
3784  $userpage = $user->getUserPage();
3785  return( '[[' . $userpage->getPrefixedText() . '|' . wfEscapeWikiText( $nickname ) . ']]' );
3786  }
3787 
3794  function validateSig( $text ) {
3795  return( wfIsWellFormedXmlFragment( $text ) ? $text : false );
3796  }
3797 
3808  function cleanSig( $text, $parsing = false ) {
3809  global $wgTitle;
3810  $this->startExternalParse( $wgTitle, new ParserOptions(), $parsing ? OT_WIKI : OT_MSG );
3811 
3812  $substWord = MagicWord::get( 'subst' );
3813  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
3814  $substText = '{{' . $substWord->getSynonym( 0 );
3815 
3816  $text = preg_replace( $substRegex, $substText, $text );
3817  $text = $this->cleanSigInSig( $text );
3818  $text = $this->replaceVariables( $text );
3819 
3820  $this->clearState();
3821  return $text;
3822  }
3823 
3829  function cleanSigInSig( $text ) {
3830  $text = preg_replace( '/~{3,5}/', '', $text );
3831  return $text;
3832  }
3833 
3839  function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
3840  $this->mTitle =& $title;
3841  $this->mOptions = $options;
3842  $this->setOutputType( $outputType );
3843  if ( $clearState ) {
3844  $this->clearState();
3845  }
3846  }
3847 
3856  function transformMsg( $text, $options ) {
3857  global $wgTitle;
3858  static $executing = false;
3859 
3860  $fname = "Parser::transformMsg";
3861 
3862  # Guard against infinite recursion
3863  if ( $executing ) {
3864  return $text;
3865  }
3866  $executing = true;
3867 
3868  wfProfileIn($fname);
3869 
3870  if ( $wgTitle && !( $wgTitle instanceof FakeTitle ) ) {
3871  $this->mTitle = $wgTitle;
3872  } else {
3873  $this->mTitle = Title::newFromText('msg');
3874  }
3875  $this->mOptions = $options;
3876  $this->setOutputType( OT_MSG );
3877  $this->clearState();
3878  $text = $this->replaceVariables( $text );
3879 
3880  $executing = false;
3881  wfProfileOut($fname);
3882  return $text;
3883  }
3884 
3900  function setHook( $tag, $callback ) {
3901  $tag = strtolower( $tag );
3902  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
3903  $this->mTagHooks[$tag] = $callback;
3904 
3905  return $oldVal;
3906  }
3907 
3932  function setFunctionHook( $id, $callback, $flags = 0 ) {
3933  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id] : null;
3934  $this->mFunctionHooks[$id] = $callback;
3935 
3936  # Add to function cache
3937  $mw = MagicWord::get( $id );
3938  if( !$mw )
3939  throw new MWException( 'Parser::setFunctionHook() expecting a magic word identifier.' );
3940 
3941  $synonyms = $mw->getSynonyms();
3942  $sensitive = intval( $mw->isCaseSensitive() );
3943 
3944  foreach ( $synonyms as $syn ) {
3945  # Case
3946  if ( !$sensitive ) {
3947  $syn = strtolower( $syn );
3948  }
3949  # Add leading hash
3950  if ( !( $flags & SFH_NO_HASH ) ) {
3951  $syn = '#' . $syn;
3952  }
3953  # Remove trailing colon
3954  if ( substr( $syn, -1, 1 ) == ':' ) {
3955  $syn = substr( $syn, 0, -1 );
3956  }
3957  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
3958  }
3959  return $oldVal;
3960  }
3961 
3967  function getFunctionHooks() {
3968  return array_keys( $this->mFunctionHooks );
3969  }
3970 
3980  function replaceLinkHolders( &$text, $options = 0 ) {
3981  global $wgUser;
3982  global $wgContLang;
3983 
3984  $fname = 'Parser::replaceLinkHolders';
3985  wfProfileIn( $fname );
3986 
3987  $pdbks = array();
3988  $colours = array();
3989  $sk = $this->mOptions->getSkin();
3990  $linkCache =& LinkCache::singleton();
3991 
3992  if ( !empty( $this->mLinkHolders['namespaces'] ) ) {
3993  wfProfileIn( $fname.'-check' );
3994  $dbr = wfGetDB( DB_SLAVE );
3995  $page = $dbr->tableName( 'page' );
3996  $threshold = $wgUser->getOption('stubthreshold');
3997 
3998  # Sort by namespace
3999  asort( $this->mLinkHolders['namespaces'] );
4000 
4001  # Generate query
4002  $query = false;
4003  $current = null;
4004  foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
4005  # Make title object
4006  $title = $this->mLinkHolders['titles'][$key];
4007 
4008  # Skip invalid entries.
4009  # Result will be ugly, but prevents crash.
4010  if ( is_null( $title ) ) {
4011  continue;
4012  }
4013  $pdbk = $pdbks[$key] = $title->getPrefixedDBkey();
4014 
4015  # Check if it's a static known link, e.g. interwiki
4016  if ( $title->isAlwaysKnown() ) {
4017  $colours[$pdbk] = 1;
4018  } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
4019  $colours[$pdbk] = 1;
4020  $this->mOutput->addLink( $title, $id );
4021  } elseif ( $linkCache->isBadLink( $pdbk ) ) {
4022  $colours[$pdbk] = 0;
4023  } else {
4024  # Not in the link cache, add it to the query
4025  if ( !isset( $current ) ) {
4026  $current = $ns;
4027  $query = "SELECT page_id, page_namespace, page_title";
4028  if ( $threshold > 0 ) {
4029  $query .= ', page_len, page_is_redirect';
4030  }
4031  $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN(";
4032  } elseif ( $current != $ns ) {
4033  $current = $ns;
4034  $query .= ")) OR (page_namespace=$ns AND page_title IN(";
4035  } else {
4036  $query .= ', ';
4037  }
4038 
4039  $query .= $dbr->addQuotes( $this->mLinkHolders['dbkeys'][$key] );
4040  }
4041  }
4042  if ( $query ) {
4043  $query .= '))';
4044  if ( $options & RLH_FOR_UPDATE ) {
4045  $query .= ' FOR UPDATE';
4046  }
4047 
4048  $res = $dbr->query( $query, $fname );
4049 
4050  # Fetch data and form into an associative array
4051  # non-existent = broken
4052  # 1 = known
4053  # 2 = stub
4054  while ( $s = $dbr->fetchObject($res) ) {
4055  $title = Title::makeTitle( $s->page_namespace, $s->page_title );
4056  $pdbk = $title->getPrefixedDBkey();
4057  $linkCache->addGoodLinkObj( $s->page_id, $title );
4058  $this->mOutput->addLink( $title, $s->page_id );
4059 
4060  if ( $threshold > 0 ) {
4061  $size = $s->page_len;
4062  if ( $s->page_is_redirect || $s->page_namespace != 0 || $size >= $threshold ) {
4063  $colours[$pdbk] = 1;
4064  } else {
4065  $colours[$pdbk] = 2;
4066  }
4067  } else {
4068  $colours[$pdbk] = 1;
4069  }
4070  }
4071  }
4072  wfProfileOut( $fname.'-check' );
4073 
4074  # Do a second query for different language variants of links and categories
4075  if($wgContLang->hasVariants()){
4076  $linkBatch = new LinkBatch();
4077  $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
4078  $categoryMap = array(); // maps $category_variant => $category (dbkeys)
4079  $varCategories = array(); // category replacements oldDBkey => newDBkey
4080 
4081  $categories = $this->mOutput->getCategoryLinks();
4082 
4083  // Add variants of links to link batch
4084  foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
4085  $title = $this->mLinkHolders['titles'][$key];
4086  if ( is_null( $title ) )
4087  continue;
4088 
4089  $pdbk = $title->getPrefixedDBkey();
4090  $titleText = $title->getText();
4091 
4092  // generate all variants of the link title text
4093  $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText);
4094 
4095  // if link was not found (in first query), add all variants to query
4096  if ( !isset($colours[$pdbk]) ){
4097  foreach($allTextVariants as $textVariant){
4098  if($textVariant != $titleText){
4099  $variantTitle = Title::makeTitle( $ns, $textVariant );
4100  if(is_null($variantTitle)) continue;
4101  $linkBatch->addObj( $variantTitle );
4102  $variantMap[$variantTitle->getPrefixedDBkey()][] = $key;
4103  }
4104  }
4105  }
4106  }
4107 
4108  // process categories, check if a category exists in some variant
4109  foreach( $categories as $category){
4110  $variants = $wgContLang->convertLinkToAllVariants($category);
4111  foreach($variants as $variant){
4112  if($variant != $category){
4113  $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) );
4114  if(is_null($variantTitle)) continue;
4115  $linkBatch->addObj( $variantTitle );
4116  $categoryMap[$variant] = $category;
4117  }
4118  }
4119  }
4120 
4121 
4122  if(!$linkBatch->isEmpty()){
4123  // construct query
4124  $titleClause = $linkBatch->constructSet('page', $dbr);
4125 
4126  $variantQuery = "SELECT page_id, page_namespace, page_title";
4127  if ( $threshold > 0 ) {
4128  $variantQuery .= ', page_len, page_is_redirect';
4129  }
4130 
4131  $variantQuery .= " FROM $page WHERE $titleClause";
4132  if ( $options & RLH_FOR_UPDATE ) {
4133  $variantQuery .= ' FOR UPDATE';
4134  }
4135 
4136  $varRes = $dbr->query( $variantQuery, $fname );
4137 
4138  // for each found variants, figure out link holders and replace
4139  while ( $s = $dbr->fetchObject($varRes) ) {
4140 
4141  $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
4142  $varPdbk = $variantTitle->getPrefixedDBkey();
4143  $vardbk = $variantTitle->getDBkey();
4144 
4145  $holderKeys = array();
4146  if(isset($variantMap[$varPdbk])){
4147  $holderKeys = $variantMap[$varPdbk];
4148  $linkCache->addGoodLinkObj( $s->page_id, $variantTitle );
4149  $this->mOutput->addLink( $variantTitle, $s->page_id );
4150  }
4151 
4152  // loop over link holders
4153  foreach($holderKeys as $key){
4154  $title = $this->mLinkHolders['titles'][$key];
4155  if ( is_null( $title ) ) continue;
4156 
4157  $pdbk = $title->getPrefixedDBkey();
4158 
4159  if(!isset($colours[$pdbk])){
4160  // found link in some of the variants, replace the link holder data
4161  $this->mLinkHolders['titles'][$key] = $variantTitle;
4162  $this->mLinkHolders['dbkeys'][$key] = $variantTitle->getDBkey();
4163 
4164  // set pdbk and colour
4165  $pdbks[$key] = $varPdbk;
4166  if ( $threshold > 0 ) {
4167  $size = $s->page_len;
4168  if ( $s->page_is_redirect || $s->page_namespace != 0 || $size >= $threshold ) {
4169  $colours[$varPdbk] = 1;
4170  } else {
4171  $colours[$varPdbk] = 2;
4172  }
4173  }
4174  else {
4175  $colours[$varPdbk] = 1;
4176  }
4177  }
4178  }
4179 
4180  // check if the object is a variant of a category
4181  if(isset($categoryMap[$vardbk])){
4182  $oldkey = $categoryMap[$vardbk];
4183  if($oldkey != $vardbk)
4184  $varCategories[$oldkey]=$vardbk;
4185  }
4186  }
4187 
4188  // rebuild the categories in original order (if there are replacements)
4189  if(count($varCategories)>0){
4190  $newCats = array();
4191  $originalCats = $this->mOutput->getCategories();
4192  foreach($originalCats as $cat => $sortkey){
4193  // make the replacement
4194  if( array_key_exists($cat,$varCategories) )
4195  $newCats[$varCategories[$cat]] = $sortkey;
4196  else $newCats[$cat] = $sortkey;
4197  }
4198  $this->mOutput->setCategoryLinks($newCats);
4199  }
4200  }
4201  }
4202 
4203  # Construct search and replace arrays
4204  wfProfileIn( $fname.'-construct' );
4205  $replacePairs = array();
4206  foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) {
4207  $pdbk = $pdbks[$key];
4208  $searchkey = "<!--LINK $key-->";
4209  $title = $this->mLinkHolders['titles'][$key];
4210  if ( empty( $colours[$pdbk] ) ) {
4211  $linkCache->addBadLinkObj( $title );
4212  $colours[$pdbk] = 0;
4213  $this->mOutput->addLink( $title, 0 );
4214  $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title,
4215  $this->mLinkHolders['texts'][$key],
4216  $this->mLinkHolders['queries'][$key] );
4217  } elseif ( $colours[$pdbk] == 1 ) {
4218  $replacePairs[$searchkey] = $sk->makeKnownLinkObj( $title,
4219  $this->mLinkHolders['texts'][$key],
4220  $this->mLinkHolders['queries'][$key] );
4221  } elseif ( $colours[$pdbk] == 2 ) {
4222  $replacePairs[$searchkey] = $sk->makeStubLinkObj( $title,
4223  $this->mLinkHolders['texts'][$key],
4224  $this->mLinkHolders['queries'][$key] );
4225  }
4226  }
4227  $replacer = new HashtableReplacer( $replacePairs, 1 );
4228  wfProfileOut( $fname.'-construct' );
4229 
4230  # Do the thing
4231  wfProfileIn( $fname.'-replace' );
4232  $text = preg_replace_callback(
4233  '/(<!--LINK .*?-->)/',
4234  $replacer->cb(),
4235  $text);
4236 
4237  wfProfileOut( $fname.'-replace' );
4238  }
4239 
4240  # Now process interwiki link holders
4241  # This is quite a bit simpler than internal links
4242  if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) {
4243  wfProfileIn( $fname.'-interwiki' );
4244  # Make interwiki link HTML
4245  $replacePairs = array();
4246  foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) {
4247  $title = $this->mInterwikiLinkHolders['titles'][$key];
4248  $replacePairs[$key] = $sk->makeLinkObj( $title, $link );
4249  }
4250  $replacer = new HashtableReplacer( $replacePairs, 1 );
4251 
4252  $text = preg_replace_callback(
4253  '/<!--IWLINK (.*?)-->/',
4254  $replacer->cb(),
4255  $text );
4256  wfProfileOut( $fname.'-interwiki' );
4257  }
4258 
4259  wfProfileOut( $fname );
4260  return $colours;
4261  }
4262 
4269  function replaceLinkHoldersText( $text ) {
4270  $fname = 'Parser::replaceLinkHoldersText';
4271  wfProfileIn( $fname );
4272 
4273  $text = preg_replace_callback(
4274  '/<!--(LINK|IWLINK) (.*?)-->/',
4275  array( &$this, 'replaceLinkHoldersTextCallback' ),
4276  $text );
4277 
4278  wfProfileOut( $fname );
4279  return $text;
4280  }
4281 
4287  function replaceLinkHoldersTextCallback( $matches ) {
4288  $type = $matches[1];
4289  $key = $matches[2];
4290  if( $type == 'LINK' ) {
4291  if( isset( $this->mLinkHolders['texts'][$key] ) ) {
4292  return $this->mLinkHolders['texts'][$key];
4293  }
4294  } elseif( $type == 'IWLINK' ) {
4295  if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) {
4296  return $this->mInterwikiLinkHolders['texts'][$key];
4297  }
4298  }
4299  return $matches[0];
4300  }
4301 
4305  function renderPreTag( $text, $attribs ) {
4306  // Backwards-compatibility hack
4307  $content = StringUtils::delimiterReplace( '<nowiki>', '</nowiki>', '$1', $text, 'i' );
4308 
4309  $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
4310  return wfOpenElement( 'pre', $attribs ) .
4311  Xml::escapeTagsOnly( $content ) .
4312  '</pre>';
4313  }
4314 
4324  function renderImageGallery( $text, $params ) {
4325  $ig = new ImageGallery();
4326  $ig->setContextTitle( $this->mTitle );
4327  $ig->setShowBytes( false );
4328  $ig->setShowFilename( false );
4329  $ig->setParsing();
4330  $ig->useSkin( $this->mOptions->getSkin() );
4331 
4332  if( isset( $params['caption'] ) ) {
4333  $caption = $params['caption'];
4334  $caption = htmlspecialchars( $caption );
4335  $caption = $this->replaceInternalLinks( $caption );
4336  $ig->setCaptionHtml( $caption );
4337  }
4338  if( isset( $params['perrow'] ) ) {
4339  $ig->setPerRow( $params['perrow'] );
4340  }
4341  if( isset( $params['widths'] ) ) {
4342  $ig->setWidths( $params['widths'] );
4343  }
4344  if( isset( $params['heights'] ) ) {
4345  $ig->setHeights( $params['heights'] );
4346  }
4347 
4348  $lines = explode( "\n", $text );
4349  foreach ( $lines as $line ) {
4350  # match lines like these:
4351  # Image:someimage.jpg|This is some image
4352  $matches = array();
4353  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
4354  # Skip empty lines
4355  if ( count( $matches ) == 0 ) {
4356  continue;
4357  }
4358  $tp = Title::newFromText( $matches[1] );
4359  $nt =& $tp;
4360  if( is_null( $nt ) ) {
4361  # Bogus title. Ignore these so we don't bomb out later.
4362  continue;
4363  }
4364  if ( isset( $matches[3] ) ) {
4365  $label = $matches[3];
4366  } else {
4367  $label = '';
4368  }
4369 
4370  $pout = $this->parse( $label,
4371  $this->mTitle,
4372  $this->mOptions,
4373  false, // Strip whitespace...?
4374  false // Don't clear state!
4375  );
4376  $html = $pout->getText();
4377 
4378  $ig->add( new Image( $nt ), $html );
4379 
4380  # Only add real images (bug #5586)
4381  if ( $nt->getNamespace() == NS_IMAGE ) {
4382  $this->mOutput->addImage( $nt->getDBkey() );
4383  }
4384  }
4385  return $ig->toHTML();
4386  }
4387 
4391  function makeImage( $nt, $options ) {
4392  # @TODO: let the MediaHandler specify its transform parameters
4393  #
4394  # Check if the options text is of the form "options|alt text"
4395  # Options are:
4396  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
4397  # * left no resizing, just left align. label is used for alt= only
4398  # * right same, but right aligned
4399  # * none same, but not aligned
4400  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
4401  # * center center the image
4402  # * framed Keep original image size, no magnify-button.
4403  # vertical-align values (no % or length right now):
4404  # * baseline
4405  # * sub
4406  # * super
4407  # * top
4408  # * text-top
4409  # * middle
4410  # * bottom
4411  # * text-bottom
4412 
4413 
4414  $part = array_map( 'trim', explode( '|', $options) );
4415 
4416  $mwAlign = array();
4417  $alignments = array( 'left', 'right', 'center', 'none', 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 'bottom', 'text-bottom' );
4418  foreach ( $alignments as $alignment ) {
4419  $mwAlign[$alignment] =& MagicWord::get( 'img_'.$alignment );
4420  }
4421  $mwThumb =& MagicWord::get( 'img_thumbnail' );
4422  $mwManualThumb =& MagicWord::get( 'img_manualthumb' );
4423  $mwWidth =& MagicWord::get( 'img_width' );
4424  $mwFramed =& MagicWord::get( 'img_framed' );
4425  $mwPage =& MagicWord::get( 'img_page' );
4426  $caption = '';
4427 
4428  $params = array();
4429  $framed = $thumb = false;
4430  $manual_thumb = '' ;
4431  $align = $valign = '';
4432  $sk = $this->mOptions->getSkin();
4433 
4434  foreach( $part as $val ) {
4435  if ( !is_null( $mwThumb->matchVariableStartToEnd($val) ) ) {
4436  $thumb=true;
4437  } elseif ( ! is_null( $match = $mwManualThumb->matchVariableStartToEnd($val) ) ) {
4438  # use manually specified thumbnail
4439  $thumb=true;
4440  $manual_thumb = $match;
4441  } else {
4442  foreach( $alignments as $alignment ) {
4443  if ( ! is_null( $mwAlign[$alignment]->matchVariableStartToEnd($val) ) ) {
4444  switch ( $alignment ) {
4445  case 'left': case 'right': case 'center': case 'none':
4446  $align = $alignment; break;
4447  default:
4448  $valign = $alignment;
4449  }
4450  continue 2;
4451  }
4452  }
4453  if ( ! is_null( $match = $mwPage->matchVariableStartToEnd($val) ) ) {
4454  # Select a page in a multipage document
4455  $params['page'] = $match;
4456  } elseif ( !isset( $params['width'] ) && ! is_null( $match = $mwWidth->matchVariableStartToEnd($val) ) ) {
4457  wfDebug( "img_width match: $match\n" );
4458  # $match is the image width in pixels
4459  $m = array();
4460  if ( preg_match( '/^([0-9]*)x([0-9]*)$/', $match, $m ) ) {
4461  $params['width'] = intval( $m[1] );
4462  $params['height'] = intval( $m[2] );
4463  } else {
4464  $params['width'] = intval($match);
4465  }
4466  } elseif ( ! is_null( $mwFramed->matchVariableStartToEnd($val) ) ) {
4467  $framed=true;
4468  } else {
4469  $caption = $val;
4470  }
4471  }
4472  }
4473  # Strip bad stuff out of the alt text
4474  $alt = $this->replaceLinkHoldersText( $caption );
4475 
4476  # make sure there are no placeholders in thumbnail attributes
4477  # that are later expanded to html- so expand them now and
4478  # remove the tags
4479  $alt = $this->mStripState->unstripBoth( $alt );
4480  $alt = Sanitizer::stripAllTags( $alt );
4481 
4482  # Linker does the rest
4483  return $sk->makeImageLinkObj( $nt, $caption, $alt, $align, $params, $framed, $thumb, $manual_thumb, $valign );
4484  }
4485 
4490  function disableCache() {
4491  wfDebug( "Parser output marked as uncacheable.\n" );
4492  $this->mOutput->mCacheTime = -1;
4493  }
4494 
4503  function attributeStripCallback( &$text, $args ) {
4504  $text = $this->replaceVariables( $text, $args );
4505  $text = $this->mStripState->unstripBoth( $text );
4506  return $text;
4507  }
4508 
4514  function Title( $x = NULL ) { return wfSetVar( $this->mTitle, $x ); }
4515  function Options( $x = NULL ) { return wfSetVar( $this->mOptions, $x ); }
4516  function OutputType( $x = NULL ) { return wfSetVar( $this->mOutputType, $x ); }
4522  function getTags() { return array_keys( $this->mTagHooks ); }
4541  private function extractSections( $text, $section, $mode, $newtext='' ) {
4542  # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
4543  # comments to be stripped as well)
4544  $stripState = new StripState;
4545 
4546  $oldOutputType = $this->mOutputType;
4547  $oldOptions = $this->mOptions;
4548  $this->mOptions = new ParserOptions();
4549  $this->setOutputType( OT_WIKI );
4550 
4551  $striptext = $this->strip( $text, $stripState, true );
4552 
4553  $this->setOutputType( $oldOutputType );
4554  $this->mOptions = $oldOptions;
4555 
4556  # now that we can be sure that no pseudo-sections are in the source,
4557  # split it up by section
4558  $uniq = preg_quote( $this->uniqPrefix(), '/' );
4559  $comment = "(?:$uniq-!--.*?QINU)";
4560  $secs = preg_split(
4561  "/
4562  (
4563  ^
4564  (?:$comment|<\/?noinclude>)* # Initial comments will be stripped
4565  (=+) # Should this be limited to 6?
4566  .+? # Section title...
4567  \\2 # Ending = count must match start
4568  (?:$comment|<\/?noinclude>|[ \\t]+)* # Trailing whitespace ok
4569  $
4570  |
4571  <h([1-6])\b.*?>
4572  .*?
4573  <\/h\\3\s*>
4574  )
4575  /mix",
4576  $striptext, -1,
4577  PREG_SPLIT_DELIM_CAPTURE);
4578 
4579  if( $mode == "get" ) {
4580  if( $section == 0 ) {
4581  // "Section 0" returns the content before any other section.
4582  $rv = $secs[0];
4583  } else {
4584  //track missing section, will replace if found.
4585  $rv = $newtext;
4586  }
4587  } elseif( $mode == "replace" ) {
4588  if( $section == 0 ) {
4589  $rv = $newtext . "\n\n";
4590  $remainder = true;
4591  } else {
4592  $rv = $secs[0];
4593  $remainder = false;
4594  }
4595  }
4596  $count = 0;
4597  $sectionLevel = 0;
4598  for( $index = 1; $index < count( $secs ); ) {
4599  $headerLine = $secs[$index++];
4600  if( $secs[$index] ) {
4601  // A wiki header
4602  $headerLevel = strlen( $secs[$index++] );
4603  } else {
4604  // An HTML header
4605  $index++;
4606  $headerLevel = intval( $secs[$index++] );
4607  }
4608  $content = $secs[$index++];
4609 
4610  $count++;
4611  if( $mode == "get" ) {
4612  if( $count == $section ) {
4613  $rv = $headerLine . $content;
4614  $sectionLevel = $headerLevel;
4615  } elseif( $count > $section ) {
4616  if( $sectionLevel && $headerLevel > $sectionLevel ) {
4617  $rv .= $headerLine . $content;
4618  } else {
4619  // Broke out to a higher-level section
4620  break;
4621  }
4622  }
4623  } elseif( $mode == "replace" ) {
4624  if( $count < $section ) {
4625  $rv .= $headerLine . $content;
4626  } elseif( $count == $section ) {
4627  $rv .= $newtext . "\n\n";
4628  $sectionLevel = $headerLevel;
4629  } elseif( $count > $section ) {
4630  if( $headerLevel <= $sectionLevel ) {
4631  // Passed the section's sub-parts.
4632  $remainder = true;
4633  }
4634  if( $remainder ) {
4635  $rv .= $headerLine . $content;
4636  }
4637  }
4638  }
4639  }
4640  if (is_string($rv))
4641  # reinsert stripped tags
4642  $rv = trim( $stripState->unstripBoth( $rv ) );
4643 
4644  return $rv;
4645  }
4646 
4659  public function getSection( $text, $section, $deftext='' ) {
4660  return $this->extractSections( $text, $section, "get", $deftext );
4661  }
4663  public function replaceSection( $oldtext, $section, $text ) {
4664  return $this->extractSections( $oldtext, $section, "replace", $text );
4665  }
4666 
4671  function getRevisionTimestamp() {
4672  if ( is_null( $this->mRevisionTimestamp ) ) {
4673  wfProfileIn( __METHOD__ );
4674  global $wgContLang;
4675  $dbr = wfGetDB( DB_SLAVE );
4676  $timestamp = $dbr->selectField( 'revision', 'rev_timestamp',
4677  array( 'rev_id' => $this->mRevisionId ), __METHOD__ );
4678 
4679  // Normalize timestamp to internal MW format for timezone processing.
4680  // This has the added side-effect of replacing a null value with
4681  // the current time, which gives us more sensible behavior for
4682  // previews.
4683  $timestamp = wfTimestamp( TS_MW, $timestamp );
4684 
4685  // The cryptic '' timezone parameter tells to use the site-default
4686  // timezone offset instead of the user settings.
4687  //
4688  // Since this value will be saved into the parser cache, served
4689  // to other users, and potentially even used inside links and such,
4690  // it needs to be consistent for all visitors.
4691  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
4692 
4693  wfProfileOut( __METHOD__ );
4694  }
4695  return $this->mRevisionTimestamp;
4696  }
4697 
4703  public function setDefaultSort( $sort ) {
4704  $this->mDefaultSort = $sort;
4705  }
4706 
4713  public function getDefaultSort() {
4714  if( $this->mDefaultSort !== false ) {
4715  return $this->mDefaultSort;
4716  } else {
4717  return $this->mTitle->getNamespace() == NS_CATEGORY
4718  ? $this->mTitle->getText()
4719  : $this->mTitle->getPrefixedText();
4720  }
4721  }
4722 
4723 }
4724 
4730  var $output = '';
4732  function replace( $matches ) {
4733  if ( substr( $matches[1], -1 ) == "\n" ) {
4734  $this->output .= substr( $matches[1], 0, -1 );
4735  } else {
4736  $this->output .= $matches[1];
4737  }
4738  }
4739 }
4740 
4745 class StripState {
4746  var $general, $nowiki;
4748  function __construct() {
4749  $this->general = new ReplacementArray;
4750  $this->nowiki = new ReplacementArray;
4751  }
4753  function unstripGeneral( $text ) {
4754  wfProfileIn( __METHOD__ );
4755  $text = $this->general->replace( $text );
4756  wfProfileOut( __METHOD__ );
4757  return $text;
4758  }
4760  function unstripNoWiki( $text ) {
4761  wfProfileIn( __METHOD__ );
4762  $text = $this->nowiki->replace( $text );
4763  wfProfileOut( __METHOD__ );
4764  return $text;
4765  }
4767  function unstripBoth( $text ) {
4768  wfProfileIn( __METHOD__ );
4769  $text = $this->general->replace( $text );
4770  $text = $this->nowiki->replace( $text );
4771  wfProfileOut( __METHOD__ );
4772  return $text;
4773  }
4774 }
4775 
4776 ?>