ILIAS  release_5-3 Revision v5.3.23-19-g915713cf615
geshi.php
Go to the documentation of this file.
1 <?php
35 //
36 // GeSHi Constants
37 // You should use these constant names in your programs instead of
38 // their values - you never know when a value may change in a future
39 // version
40 //
41 
43 define('GESHI_VERSION', '1.0.9.0');
44 
45 // Define the root directory for the GeSHi code tree
46 if (!defined('GESHI_ROOT')) {
48  define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
49 }
52 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
53 
54 // Define if GeSHi should be paranoid about security
55 if (!defined('GESHI_SECURITY_PARANOID')) {
57  define('GESHI_SECURITY_PARANOID', false);
58 }
59 
60 // Line numbers - use with enable_line_numbers()
62 define('GESHI_NO_LINE_NUMBERS', 0);
64 define('GESHI_NORMAL_LINE_NUMBERS', 1);
66 define('GESHI_FANCY_LINE_NUMBERS', 2);
67 
68 // Container HTML type
70 define('GESHI_HEADER_NONE', 0);
72 define('GESHI_HEADER_DIV', 1);
74 define('GESHI_HEADER_PRE', 2);
76 define('GESHI_HEADER_PRE_VALID', 3);
90 define('GESHI_HEADER_PRE_TABLE', 4);
91 
92 // Capatalisation constants
94 define('GESHI_CAPS_NO_CHANGE', 0);
96 define('GESHI_CAPS_UPPER', 1);
98 define('GESHI_CAPS_LOWER', 2);
99 
100 // Link style constants
102 define('GESHI_LINK', 0);
104 define('GESHI_HOVER', 1);
106 define('GESHI_ACTIVE', 2);
108 define('GESHI_VISITED', 3);
109 
110 // Important string starter/finisher
111 // Note that if you change these, they should be as-is: i.e., don't
112 // write them as if they had been run through htmlentities()
114 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
116 define('GESHI_END_IMPORTANT', '<END GeSHi>');
117 
121 // When strict mode applies for a language
123 define('GESHI_NEVER', 0);
126 define('GESHI_MAYBE', 1);
128 define('GESHI_ALWAYS', 2);
129 
130 // Advanced regexp handling constants, used in language files
132 define('GESHI_SEARCH', 0);
135 define('GESHI_REPLACE', 1);
137 define('GESHI_MODIFIERS', 2);
140 define('GESHI_BEFORE', 3);
143 define('GESHI_AFTER', 4);
146 define('GESHI_CLASS', 5);
147 
149 define('GESHI_COMMENTS', 0);
150 
155 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
161 define('GESHI_MAX_PCRE_LENGTH', 12288);
162 
163 //Number format specification
165 define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
167 define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
169 define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
171 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
173 define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
175 define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
177 define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+
179 define('GESHI_NUMBER_OCT_PREFIX_AT', 1024); //@[0-7]+
181 define('GESHI_NUMBER_OCT_SUFFIX', 2048); //[0-7]+[oO]
183 define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
185 define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192); //$[0-9a-fA-F]+
187 define('GESHI_NUMBER_HEX_SUFFIX', 16384); //[0-9][0-9a-fA-F]*h
189 define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
191 define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
193 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
195 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
196 //Custom formats are passed by RX array
197 
198 // Error detection - use these to analyse faults
202 define('GESHI_ERROR_NO_INPUT', 1);
204 define('GESHI_ERROR_NO_SUCH_LANG', 2);
206 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
208 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
210 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
226 class GeSHi {
227 
232  protected $source = '';
233 
238  protected $language = '';
239 
244  protected $language_data = array();
245 
251 
257  protected $error = false;
258 
263  protected $error_messages = array(
264  GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
265  GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
266  GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
267  GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
268  );
269 
274  protected $strict_mode = false;
275 
280  protected $use_classes = false;
281 
293 
299  'KEYWORDS' => array(),
300  'COMMENTS' => array('MULTI' => true),
301  'REGEXPS' => array(),
302  'ESCAPE_CHAR' => true,
303  'BRACKETS' => true,
304  'SYMBOLS' => false,
305  'STRINGS' => true,
306  'NUMBERS' => true,
307  'METHODS' => true,
308  'SCRIPT' => true
309  );
310 
315  protected $time = 0;
316 
321  protected $header_content = '';
322 
327  protected $footer_content = '';
328 
333  protected $header_content_style = '';
334 
339  protected $footer_content_style = '';
340 
346  protected $force_code_block = false;
347 
352  protected $link_styles = array();
353 
360  protected $enable_important_blocks = false;
361 
369  protected $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
370 
375  protected $add_ids = false;
376 
382 
388 
393  protected $highlight_extra_lines_style = 'background-color: #ffc;';
394 
401  protected $line_ending = null;
402 
407  protected $line_numbers_start = 1;
408 
413  protected $overall_style = 'font-family:monospace;';
414 
419  protected $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
420 
425  protected $overall_class = '';
426 
431  protected $overall_id = '';
432 
437  protected $line_style1 = 'font-weight: normal; vertical-align:top;';
438 
443  protected $line_style2 = 'font-weight: bold; vertical-align:top;';
444 
449  protected $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
450 
456 
462  protected $allow_multiline_span = true;
463 
468  protected $line_nth_row = 0;
469 
474  protected $tab_width = 8;
475 
480  protected $use_language_tab_width = false;
481 
486  protected $link_target = '';
487 
493  protected $encoding = 'utf-8';
494 
499  protected $keyword_links = true;
500 
506  protected $loaded_language = '';
507 
514  protected $parse_cache_built = false;
515 
531  protected $_kw_replace_group = 0;
532  protected $_rx_key = 0;
533 
541  protected $_hmr_before = '';
542  protected $_hmr_replace = '';
543  protected $_hmr_after = '';
544  protected $_hmr_key = 0;
545 
560  public function __construct($source = '', $language = '', $path = '') {
561  if ( is_string($source) && ($source !== '') ) {
562  $this->set_source($source);
563  }
564  if ( is_string($language) && ($language !== '') ) {
565  $this->set_language($language);
566  }
567  $this->set_language_path($path);
568  }
569 
576  public function get_version()
577  {
578  return GESHI_VERSION;
579  }
580 
588  public function error() {
589  if ($this->error) {
590  //Put some template variables for debugging here ...
591  $debug_tpl_vars = array(
592  '{LANGUAGE}' => $this->language,
593  '{PATH}' => $this->language_path
594  );
595  $msg = str_replace(
596  array_keys($debug_tpl_vars),
597  array_values($debug_tpl_vars),
598  $this->error_messages[$this->error]);
599 
600  return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
601  }
602  return false;
603  }
604 
612  public function get_language_name() {
613  if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
614  return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
615  }
616  return $this->language_data['LANG_NAME'];
617  }
618 
625  public function set_source($source) {
626  $this->source = $source;
627  $this->highlight_extra_lines = array();
628  }
629 
636  public function strip_language_name($language) {
637  $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
638  $language = strtolower($language);
639 
640  return $language;
641  }
642 
653  public function set_language($language, $force_reset = false) {
654  $this->error = false;
655  $this->strict_mode = GESHI_NEVER;
656 
657  if ($force_reset) {
658  $this->loaded_language = false;
659  }
660 
661  //Clean up the language name to prevent malicious code injection
663 
664  //Retreive the full filename
665  $file_name = $this->language_path . $language . '.php';
666  if ($file_name == $this->loaded_language) {
667  // this language is already loaded!
668  return;
669  }
670 
671  $this->language = $language;
672 
673  //Check if we can read the desired file
674  if (!is_readable($file_name)) {
676  return;
677  }
678 
679  // Load the language for parsing
680  $this->load_language($file_name);
681  }
682 
695  public function set_language_path($path) {
696  if(strpos($path,':')) {
697  //Security Fix to prevent external directories using fopen wrappers.
698  if(DIRECTORY_SEPARATOR == "\\") {
699  if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
700  return;
701  }
702  } else {
703  return;
704  }
705  }
706  if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
707  //Security Fix to prevent external directories using fopen wrappers.
708  return;
709  }
710  if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
711  //Security Fix to prevent external directories using fopen wrappers.
712  return;
713  }
714  if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
715  //Security Fix to prevent external directories using fopen wrappers.
716  return;
717  }
718  if ($path) {
719  $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
720  $this->set_language($this->language); // otherwise set_language_path has no effect
721  }
722  }
723 
729  public function get_supported_languages($full_names=false)
730  {
731  // return array
732  $back = array();
733 
734  // we walk the lang root
735  $dir = dir($this->language_path);
736 
737  // foreach entry
738  while (false !== ($entry = $dir->read()))
739  {
740  $full_path = $this->language_path.$entry;
741 
742  // Skip all dirs
743  if (is_dir($full_path)) {
744  continue;
745  }
746 
747  // we only want lang.php files
748  if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
749  continue;
750  }
751 
752  // Raw lang name is here
753  $langname = $matches[1];
754 
755  // We want the fullname too?
756  if ($full_names === true)
757  {
758  if (false !== ($fullname = $this->get_language_fullname($langname)))
759  {
760  $back[$langname] = $fullname; // we go associative
761  }
762  }
763  else
764  {
765  // just store raw langname
766  $back[] = $langname;
767  }
768  }
769 
770  $dir->close();
771 
772  return $back;
773  }
774 
781  {
782  //Clean up the language name to prevent malicious code injection
783  $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
784 
785  $language = strtolower($language);
786 
787  // get fullpath-filename for a langname
788  $fullpath = $this->language_path.$language.'.php';
789 
790  // we need to get contents :S
791  if (false === ($data = file_get_contents($fullpath))) {
792  $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
793  return false;
794  }
795 
796  // match the langname
797  if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
798  $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
799  return false;
800  }
801 
802  // return fullname for langname
803  return stripcslashes($matches[1]);
804  }
805 
820  public function set_header_type($type) {
821  //Check if we got a valid header type
825  return;
826  }
827 
828  //Set that new header type
829  $this->header_type = $type;
830  }
831 
841  public function set_overall_style($style, $preserve_defaults = false) {
842  if (!$preserve_defaults) {
843  $this->overall_style = $style;
844  } else {
845  $this->overall_style .= $style;
846  }
847  }
848 
857  public function set_overall_class($class) {
858  $this->overall_class = $class;
859  }
860 
868  public function set_overall_id($id) {
869  $this->overall_id = $id;
870  }
871 
879  public function enable_classes($flag = true) {
880  $this->use_classes = ($flag) ? true : false;
881  }
882 
898  public function set_code_style($style, $preserve_defaults = false) {
899  if (!$preserve_defaults) {
900  $this->code_style = $style;
901  } else {
902  $this->code_style .= $style;
903  }
904  }
905 
918  public function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
919  //Check if we got 2 or three parameters
920  if (is_bool($style2)) {
921  $preserve_defaults = $style2;
922  $style2 = '';
923  }
924 
925  //Actually set the new styles
926  if (!$preserve_defaults) {
927  $this->line_style1 = $style1;
928  $this->line_style2 = $style2;
929  } else {
930  $this->line_style1 .= $style1;
931  $this->line_style2 .= $style2;
932  }
933  }
934 
952  public function enable_line_numbers($flag, $nth_row = 5) {
953  if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
954  && GESHI_FANCY_LINE_NUMBERS != $flag) {
956  }
957  $this->line_numbers = $flag;
958  $this->line_nth_row = $nth_row;
959  }
960 
970  public function enable_multiline_span($flag) {
971  $this->allow_multiline_span = (bool) $flag;
972  }
973 
980  public function get_multiline_span() {
982  }
983 
995  public function set_keyword_group_style($key, $style, $preserve_defaults = false) {
996  //Set the style for this keyword group
997  if('*' == $key) {
998  foreach($this->language_data['STYLES']['KEYWORDS'] as $_key => $_value) {
999  if (!$preserve_defaults) {
1000  $this->language_data['STYLES']['KEYWORDS'][$_key] = $style;
1001  } else {
1002  $this->language_data['STYLES']['KEYWORDS'][$_key] .= $style;
1003  }
1004  }
1005  } else {
1006  if (!$preserve_defaults) {
1007  $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
1008  } else {
1009  $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
1010  }
1011  }
1012 
1013  //Update the lexic permissions
1014  if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1015  $this->lexic_permissions['KEYWORDS'][$key] = true;
1016  }
1017  }
1018 
1026  public function set_keyword_group_highlighting($key, $flag = true) {
1027  $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1028  }
1029 
1041  public function set_comments_style($key, $style, $preserve_defaults = false) {
1042  if('*' == $key) {
1043  foreach($this->language_data['STYLES']['COMMENTS'] as $_key => $_value) {
1044  if (!$preserve_defaults) {
1045  $this->language_data['STYLES']['COMMENTS'][$_key] = $style;
1046  } else {
1047  $this->language_data['STYLES']['COMMENTS'][$_key] .= $style;
1048  }
1049  }
1050  } else {
1051  if (!$preserve_defaults) {
1052  $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1053  } else {
1054  $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1055  }
1056  }
1057  }
1058 
1066  public function set_comments_highlighting($key, $flag = true) {
1067  $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1068  }
1069 
1081  public function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1082  if (!$preserve_defaults) {
1083  $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1084  } else {
1085  $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1086  }
1087  }
1088 
1095  public function set_escape_characters_highlighting($flag = true) {
1096  $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1097  }
1098 
1113  public function set_brackets_style($style, $preserve_defaults = false) {
1114  if (!$preserve_defaults) {
1115  $this->language_data['STYLES']['BRACKETS'][0] = $style;
1116  } else {
1117  $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1118  }
1119  }
1120 
1131  public function set_brackets_highlighting($flag) {
1132  $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1133  }
1134 
1146  public function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1147  // Update the style of symbols
1148  if (!$preserve_defaults) {
1149  $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1150  } else {
1151  $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1152  }
1153 
1154  // For backward compatibility
1155  if (0 == $group) {
1156  $this->set_brackets_style ($style, $preserve_defaults);
1157  }
1158  }
1159 
1166  public function set_symbols_highlighting($flag) {
1167  // Update lexic permissions for this symbol group
1168  $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1169 
1170  // For backward compatibility
1171  $this->set_brackets_highlighting ($flag);
1172  }
1173 
1185  public function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1186  if (!$preserve_defaults) {
1187  $this->language_data['STYLES']['STRINGS'][$group] = $style;
1188  } else {
1189  $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1190  }
1191  }
1192 
1199  public function set_strings_highlighting($flag) {
1200  $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1201  }
1202 
1214  public function set_script_style($style, $preserve_defaults = false, $group = 0) {
1215  // Update the style of symbols
1216  if (!$preserve_defaults) {
1217  $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1218  } else {
1219  $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1220  }
1221  }
1222 
1234  public function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1235  if (!$preserve_defaults) {
1236  $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1237  } else {
1238  $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1239  }
1240  }
1241 
1248  public function set_numbers_highlighting($flag) {
1249  $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1250  }
1251 
1265  public function set_methods_style($key, $style, $preserve_defaults = false) {
1266  if (!$preserve_defaults) {
1267  $this->language_data['STYLES']['METHODS'][$key] = $style;
1268  } else {
1269  $this->language_data['STYLES']['METHODS'][$key] .= $style;
1270  }
1271  }
1272 
1279  public function set_methods_highlighting($flag) {
1280  $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1281  }
1282 
1295  public function set_regexps_style($key, $style, $preserve_defaults = false) {
1296  if (!$preserve_defaults) {
1297  $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1298  } else {
1299  $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1300  }
1301  }
1302 
1310  public function set_regexps_highlighting($key, $flag) {
1311  $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1312  }
1313 
1321  public function set_case_sensitivity($key, $case) {
1322  $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1323  }
1324 
1335  public function set_case_keywords($case) {
1336  if (in_array($case, array(
1338  $this->language_data['CASE_KEYWORDS'] = $case;
1339  }
1340  }
1341 
1350  public function set_tab_width($width) {
1351  $this->tab_width = intval($width);
1352 
1353  //Check if it fit's the constraints:
1354  if ($this->tab_width < 1) {
1355  //Return it to the default
1356  $this->tab_width = 8;
1357  }
1358  }
1359 
1366  public function set_use_language_tab_width($use) {
1367  $this->use_language_tab_width = (bool) $use;
1368  }
1369 
1377  public function get_real_tab_width() {
1378  if (!$this->use_language_tab_width ||
1379  !isset($this->language_data['TAB_WIDTH'])) {
1380  return $this->tab_width;
1381  } else {
1382  return $this->language_data['TAB_WIDTH'];
1383  }
1384  }
1385 
1394  public function enable_strict_mode($mode = true) {
1395  if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1396  $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1397  }
1398  }
1399 
1407  public function disable_highlighting() {
1408  $this->enable_highlighting(false);
1409  }
1410 
1421  public function enable_highlighting($flag = true) {
1422  $flag = $flag ? true : false;
1423  foreach ($this->lexic_permissions as $key => $value) {
1424  if (is_array($value)) {
1425  foreach ($value as $k => $v) {
1426  $this->lexic_permissions[$key][$k] = $flag;
1427  }
1428  } else {
1429  $this->lexic_permissions[$key] = $flag;
1430  }
1431  }
1432 
1433  // Context blocks
1434  $this->enable_important_blocks = $flag;
1435  }
1436 
1448  public static function get_language_name_from_extension( $extension, $lookup = array() ) {
1449  $extension = strtolower($extension);
1450 
1451  if ( !is_array($lookup) || empty($lookup)) {
1452  $lookup = array(
1453  '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1454  '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1455  '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1456  '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1457  'abap' => array('abap'),
1458  'actionscript' => array('as'),
1459  'ada' => array('a', 'ada', 'adb', 'ads'),
1460  'apache' => array('conf'),
1461  'asm' => array('ash', 'asm', 'inc'),
1462  'asp' => array('asp'),
1463  'bash' => array('sh'),
1464  'bf' => array('bf'),
1465  'c' => array('c', 'h'),
1466  'c_mac' => array('c', 'h'),
1467  'caddcl' => array(),
1468  'cadlisp' => array(),
1469  'cdfg' => array('cdfg'),
1470  'cobol' => array('cbl'),
1471  'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1472  'csharp' => array('cs'),
1473  'css' => array('css'),
1474  'd' => array('d'),
1475  'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1476  'diff' => array('diff', 'patch'),
1477  'dos' => array('bat', 'cmd'),
1478  'gdb' => array('kcrash', 'crash', 'bt'),
1479  'gettext' => array('po', 'pot'),
1480  'gml' => array('gml'),
1481  'gnuplot' => array('plt'),
1482  'groovy' => array('groovy'),
1483  'haskell' => array('hs'),
1484  'haxe' => array('hx'),
1485  'html4strict' => array('html', 'htm'),
1486  'ini' => array('ini', 'desktop', 'vbp'),
1487  'java' => array('java'),
1488  'javascript' => array('js'),
1489  'klonec' => array('kl1'),
1490  'klonecpp' => array('klx'),
1491  'latex' => array('tex'),
1492  'lisp' => array('lisp'),
1493  'lua' => array('lua'),
1494  'matlab' => array('m'),
1495  'mpasm' => array(),
1496  'mysql' => array('sql'),
1497  'nsis' => array(),
1498  'objc' => array(),
1499  'oobas' => array(),
1500  'oracle8' => array(),
1501  'oracle10' => array(),
1502  'pascal' => array('pas'),
1503  'perl' => array('pl', 'pm'),
1504  'php' => array('php', 'php5', 'phtml', 'phps'),
1505  'povray' => array('pov'),
1506  'providex' => array('pvc', 'pvx'),
1507  'prolog' => array('pl'),
1508  'python' => array('py'),
1509  'qbasic' => array('bi'),
1510  'reg' => array('reg'),
1511  'ruby' => array('rb'),
1512  'sas' => array('sas'),
1513  'scala' => array('scala'),
1514  'scheme' => array('scm'),
1515  'scilab' => array('sci'),
1516  'smalltalk' => array('st'),
1517  'smarty' => array(),
1518  'tcl' => array('tcl'),
1519  'text' => array('txt'),
1520  'vb' => array('bas', 'ctl', 'frm'),
1521  'vbnet' => array('vb', 'sln'),
1522  'visualfoxpro' => array(),
1523  'whitespace' => array('ws'),
1524  'xml' => array('xml', 'svg', 'xrc', 'vbproj', 'csproj', 'userprefs', 'resx', 'stetic', 'settings', 'manifest', 'myapp'),
1525  'z80' => array('z80', 'asm', 'inc')
1526  );
1527  }
1528 
1529  foreach ($lookup as $lang => $extensions) {
1530  if (in_array($extension, $extensions)) {
1531  return $lang;
1532  }
1533  }
1534 
1535  return 'text';
1536  }
1537 
1555  public function load_from_file($file_name, $lookup = array()) {
1556  if (is_readable($file_name)) {
1557  $this->set_source(file_get_contents($file_name));
1558  $this->set_language(self::get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1559  } else {
1561  }
1562  }
1563 
1571  public function add_keyword($key, $word) {
1572  if (!is_array($this->language_data['KEYWORDS'][$key])) {
1573  $this->language_data['KEYWORDS'][$key] = array();
1574  }
1575  if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1576  $this->language_data['KEYWORDS'][$key][] = $word;
1577 
1578  //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1579  if ($this->parse_cache_built) {
1580  $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1581  $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1582  }
1583  }
1584  }
1585 
1599  public function remove_keyword($key, $word, $recompile = true) {
1600  $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1601  if ($key_to_remove !== false) {
1602  unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1603 
1604  //NEW in 1.0.8, optionally recompile keyword group
1605  if ($recompile && $this->parse_cache_built) {
1606  $this->optimize_keyword_group($key);
1607  }
1608  }
1609  }
1610 
1621  public function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1622  $words = (array) $words;
1623  if (empty($words)) {
1624  // empty word lists mess up highlighting
1625  return false;
1626  }
1627 
1628  //Add the new keyword group internally
1629  $this->language_data['KEYWORDS'][$key] = $words;
1630  $this->lexic_permissions['KEYWORDS'][$key] = true;
1631  $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1632  $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1633 
1634  //NEW in 1.0.8, cache keyword regexp
1635  if ($this->parse_cache_built) {
1636  $this->optimize_keyword_group($key);
1637  }
1638  return true;
1639  }
1640 
1647  public function remove_keyword_group ($key) {
1648  //Remove the keyword group internally
1649  unset($this->language_data['KEYWORDS'][$key]);
1650  unset($this->lexic_permissions['KEYWORDS'][$key]);
1651  unset($this->language_data['CASE_SENSITIVE'][$key]);
1652  unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1653 
1654  //NEW in 1.0.8
1655  unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1656  }
1657 
1664  public function optimize_keyword_group($key) {
1665  $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1666  $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1667  $space_as_whitespace = false;
1668  if(isset($this->language_data['PARSER_CONTROL'])) {
1669  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1670  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1671  $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1672  }
1673  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1674  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1675  $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1676  }
1677  }
1678  }
1679  }
1680  if($space_as_whitespace) {
1681  foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1682  $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1683  str_replace(" ", "\\s+", $rxv);
1684  }
1685  }
1686  }
1687 
1694  public function set_header_content($content) {
1695  $this->header_content = $content;
1696  }
1697 
1704  public function set_footer_content($content) {
1705  $this->footer_content = $content;
1706  }
1707 
1715  $this->header_content_style = $style;
1716  }
1717 
1725  $this->footer_content_style = $style;
1726  }
1727 
1735  public function enable_inner_code_block($flag) {
1736  $this->force_code_block = (bool)$flag;
1737  }
1738 
1748  public function set_url_for_keyword_group($group, $url) {
1749  $this->language_data['URLS'][$group] = $url;
1750  }
1751 
1760  public function set_link_styles($type, $styles) {
1761  $this->link_styles[$type] = $styles;
1762  }
1763 
1770  public function set_link_target($target) {
1771  if (!$target) {
1772  $this->link_target = '';
1773  } else {
1774  $this->link_target = ' target="' . $target . '"';
1775  }
1776  }
1777 
1784  public function set_important_styles($styles) {
1785  $this->important_styles = $styles;
1786  }
1787 
1796  public function enable_important_blocks($flag) {
1797  $this->enable_important_blocks = ( $flag ) ? true : false;
1798  }
1799 
1806  public function enable_ids($flag = true) {
1807  $this->add_ids = ($flag) ? true : false;
1808  }
1809 
1824  public function highlight_lines_extra($lines, $style = null) {
1825  if (is_array($lines)) {
1826  //Split up the job using single lines at a time
1827  foreach ($lines as $line) {
1828  $this->highlight_lines_extra($line, $style);
1829  }
1830  } else {
1831  //Mark the line as being highlighted specially
1832  $lines = intval($lines);
1833  $this->highlight_extra_lines[$lines] = $lines;
1834 
1835  //Decide on which style to use
1836  if ($style === null) { //Check if we should use default style
1837  unset($this->highlight_extra_lines_styles[$lines]);
1838  } elseif ($style === false) { //Check if to remove this line
1839  unset($this->highlight_extra_lines[$lines]);
1840  unset($this->highlight_extra_lines_styles[$lines]);
1841  } else {
1842  $this->highlight_extra_lines_styles[$lines] = $style;
1843  }
1844  }
1845  }
1846 
1854  $this->highlight_extra_lines_style = $styles;
1855  }
1856 
1863  public function set_line_ending($line_ending) {
1864  $this->line_ending = (string)$line_ending;
1865  }
1866 
1882  public function start_line_numbers_at($number) {
1883  $this->line_numbers_start = abs(intval($number));
1884  }
1885 
1898  public function set_encoding($encoding) {
1899  if ($encoding) {
1900  $this->encoding = strtolower($encoding);
1901  }
1902  }
1903 
1910  public function enable_keyword_links($enable = true) {
1911  $this->keyword_links = (bool) $enable;
1912  }
1913 
1922  protected function build_style_cache() {
1923  //Build the style cache needed to highlight numbers appropriate
1924  if($this->lexic_permissions['NUMBERS']) {
1925  //First check what way highlighting information for numbers are given
1926  if(!isset($this->language_data['NUMBERS'])) {
1927  $this->language_data['NUMBERS'] = 0;
1928  }
1929 
1930  if(is_array($this->language_data['NUMBERS'])) {
1931  $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1932  } else {
1933  $this->language_data['NUMBERS_CACHE'] = array();
1934  if(!$this->language_data['NUMBERS']) {
1935  $this->language_data['NUMBERS'] =
1938  }
1939 
1940  for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1941  //Rearrange style indices if required ...
1942  if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1943  $this->language_data['STYLES']['NUMBERS'][$i] =
1944  $this->language_data['STYLES']['NUMBERS'][1<<$i];
1945  unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1946  }
1947 
1948  //Check if this bit is set for highlighting
1949  if($j&1) {
1950  //So this bit is set ...
1951  //Check if it belongs to group 0 or the actual stylegroup
1952  if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1953  $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1954  } else {
1955  if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1956  $this->language_data['NUMBERS_CACHE'][0] = 0;
1957  }
1958  $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1959  }
1960  }
1961  }
1962  }
1963  }
1964  }
1965 
1972  protected function build_parse_cache() {
1973  // check whether language_data is available
1974  if (empty($this->language_data)) {
1975  return false;
1976  }
1977 
1978  // cache symbol regexp
1979  //As this is a costy operation, we avoid doing it for multiple groups ...
1980  //Instead we perform it for all symbols at once.
1981  //
1982  //For this to work, we need to reorganize the data arrays.
1983  if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1984  $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1985 
1986  $this->language_data['SYMBOL_DATA'] = array();
1987  $symbol_preg_multi = array(); // multi char symbols
1988  $symbol_preg_single = array(); // single char symbols
1989  foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1990  if (is_array($symbols)) {
1991  foreach ($symbols as $sym) {
1992  $sym = $this->hsc($sym);
1993  if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1994  $this->language_data['SYMBOL_DATA'][$sym] = $key;
1995  if (isset($sym[1])) { // multiple chars
1996  $symbol_preg_multi[] = preg_quote($sym, '/');
1997  } else { // single char
1998  if ($sym == '-') {
1999  // don't trigger range out of order error
2000  $symbol_preg_single[] = '\-';
2001  } else {
2002  $symbol_preg_single[] = preg_quote($sym, '/');
2003  }
2004  }
2005  }
2006  }
2007  } else {
2008  $symbols = $this->hsc($symbols);
2009  if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
2010  $this->language_data['SYMBOL_DATA'][$symbols] = 0;
2011  if (isset($symbols[1])) { // multiple chars
2012  $symbol_preg_multi[] = preg_quote($symbols, '/');
2013  } elseif ($symbols == '-') {
2014  // don't trigger range out of order error
2015  $symbol_preg_single[] = '\-';
2016  } else { // single char
2017  $symbol_preg_single[] = preg_quote($symbols, '/');
2018  }
2019  }
2020  }
2021  }
2022 
2023  //Now we have an array with each possible symbol as the key and the style as the actual data.
2024  //This way we can set the correct style just the moment we highlight ...
2025  //
2026  //Now we need to rewrite our array to get a search string that
2027  $symbol_preg = array();
2028  if (!empty($symbol_preg_multi)) {
2029  rsort($symbol_preg_multi);
2030  $symbol_preg[] = implode('|', $symbol_preg_multi);
2031  }
2032  if (!empty($symbol_preg_single)) {
2033  rsort($symbol_preg_single);
2034  $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2035  }
2036  $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2037  }
2038 
2039  // cache optimized regexp for keyword matching
2040  // remove old cache
2041  $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2042  foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2043  if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2044  $this->lexic_permissions['KEYWORDS'][$key]) {
2045  $this->optimize_keyword_group($key);
2046  }
2047  }
2048 
2049  // brackets
2050  if ($this->lexic_permissions['BRACKETS']) {
2051  $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2052  if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2053  $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2054  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2055  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2056  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2057  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2058  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2059  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2060  );
2061  }
2062  else {
2063  $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2064  '<| class="br0">&#91;|>',
2065  '<| class="br0">&#93;|>',
2066  '<| class="br0">&#40;|>',
2067  '<| class="br0">&#41;|>',
2068  '<| class="br0">&#123;|>',
2069  '<| class="br0">&#125;|>',
2070  );
2071  }
2072  }
2073 
2074  //Build the parse cache needed to highlight numbers appropriate
2075  if($this->lexic_permissions['NUMBERS']) {
2076  //Check if the style rearrangements have been processed ...
2077  //This also does some preprocessing to check which style groups are useable ...
2078  if(!isset($this->language_data['NUMBERS_CACHE'])) {
2079  $this->build_style_cache();
2080  }
2081 
2082  //Number format specification
2083  //All this formats are matched case-insensitively!
2084  static $numbers_format = array(
2086  '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2088  '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2090  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2092  '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2094  '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2096  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2098  '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2100  '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2102  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2104  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2106  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2108  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2110  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2112  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2114  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2116  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2117  );
2118 
2119  //At this step we have an associative array with flag groups for a
2120  //specific style or an string denoting a regexp given its index.
2121  $this->language_data['NUMBERS_RXCACHE'] = array();
2122  foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2123  if(is_string($rxdata)) {
2124  $regexp = $rxdata;
2125  } else {
2126  //This is a bitfield of number flags to highlight:
2127  //Build an array, implode them together and make this the actual RX
2128  $rxuse = array();
2129  for($i = 1; $i <= $rxdata; $i<<=1) {
2130  if($rxdata & $i) {
2131  $rxuse[] = $numbers_format[$i];
2132  }
2133  }
2134  $regexp = implode("|", $rxuse);
2135  }
2136 
2137  $this->language_data['NUMBERS_RXCACHE'][$key] =
2138  "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i";
2139  }
2140 
2141  if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2142  $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2143  }
2144  }
2145 
2146  $this->parse_cache_built = true;
2147  }
2148 
2159  public function parse_code() {
2160  // Start the timer
2161  $start_time = microtime();
2162 
2163  // Replace all newlines to a common form.
2164  $code = str_replace("\r\n", "\n", $this->source);
2165  $code = str_replace("\r", "\n", $code);
2166 
2167  // check whether language_data is available
2168  if (empty($this->language_data)) {
2170  }
2171 
2172  // Firstly, if there is an error, we won't highlight
2173  if ($this->error) {
2174  //Escape the source for output
2175  $result = $this->hsc($this->source);
2176 
2177  //This fix is related to SF#1923020, but has to be applied regardless of
2178  //actually highlighting symbols.
2179  $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2180 
2181  // Timing is irrelevant
2182  $this->set_time($start_time, $start_time);
2183  $this->finalise($result);
2184  return $result;
2185  }
2186 
2187  // make sure the parse cache is up2date
2188  if (!$this->parse_cache_built) {
2189  $this->build_parse_cache();
2190  }
2191 
2192  // Initialise various stuff
2193  $length = strlen($code);
2194  $COMMENT_MATCHED = false;
2195  $stuff_to_parse = '';
2196  $endresult = '';
2197 
2198  // "Important" selections are handled like multiline comments
2199  // @todo GET RID OF THIS SHIZ
2200  if ($this->enable_important_blocks) {
2201  $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2202  }
2203 
2204  if ($this->strict_mode) {
2205  // Break the source into bits. Each bit will be a portion of the code
2206  // within script delimiters - for example, HTML between < and >
2207  $k = 0;
2208  $parts = array();
2209  $matches = array();
2210  $next_match_pointer = null;
2211  // we use a copy to unset delimiters on demand (when they are not found)
2212  $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2213  $i = 0;
2214  while ($i < $length) {
2215  $next_match_pos = $length + 1; // never true
2216  foreach ($delim_copy as $dk => $delimiters) {
2217  if(is_array($delimiters)) {
2218  foreach ($delimiters as $open => $close) {
2219  // make sure the cache is setup properly
2220  if (!isset($matches[$dk][$open])) {
2221  $matches[$dk][$open] = array(
2222  'next_match' => -1,
2223  'dk' => $dk,
2224 
2225  'open' => $open, // needed for grouping of adjacent code blocks (see below)
2226  'open_strlen' => strlen($open),
2227 
2228  'close' => $close,
2229  'close_strlen' => strlen($close),
2230  );
2231  }
2232  // Get the next little bit for this opening string
2233  if ($matches[$dk][$open]['next_match'] < $i) {
2234  // only find the next pos if it was not already cached
2235  $open_pos = strpos($code, $open, $i);
2236  if ($open_pos === false) {
2237  // no match for this delimiter ever
2238  unset($delim_copy[$dk][$open]);
2239  continue;
2240  }
2241  $matches[$dk][$open]['next_match'] = $open_pos;
2242  }
2243  if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2244  //So we got a new match, update the close_pos
2245  $matches[$dk][$open]['close_pos'] =
2246  strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2247 
2248  $next_match_pointer =& $matches[$dk][$open];
2249  $next_match_pos = $matches[$dk][$open]['next_match'];
2250  }
2251  }
2252  } else {
2253  //So we should match an RegExp as Strict Block ...
2260  if(preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2261  //We got a match ...
2262  if(isset($matches_rx['start']) && isset($matches_rx['end']))
2263  {
2264  $matches[$dk] = array(
2265  'next_match' => $matches_rx['start'][1],
2266  'dk' => $dk,
2267 
2268  'close_strlen' => strlen($matches_rx['end'][0]),
2269  'close_pos' => $matches_rx['end'][1],
2270  );
2271  } else {
2272  $matches[$dk] = array(
2273  'next_match' => $matches_rx[1][1],
2274  'dk' => $dk,
2275 
2276  'close_strlen' => strlen($matches_rx[2][0]),
2277  'close_pos' => $matches_rx[2][1],
2278  );
2279  }
2280  } else {
2281  // no match for this delimiter ever
2282  unset($delim_copy[$dk]);
2283  continue;
2284  }
2285 
2286  if ($matches[$dk]['next_match'] <= $next_match_pos) {
2287  $next_match_pointer =& $matches[$dk];
2288  $next_match_pos = $matches[$dk]['next_match'];
2289  }
2290  }
2291  }
2292 
2293  // non-highlightable text
2294  $parts[$k] = array(
2295  1 => substr($code, $i, $next_match_pos - $i)
2296  );
2297  ++$k;
2298 
2299  if ($next_match_pos > $length) {
2300  // out of bounds means no next match was found
2301  break;
2302  }
2303 
2304  // highlightable code
2305  $parts[$k][0] = $next_match_pointer['dk'];
2306 
2307  //Only combine for non-rx script blocks
2308  if(is_array($delim_copy[$next_match_pointer['dk']])) {
2309  // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2310  $i = $next_match_pos + $next_match_pointer['open_strlen'];
2311  while (true) {
2312  $close_pos = strpos($code, $next_match_pointer['close'], $i);
2313  if ($close_pos == false) {
2314  break;
2315  }
2316  $i = $close_pos + $next_match_pointer['close_strlen'];
2317  if ($i == $length) {
2318  break;
2319  }
2320  if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2321  substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2322  // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2323  foreach ($matches as $submatches) {
2324  foreach ($submatches as $match) {
2325  if ($match['next_match'] == $i) {
2326  // a different block already matches here!
2327  break 3;
2328  }
2329  }
2330  }
2331  } else {
2332  break;
2333  }
2334  }
2335  } else {
2336  $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2337  $i = $close_pos;
2338  }
2339 
2340  if ($close_pos === false) {
2341  // no closing delimiter found!
2342  $parts[$k][1] = substr($code, $next_match_pos);
2343  ++$k;
2344  break;
2345  } else {
2346  $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2347  ++$k;
2348  }
2349  }
2350  unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2351  $num_parts = $k;
2352 
2353  if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2354  // when we have only one part, we don't have anything to highlight at all.
2355  // if we have a "maybe" strict language, this should be handled as highlightable code
2356  $parts = array(
2357  0 => array(
2358  0 => '',
2359  1 => ''
2360  ),
2361  1 => array(
2362  0 => null,
2363  1 => $parts[0][1]
2364  )
2365  );
2366  $num_parts = 2;
2367  }
2368 
2369  } else {
2370  // Not strict mode - simply dump the source into
2371  // the array at index 1 (the first highlightable block)
2372  $parts = array(
2373  0 => array(
2374  0 => '',
2375  1 => ''
2376  ),
2377  1 => array(
2378  0 => null,
2379  1 => $code
2380  )
2381  );
2382  $num_parts = 2;
2383  }
2384 
2385  //Unset variables we won't need any longer
2386  unset($code);
2387 
2388  //Preload some repeatedly used values regarding hardquotes ...
2389  $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2390  $hq_strlen = strlen($hq);
2391 
2392  //Preload if line numbers are to be generated afterwards
2393  //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2394  $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2395  !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2396 
2397  //preload the escape char for faster checking ...
2398  $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2399 
2400  // this is used for single-line comments
2401  $sc_disallowed_before = "";
2402  $sc_disallowed_after = "";
2403 
2404  if (isset($this->language_data['PARSER_CONTROL'])) {
2405  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2406  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2407  $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2408  }
2409  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2410  $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2411  }
2412  }
2413  }
2414 
2415  //Fix for SF#1932083: Multichar Quotemarks unsupported
2416  $is_string_starter = array();
2417  if ($this->lexic_permissions['STRINGS']) {
2418  foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2419  if (!isset($is_string_starter[$quotemark[0]])) {
2420  $is_string_starter[$quotemark[0]] = (string)$quotemark;
2421  } elseif (is_string($is_string_starter[$quotemark[0]])) {
2422  $is_string_starter[$quotemark[0]] = array(
2423  $is_string_starter[$quotemark[0]],
2424  $quotemark);
2425  } else {
2426  $is_string_starter[$quotemark[0]][] = $quotemark;
2427  }
2428  }
2429  }
2430 
2431  // Now we go through each part. We know that even-indexed parts are
2432  // code that shouldn't be highlighted, and odd-indexed parts should
2433  // be highlighted
2434  for ($key = 0; $key < $num_parts; ++$key) {
2435  $STRICTATTRS = '';
2436 
2437  // If this block should be highlighted...
2438  if (!($key & 1)) {
2439  // Else not a block to highlight
2440  $endresult .= $this->hsc($parts[$key][1]);
2441  unset($parts[$key]);
2442  continue;
2443  }
2444 
2445  $result = '';
2446  $part = $parts[$key][1];
2447 
2448  $highlight_part = true;
2449  if ($this->strict_mode && !is_null($parts[$key][0])) {
2450  // get the class key for this block of code
2451  $script_key = $parts[$key][0];
2452  $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2453  if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2454  $this->lexic_permissions['SCRIPT']) {
2455  // Add a span element around the source to
2456  // highlight the overall source block
2457  if (!$this->use_classes &&
2458  $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2459  $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2460  } else {
2461  $attributes = ' class="sc' . $script_key . '"';
2462  }
2463  $result .= "<span$attributes>";
2464  $STRICTATTRS = $attributes;
2465  }
2466  }
2467 
2468  if ($highlight_part) {
2469  // Now, highlight the code in this block. This code
2470  // is really the engine of GeSHi (along with the method
2471  // parse_non_string_part).
2472 
2473  // cache comment regexps incrementally
2474  $next_comment_regexp_key = '';
2475  $next_comment_regexp_pos = -1;
2476  $next_comment_multi_pos = -1;
2477  $next_comment_single_pos = -1;
2478  $comment_regexp_cache_per_key = array();
2479  $comment_multi_cache_per_key = array();
2480  $comment_single_cache_per_key = array();
2481  $next_open_comment_multi = '';
2482  $next_comment_single_key = '';
2483  $escape_regexp_cache_per_key = array();
2484  $next_escape_regexp_key = '';
2485  $next_escape_regexp_pos = -1;
2486 
2487  $length = strlen($part);
2488  for ($i = 0; $i < $length; ++$i) {
2489  // Get the next char
2490  $char = $part[$i];
2491  $char_len = 1;
2492 
2493  // update regexp comment cache if needed
2494  if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2495  $next_comment_regexp_pos = $length;
2496  foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2497  $match_i = false;
2498  if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2499  ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2500  $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2501  // we have already matched something
2502  if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2503  // this comment is never matched
2504  continue;
2505  }
2506  $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2507  } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) {
2508  $match_i = $match[0][1];
2509 
2510  $comment_regexp_cache_per_key[$comment_key] = array(
2511  'key' => $comment_key,
2512  'length' => strlen($match[0][0]),
2513  'pos' => $match_i
2514  );
2515  } else {
2516  $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2517  continue;
2518  }
2519 
2520  if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2521  $next_comment_regexp_pos = $match_i;
2522  $next_comment_regexp_key = $comment_key;
2523  if ($match_i === $i) {
2524  break;
2525  }
2526  }
2527  }
2528  }
2529 
2530  $string_started = false;
2531 
2532  if (isset($is_string_starter[$char])) {
2533  // Possibly the start of a new string ...
2534 
2535  //Check which starter it was ...
2536  //Fix for SF#1932083: Multichar Quotemarks unsupported
2537  if (is_array($is_string_starter[$char])) {
2538  $char_new = '';
2539  foreach ($is_string_starter[$char] as $testchar) {
2540  if ($testchar === substr($part, $i, strlen($testchar)) &&
2541  strlen($testchar) > strlen($char_new)) {
2542  $char_new = $testchar;
2543  $string_started = true;
2544  }
2545  }
2546  if ($string_started) {
2547  $char = $char_new;
2548  }
2549  } else {
2550  $testchar = $is_string_starter[$char];
2551  if ($testchar === substr($part, $i, strlen($testchar))) {
2552  $char = $testchar;
2553  $string_started = true;
2554  }
2555  }
2556  $char_len = strlen($char);
2557  }
2558 
2559  if ($string_started && ($i != $next_comment_regexp_pos)) {
2560  // Hand out the correct style information for this string
2561  $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2562  if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2563  !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2564  $string_key = 0;
2565  }
2566 
2567  // parse the stuff before this
2568  $result .= $this->parse_non_string_part($stuff_to_parse);
2569  $stuff_to_parse = '';
2570 
2571  if (!$this->use_classes) {
2572  $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2573  } else {
2574  $string_attributes = ' class="st'.$string_key.'"';
2575  }
2576 
2577  // now handle the string
2578  $string = "<span$string_attributes>" . GeSHi::hsc($char);
2579  $start = $i + $char_len;
2580  $string_open = true;
2581 
2582  if(empty($this->language_data['ESCAPE_REGEXP'])) {
2583  $next_escape_regexp_pos = $length;
2584  }
2585 
2586  do {
2587  //Get the regular ending pos ...
2588  $close_pos = strpos($part, $char, $start);
2589  if(false === $close_pos) {
2590  $close_pos = $length;
2591  }
2592 
2593  if($this->lexic_permissions['ESCAPE_CHAR']) {
2594  // update escape regexp cache if needed
2595  if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2596  $next_escape_regexp_pos = $length;
2597  foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2598  $match_i = false;
2599  if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2600  ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2601  $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2602  // we have already matched something
2603  if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2604  // this comment is never matched
2605  continue;
2606  }
2607  $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2608  } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) {
2609  $match_i = $match[0][1];
2610 
2611  $escape_regexp_cache_per_key[$escape_key] = array(
2612  'key' => $escape_key,
2613  'length' => strlen($match[0][0]),
2614  'pos' => $match_i
2615  );
2616  } else {
2617  $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2618  continue;
2619  }
2620 
2621  if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2622  $next_escape_regexp_pos = $match_i;
2623  $next_escape_regexp_key = $escape_key;
2624  if ($match_i === $start) {
2625  break;
2626  }
2627  }
2628  }
2629  }
2630 
2631  //Find the next simple escape position
2632  if('' != $this->language_data['ESCAPE_CHAR']) {
2633  $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2634  if(false === $simple_escape) {
2635  $simple_escape = $length;
2636  }
2637  } else {
2638  $simple_escape = $length;
2639  }
2640  } else {
2641  $next_escape_regexp_pos = $length;
2642  $simple_escape = $length;
2643  }
2644 
2645  if($simple_escape < $next_escape_regexp_pos &&
2646  $simple_escape < $length &&
2647  $simple_escape < $close_pos) {
2648  //The nexxt escape sequence is a simple one ...
2649  $es_pos = $simple_escape;
2650 
2651  //Add the stuff not in the string yet ...
2652  $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2653 
2654  //Get the style for this escaped char ...
2655  if (!$this->use_classes) {
2656  $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2657  } else {
2658  $escape_char_attributes = ' class="es0"';
2659  }
2660 
2661  //Add the style for the escape char ...
2662  $string .= "<span$escape_char_attributes>" .
2663  GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2664 
2665  //Get the byte AFTER the ESCAPE_CHAR we just found
2666  $es_char = $part[$es_pos + 1];
2667  if ($es_char == "\n") {
2668  // don't put a newline around newlines
2669  $string .= "</span>\n";
2670  $start = $es_pos + 2;
2671  } elseif (ord($es_char) >= 128) {
2672  //This is an non-ASCII char (UTF8 or single byte)
2673  //This code tries to work around SF#2037598 ...
2674  if(function_exists('mb_substr')) {
2675  $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2676  $string .= $es_char_m . '</span>';
2677  } elseif ('utf-8' == $this->encoding) {
2678  if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2679  "|\xE0[\xA0-\xBF][\x80-\xBF]".
2680  "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2681  "|\xED[\x80-\x9F][\x80-\xBF]".
2682  "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2683  "|[\xF1-\xF3][\x80-\xBF]{3}".
2684  "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2685  $part, $es_char_m, null, $es_pos + 1)) {
2686  $es_char_m = $es_char_m[0];
2687  } else {
2688  $es_char_m = $es_char;
2689  }
2690  $string .= $this->hsc($es_char_m) . '</span>';
2691  } else {
2692  $es_char_m = $this->hsc($es_char);
2693  }
2694  $start = $es_pos + strlen($es_char_m) + 1;
2695  } else {
2696  $string .= $this->hsc($es_char) . '</span>';
2697  $start = $es_pos + 2;
2698  }
2699  } elseif ($next_escape_regexp_pos < $length &&
2700  $next_escape_regexp_pos < $close_pos) {
2701  $es_pos = $next_escape_regexp_pos;
2702  //Add the stuff not in the string yet ...
2703  $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2704 
2705  //Get the key and length of this match ...
2706  $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2707  $escape_str = substr($part, $es_pos, $escape['length']);
2708  $escape_key = $escape['key'];
2709 
2710  //Get the style for this escaped char ...
2711  if (!$this->use_classes) {
2712  $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2713  } else {
2714  $escape_char_attributes = ' class="es' . $escape_key . '"';
2715  }
2716 
2717  //Add the style for the escape char ...
2718  $string .= "<span$escape_char_attributes>" .
2719  $this->hsc($escape_str) . '</span>';
2720 
2721  $start = $es_pos + $escape['length'];
2722  } else {
2723  //Copy the remainder of the string ...
2724  $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2725  $start = $close_pos + $char_len;
2726  $string_open = false;
2727  }
2728  } while($string_open);
2729 
2730  if ($check_linenumbers) {
2731  // Are line numbers used? If, we should end the string before
2732  // the newline and begin it again (so when <li>s are put in the source
2733  // remains XHTML compliant)
2734  // note to self: This opens up possibility of config files specifying
2735  // that languages can/cannot have multiline strings???
2736  $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2737  }
2738 
2739  $result .= $string;
2740  $string = '';
2741  $i = $start - 1;
2742  continue;
2743  } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2744  substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2745  // The start of a hard quoted string
2746  if (!$this->use_classes) {
2747  $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2748  $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2749  } else {
2750  $string_attributes = ' class="st_h"';
2751  $escape_char_attributes = ' class="es_h"';
2752  }
2753  // parse the stuff before this
2754  $result .= $this->parse_non_string_part($stuff_to_parse);
2755  $stuff_to_parse = '';
2756 
2757  // now handle the string
2758  $string = '';
2759 
2760  // look for closing quote
2761  $start = $i + $hq_strlen;
2762  while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2763  $start = $close_pos + 1;
2764  if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2765  (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2766  // make sure this quote is not escaped
2767  foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2768  if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2769  // check wether this quote is escaped or if it is something like '\\'
2770  $escape_char_pos = $close_pos - 1;
2771  while ($escape_char_pos > 0
2772  && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2773  --$escape_char_pos;
2774  }
2775  if (($close_pos - $escape_char_pos) & 1) {
2776  // uneven number of escape chars => this quote is escaped
2777  continue 2;
2778  }
2779  }
2780  }
2781  }
2782 
2783  // found closing quote
2784  break;
2785  }
2786 
2787  //Found the closing delimiter?
2788  if (!$close_pos) {
2789  // span till the end of this $part when no closing delimiter is found
2790  $close_pos = $length;
2791  }
2792 
2793  //Get the actual string
2794  $string = substr($part, $i, $close_pos - $i + 1);
2795  $i = $close_pos;
2796 
2797  // handle escape chars and encode html chars
2798  // (special because when we have escape chars within our string they may not be escaped)
2799  if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2800  $start = 0;
2801  $new_string = '';
2802  while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2803  // hmtl escape stuff before
2804  $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2805  // check if this is a hard escape
2806  foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2807  if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2808  // indeed, this is a hardescape
2809  $new_string .= "<span$escape_char_attributes>" .
2810  $this->hsc($hardescape) . '</span>';
2811  $start = $es_pos + strlen($hardescape);
2812  continue 2;
2813  }
2814  }
2815  // not a hard escape, but a normal escape
2816  // they come in pairs of two
2817  $c = 0;
2818  while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2819  && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2820  && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2821  $c += 2;
2822  }
2823  if ($c) {
2824  $new_string .= "<span$escape_char_attributes>" .
2825  str_repeat($escaped_escape_char, $c) .
2826  '</span>';
2827  $start = $es_pos + $c;
2828  } else {
2829  // this is just a single lonely escape char...
2830  $new_string .= $escaped_escape_char;
2831  $start = $es_pos + 1;
2832  }
2833  }
2834  $string = $new_string . $this->hsc(substr($string, $start));
2835  } else {
2836  $string = $this->hsc($string);
2837  }
2838 
2839  if ($check_linenumbers) {
2840  // Are line numbers used? If, we should end the string before
2841  // the newline and begin it again (so when <li>s are put in the source
2842  // remains XHTML compliant)
2843  // note to self: This opens up possibility of config files specifying
2844  // that languages can/cannot have multiline strings???
2845  $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2846  }
2847 
2848  $result .= "<span$string_attributes>" . $string . '</span>';
2849  $string = '';
2850  continue;
2851  } else {
2852  //Have a look for regexp comments
2853  if ($i == $next_comment_regexp_pos) {
2854  $COMMENT_MATCHED = true;
2855  $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2856  $test_str = $this->hsc(substr($part, $i, $comment['length']));
2857 
2858  //@todo If remove important do remove here
2859  if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2860  if (!$this->use_classes) {
2861  $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2862  } else {
2863  $attributes = ' class="co' . $comment['key'] . '"';
2864  }
2865 
2866  $test_str = "<span$attributes>" . $test_str . "</span>";
2867 
2868  // Short-cut through all the multiline code
2869  if ($check_linenumbers) {
2870  // strreplace to put close span and open span around multiline newlines
2871  $test_str = str_replace(
2872  "\n", "</span>\n<span$attributes>",
2873  str_replace("\n ", "\n&nbsp;", $test_str)
2874  );
2875  }
2876  }
2877 
2878  $i += $comment['length'] - 1;
2879 
2880  // parse the rest
2881  $result .= $this->parse_non_string_part($stuff_to_parse);
2882  $stuff_to_parse = '';
2883  }
2884 
2885  // If we haven't matched a regexp comment, try multi-line comments
2886  if (!$COMMENT_MATCHED) {
2887  // Is this a multiline comment?
2888  if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2889  $next_comment_multi_pos = $length;
2890  foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2891  $match_i = false;
2892  if (isset($comment_multi_cache_per_key[$open]) &&
2893  ($comment_multi_cache_per_key[$open] >= $i ||
2894  $comment_multi_cache_per_key[$open] === false)) {
2895  // we have already matched something
2896  if ($comment_multi_cache_per_key[$open] === false) {
2897  // this comment is never matched
2898  continue;
2899  }
2900  $match_i = $comment_multi_cache_per_key[$open];
2901  } elseif (($match_i = stripos($part, $open, $i)) !== false) {
2902  $comment_multi_cache_per_key[$open] = $match_i;
2903  } else {
2904  $comment_multi_cache_per_key[$open] = false;
2905  continue;
2906  }
2907  if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2908  $next_comment_multi_pos = $match_i;
2909  $next_open_comment_multi = $open;
2910  if ($match_i === $i) {
2911  break;
2912  }
2913  }
2914  }
2915  }
2916  if ($i == $next_comment_multi_pos) {
2917  $open = $next_open_comment_multi;
2918  $close = $this->language_data['COMMENT_MULTI'][$open];
2919  $open_strlen = strlen($open);
2920  $close_strlen = strlen($close);
2921  $COMMENT_MATCHED = true;
2922  $test_str_match = $open;
2923  //@todo If remove important do remove here
2924  if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2925  $open == GESHI_START_IMPORTANT) {
2926  if ($open != GESHI_START_IMPORTANT) {
2927  if (!$this->use_classes) {
2928  $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2929  } else {
2930  $attributes = ' class="coMULTI"';
2931  }
2932  $test_str = "<span$attributes>" . $this->hsc($open);
2933  } else {
2934  if (!$this->use_classes) {
2935  $attributes = ' style="' . $this->important_styles . '"';
2936  } else {
2937  $attributes = ' class="imp"';
2938  }
2939 
2940  // We don't include the start of the comment if it's an
2941  // "important" part
2942  $test_str = "<span$attributes>";
2943  }
2944  } else {
2945  $test_str = $this->hsc($open);
2946  }
2947 
2948  $close_pos = strpos( $part, $close, $i + $open_strlen );
2949 
2950  if ($close_pos === false) {
2951  $close_pos = $length;
2952  }
2953 
2954  // Short-cut through all the multiline code
2955  $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2956  if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2957  $test_str_match == GESHI_START_IMPORTANT) &&
2958  $check_linenumbers) {
2959 
2960  // strreplace to put close span and open span around multiline newlines
2961  $test_str .= str_replace(
2962  "\n", "</span>\n<span$attributes>",
2963  str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2964  );
2965  } else {
2966  $test_str .= $rest_of_comment;
2967  }
2968 
2969  if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2970  $test_str_match == GESHI_START_IMPORTANT) {
2971  $test_str .= '</span>';
2972  }
2973 
2974  $i = $close_pos + $close_strlen - 1;
2975 
2976  // parse the rest
2977  $result .= $this->parse_non_string_part($stuff_to_parse);
2978  $stuff_to_parse = '';
2979  }
2980  }
2981 
2982  // If we haven't matched a multiline comment, try single-line comments
2983  if (!$COMMENT_MATCHED) {
2984  // cache potential single line comment occurances
2985  if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2986  $next_comment_single_pos = $length;
2987  foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2988  $match_i = false;
2989  if (isset($comment_single_cache_per_key[$comment_key]) &&
2990  ($comment_single_cache_per_key[$comment_key] >= $i ||
2991  $comment_single_cache_per_key[$comment_key] === false)) {
2992  // we have already matched something
2993  if ($comment_single_cache_per_key[$comment_key] === false) {
2994  // this comment is never matched
2995  continue;
2996  }
2997  $match_i = $comment_single_cache_per_key[$comment_key];
2998  } elseif (
2999  // case sensitive comments
3000  ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3001  ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
3002  // non case sensitive
3003  (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3004  (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
3005  $comment_single_cache_per_key[$comment_key] = $match_i;
3006  } else {
3007  $comment_single_cache_per_key[$comment_key] = false;
3008  continue;
3009  }
3010  if ($match_i !== false && $match_i < $next_comment_single_pos) {
3011  $next_comment_single_pos = $match_i;
3012  $next_comment_single_key = $comment_key;
3013  if ($match_i === $i) {
3014  break;
3015  }
3016  }
3017  }
3018  }
3019  if ($next_comment_single_pos == $i) {
3020  $comment_key = $next_comment_single_key;
3021  $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
3022  $com_len = strlen($comment_mark);
3023 
3024  // This check will find special variables like $# in bash
3025  // or compiler directives of Delphi beginning {$
3026  if ((empty($sc_disallowed_before) || ($i == 0) ||
3027  (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
3028  (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
3029  (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
3030  {
3031  // this is a valid comment
3032  $COMMENT_MATCHED = true;
3033  if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3034  if (!$this->use_classes) {
3035  $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
3036  } else {
3037  $attributes = ' class="co' . $comment_key . '"';
3038  }
3039  $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
3040  } else {
3041  $test_str = $this->hsc($comment_mark);
3042  }
3043 
3044  //Check if this comment is the last in the source
3045  $close_pos = strpos($part, "\n", $i);
3046  $oops = false;
3047  if ($close_pos === false) {
3048  $close_pos = $length;
3049  $oops = true;
3050  }
3051  $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
3052  if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3053  $test_str .= "</span>";
3054  }
3055 
3056  // Take into account that the comment might be the last in the source
3057  if (!$oops) {
3058  $test_str .= "\n";
3059  }
3060 
3061  $i = $close_pos;
3062 
3063  // parse the rest
3064  $result .= $this->parse_non_string_part($stuff_to_parse);
3065  $stuff_to_parse = '';
3066  }
3067  }
3068  }
3069  }
3070 
3071  // Where are we adding this char?
3072  if (!$COMMENT_MATCHED) {
3073  $stuff_to_parse .= $char;
3074  } else {
3075  $result .= $test_str;
3076  unset($test_str);
3077  $COMMENT_MATCHED = false;
3078  }
3079  }
3080  // Parse the last bit
3081  $result .= $this->parse_non_string_part($stuff_to_parse);
3082  $stuff_to_parse = '';
3083  } else {
3084  $result .= $this->hsc($part);
3085  }
3086  // Close the <span> that surrounds the block
3087  if ($STRICTATTRS != '') {
3088  $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
3089  $result .= '</span>';
3090  }
3091 
3092  $endresult .= $result;
3093  unset($part, $parts[$key], $result);
3094  }
3095 
3096  //This fix is related to SF#1923020, but has to be applied regardless of
3097  //actually highlighting symbols.
3099  $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
3100 
3101 // // Parse the last stuff (redundant?)
3102 // $result .= $this->parse_non_string_part($stuff_to_parse);
3103 
3104  // Lop off the very first and last spaces
3105 // $result = substr($result, 1, -1);
3106 
3107  // We're finished: stop timing
3108  $this->set_time($start_time, microtime());
3109 
3110  $this->finalise($endresult);
3111  return $endresult;
3112  }
3113 
3121  protected function indent(&$result) {
3123  if (false !== strpos($result, "\t")) {
3124  $lines = explode("\n", $result);
3125  $result = null;//Save memory while we process the lines individually
3126  $tab_width = $this->get_real_tab_width();
3127  $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3128 
3129  for ($key = 0, $n = count($lines); $key < $n; $key++) {
3130  $line = $lines[$key];
3131  if (false === strpos($line, "\t")) {
3132  continue;
3133  }
3134 
3135  $pos = 0;
3136  $length = strlen($line);
3137  $lines[$key] = ''; // reduce memory
3138 
3139  $IN_TAG = false;
3140  for ($i = 0; $i < $length; ++$i) {
3141  $char = $line[$i];
3142  // Simple engine to work out whether we're in a tag.
3143  // If we are we modify $pos. This is so we ignore HTML
3144  // in the line and only workout the tab replacement
3145  // via the actual content of the string
3146  // This test could be improved to include strings in the
3147  // html so that < or > would be allowed in user's styles
3148  // (e.g. quotes: '<' '>'; or similar)
3149  if ($IN_TAG) {
3150  if ('>' == $char) {
3151  $IN_TAG = false;
3152  }
3153  $lines[$key] .= $char;
3154  } elseif ('<' == $char) {
3155  $IN_TAG = true;
3156  $lines[$key] .= '<';
3157  } elseif ('&' == $char) {
3158  $substr = substr($line, $i + 3, 5);
3159  $posi = strpos($substr, ';');
3160  if (false === $posi) {
3161  ++$pos;
3162  } else {
3163  $pos -= $posi+2;
3164  }
3165  $lines[$key] .= $char;
3166  } elseif ("\t" == $char) {
3167  $str = '';
3168  // OPTIMISE - move $strs out. Make an array:
3169  // $tabs = array(
3170  // 1 => '&nbsp;',
3171  // 2 => '&nbsp; ',
3172  // 3 => '&nbsp; &nbsp;' etc etc
3173  // to use instead of building a string every time
3174  $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3175  if (($pos & 1) || 1 == $tab_end_width) {
3176  $str .= substr($tab_string, 6, $tab_end_width);
3177  } else {
3178  $str .= substr($tab_string, 0, $tab_end_width+5);
3179  }
3180  $lines[$key] .= $str;
3181  $pos += $tab_end_width;
3182 
3183  if (false === strpos($line, "\t", $i + 1)) {
3184  $lines[$key] .= substr($line, $i + 1);
3185  break;
3186  }
3187  } elseif (0 == $pos && ' ' == $char) {
3188  $lines[$key] .= '&nbsp;';
3189  ++$pos;
3190  } else {
3191  $lines[$key] .= $char;
3192  ++$pos;
3193  }
3194  }
3195  }
3196  $result = implode("\n", $lines);
3197  unset($lines);//We don't need the lines separated beyond this --- free them!
3198  }
3199  // Other whitespace
3200  // BenBE: Fix to reduce the number of replacements to be done
3201  $result = preg_replace('/^ /m', '&nbsp;', $result);
3202  $result = str_replace(' ', ' &nbsp;', $result);
3203 
3204  if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3205  if ($this->line_ending === null) {
3206  $result = nl2br($result);
3207  } else {
3208  $result = str_replace("\n", $this->line_ending, $result);
3209  }
3210  }
3211  }
3212 
3220  protected function change_case($instr) {
3221  switch ($this->language_data['CASE_KEYWORDS']) {
3222  case GESHI_CAPS_UPPER:
3223  return strtoupper($instr);
3224  case GESHI_CAPS_LOWER:
3225  return strtolower($instr);
3226  default:
3227  return $instr;
3228  }
3229  }
3230 
3240  protected function handle_keyword_replace($match) {
3242  $keyword = $match[0];
3243  $keyword_match = $match[1];
3244 
3245  $before = '';
3246  $after = '';
3247 
3248  if ($this->keyword_links) {
3249  // Keyword links have been ebabled
3250 
3251  if (isset($this->language_data['URLS'][$k]) &&
3252  $this->language_data['URLS'][$k] != '') {
3253  // There is a base group for this keyword
3254 
3255  // Old system: strtolower
3256  //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3257  // New system: get keyword from language file to get correct case
3258  if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3259  strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3260  foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3261  if (strcasecmp($word, $keyword_match) == 0) {
3262  break;
3263  }
3264  }
3265  } else {
3266  $word = $keyword_match;
3267  }
3268 
3269  $before = '<|UR1|"' .
3270  str_replace(
3271  array(
3272  '{FNAME}',
3273  '{FNAMEL}',
3274  '{FNAMEU}',
3275  '{FNAMEUF}',
3276  '.'),
3277  array(
3278  str_replace('+', '%20', urlencode($this->hsc($word))),
3279  str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3280  str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3281  str_replace('+', '%20', urlencode($this->hsc(ucfirst($word)))),
3282  '<DOT>'),
3283  $this->language_data['URLS'][$k]
3284  ) . '">';
3285  $after = '</a>';
3286  }
3287  }
3288 
3289  return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3290  }
3291 
3301  protected function handle_regexps_callback($matches) {
3302  // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3303  return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3304  }
3305 
3315  protected function handle_multiline_regexps($matches) {
3316  $before = $this->_hmr_before;
3317  $after = $this->_hmr_after;
3318  if ($this->_hmr_replace) {
3319  $replace = $this->_hmr_replace;
3320  $search = array();
3321 
3322  foreach (array_keys($matches) as $k) {
3323  $search[] = '\\' . $k;
3324  }
3325 
3326  $before = str_replace($search, $matches, $before);
3327  $after = str_replace($search, $matches, $after);
3328  $replace = str_replace($search, $matches, $replace);
3329  } else {
3330  $replace = $matches[0];
3331  }
3332  return $before
3333  . '<|!REG3XP' . $this->_hmr_key .'!>'
3334  . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3335  . '|>'
3336  . $after;
3337  }
3338 
3348  protected function parse_non_string_part($stuff_to_parse) {
3349  $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3350 
3351  // Highlight keywords
3352  $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
3353  $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3354  if ($this->lexic_permissions['STRINGS']) {
3355  $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3356  $disallowed_before .= $quotemarks;
3357  $disallowed_after .= $quotemarks;
3358  }
3359  $disallowed_before .= "])";
3360  $disallowed_after .= "])";
3361 
3362  $parser_control_pergroup = false;
3363  if (isset($this->language_data['PARSER_CONTROL'])) {
3364  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3365  $x = 0; // check wether per-keyword-group parser_control is enabled
3366  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3367  $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3368  ++$x;
3369  }
3370  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3371  $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3372  ++$x;
3373  }
3374  $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3375  }
3376  }
3377 
3378  foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3379  if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3380  $this->lexic_permissions['KEYWORDS'][$k]) {
3381 
3382  $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3383  $modifiers = $case_sensitive ? '' : 'i';
3384 
3385  // NEW in 1.0.8 - per-keyword-group parser control
3386  $disallowed_before_local = $disallowed_before;
3387  $disallowed_after_local = $disallowed_after;
3388  if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3389  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3390  $disallowed_before_local =
3391  $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3392  }
3393 
3394  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3395  $disallowed_after_local =
3396  $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3397  }
3398  }
3399 
3400  $this->_kw_replace_group = $k;
3401 
3402  //NEW in 1.0.8, the cached regexp list
3403  // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3404  for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) {
3405  $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3406  // Might make a more unique string for putting the number in soon
3407  // Basically, we don't put the styles in yet because then the styles themselves will
3408  // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3409  $stuff_to_parse = preg_replace_callback(
3410  "/$disallowed_before_local({$keywordset})(?!<DOT>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
3411  array($this, 'handle_keyword_replace'),
3412  $stuff_to_parse
3413  );
3414  }
3415  }
3416  }
3417 
3418  // Regular expressions
3419  foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3420  if ($this->lexic_permissions['REGEXPS'][$key]) {
3421  if (is_array($regexp)) {
3422  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3423  // produce valid HTML when we match multiple lines
3424  $this->_hmr_replace = $regexp[GESHI_REPLACE];
3425  $this->_hmr_before = $regexp[GESHI_BEFORE];
3426  $this->_hmr_key = $key;
3427  $this->_hmr_after = $regexp[GESHI_AFTER];
3428  $stuff_to_parse = preg_replace_callback(
3429  "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3430  array($this, 'handle_multiline_regexps'),
3431  $stuff_to_parse);
3432  $this->_hmr_replace = false;
3433  $this->_hmr_before = '';
3434  $this->_hmr_after = '';
3435  } else {
3436  $stuff_to_parse = preg_replace(
3437  '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3438  $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3439  $stuff_to_parse);
3440  }
3441  } else {
3442  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3443  // produce valid HTML when we match multiple lines
3444  $this->_hmr_key = $key;
3445  $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3446  array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3447  $this->_hmr_key = '';
3448  } else {
3449  $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3450  }
3451  }
3452  }
3453  }
3454 
3455  // Highlight numbers. As of 1.0.8 we support different types of numbers
3456  $numbers_found = false;
3457 
3458  if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
3459  $numbers_found = true;
3460 
3461  //For each of the formats ...
3462  foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3463  //Check if it should be highlighted ...
3464  $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3465  }
3466  }
3467 
3468  //
3469  // Now that's all done, replace /[number]/ with the correct styles
3470  //
3471  foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3472  if (!$this->use_classes) {
3473  $attributes = ' style="' .
3474  (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3475  $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3476  } else {
3477  $attributes = ' class="kw' . $k . '"';
3478  }
3479  $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3480  }
3481 
3482  if ($numbers_found) {
3483  // Put number styles in
3484  foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3485  //Commented out for now, as this needs some review ...
3486  // if ($numbers_permissions & $id) {
3487  //Get the appropriate style ...
3488  //Checking for unset styles is done by the style cache builder ...
3489  if (!$this->use_classes) {
3490  $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3491  } else {
3492  $attributes = ' class="nu'.$id.'"';
3493  }
3494 
3495  //Set in the correct styles ...
3496  $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3497  // }
3498  }
3499  }
3500 
3501  // Highlight methods and fields in objects
3502  if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3503  $oolang_spaces = "[\s]*";
3504  $oolang_before = "";
3505  $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3506  if (isset($this->language_data['PARSER_CONTROL'])) {
3507  if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3508  if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3509  $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3510  }
3511  if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3512  $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3513  }
3514  if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3515  $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3516  }
3517  }
3518  }
3519 
3520  foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3521  if (false !== strpos($stuff_to_parse, $splitter)) {
3522  if (!$this->use_classes) {
3523  $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3524  } else {
3525  $attributes = ' class="me' . $key . '"';
3526  }
3527  $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3528  }
3529  }
3530  }
3531 
3532  //
3533  // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3534  // You try it, and see what happens ;)
3535  // TODO: Fix lexic permissions not converting entities if shouldn't
3536  // be highlighting regardless
3537  //
3538  if ($this->lexic_permissions['BRACKETS']) {
3539  $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3540  $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3541  }
3542 
3543 
3544  //FIX for symbol highlighting ...
3545  if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3546  //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3547  $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3548  $global_offset = 0;
3549  for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3550  $symbol_match = $pot_symbols[$s_id][0][0];
3551  if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3552  // already highlighted blocks _must_ include either < or >
3553  // so if this conditional applies, we have to skip this match
3554  // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3555  if(strpos($symbol_match, '<SEMI>') === false &&
3556  strpos($symbol_match, '<PIPE>') === false) {
3557  continue;
3558  }
3559  }
3560 
3561  // if we reach this point, we have a valid match which needs to be highlighted
3562 
3563  $symbol_length = strlen($symbol_match);
3564  $symbol_offset = $pot_symbols[$s_id][0][1];
3565  unset($pot_symbols[$s_id]);
3566  $symbol_hl = "";
3567 
3568  // if we have multiple styles, we have to handle them properly
3569  if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3570  $old_sym = -1;
3571  // Split the current stuff to replace into its atomic symbols ...
3572  preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3573  foreach ($sym_match_syms[0] as $sym_ms) {
3574  //Check if consequtive symbols belong to the same group to save output ...
3575  if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3576  && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3577  if (-1 != $old_sym) {
3578  $symbol_hl .= "|>";
3579  }
3580  $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3581  if (!$this->use_classes) {
3582  $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3583  } else {
3584  $symbol_hl .= '<| class="sy' . $old_sym . '">';
3585  }
3586  }
3587  $symbol_hl .= $sym_ms;
3588  }
3589  unset($sym_match_syms);
3590 
3591  //Close remaining tags and insert the replacement at the right position ...
3592  //Take caution if symbol_hl is empty to avoid doubled closing spans.
3593  if (-1 != $old_sym) {
3594  $symbol_hl .= "|>";
3595  }
3596  } else {
3597  if (!$this->use_classes) {
3598  $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3599  } else {
3600  $symbol_hl = '<| class="sy0">';
3601  }
3602  $symbol_hl .= $symbol_match . '|>';
3603  }
3604 
3605  $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3606 
3607  // since we replace old text with something of different size,
3608  // we'll have to keep track of the differences
3609  $global_offset += strlen($symbol_hl) - $symbol_length;
3610  }
3611  }
3612  //FIX for symbol highlighting ...
3613 
3614  // Add class/style for regexps
3615  foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3616  if ($this->lexic_permissions['REGEXPS'][$key]) {
3617  if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3618  $this->_rx_key = $key;
3619  $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3620  array($this, 'handle_regexps_callback'),
3621  $stuff_to_parse);
3622  } else {
3623  if (!$this->use_classes) {
3624  $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3625  } else {
3626  if (is_array($this->language_data['REGEXPS'][$key]) &&
3627  array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3628  $attributes = ' class="' .
3629  $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3630  } else {
3631  $attributes = ' class="re' . $key . '"';
3632  }
3633  }
3634  $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3635  }
3636  }
3637  }
3638 
3639  // Replace <DOT> with . for urls
3640  $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3641  // Replace <|UR1| with <a href= for urls also
3642  if (isset($this->link_styles[GESHI_LINK])) {
3643  if ($this->use_classes) {
3644  $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3645  } else {
3646  $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3647  }
3648  } else {
3649  $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3650  }
3651 
3652  //
3653  // NOW we add the span thingy ;)
3654  //
3655 
3656  $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3657  $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3658  return substr($stuff_to_parse, 1);
3659  }
3660 
3668  protected function set_time($start_time, $end_time) {
3669  $start = explode(' ', $start_time);
3670  $end = explode(' ', $end_time);
3671  $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3672  }
3673 
3680  public function get_time() {
3681  return $this->time;
3682  }
3683 
3689  protected function merge_arrays() {
3690  $arrays = func_get_args();
3691  $narrays = count($arrays);
3692 
3693  // check arguments
3694  // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3695  for ($i = 0; $i < $narrays; $i ++) {
3696  if (!is_array($arrays[$i])) {
3697  // also array_merge_recursive returns nothing in this case
3698  trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3699  return false;
3700  }
3701  }
3702 
3703  // the first array is in the output set in every case
3704  $ret = $arrays[0];
3705 
3706  // merege $ret with the remaining arrays
3707  for ($i = 1; $i < $narrays; $i ++) {
3708  foreach ($arrays[$i] as $key => $value) {
3709  if (is_array($value) && isset($ret[$key])) {
3710  // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3711  // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3712  $ret[$key] = $this->merge_arrays($ret[$key], $value);
3713  } else {
3714  $ret[$key] = $value;
3715  }
3716  }
3717  }
3718 
3719  return $ret;
3720  }
3721 
3729  protected function load_language($file_name) {
3730  if ($file_name == $this->loaded_language) {
3731  // this file is already loaded!
3732  return;
3733  }
3734 
3735  //Prepare some stuff before actually loading the language file
3736  $this->loaded_language = $file_name;
3737  $this->parse_cache_built = false;
3738  $this->enable_highlighting();
3739  $language_data = array();
3740 
3741  //Load the language file
3742  require $file_name;
3743 
3744  // Perhaps some checking might be added here later to check that
3745  // $language data is a valid thing but maybe not
3746  $this->language_data = $language_data;
3747 
3748  // Set strict mode if should be set
3749  $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3750 
3751  // Set permissions for all lexics to true
3752  // so they'll be highlighted by default
3753  foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3754  if (!empty($this->language_data['KEYWORDS'][$key])) {
3755  $this->lexic_permissions['KEYWORDS'][$key] = true;
3756  } else {
3757  $this->lexic_permissions['KEYWORDS'][$key] = false;
3758  }
3759  }
3760 
3761  foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3762  $this->lexic_permissions['COMMENTS'][$key] = true;
3763  }
3764  foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3765  $this->lexic_permissions['REGEXPS'][$key] = true;
3766  }
3767 
3768  // for BenBE and future code reviews:
3769  // we can use empty here since we only check for existance and emptiness of an array
3770  // if it is not an array at all but rather false or null this will work as intended as well
3771  // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3772  if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3773  foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3774  // it's either true or false and maybe is true as well
3775  $perm = $value !== GESHI_NEVER;
3776  if ($flag == 'ALL') {
3777  $this->enable_highlighting($perm);
3778  continue;
3779  }
3780  if (!isset($this->lexic_permissions[$flag])) {
3781  // unknown lexic permission
3782  continue;
3783  }
3784  if (is_array($this->lexic_permissions[$flag])) {
3785  foreach ($this->lexic_permissions[$flag] as $key => $val) {
3786  $this->lexic_permissions[$flag][$key] = $perm;
3787  }
3788  } else {
3789  $this->lexic_permissions[$flag] = $perm;
3790  }
3791  }
3792  unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3793  }
3794 
3795  //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3796  //You need to set one for HARDESCAPES only in this case.
3797  if(!isset($this->language_data['HARDCHAR'])) {
3798  $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3799  }
3800 
3801  //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3802  $style_filename = substr($file_name, 0, -4) . '.style.php';
3803  if (is_readable($style_filename)) {
3804  //Clear any style_data that could have been set before ...
3805  if (isset($style_data)) {
3806  unset($style_data);
3807  }
3808 
3809  //Read the Style Information from the style file
3810  include $style_filename;
3811 
3812  //Apply the new styles to our current language styles
3813  if (isset($style_data) && is_array($style_data)) {
3814  $this->language_data['STYLES'] =
3815  $this->merge_arrays($this->language_data['STYLES'], $style_data);
3816  }
3817  }
3818  }
3819 
3827  protected function finalise(&$parsed_code) {
3828  // Remove end parts of important declarations
3829  // This is BUGGY!! My fault for bad code: fix coming in 1.2
3830  // @todo Remove this crap
3831  if ($this->enable_important_blocks &&
3832  (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3833  $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3834  }
3835 
3836  // Add HTML whitespace stuff if we're using the <div> header
3837  if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3838  $this->indent($parsed_code);
3839  }
3840 
3841  // purge some unnecessary stuff
3843  $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3844 
3845  // If we are using IDs for line numbers, there needs to be an overall
3846  // ID set to prevent collisions.
3847  if ($this->add_ids && !$this->overall_id) {
3848  $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3849  }
3850 
3851  // Get code into lines
3853  $code = explode("\n", $parsed_code);
3854  $parsed_code = $this->header();
3855 
3856  // If we're using line numbers, we insert <li>s and appropriate
3857  // markup to style them (otherwise we don't need to do anything)
3858  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3859  // If we're using the <pre> header, we shouldn't add newlines because
3860  // the <pre> will line-break them (and the <li>s already do this for us)
3861  $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3862 
3863  // Foreach line...
3864  for ($i = 0, $n = count($code); $i < $n;) {
3865  //Reset the attributes for a new line ...
3866  $attrs = array();
3867 
3868  // Make lines have at least one space in them if they're empty
3869  // BenBE: Checking emptiness using trim instead of relying on blanks
3870  if ('' == trim($code[$i])) {
3871  $code[$i] = '&nbsp;';
3872  }
3873 
3874  // If this is a "special line"...
3875  if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3876  $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3877  // Set the attributes to style the line
3878  if ($this->use_classes) {
3879  //$attr = ' class="li2"';
3880  $attrs['class'][] = 'li2';
3881  $def_attr = ' class="de2"';
3882  } else {
3883  //$attr = ' style="' . $this->line_style2 . '"';
3884  $attrs['style'][] = $this->line_style2;
3885  // This style "covers up" the special styles set for special lines
3886  // so that styles applied to special lines don't apply to the actual
3887  // code on that line
3888  $def_attr = ' style="' . $this->code_style . '"';
3889  }
3890  } else {
3891  if ($this->use_classes) {
3892  //$attr = ' class="li1"';
3893  $attrs['class'][] = 'li1';
3894  $def_attr = ' class="de1"';
3895  } else {
3896  //$attr = ' style="' . $this->line_style1 . '"';
3897  $attrs['style'][] = $this->line_style1;
3898  $def_attr = ' style="' . $this->code_style . '"';
3899  }
3900  }
3901 
3902  //Check which type of tag to insert for this line
3903  if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3904  $start = "<pre$def_attr>";
3905  $end = '</pre>';
3906  } else {
3907  // Span or div?
3908  $start = "<div$def_attr>";
3909  $end = '</div>';
3910  }
3911 
3912  ++$i;
3913 
3914  // Are we supposed to use ids? If so, add them
3915  if ($this->add_ids) {
3916  $attrs['id'][] = "$this->overall_id-$i";
3917  }
3918 
3919  //Is this some line with extra styles???
3920  if (in_array($i, $this->highlight_extra_lines)) {
3921  if ($this->use_classes) {
3922  if (isset($this->highlight_extra_lines_styles[$i])) {
3923  $attrs['class'][] = "lx$i";
3924  } else {
3925  $attrs['class'][] = "ln-xtra";
3926  }
3927  } else {
3928  array_push($attrs['style'], $this->get_line_style($i));
3929  }
3930  }
3931 
3932  // Add in the line surrounded by appropriate list HTML
3933  $attr_string = '';
3934  foreach ($attrs as $key => $attr) {
3935  $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3936  }
3937 
3938  $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3939  unset($code[$i - 1]);
3940  }
3941  } else {
3942  $n = count($code);
3943  if ($this->use_classes) {
3944  $attributes = ' class="de1"';
3945  } else {
3946  $attributes = ' style="'. $this->code_style .'"';
3947  }
3948  if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3949  $parsed_code .= '<pre'. $attributes .'>';
3950  } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3951  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3952  if ($this->use_classes) {
3953  $attrs = ' class="ln"';
3954  } else {
3955  $attrs = ' style="'. $this->table_linenumber_style .'"';
3956  }
3957  $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3958  // get linenumbers
3959  // we don't merge it with the for below, since it should be better for
3960  // memory consumption this way
3961  // @todo: but... actually it would still be somewhat nice to merge the two loops
3962  // the mem peaks are at different positions
3963  for ($i = 0; $i < $n; ++$i) {
3964  $close = 0;
3965  // fancy lines
3966  if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3967  $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3968  // Set the attributes to style the line
3969  if ($this->use_classes) {
3970  $parsed_code .= '<span class="xtra li2"><span class="de2">';
3971  } else {
3972  // This style "covers up" the special styles set for special lines
3973  // so that styles applied to special lines don't apply to the actual
3974  // code on that line
3975  $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3976  .'<span style="' . $this->code_style .'">';
3977  }
3978  $close += 2;
3979  }
3980  //Is this some line with extra styles???
3981  if (in_array($i + 1, $this->highlight_extra_lines)) {
3982  if ($this->use_classes) {
3983  if (isset($this->highlight_extra_lines_styles[$i])) {
3984  $parsed_code .= "<span class=\"xtra lx$i\">";
3985  } else {
3986  $parsed_code .= "<span class=\"xtra ln-xtra\">";
3987  }
3988  } else {
3989  $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3990  }
3991  ++$close;
3992  }
3993  $parsed_code .= $this->line_numbers_start + $i;
3994  if ($close) {
3995  $parsed_code .= str_repeat('</span>', $close);
3996  } elseif ($i != $n) {
3997  $parsed_code .= "\n";
3998  }
3999  }
4000  $parsed_code .= '</pre></td><td'.$attributes.'>';
4001  }
4002  $parsed_code .= '<pre'. $attributes .'>';
4003  }
4004  // No line numbers, but still need to handle highlighting lines extra.
4005  // Have to use divs so the full width of the code is highlighted
4006  $close = 0;
4007  for ($i = 0; $i < $n; ++$i) {
4008  // Make lines have at least one space in them if they're empty
4009  // BenBE: Checking emptiness using trim instead of relying on blanks
4010  if ('' == trim($code[$i])) {
4011  $code[$i] = '&nbsp;';
4012  }
4013  // fancy lines
4014  if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
4015  $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
4016  // Set the attributes to style the line
4017  if ($this->use_classes) {
4018  $parsed_code .= '<span class="xtra li2"><span class="de2">';
4019  } else {
4020  // This style "covers up" the special styles set for special lines
4021  // so that styles applied to special lines don't apply to the actual
4022  // code on that line
4023  $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
4024  .'<span style="' . $this->code_style .'">';
4025  }
4026  $close += 2;
4027  }
4028  //Is this some line with extra styles???
4029  if (in_array($i + 1, $this->highlight_extra_lines)) {
4030  if ($this->use_classes) {
4031  if (isset($this->highlight_extra_lines_styles[$i])) {
4032  $parsed_code .= "<span class=\"xtra lx$i\">";
4033  } else {
4034  $parsed_code .= "<span class=\"xtra ln-xtra\">";
4035  }
4036  } else {
4037  $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4038  }
4039  ++$close;
4040  }
4041 
4042  $parsed_code .= $code[$i];
4043 
4044  if ($close) {
4045  $parsed_code .= str_repeat('</span>', $close);
4046  $close = 0;
4047  }
4048  elseif ($i + 1 < $n) {
4049  $parsed_code .= "\n";
4050  }
4051  unset($code[$i]);
4052  }
4053 
4054  if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
4055  $parsed_code .= '</pre>';
4056  }
4057  if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4058  $parsed_code .= '</td>';
4059  }
4060  }
4061 
4062  $parsed_code .= $this->footer();
4063  }
4064 
4071  protected function header() {
4072  // Get attributes needed
4077  $attributes = ' class="' . $this->_genCSSName($this->language);
4078  if ($this->overall_class != '') {
4079  $attributes .= " ".$this->_genCSSName($this->overall_class);
4080  }
4081  $attributes .= '"';
4082 
4083  if ($this->overall_id != '') {
4084  $attributes .= " id=\"{$this->overall_id}\"";
4085  }
4086  if ($this->overall_style != '' && !$this->use_classes) {
4087  $attributes .= ' style="' . $this->overall_style . '"';
4088  }
4089 
4090  $ol_attributes = '';
4091 
4092  if ($this->line_numbers_start != 1) {
4093  $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
4094  }
4095 
4096  // Get the header HTML
4098  if ($header) {
4099  if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
4100  $header = str_replace("\n", '', $header);
4101  }
4102  $header = $this->replace_keywords($header);
4103 
4104  if ($this->use_classes) {
4105  $attr = ' class="head"';
4106  } else {
4107  $attr = " style=\"{$this->header_content_style}\"";
4108  }
4109  if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4110  $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4111  } else {
4112  $header = "<div$attr>$header</div>";
4113  }
4114  }
4115 
4116  if (GESHI_HEADER_NONE == $this->header_type) {
4117  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4118  return "$header<ol$attributes$ol_attributes>";
4119  }
4120  return $header . ($this->force_code_block ? '<div>' : '');
4121  }
4122 
4123  // Work out what to return and do it
4124  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4125  if ($this->header_type == GESHI_HEADER_PRE) {
4126  return "<pre$attributes>$header<ol$ol_attributes>";
4127  } elseif ($this->header_type == GESHI_HEADER_DIV ||
4128  $this->header_type == GESHI_HEADER_PRE_VALID) {
4129  return "<div$attributes>$header<ol$ol_attributes>";
4130  } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4131  return "<table$attributes>$header<tbody><tr class=\"li1\">";
4132  }
4133  } else {
4134  if ($this->header_type == GESHI_HEADER_PRE) {
4135  return "<pre$attributes>$header" .
4136  ($this->force_code_block ? '<div>' : '');
4137  } else {
4138  return "<div$attributes>$header" .
4139  ($this->force_code_block ? '<div>' : '');
4140  }
4141  }
4142  }
4143 
4150  protected function footer() {
4151  $footer = $this->footer_content;
4152  if ($footer) {
4153  if ($this->header_type == GESHI_HEADER_PRE) {
4154  $footer = str_replace("\n", '', $footer);;
4155  }
4156  $footer = $this->replace_keywords($footer);
4157 
4158  if ($this->use_classes) {
4159  $attr = ' class="foot"';
4160  } else {
4161  $attr = " style=\"{$this->footer_content_style}\"";
4162  }
4163  if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4164  $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4165  } else {
4166  $footer = "<div$attr>$footer</div>";
4167  }
4168  }
4169 
4170  if (GESHI_HEADER_NONE == $this->header_type) {
4171  return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4172  }
4173 
4174  if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4175  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4176  return "</ol>$footer</div>";
4177  }
4178  return ($this->force_code_block ? '</div>' : '') .
4179  "$footer</div>";
4180  }
4181  elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4182  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4183  return "</tr></tbody>$footer</table>";
4184  }
4185  return ($this->force_code_block ? '</div>' : '') .
4186  "$footer</div>";
4187  }
4188  else {
4189  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4190  return "</ol>$footer</pre>";
4191  }
4192  return ($this->force_code_block ? '</div>' : '') .
4193  "$footer</pre>";
4194  }
4195  }
4196 
4205  protected function replace_keywords($instr) {
4206  $keywords = $replacements = array();
4207 
4208  $keywords[] = '<TIME>';
4209  $keywords[] = '{TIME}';
4210  $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4211 
4212  $keywords[] = '<LANGUAGE>';
4213  $keywords[] = '{LANGUAGE}';
4214  $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4215 
4216  $keywords[] = '<VERSION>';
4217  $keywords[] = '{VERSION}';
4218  $replacements[] = $replacements[] = GESHI_VERSION;
4219 
4220  $keywords[] = '<SPEED>';
4221  $keywords[] = '{SPEED}';
4222  if ($time <= 0) {
4223  $speed = 'N/A';
4224  } else {
4225  $speed = strlen($this->source) / $time;
4226  if ($speed >= 1024) {
4227  $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4228  } else {
4229  $speed = sprintf("%.0f B/s", $speed);
4230  }
4231  }
4232  $replacements[] = $replacements[] = $speed;
4233 
4234  return str_replace($keywords, $replacements, $instr);
4235  }
4236 
4289  protected function hsc($string, $quote_style = ENT_COMPAT) {
4290  // init
4291  static $aTransSpecchar = array(
4292  '&' => '&amp;',
4293  '"' => '&quot;',
4294  '<' => '&lt;',
4295  '>' => '&gt;',
4296 
4297  //This fix is related to SF#1923020, but has to be applied
4298  //regardless of actually highlighting symbols.
4299 
4300  //Circumvent a bug with symbol highlighting
4301  //This is required as ; would produce undesirable side-effects if it
4302  //was not to be processed as an entity.
4303  ';' => '<SEMI>', // Force ; to be processed as entity
4304  '|' => '<PIPE>' // Force | to be processed as entity
4305  ); // ENT_COMPAT set
4306 
4307  switch ($quote_style) {
4308  case ENT_NOQUOTES: // don't convert double quotes
4309  unset($aTransSpecchar['"']);
4310  break;
4311  case ENT_QUOTES: // convert single quotes as well
4312  $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4313  break;
4314  }
4315 
4316  // return translated string
4317  return strtr($string, $aTransSpecchar);
4318  }
4319 
4328  protected function _genCSSName($name) {
4329  return (is_numeric($name[0]) ? '_' : '') . $name;
4330  }
4331 
4341  public function get_stylesheet($economy_mode = true) {
4342  // If there's an error, chances are that the language file
4343  // won't have populated the language data file, so we can't
4344  // risk getting a stylesheet...
4345  if ($this->error) {
4346  return '';
4347  }
4348 
4349  //Check if the style rearrangements have been processed ...
4350  //This also does some preprocessing to check which style groups are useable ...
4351  if(!isset($this->language_data['NUMBERS_CACHE'])) {
4352  $this->build_style_cache();
4353  }
4354 
4355  // First, work out what the selector should be. If there's an ID,
4356  // that should be used, the same for a class. Otherwise, a selector
4357  // of '' means that these styles will be applied anywhere
4358  if ($this->overall_id) {
4359  $selector = '#' . $this->_genCSSName($this->overall_id);
4360  } else {
4361  $selector = '.' . $this->_genCSSName($this->language);
4362  if ($this->overall_class) {
4363  $selector .= '.' . $this->_genCSSName($this->overall_class);
4364  }
4365  }
4366  $selector .= ' ';
4367 
4368  // Header of the stylesheet
4369  if (!$economy_mode) {
4370  $stylesheet = "/**\n".
4371  " * GeSHi Dynamically Generated Stylesheet\n".
4372  " * --------------------------------------\n".
4373  " * Dynamically generated stylesheet for {$this->language}\n".
4374  " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4375  " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4376  " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4377  " * --------------------------------------\n".
4378  " */\n";
4379  } else {
4380  $stylesheet = "/**\n".
4381  " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4382  " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4383  " */\n";
4384  }
4385 
4386  // Set the <ol> to have no effect at all if there are line numbers
4387  // (<ol>s have margins that should be destroyed so all layout is
4388  // controlled by the set_overall_style method, which works on the
4389  // <pre> or <div> container). Additionally, set default styles for lines
4390  if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4391  //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4392  $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4393  }
4394 
4395  // Add overall styles
4396  // note: neglect economy_mode, empty styles are meaningless
4397  if ($this->overall_style != '') {
4398  $stylesheet .= "$selector {{$this->overall_style}}\n";
4399  }
4400 
4401  // Add styles for links
4402  // note: economy mode does not make _any_ sense here
4403  // either the style is empty and thus no selector is needed
4404  // or the appropriate key is given.
4405  foreach ($this->link_styles as $key => $style) {
4406  if ($style != '') {
4407  switch ($key) {
4408  case GESHI_LINK:
4409  $stylesheet .= "{$selector}a:link {{$style}}\n";
4410  break;
4411  case GESHI_HOVER:
4412  $stylesheet .= "{$selector}a:hover {{$style}}\n";
4413  break;
4414  case GESHI_ACTIVE:
4415  $stylesheet .= "{$selector}a:active {{$style}}\n";
4416  break;
4417  case GESHI_VISITED:
4418  $stylesheet .= "{$selector}a:visited {{$style}}\n";
4419  break;
4420  }
4421  }
4422  }
4423 
4424  // Header and footer
4425  // note: neglect economy_mode, empty styles are meaningless
4426  if ($this->header_content_style != '') {
4427  $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4428  }
4429  if ($this->footer_content_style != '') {
4430  $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4431  }
4432 
4433  // Styles for important stuff
4434  // note: neglect economy_mode, empty styles are meaningless
4435  if ($this->important_styles != '') {
4436  $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4437  }
4438 
4439  // Simple line number styles
4440  if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4441  $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4442  }
4443  if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4444  $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4445  }
4446  // If there is a style set for fancy line numbers, echo it out
4447  if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4448  $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4449  }
4450 
4451  // note: empty styles are meaningless
4452  foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4453  if ($styles != '' && (!$economy_mode ||
4454  (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4455  $this->lexic_permissions['KEYWORDS'][$group]))) {
4456  $stylesheet .= "$selector.kw$group {{$styles}}\n";
4457  }
4458  }
4459  foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4460  if ($styles != '' && (!$economy_mode ||
4461  (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4462  $this->lexic_permissions['COMMENTS'][$group]) ||
4463  (!empty($this->language_data['COMMENT_REGEXP']) &&
4464  !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4465  $stylesheet .= "$selector.co$group {{$styles}}\n";
4466  }
4467  }
4468  foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4469  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4470  // NEW: since 1.0.8 we have to handle hardescapes
4471  if ($group === 'HARD') {
4472  $group = '_h';
4473  }
4474  $stylesheet .= "$selector.es$group {{$styles}}\n";
4475  }
4476  }
4477  foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4478  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4479  $stylesheet .= "$selector.br$group {{$styles}}\n";
4480  }
4481  }
4482  foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4483  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4484  $stylesheet .= "$selector.sy$group {{$styles}}\n";
4485  }
4486  }
4487  foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4488  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4489  // NEW: since 1.0.8 we have to handle hardquotes
4490  if ($group === 'HARD') {
4491  $group = '_h';
4492  }
4493  $stylesheet .= "$selector.st$group {{$styles}}\n";
4494  }
4495  }
4496  foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4497  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4498  $stylesheet .= "$selector.nu$group {{$styles}}\n";
4499  }
4500  }
4501  foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4502  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4503  $stylesheet .= "$selector.me$group {{$styles}}\n";
4504  }
4505  }
4506  // note: neglect economy_mode, empty styles are meaningless
4507  foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4508  if ($styles != '') {
4509  $stylesheet .= "$selector.sc$group {{$styles}}\n";
4510  }
4511  }
4512  foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4513  if ($styles != '' && (!$economy_mode ||
4514  (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4515  $this->lexic_permissions['REGEXPS'][$group]))) {
4516  if (is_array($this->language_data['REGEXPS'][$group]) &&
4517  array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4518  $stylesheet .= "$selector.";
4519  $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4520  $stylesheet .= " {{$styles}}\n";
4521  } else {
4522  $stylesheet .= "$selector.re$group {{$styles}}\n";
4523  }
4524  }
4525  }
4526  // Styles for lines being highlighted extra
4527  if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4528  $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4529  }
4530  $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4531  foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4532  $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4533  }
4534 
4535  return $stylesheet;
4536  }
4537 
4544  protected function get_line_style($line) {
4545  $style = null;
4546  if (isset($this->highlight_extra_lines_styles[$line])) {
4547  $style = $this->highlight_extra_lines_styles[$line];
4548  } else { // if no "extra" style assigned
4550  }
4551 
4552  return $style;
4553  }
4554 
4569  protected function optimize_regexp_list($list, $regexp_delimiter = '/') {
4570  $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
4571  '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4572  sort($list);
4573  $regexp_list = array('');
4574  $num_subpatterns = 0;
4575  $list_key = 0;
4576 
4577  // the tokens which we will use to generate the regexp list
4578  $tokens = array();
4579  $prev_keys = array();
4580  // go through all entries of the list and generate the token list
4581  $cur_len = 0;
4582  for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4583  if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4584  // seems like the length of this pcre is growing exorbitantly
4585  $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4586  $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4587  $tokens = array();
4588  $cur_len = 0;
4589  }
4590  $level = 0;
4591  $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4592  $pointer = &$tokens;
4593  // properly assign the new entry to the correct position in the token array
4594  // possibly generate smaller common denominator keys
4595  while (true) {
4596  // get the common denominator
4597  if (isset($prev_keys[$level])) {
4598  if ($prev_keys[$level] == $entry) {
4599  // this is a duplicate entry, skip it
4600  continue 2;
4601  }
4602  $char = 0;
4603  while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4604  && $entry[$char] == $prev_keys[$level][$char]) {
4605  ++$char;
4606  }
4607  if ($char > 0) {
4608  // this entry has at least some chars in common with the current key
4609  if ($char == strlen($prev_keys[$level])) {
4610  // current key is totally matched, i.e. this entry has just some bits appended
4611  $pointer = &$pointer[$prev_keys[$level]];
4612  } else {
4613  // only part of the keys match
4614  $new_key_part1 = substr($prev_keys[$level], 0, $char);
4615  $new_key_part2 = substr($prev_keys[$level], $char);
4616 
4617  if (in_array($new_key_part1[0], $regex_chars)
4618  || in_array($new_key_part2[0], $regex_chars)) {
4619  // this is bad, a regex char as first character
4620  $pointer[$entry] = array('' => true);
4621  array_splice($prev_keys, $level, count($prev_keys), $entry);
4622  $cur_len += strlen($entry);
4623  continue;
4624  } else {
4625  // relocate previous tokens
4626  $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4627  unset($pointer[$prev_keys[$level]]);
4628  $pointer = &$pointer[$new_key_part1];
4629  // recreate key index
4630  array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4631  $cur_len += strlen($new_key_part2);
4632  }
4633  }
4634  ++$level;
4635  $entry = substr($entry, $char);
4636  continue;
4637  }
4638  // else: fall trough, i.e. no common denominator was found
4639  }
4640  if ($level == 0 && !empty($tokens)) {
4641  // we can dump current tokens into the string and throw them away afterwards
4642  $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4643  $new_subpatterns = substr_count($new_entry, '(?:');
4644  if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4645  $regexp_list[++$list_key] = $new_entry;
4646  $num_subpatterns = $new_subpatterns;
4647  } else {
4648  if (!empty($regexp_list[$list_key])) {
4649  $new_entry = '|' . $new_entry;
4650  }
4651  $regexp_list[$list_key] .= $new_entry;
4652  $num_subpatterns += $new_subpatterns;
4653  }
4654  $tokens = array();
4655  $cur_len = 0;
4656  }
4657  // no further common denominator found
4658  $pointer[$entry] = array('' => true);
4659  array_splice($prev_keys, $level, count($prev_keys), $entry);
4660 
4661  $cur_len += strlen($entry);
4662  break;
4663  }
4664  unset($list[$i]);
4665  }
4666  // make sure the last tokens get converted as well
4667  $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4668  if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4669  if ( !empty($regexp_list[$list_key]) ) {
4670  ++$list_key;
4671  }
4672  $regexp_list[$list_key] = $new_entry;
4673  } else {
4674  if (!empty($regexp_list[$list_key])) {
4675  $new_entry = '|' . $new_entry;
4676  }
4677  $regexp_list[$list_key] .= $new_entry;
4678  }
4679  return $regexp_list;
4680  }
4681 
4692  protected function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4693  $list = '';
4694  foreach ($tokens as $token => $sub_tokens) {
4695  $list .= $token;
4696  $close_entry = isset($sub_tokens['']);
4697  unset($sub_tokens['']);
4698  if (!empty($sub_tokens)) {
4699  $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4700  if ($close_entry) {
4701  // make sub_tokens optional
4702  $list .= '?';
4703  }
4704  }
4705  $list .= '|';
4706  }
4707  if (!$recursed) {
4708  // do some optimizations
4709  // common trailing strings
4710  // BUGGY!
4711  //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4712  // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4713  // (?:p)? => p?
4714  $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4715  // (?:a|b|c|d|...)? => [abcd...]?
4716  // TODO: a|bb|c => [ac]|bb
4717  static $callback_2;
4718  if (!isset($callback_2)) {
4719  $callback_2 = function($matches) {
4720  return "[" . str_replace("|", "", $matches[1]) . "]";
4721  };
4722  }
4723  $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4724  }
4725  // return $list without trailing pipe
4726  return substr($list, 0, -1);
4727  }
4728 } // End Class GeSHi
4729 
4730 
4731 if (!function_exists('geshi_highlight')) {
4743  function geshi_highlight($string, $language, $path = null, $return = false) {
4744  $geshi = new GeSHi($string, $language, $path);
4745  $geshi->set_header_type(GESHI_HEADER_NONE);
4746 
4747  if ($return) {
4748  return '<code>' . $geshi->parse_code() . '</code>';
4749  }
4750 
4751  echo '<code>' . $geshi->parse_code() . '</code>';
4752 
4753  if ($geshi->error()) {
4754  return false;
4755  }
4756  return true;
4757  }
4758 }
strip_language_name($language)
Clean up the language name to prevent malicious code injection.
Definition: geshi.php:636
replace_keywords($instr)
Replaces certain keywords in the header and footer with certain configuration values.
Definition: geshi.php:4205
$highlight_extra_lines_style
Definition: geshi.php:393
set_keyword_group_highlighting($key, $flag=true)
Turns highlighting on/off for a keyword group.
Definition: geshi.php:1026
finalise(&$parsed_code)
Takes the parsed code and various options, and creates the HTML surrounding it to make it look nice...
Definition: geshi.php:3827
set_header_content_style($style)
Sets the style for the header content.
Definition: geshi.php:1714
enable_keyword_links($enable=true)
Turns linking of keywords on or off.
Definition: geshi.php:1910
Add rich text string
enable_highlighting($flag=true)
Enables all highlighting.
Definition: geshi.php:1421
set_regexps_highlighting($key, $flag)
Turns highlighting on/off for regexps.
Definition: geshi.php:1310
$lexic_permissions
Definition: geshi.php:298
if(isset($_REQUEST['delete'])) $list
Definition: registry.php:41
set_escape_characters_style($style, $preserve_defaults=false, $group=0)
Sets the styles for escaped characters.
Definition: geshi.php:1081
$style
Definition: example_012.php:70
$use_language_tab_width
Definition: geshi.php:480
const GESHI_ACTIVE
Links in the source in the :active state.
Definition: geshi.php:106
set_strings_style($style, $preserve_defaults=false, $group=0)
Sets the styles for strings.
Definition: geshi.php:1185
const GESHI_NUMBER_INT_CSTYLE
Enhanced number format for integers like seen in C.
Definition: geshi.php:167
$code_style
Definition: geshi.php:419
const GESHI_ERROR_INVALID_LINE_NUMBER_TYPE
The line number type passed to GeSHi->enable_line_numbers() was invalid.
Definition: geshi.php:210
$footer_content
Definition: geshi.php:327
const GESHI_MAX_PCRE_LENGTH
it&#39;s also important not to generate too long regular expressions be generous here...
Definition: geshi.php:161
set_time($start_time, $end_time)
Sets the time taken to parse the code.
Definition: geshi.php:3668
$line_style2
Definition: geshi.php:443
set_source($source)
Sets the source code for this object.
Definition: geshi.php:625
$source
Definition: geshi.php:232
$result
footer()
Returns the footer for the code block.
Definition: geshi.php:4150
const GESHI_MODIFIERS
The key of the regex array defining any modifiers to the regular expression.
Definition: geshi.php:137
const GESHI_LANG_ROOT(!defined('GESHI_ROOT'))
The language file directory for GeSHi private.
Definition: geshi.php:52
const GESHI_START_IMPORTANT
The starter for important parts of the source.
Definition: geshi.php:114
$type
$x
Definition: example_009.php:98
$time
Definition: geshi.php:315
hsc($string, $quote_style=ENT_COMPAT)
Secure replacement for PHP built-in function htmlspecialchars().
Definition: geshi.php:4289
set_overall_style($style, $preserve_defaults=false)
Sets the styles for the code that will be outputted when this object is parsed.
Definition: geshi.php:841
const GESHI_NORMAL_LINE_NUMBERS
Use normal line numbers when building the result.
Definition: geshi.php:64
const GESHI_NUMBER_FLT_SCI_ZERO
Number format to highlight floating-point numbers with support for scientific notation (E) and requir...
Definition: geshi.php:195
set_case_sensitivity($key, $case)
Sets whether a set of keywords are checked for in a case sensitive manner.
Definition: geshi.php:1321
load_language($file_name)
Gets language information and stores it for later use.
Definition: geshi.php:3729
$error_messages
Definition: geshi.php:263
$loaded_language
Definition: geshi.php:506
highlight_lines_extra($lines, $style=null)
Specifies which lines to highlight extra.
Definition: geshi.php:1824
build_style_cache()
Setup caches needed for styling.
Definition: geshi.php:1922
$encoding
Definition: geshi.php:493
$code
Definition: example_050.php:99
disable_highlighting()
Disables all highlighting.
Definition: geshi.php:1407
const GESHI_NUMBER_HEX_SUFFIX
Number format to highlight hex numbers with a suffix of h.
Definition: geshi.php:187
$link_target
Definition: geshi.php:486
set_symbols_style($style, $preserve_defaults=false, $group=0)
Sets the styles for symbols.
Definition: geshi.php:1146
get_version()
Returns the version of GeSHi.
Definition: geshi.php:576
const GESHI_HEADER_PRE_VALID
Use a pre to wrap lines when line numbers are enabled or to wrap the whole code.
Definition: geshi.php:76
set_footer_content_style($style)
Sets the style for the footer content.
Definition: geshi.php:1724
$end
Definition: saml1-acs.php:18
Definition: geshi.php:226
$header_type
Definition: geshi.php:292
$_hmr_after
Definition: geshi.php:543
$keyword_links
Definition: geshi.php:499
set_header_content($content)
Sets the content of the header block.
Definition: geshi.php:1694
if(!array_key_exists('StateId', $_REQUEST)) $id
set_link_target($target)
Sets the target for links in code.
Definition: geshi.php:1770
$language
Definition: geshi.php:238
$overall_class
Definition: geshi.php:425
const GESHI_BEFORE
The key of the regex array defining what bracket group in a matched search to put before the replacem...
Definition: geshi.php:140
enable_line_numbers($flag, $nth_row=5)
Sets whether line numbers should be displayed.
Definition: geshi.php:952
const GESHI_CLASS
The key of the regex array defining a custom keyword to use for this regexp&#39;s html tag class...
Definition: geshi.php:146
$line_numbers_start
Definition: geshi.php:407
const GESHI_HOVER
Links in the source in the :hover state.
Definition: geshi.php:104
$attributes
get_language_fullname($language)
Get full_name for a lang or false.
Definition: geshi.php:780
get_real_tab_width()
Returns the tab width to use, based on the current language and user preference.
Definition: geshi.php:1377
set_line_ending($line_ending)
Sets the line-ending.
Definition: geshi.php:1863
set_highlight_lines_extra_style($styles)
Sets the style for extra-highlighted lines.
Definition: geshi.php:1853
enable_classes($flag=true)
Sets whether CSS classes should be used to highlight the source.
Definition: geshi.php:879
error()
Returns an error message associated with the last GeSHi operation, or false if no error has occurred...
Definition: geshi.php:588
$table_linenumber_style
Definition: geshi.php:449
set_code_style($style, $preserve_defaults=false)
Sets the style for the actual code.
Definition: geshi.php:898
$use_classes
Definition: geshi.php:280
static get_language_name_from_extension( $extension, $lookup=array())
Given a file extension, this method returns either a valid geshi language name, or the empty string i...
Definition: geshi.php:1448
set_strings_highlighting($flag)
Turns highlighting on/off for strings.
Definition: geshi.php:1199
remove_keyword($key, $word, $recompile=true)
Removes a keyword from a keyword group.
Definition: geshi.php:1599
add_keyword_group($key, $styles, $case_sensitive=true, $words=array())
Creates a new keyword group.
Definition: geshi.php:1621
optimize_regexp_list($list, $regexp_delimiter='/')
this functions creates an optimized regular expression list of an array of strings.
Definition: geshi.php:4569
_optimize_regexp_list_tokens_to_string(&$tokens, $recursed=false)
this function creates the appropriate regexp string of an token array you should not call this functi...
Definition: geshi.php:4692
$link_styles
Definition: geshi.php:352
$line_nth_row
Definition: geshi.php:468
$_hmr_replace
Definition: geshi.php:542
const GESHI_NUMBER_FLT_NONSCI_F
Number format to highlight floating-point numbers without support for scientific notation.
Definition: geshi.php:191
get_line_style($line)
Get&#39;s the style that is used for the specified line.
Definition: geshi.php:4544
get_multiline_span()
Get current setting for multiline spans, see GeSHi->enable_multiline_span().
Definition: geshi.php:980
enable_multiline_span($flag)
Sets wether spans and other HTML markup generated by GeSHi can span over multiple lines or not...
Definition: geshi.php:970
set_numbers_highlighting($flag)
Turns highlighting on/off for numbers.
Definition: geshi.php:1248
__construct($source='', $language='', $path='')
Creates a new GeSHi object, with source and language.
Definition: geshi.php:560
const GESHI_COMMENTS
Used in language files to mark comments.
Definition: geshi.php:149
$allow_multiline_span
Definition: geshi.php:462
set_keyword_group_style($key, $style, $preserve_defaults=false)
Sets the style for a keyword group.
Definition: geshi.php:995
set_escape_characters_highlighting($flag=true)
Turns highlighting on/off for escaped characters.
Definition: geshi.php:1095
set_regexps_style($key, $style, $preserve_defaults=false)
Sets the styles for regexps.
Definition: geshi.php:1295
$linestyle
const GESHI_REPLACE
The key of the regex array defining what bracket group in a matched search to use as a replacement...
Definition: geshi.php:135
const GESHI_NUMBER_HEX_PREFIX
Number format to highlight hex numbers with a prefix 0x.
Definition: geshi.php:183
$parse_cache_built
Definition: geshi.php:514
set_comments_style($key, $style, $preserve_defaults=false)
Sets the styles for comment groups.
Definition: geshi.php:1041
$language_path
Definition: geshi.php:250
const GESHI_NUMBER_BIN_PREFIX_0B
Number format to highlight binary numbers with a prefix 0b (C)
Definition: geshi.php:173
if($format !==null) $name
Definition: metadata.php:146
const GESHI_HEADER_DIV
Use a "div" to surround the source.
Definition: geshi.php:72
change_case($instr)
Changes the case of a keyword for those languages where a change is asked for.
Definition: geshi.php:3220
enable_inner_code_block($flag)
Sets whether to force a surrounding block around the highlighted code or not.
Definition: geshi.php:1735
parse_code()
Returns the code in $this->source, highlighted and surrounded by the nessecary HTML.
Definition: geshi.php:2159
const GESHI_ERROR_NO_SUCH_LANG
The language specified does not exist.
Definition: geshi.php:204
$overall_id
Definition: geshi.php:431
set_encoding($encoding)
Sets the encoding used for htmlspecialchars(), for international support.
Definition: geshi.php:1898
$overall_style
Definition: geshi.php:413
$highlight_extra_lines_styles
Definition: geshi.php:387
const GESHI_CAPS_UPPER
Uppercase keywords found.
Definition: geshi.php:96
$line_style1
Definition: geshi.php:437
const GESHI_ERROR_INVALID_HEADER_TYPE
The header type passed to GeSHi->set_header_type() was invalid.
Definition: geshi.php:208
const GESHI_MAX_PCRE_SUBPATTERNS
some old PHP / PCRE subpatterns only support up to xxx subpatterns in regular expressions.
Definition: geshi.php:155
set_brackets_highlighting($flag)
Turns highlighting on/off for brackets.
Definition: geshi.php:1131
const GESHI_NO_LINE_NUMBERS(!defined('GESHI_SECURITY_PARANOID'))
Use no line numbers when building the result.
Definition: geshi.php:62
const GESHI_NUMBER_OCT_PREFIX_0O
Number format to highlight octal numbers with a prefix 0o (logtalk)
Definition: geshi.php:177
const GESHI_NUMBER_BIN_SUFFIX
Number format to highlight binary numbers with a suffix "b".
Definition: geshi.php:169
const GESHI_HEADER_PRE_TABLE
Use a "table" to surround the source:
Definition: geshi.php:90
const GESHI_NUMBER_BIN_PREFIX_PERCENT
Number format to highlight binary numbers with a prefix %.
Definition: geshi.php:171
$strict_mode
Definition: geshi.php:274
get_supported_languages($full_names=false)
Get supported langs or an associative array lang=>full_name.
Definition: geshi.php:729
optimize_keyword_group($key)
compile optimized regexp list for keyword group
Definition: geshi.php:1664
$error
Definition: geshi.php:257
const GESHI_VERSION
The version of this GeSHi file.
Definition: geshi.php:43
get_language_name()
Gets a human-readable language name (thanks to Simon Patterson for the idea :))
Definition: geshi.php:612
const GESHI_HEADER_PRE
Use a "pre" to surround the source.
Definition: geshi.php:74
$n
Definition: RandomTest.php:85
enable_ids($flag=true)
Whether CSS IDs should be added to each line.
Definition: geshi.php:1806
handle_keyword_replace($match)
Handles replacements of keywords to include markup and links if requested.
Definition: geshi.php:3240
const GESHI_VISITED
Links in the source in the :visited state.
Definition: geshi.php:108
enable_strict_mode($mode=true)
Enables/disables strict highlighting.
Definition: geshi.php:1394
set_tab_width($width)
Sets how many spaces a tab is substituted for.
Definition: geshi.php:1350
parse_non_string_part($stuff_to_parse)
Takes a string that has no strings or comments in it, and highlights stuff like keywords, numbers and methods.
Definition: geshi.php:3348
$add_ids
Definition: geshi.php:375
$line_numbers
Definition: geshi.php:455
const GESHI_NUMBER_FLT_NONSCI
Number format to highlight floating-point numbers without support for scientific notation.
Definition: geshi.php:189
const GESHI_SEARCH
The key of the regex array defining what to search for.
Definition: geshi.php:132
$comment
Definition: buildRTE.php:83
set_brackets_style($style, $preserve_defaults=false)
Sets the styles for brackets.
Definition: geshi.php:1113
Create styles array
The data for the language used.
const GESHI_LINK
Links in the source in the :link state.
Definition: geshi.php:102
const GESHI_ERROR_FILE_NOT_READABLE
GeSHi could not open a file for reading (generally a language file)
Definition: geshi.php:206
$highlight_extra_lines
Definition: geshi.php:381
const GESHI_FANCY_LINE_NUMBERS
Use fancy line numbers when building the result.
Definition: geshi.php:66
set_language_path($path)
Sets the path to the directory containing the language files.
Definition: geshi.php:695
const GESHI_CAPS_LOWER
Leave keywords found as the case that they are.
Definition: geshi.php:98
set_url_for_keyword_group($group, $url)
Sets the base URL to be used for keywords.
Definition: geshi.php:1748
get_stylesheet($economy_mode=true)
Returns a stylesheet for the highlighted code.
Definition: geshi.php:4341
const GESHI_NUMBER_OCT_PREFIX_AT
Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series)...
Definition: geshi.php:179
$_kw_replace_group
Definition: geshi.php:531
set_numbers_style($style, $preserve_defaults=false, $group=0)
Sets the styles for numbers.
Definition: geshi.php:1234
const GESHI_MAYBE
Strict mode might apply, and can be enabled or disabled by GeSHi->enable_strict_mode().
Definition: geshi.php:126
$footer_content_style
Definition: geshi.php:339
set_script_style($style, $preserve_defaults=false, $group=0)
Sets the styles for strict code blocks.
Definition: geshi.php:1214
$language_data
Definition: geshi.php:244
set_overall_id($id)
Sets the overall id for this block of code.
Definition: geshi.php:868
start_line_numbers_at($number)
Sets what number line numbers should start at.
Definition: geshi.php:1882
set_header_type($type)
Sets the type of header to be used.
Definition: geshi.php:820
const GESHI_NUMBER_FLT_SCI_SHORT
Number format to highlight floating-point numbers with support for scientific notation (E) and option...
Definition: geshi.php:193
handle_multiline_regexps($matches)
handles newlines in REGEXPS matches.
Definition: geshi.php:3315
$line_ending
Definition: geshi.php:401
indent(&$result)
Swaps out spaces and tabs for HTML indentation.
Definition: geshi.php:3121
set_methods_highlighting($flag)
Turns highlighting on/off for methods.
Definition: geshi.php:1279
$_hmr_before
Definition: geshi.php:541
$_hmr_key
Definition: geshi.php:544
const GESHI_NEVER
#+ private
Definition: geshi.php:123
$enable_important_blocks
Definition: geshi.php:360
$ret
Definition: parser.php:6
load_from_file($file_name, $lookup=array())
Given a file name, this method loads its contents in, and attempts to set the language automatically...
Definition: geshi.php:1555
$force_code_block
Definition: geshi.php:346
enable_important_blocks($flag)
Sets whether context-important blocks are highlighted.
Definition: geshi.php:1796
const GESHI_NUMBER_HEX_PREFIX_DOLLAR
Number format to highlight hex numbers with a prefix $.
Definition: geshi.php:185
$i
Definition: disco.tpl.php:19
set_methods_style($key, $style, $preserve_defaults=false)
Sets the styles for methods.
Definition: geshi.php:1265
set_footer_content($content)
Sets the content of the footer block.
Definition: geshi.php:1704
$url
Add data(end) time
Method that wraps PHPs time in order to allow simulations with the workflow.
set_comments_highlighting($key, $flag=true)
Turns highlighting on/off for comment groups.
Definition: geshi.php:1066
$tab_width
Definition: geshi.php:474
get_time()
Gets the time taken to parse the code.
Definition: geshi.php:3680
set_case_keywords($case)
Sets the case that keywords should use when found.
Definition: geshi.php:1335
set_important_styles($styles)
Sets styles for important parts of the code.
Definition: geshi.php:1784
defined( 'APPLICATION_ENV')||define( 'APPLICATION_ENV'
Definition: bootstrap.php:27
const GESHI_NUMBER_INT_BASIC
Basic number format for integers.
Definition: geshi.php:165
const GESHI_CAPS_NO_CHANGE
Lowercase keywords found.
Definition: geshi.php:94
set_link_styles($type, $styles)
Sets styles for links in code.
Definition: geshi.php:1760
remove_keyword_group($key)
Removes a keyword group.
Definition: geshi.php:1647
const GESHI_NUMBER_OCT_PREFIX
Number format to highlight octal numbers with a leading zero.
Definition: geshi.php:175
_genCSSName($name)
Generate a CSS class name from a given string.
Definition: geshi.php:4328
header()
Creates the header for the code block (with correct attributes)
Definition: geshi.php:4071
const GESHI_NUMBER_OCT_SUFFIX
Number format to highlight octal numbers with a suffix of o.
Definition: geshi.php:181
set_overall_class($class)
Sets the overall classname for this block of code.
Definition: geshi.php:857
set_line_style($style1, $style2='', $preserve_defaults=false)
Sets the styles for the line numbers.
Definition: geshi.php:918
$style2
Definition: example_012.php:71
$key
Definition: croninfo.php:18
handle_regexps_callback($matches)
handles regular expressions highlighting-definitions with callback functions
Definition: geshi.php:3301
const GESHI_END_IMPORTANT
The ender for important parts of the source.
Definition: geshi.php:116
add_keyword($key, $word)
Adds a keyword to a keyword group for highlighting.
Definition: geshi.php:1571
$important_styles
Definition: geshi.php:369
const GESHI_HEADER_NONE
Use nothing to surround the source.
Definition: geshi.php:70
$header_content
Definition: geshi.php:321
set_symbols_highlighting($flag)
Turns highlighting on/off for symbols.
Definition: geshi.php:1166
$_rx_key
Definition: geshi.php:532
$header_content_style
Definition: geshi.php:333
merge_arrays()
Merges arrays recursively, overwriting values of the first array with values of later arrays...
Definition: geshi.php:3689
const GESHI_ALWAYS
Strict mode always applies.
Definition: geshi.php:128
build_parse_cache()
Setup caches needed for parsing.
Definition: geshi.php:1972
set_use_language_tab_width($use)
Sets whether or not to use tab-stop width specifed by language.
Definition: geshi.php:1366
set_language($language, $force_reset=false)
Sets the language for this object.
Definition: geshi.php:653
const GESHI_AFTER
The key of the regex array defining what bracket group in a matched search to put after the replaceme...
Definition: geshi.php:143