ILIAS  release_5-2 Revision v5.2.25-18-g3f80b828510
geshi.php
Go to the documentation of this file.
1 <?php
36 //
37 // GeSHi Constants
38 // You should use these constant names in your programs instead of
39 // their values - you never know when a value may change in a future
40 // version
41 //
42 
44 define('GESHI_VERSION', '1.0.8.12');
45 
46 // Define the root directory for the GeSHi code tree
47 if (!defined('GESHI_ROOT')) {
49  define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
50 }
53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
54 
55 // Define if GeSHi should be paranoid about security
56 if (!defined('GESHI_SECURITY_PARANOID')) {
58  define('GESHI_SECURITY_PARANOID', false);
59 }
60 
61 // Line numbers - use with enable_line_numbers()
63 define('GESHI_NO_LINE_NUMBERS', 0);
65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
67 define('GESHI_FANCY_LINE_NUMBERS', 2);
68 
69 // Container HTML type
71 define('GESHI_HEADER_NONE', 0);
73 define('GESHI_HEADER_DIV', 1);
75 define('GESHI_HEADER_PRE', 2);
77 define('GESHI_HEADER_PRE_VALID', 3);
91 define('GESHI_HEADER_PRE_TABLE', 4);
92 
93 // Capatalisation constants
95 define('GESHI_CAPS_NO_CHANGE', 0);
97 define('GESHI_CAPS_UPPER', 1);
99 define('GESHI_CAPS_LOWER', 2);
100 
101 // Link style constants
103 define('GESHI_LINK', 0);
105 define('GESHI_HOVER', 1);
107 define('GESHI_ACTIVE', 2);
109 define('GESHI_VISITED', 3);
110 
111 // Important string starter/finisher
112 // Note that if you change these, they should be as-is: i.e., don't
113 // write them as if they had been run through htmlentities()
115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
117 define('GESHI_END_IMPORTANT', '<END GeSHi>');
118 
122 // When strict mode applies for a language
124 define('GESHI_NEVER', 0);
127 define('GESHI_MAYBE', 1);
129 define('GESHI_ALWAYS', 2);
130 
131 // Advanced regexp handling constants, used in language files
133 define('GESHI_SEARCH', 0);
136 define('GESHI_REPLACE', 1);
138 define('GESHI_MODIFIERS', 2);
141 define('GESHI_BEFORE', 3);
144 define('GESHI_AFTER', 4);
147 define('GESHI_CLASS', 5);
148 
150 define('GESHI_COMMENTS', 0);
151 
153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
154 
156 if (!function_exists('stripos')) {
157  // the offset param of preg_match is not supported below PHP 4.3.3
158  if (GESHI_PHP_PRE_433) {
162  function stripos($haystack, $needle, $offset = null) {
163  if (!is_null($offset)) {
164  $haystack = substr($haystack, $offset);
165  }
166  if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
167  return $match[0][1];
168  }
169  return false;
170  }
171  }
172  else {
176  function stripos($haystack, $needle, $offset = null) {
177  if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
178  return $match[0][1];
179  }
180  return false;
181  }
182  }
183 }
184 
189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
195 define('GESHI_MAX_PCRE_LENGTH', 12288);
196 
197 //Number format specification
199 define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
201 define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
203 define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
209 define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
211 define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+
213 define('GESHI_NUMBER_OCT_PREFIX_AT', 1024); //@[0-7]+
215 define('GESHI_NUMBER_OCT_SUFFIX', 2048); //[0-7]+[oO]
217 define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
219 define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192); //$[0-9a-fA-F]+
221 define('GESHI_NUMBER_HEX_SUFFIX', 16384); //[0-9][0-9a-fA-F]*h
223 define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
225 define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
227 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
229 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
230 //Custom formats are passed by RX array
231 
232 // Error detection - use these to analyse faults
236 define('GESHI_ERROR_NO_INPUT', 1);
238 define('GESHI_ERROR_NO_SUCH_LANG', 2);
240 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
242 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
244 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
259 class GeSHi {
267  var $source = '';
268 
273  var $language = '';
274 
280 
286 
292  var $error = false;
293 
299  GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
300  GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
301  GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
302  GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
303  );
304 
309  var $strict_mode = false;
310 
315  var $use_classes = false;
316 
328 
334  'KEYWORDS' => array(),
335  'COMMENTS' => array('MULTI' => true),
336  'REGEXPS' => array(),
337  'ESCAPE_CHAR' => true,
338  'BRACKETS' => true,
339  'SYMBOLS' => false,
340  'STRINGS' => true,
341  'NUMBERS' => true,
342  'METHODS' => true,
343  'SCRIPT' => true
344  );
345 
350  var $time = 0;
351 
356  var $header_content = '';
357 
362  var $footer_content = '';
363 
369 
375 
381  var $force_code_block = false;
382 
388 
396 
404  var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
405 
410  var $add_ids = false;
411 
417 
423 
428  var $highlight_extra_lines_style = 'background-color: #ffc;';
429 
436  var $line_ending = null;
437 
443 
448  var $overall_style = 'font-family:monospace;';
449 
454  var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
455 
460  var $overall_class = '';
461 
466  var $overall_id = '';
467 
472  var $line_style1 = 'font-weight: normal; vertical-align:top;';
473 
478  var $line_style2 = 'font-weight: bold; vertical-align:top;';
479 
484  var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
485 
491 
498 
503  var $line_nth_row = 0;
504 
509  var $tab_width = 8;
510 
516 
521  var $link_target = '';
522 
528  var $encoding = 'utf-8';
529 
534  var $keyword_links = true;
535 
542 
549  var $parse_cache_built = false;
550 
567  var $_rx_key = 0;
568 
576  var $_hmr_before = '';
577  var $_hmr_replace = '';
578  var $_hmr_after = '';
579  var $_hmr_key = 0;
580 
597  function __construct($source = '', $language = '', $path = '') {
598  if ( is_string($source) && ($source !== '') ) {
599  $this->set_source($source);
600  }
601  if ( is_string($language) && ($language !== '') ) {
602  $this->set_language($language);
603  }
604  $this->set_language_path($path);
605  }
606 
613  function get_version()
614  {
615  return GESHI_VERSION;
616  }
617 
625  function error() {
626  if ($this->error) {
627  //Put some template variables for debugging here ...
628  $debug_tpl_vars = array(
629  '{LANGUAGE}' => $this->language,
630  '{PATH}' => $this->language_path
631  );
632  $msg = str_replace(
633  array_keys($debug_tpl_vars),
634  array_values($debug_tpl_vars),
635  $this->error_messages[$this->error]);
636 
637  return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
638  }
639  return false;
640  }
641 
649  function get_language_name() {
650  if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
651  return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
652  }
653  return $this->language_data['LANG_NAME'];
654  }
655 
662  function set_source($source) {
663  $this->source = $source;
664  $this->highlight_extra_lines = array();
665  }
666 
676  function set_language($language, $force_reset = false) {
677  if ($force_reset) {
678  $this->loaded_language = false;
679  }
680 
681  //Clean up the language name to prevent malicious code injection
682  $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
683 
684  $language = strtolower($language);
685 
686  //Retreive the full filename
687  $file_name = $this->language_path . $language . '.php';
688  if ($file_name == $this->loaded_language) {
689  // this language is already loaded!
690  return;
691  }
692 
693  $this->language = $language;
694 
695  $this->error = false;
696  $this->strict_mode = GESHI_NEVER;
697 
698  //Check if we can read the desired file
699  if (!is_readable($file_name)) {
701  return;
702  }
703 
704  // Load the language for parsing
705  $this->load_language($file_name);
706  }
707 
721  if(strpos($path,':')) {
722  //Security Fix to prevent external directories using fopen wrappers.
723  if(DIRECTORY_SEPARATOR == "\\") {
724  if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
725  return;
726  }
727  } else {
728  return;
729  }
730  }
731  if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
732  //Security Fix to prevent external directories using fopen wrappers.
733  return;
734  }
735  if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
736  //Security Fix to prevent external directories using fopen wrappers.
737  return;
738  }
739  if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
740  //Security Fix to prevent external directories using fopen wrappers.
741  return;
742  }
743  if ($path) {
744  $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
745  $this->set_language($this->language); // otherwise set_language_path has no effect
746  }
747  }
748 
754  function get_supported_languages($full_names=false)
755  {
756  // return array
757  $back = array();
758 
759  // we walk the lang root
760  $dir = dir($this->language_path);
761 
762  // foreach entry
763  while (false !== ($entry = $dir->read()))
764  {
765  $full_path = $this->language_path.$entry;
766 
767  // Skip all dirs
768  if (is_dir($full_path)) {
769  continue;
770  }
771 
772  // we only want lang.php files
773  if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
774  continue;
775  }
776 
777  // Raw lang name is here
778  $langname = $matches[1];
779 
780  // We want the fullname too?
781  if ($full_names === true)
782  {
783  if (false !== ($fullname = $this->get_language_fullname($langname)))
784  {
785  $back[$langname] = $fullname; // we go associative
786  }
787  }
788  else
789  {
790  // just store raw langname
791  $back[] = $langname;
792  }
793  }
794 
795  $dir->close();
796 
797  return $back;
798  }
799 
805  function get_language_fullname($language)
806  {
807  //Clean up the language name to prevent malicious code injection
808  $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
809 
810  $language = strtolower($language);
811 
812  // get fullpath-filename for a langname
813  $fullpath = $this->language_path.$language.'.php';
814 
815  // we need to get contents :S
816  if (false === ($data = file_get_contents($fullpath))) {
817  $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
818  return false;
819  }
820 
821  // match the langname
822  if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
823  $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
824  return false;
825  }
826 
827  // return fullname for langname
828  return stripcslashes($matches[1]);
829  }
830 
845  function set_header_type($type) {
846  //Check if we got a valid header type
847  if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
850  return;
851  }
852 
853  //Set that new header type
854  $this->header_type = $type;
855  }
856 
866  function set_overall_style($style, $preserve_defaults = false) {
867  if (!$preserve_defaults) {
868  $this->overall_style = $style;
869  } else {
870  $this->overall_style .= $style;
871  }
872  }
873 
882  function set_overall_class($class) {
883  $this->overall_class = $class;
884  }
885 
893  function set_overall_id($id) {
894  $this->overall_id = $id;
895  }
896 
904  function enable_classes($flag = true) {
905  $this->use_classes = ($flag) ? true : false;
906  }
907 
923  function set_code_style($style, $preserve_defaults = false) {
924  if (!$preserve_defaults) {
925  $this->code_style = $style;
926  } else {
927  $this->code_style .= $style;
928  }
929  }
930 
943  function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
944  //Check if we got 2 or three parameters
945  if (is_bool($style2)) {
946  $preserve_defaults = $style2;
947  $style2 = '';
948  }
949 
950  //Actually set the new styles
951  if (!$preserve_defaults) {
952  $this->line_style1 = $style1;
953  $this->line_style2 = $style2;
954  } else {
955  $this->line_style1 .= $style1;
956  $this->line_style2 .= $style2;
957  }
958  }
959 
977  function enable_line_numbers($flag, $nth_row = 5) {
978  if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
979  && GESHI_FANCY_LINE_NUMBERS != $flag) {
981  }
982  $this->line_numbers = $flag;
983  $this->line_nth_row = $nth_row;
984  }
985 
995  function enable_multiline_span($flag) {
996  $this->allow_multiline_span = (bool) $flag;
997  }
998 
1005  function get_multiline_span() {
1007  }
1008 
1020  function set_keyword_group_style($key, $style, $preserve_defaults = false) {
1021  //Set the style for this keyword group
1022  if('*' == $key) {
1023  foreach($this->language_data['STYLES']['KEYWORDS'] as $_key => $_value) {
1024  if (!$preserve_defaults) {
1025  $this->language_data['STYLES']['KEYWORDS'][$_key] = $style;
1026  } else {
1027  $this->language_data['STYLES']['KEYWORDS'][$_key] .= $style;
1028  }
1029  }
1030  } else {
1031  if (!$preserve_defaults) {
1032  $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
1033  } else {
1034  $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
1035  }
1036  }
1037 
1038  //Update the lexic permissions
1039  if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1040  $this->lexic_permissions['KEYWORDS'][$key] = true;
1041  }
1042  }
1043 
1051  function set_keyword_group_highlighting($key, $flag = true) {
1052  $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1053  }
1054 
1066  function set_comments_style($key, $style, $preserve_defaults = false) {
1067  if('*' == $key) {
1068  foreach($this->language_data['STYLES']['COMMENTS'] as $_key => $_value) {
1069  if (!$preserve_defaults) {
1070  $this->language_data['STYLES']['COMMENTS'][$_key] = $style;
1071  } else {
1072  $this->language_data['STYLES']['COMMENTS'][$_key] .= $style;
1073  }
1074  }
1075  } else {
1076  if (!$preserve_defaults) {
1077  $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1078  } else {
1079  $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1080  }
1081  }
1082  }
1083 
1091  function set_comments_highlighting($key, $flag = true) {
1092  $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1093  }
1094 
1105  function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1106  if (!$preserve_defaults) {
1107  $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1108  } else {
1109  $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1110  }
1111  }
1112 
1119  function set_escape_characters_highlighting($flag = true) {
1120  $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1121  }
1122 
1137  function set_brackets_style($style, $preserve_defaults = false) {
1138  if (!$preserve_defaults) {
1139  $this->language_data['STYLES']['BRACKETS'][0] = $style;
1140  } else {
1141  $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1142  }
1143  }
1144 
1155  function set_brackets_highlighting($flag) {
1156  $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1157  }
1158 
1170  function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1171  // Update the style of symbols
1172  if (!$preserve_defaults) {
1173  $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1174  } else {
1175  $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1176  }
1177 
1178  // For backward compatibility
1179  if (0 == $group) {
1180  $this->set_brackets_style ($style, $preserve_defaults);
1181  }
1182  }
1183 
1190  function set_symbols_highlighting($flag) {
1191  // Update lexic permissions for this symbol group
1192  $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1193 
1194  // For backward compatibility
1195  $this->set_brackets_highlighting ($flag);
1196  }
1197 
1209  function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1210  if (!$preserve_defaults) {
1211  $this->language_data['STYLES']['STRINGS'][$group] = $style;
1212  } else {
1213  $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1214  }
1215  }
1216 
1223  function set_strings_highlighting($flag) {
1224  $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1225  }
1226 
1238  function set_script_style($style, $preserve_defaults = false, $group = 0) {
1239  // Update the style of symbols
1240  if (!$preserve_defaults) {
1241  $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1242  } else {
1243  $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1244  }
1245  }
1246 
1258  function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1259  if (!$preserve_defaults) {
1260  $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1261  } else {
1262  $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1263  }
1264  }
1265 
1272  function set_numbers_highlighting($flag) {
1273  $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1274  }
1275 
1289  function set_methods_style($key, $style, $preserve_defaults = false) {
1290  if (!$preserve_defaults) {
1291  $this->language_data['STYLES']['METHODS'][$key] = $style;
1292  } else {
1293  $this->language_data['STYLES']['METHODS'][$key] .= $style;
1294  }
1295  }
1296 
1303  function set_methods_highlighting($flag) {
1304  $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1305  }
1306 
1317  function set_regexps_style($key, $style, $preserve_defaults = false) {
1318  if (!$preserve_defaults) {
1319  $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1320  } else {
1321  $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1322  }
1323  }
1324 
1332  function set_regexps_highlighting($key, $flag) {
1333  $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1334  }
1335 
1343  function set_case_sensitivity($key, $case) {
1344  $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1345  }
1346 
1357  function set_case_keywords($case) {
1358  if (in_array($case, array(
1360  $this->language_data['CASE_KEYWORDS'] = $case;
1361  }
1362  }
1363 
1372  function set_tab_width($width) {
1373  $this->tab_width = intval($width);
1374 
1375  //Check if it fit's the constraints:
1376  if ($this->tab_width < 1) {
1377  //Return it to the default
1378  $this->tab_width = 8;
1379  }
1380  }
1381 
1389  $this->use_language_tab_width = (bool) $use;
1390  }
1391 
1399  function get_real_tab_width() {
1400  if (!$this->use_language_tab_width ||
1401  !isset($this->language_data['TAB_WIDTH'])) {
1402  return $this->tab_width;
1403  } else {
1404  return $this->language_data['TAB_WIDTH'];
1405  }
1406  }
1407 
1416  function enable_strict_mode($mode = true) {
1417  if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1418  $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1419  }
1420  }
1421 
1430  $this->enable_highlighting(false);
1431  }
1432 
1443  function enable_highlighting($flag = true) {
1444  $flag = $flag ? true : false;
1445  foreach ($this->lexic_permissions as $key => $value) {
1446  if (is_array($value)) {
1447  foreach ($value as $k => $v) {
1448  $this->lexic_permissions[$key][$k] = $flag;
1449  }
1450  } else {
1451  $this->lexic_permissions[$key] = $flag;
1452  }
1453  }
1454 
1455  // Context blocks
1456  $this->enable_important_blocks = $flag;
1457  }
1458 
1469  static function get_language_name_from_extension( $extension, $lookup = array() ) {
1470  $extension = strtolower($extension);
1471 
1472  if ( !is_array($lookup) || empty($lookup)) {
1473  $lookup = array(
1474  '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1475  '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1476  '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1477  '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1478  'abap' => array('abap'),
1479  'actionscript' => array('as'),
1480  'ada' => array('a', 'ada', 'adb', 'ads'),
1481  'apache' => array('conf'),
1482  'asm' => array('ash', 'asm', 'inc'),
1483  'asp' => array('asp'),
1484  'bash' => array('sh'),
1485  'bf' => array('bf'),
1486  'c' => array('c', 'h'),
1487  'c_mac' => array('c', 'h'),
1488  'caddcl' => array(),
1489  'cadlisp' => array(),
1490  'cdfg' => array('cdfg'),
1491  'cobol' => array('cbl'),
1492  'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1493  'csharp' => array('cs'),
1494  'css' => array('css'),
1495  'd' => array('d'),
1496  'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1497  'diff' => array('diff', 'patch'),
1498  'dos' => array('bat', 'cmd'),
1499  'gdb' => array('kcrash', 'crash', 'bt'),
1500  'gettext' => array('po', 'pot'),
1501  'gml' => array('gml'),
1502  'gnuplot' => array('plt'),
1503  'groovy' => array('groovy'),
1504  'haskell' => array('hs'),
1505  'haxe' => array('hx'),
1506  'html4strict' => array('html', 'htm'),
1507  'ini' => array('ini', 'desktop'),
1508  'java' => array('java'),
1509  'javascript' => array('js'),
1510  'klonec' => array('kl1'),
1511  'klonecpp' => array('klx'),
1512  'latex' => array('tex'),
1513  'lisp' => array('lisp'),
1514  'lua' => array('lua'),
1515  'matlab' => array('m'),
1516  'mpasm' => array(),
1517  'mysql' => array('sql'),
1518  'nsis' => array(),
1519  'objc' => array(),
1520  'oobas' => array(),
1521  'oracle8' => array(),
1522  'oracle10' => array(),
1523  'pascal' => array('pas'),
1524  'perl' => array('pl', 'pm'),
1525  'php' => array('php', 'php5', 'phtml', 'phps'),
1526  'povray' => array('pov'),
1527  'providex' => array('pvc', 'pvx'),
1528  'prolog' => array('pl'),
1529  'python' => array('py'),
1530  'qbasic' => array('bi'),
1531  'reg' => array('reg'),
1532  'ruby' => array('rb'),
1533  'sas' => array('sas'),
1534  'scala' => array('scala'),
1535  'scheme' => array('scm'),
1536  'scilab' => array('sci'),
1537  'smalltalk' => array('st'),
1538  'smarty' => array(),
1539  'tcl' => array('tcl'),
1540  'text' => array('txt'),
1541  'vb' => array('bas'),
1542  'vbnet' => array(),
1543  'visualfoxpro' => array(),
1544  'whitespace' => array('ws'),
1545  'xml' => array('xml', 'svg', 'xrc'),
1546  'z80' => array('z80', 'asm', 'inc')
1547  );
1548  }
1549 
1550  foreach ($lookup as $lang => $extensions) {
1551  if (in_array($extension, $extensions)) {
1552  return $lang;
1553  }
1554  }
1555 
1556  return 'text';
1557  }
1558 
1576  function load_from_file($file_name, $lookup = array()) {
1577  if (is_readable($file_name)) {
1578  $this->set_source(file_get_contents($file_name));
1579  $this->set_language(self::get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1580  } else {
1582  }
1583  }
1584 
1592  function add_keyword($key, $word) {
1593  if (!is_array($this->language_data['KEYWORDS'][$key])) {
1594  $this->language_data['KEYWORDS'][$key] = array();
1595  }
1596  if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1597  $this->language_data['KEYWORDS'][$key][] = $word;
1598 
1599  //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1600  if ($this->parse_cache_built) {
1601  $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1602  $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1603  }
1604  }
1605  }
1606 
1620  function remove_keyword($key, $word, $recompile = true) {
1621  $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1622  if ($key_to_remove !== false) {
1623  unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1624 
1625  //NEW in 1.0.8, optionally recompile keyword group
1626  if ($recompile && $this->parse_cache_built) {
1627  $this->optimize_keyword_group($key);
1628  }
1629  }
1630  }
1631 
1641  function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1642  $words = (array) $words;
1643  if (empty($words)) {
1644  // empty word lists mess up highlighting
1645  return false;
1646  }
1647 
1648  //Add the new keyword group internally
1649  $this->language_data['KEYWORDS'][$key] = $words;
1650  $this->lexic_permissions['KEYWORDS'][$key] = true;
1651  $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1652  $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1653 
1654  //NEW in 1.0.8, cache keyword regexp
1655  if ($this->parse_cache_built) {
1656  $this->optimize_keyword_group($key);
1657  }
1658  }
1659 
1666  function remove_keyword_group ($key) {
1667  //Remove the keyword group internally
1668  unset($this->language_data['KEYWORDS'][$key]);
1669  unset($this->lexic_permissions['KEYWORDS'][$key]);
1670  unset($this->language_data['CASE_SENSITIVE'][$key]);
1671  unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1672 
1673  //NEW in 1.0.8
1674  unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1675  }
1676 
1683  function optimize_keyword_group($key) {
1684  $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1685  $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1686  $space_as_whitespace = false;
1687  if(isset($this->language_data['PARSER_CONTROL'])) {
1688  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1689  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1690  $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1691  }
1692  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1693  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1694  $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1695  }
1696  }
1697  }
1698  }
1699  if($space_as_whitespace) {
1700  foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1701  $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1702  str_replace(" ", "\\s+", $rxv);
1703  }
1704  }
1705  }
1706 
1713  function set_header_content($content) {
1714  $this->header_content = $content;
1715  }
1716 
1723  function set_footer_content($content) {
1724  $this->footer_content = $content;
1725  }
1726 
1734  $this->header_content_style = $style;
1735  }
1736 
1744  $this->footer_content_style = $style;
1745  }
1746 
1754  function enable_inner_code_block($flag) {
1755  $this->force_code_block = (bool)$flag;
1756  }
1757 
1767  function set_url_for_keyword_group($group, $url) {
1768  $this->language_data['URLS'][$group] = $url;
1769  }
1770 
1779  function set_link_styles($type, $styles) {
1780  $this->link_styles[$type] = $styles;
1781  }
1782 
1790  if (!$target) {
1791  $this->link_target = '';
1792  } else {
1793  $this->link_target = ' target="' . $target . '"';
1794  }
1795  }
1796 
1804  $this->important_styles = $styles;
1805  }
1806 
1815  function enable_important_blocks($flag) {
1816  $this->enable_important_blocks = ( $flag ) ? true : false;
1817  }
1818 
1825  function enable_ids($flag = true) {
1826  $this->add_ids = ($flag) ? true : false;
1827  }
1828 
1843  function highlight_lines_extra($lines, $style = null) {
1844  if (is_array($lines)) {
1845  //Split up the job using single lines at a time
1846  foreach ($lines as $line) {
1847  $this->highlight_lines_extra($line, $style);
1848  }
1849  } else {
1850  //Mark the line as being highlighted specially
1851  $lines = intval($lines);
1852  $this->highlight_extra_lines[$lines] = $lines;
1853 
1854  //Decide on which style to use
1855  if ($style === null) { //Check if we should use default style
1856  unset($this->highlight_extra_lines_styles[$lines]);
1857  } elseif ($style === false) { //Check if to remove this line
1858  unset($this->highlight_extra_lines[$lines]);
1859  unset($this->highlight_extra_lines_styles[$lines]);
1860  } else {
1861  $this->highlight_extra_lines_styles[$lines] = $style;
1862  }
1863  }
1864  }
1865 
1873  $this->highlight_extra_lines_style = $styles;
1874  }
1875 
1882  function set_line_ending($line_ending) {
1883  $this->line_ending = (string)$line_ending;
1884  }
1885 
1901  function start_line_numbers_at($number) {
1902  $this->line_numbers_start = abs(intval($number));
1903  }
1904 
1917  function set_encoding($encoding) {
1918  if ($encoding) {
1919  $this->encoding = strtolower($encoding);
1920  }
1921  }
1922 
1929  function enable_keyword_links($enable = true) {
1930  $this->keyword_links = (bool) $enable;
1931  }
1932 
1942  function build_style_cache() {
1943  //Build the style cache needed to highlight numbers appropriate
1944  if($this->lexic_permissions['NUMBERS']) {
1945  //First check what way highlighting information for numbers are given
1946  if(!isset($this->language_data['NUMBERS'])) {
1947  $this->language_data['NUMBERS'] = 0;
1948  }
1949 
1950  if(is_array($this->language_data['NUMBERS'])) {
1951  $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1952  } else {
1953  $this->language_data['NUMBERS_CACHE'] = array();
1954  if(!$this->language_data['NUMBERS']) {
1955  $this->language_data['NUMBERS'] =
1958  }
1959 
1960  for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1961  //Rearrange style indices if required ...
1962  if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1963  $this->language_data['STYLES']['NUMBERS'][$i] =
1964  $this->language_data['STYLES']['NUMBERS'][1<<$i];
1965  unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1966  }
1967 
1968  //Check if this bit is set for highlighting
1969  if($j&1) {
1970  //So this bit is set ...
1971  //Check if it belongs to group 0 or the actual stylegroup
1972  if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1973  $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1974  } else {
1975  if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1976  $this->language_data['NUMBERS_CACHE'][0] = 0;
1977  }
1978  $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1979  }
1980  }
1981  }
1982  }
1983  }
1984  }
1985 
1993  function build_parse_cache() {
1994  // cache symbol regexp
1995  //As this is a costy operation, we avoid doing it for multiple groups ...
1996  //Instead we perform it for all symbols at once.
1997  //
1998  //For this to work, we need to reorganize the data arrays.
1999  if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
2000  $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
2001 
2002  $this->language_data['SYMBOL_DATA'] = array();
2003  $symbol_preg_multi = array(); // multi char symbols
2004  $symbol_preg_single = array(); // single char symbols
2005  foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
2006  if (is_array($symbols)) {
2007  foreach ($symbols as $sym) {
2008  $sym = $this->hsc($sym);
2009  if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
2010  $this->language_data['SYMBOL_DATA'][$sym] = $key;
2011  if (isset($sym[1])) { // multiple chars
2012  $symbol_preg_multi[] = preg_quote($sym, '/');
2013  } else { // single char
2014  if ($sym == '-') {
2015  // don't trigger range out of order error
2016  $symbol_preg_single[] = '\-';
2017  } else {
2018  $symbol_preg_single[] = preg_quote($sym, '/');
2019  }
2020  }
2021  }
2022  }
2023  } else {
2024  $symbols = $this->hsc($symbols);
2025  if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
2026  $this->language_data['SYMBOL_DATA'][$symbols] = 0;
2027  if (isset($symbols[1])) { // multiple chars
2028  $symbol_preg_multi[] = preg_quote($symbols, '/');
2029  } elseif ($symbols == '-') {
2030  // don't trigger range out of order error
2031  $symbol_preg_single[] = '\-';
2032  } else { // single char
2033  $symbol_preg_single[] = preg_quote($symbols, '/');
2034  }
2035  }
2036  }
2037  }
2038 
2039  //Now we have an array with each possible symbol as the key and the style as the actual data.
2040  //This way we can set the correct style just the moment we highlight ...
2041  //
2042  //Now we need to rewrite our array to get a search string that
2043  $symbol_preg = array();
2044  if (!empty($symbol_preg_multi)) {
2045  rsort($symbol_preg_multi);
2046  $symbol_preg[] = implode('|', $symbol_preg_multi);
2047  }
2048  if (!empty($symbol_preg_single)) {
2049  rsort($symbol_preg_single);
2050  $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2051  }
2052  $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2053  }
2054 
2055  // cache optimized regexp for keyword matching
2056  // remove old cache
2057  $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2058  foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2059  if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2060  $this->lexic_permissions['KEYWORDS'][$key]) {
2061  $this->optimize_keyword_group($key);
2062  }
2063  }
2064 
2065  // brackets
2066  if ($this->lexic_permissions['BRACKETS']) {
2067  $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2068  if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2069  $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2070  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2071  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2072  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2073  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2074  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2075  '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2076  );
2077  }
2078  else {
2079  $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2080  '<| class="br0">&#91;|>',
2081  '<| class="br0">&#93;|>',
2082  '<| class="br0">&#40;|>',
2083  '<| class="br0">&#41;|>',
2084  '<| class="br0">&#123;|>',
2085  '<| class="br0">&#125;|>',
2086  );
2087  }
2088  }
2089 
2090  //Build the parse cache needed to highlight numbers appropriate
2091  if($this->lexic_permissions['NUMBERS']) {
2092  //Check if the style rearrangements have been processed ...
2093  //This also does some preprocessing to check which style groups are useable ...
2094  if(!isset($this->language_data['NUMBERS_CACHE'])) {
2095  $this->build_style_cache();
2096  }
2097 
2098  //Number format specification
2099  //All this formats are matched case-insensitively!
2100  static $numbers_format = array(
2102  '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2104  '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2106  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2108  '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2110  '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2112  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2114  '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2116  '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2118  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2120  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2122  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2124  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2126  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2128  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2130  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2132  '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2133  );
2134 
2135  //At this step we have an associative array with flag groups for a
2136  //specific style or an string denoting a regexp given its index.
2137  $this->language_data['NUMBERS_RXCACHE'] = array();
2138  foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2139  if(is_string($rxdata)) {
2140  $regexp = $rxdata;
2141  } else {
2142  //This is a bitfield of number flags to highlight:
2143  //Build an array, implode them together and make this the actual RX
2144  $rxuse = array();
2145  for($i = 1; $i <= $rxdata; $i<<=1) {
2146  if($rxdata & $i) {
2147  $rxuse[] = $numbers_format[$i];
2148  }
2149  }
2150  $regexp = implode("|", $rxuse);
2151  }
2152 
2153  $this->language_data['NUMBERS_RXCACHE'][$key] =
2154  "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i";
2155  }
2156 
2157  if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2158  $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2159  }
2160  }
2161 
2162  $this->parse_cache_built = true;
2163  }
2164 
2175  function parse_code () {
2176  // Start the timer
2177  $start_time = microtime();
2178 
2179  // Replace all newlines to a common form.
2180  $code = str_replace("\r\n", "\n", $this->source);
2181  $code = str_replace("\r", "\n", $code);
2182 
2183  // Firstly, if there is an error, we won't highlight
2184  if ($this->error) {
2185  //Escape the source for output
2186  $result = $this->hsc($this->source);
2187 
2188  //This fix is related to SF#1923020, but has to be applied regardless of
2189  //actually highlighting symbols.
2190  $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2191 
2192  // Timing is irrelevant
2193  $this->set_time($start_time, $start_time);
2194  $this->finalise($result);
2195  return $result;
2196  }
2197 
2198  // make sure the parse cache is up2date
2199  if (!$this->parse_cache_built) {
2200  $this->build_parse_cache();
2201  }
2202 
2203  // Initialise various stuff
2204  $length = strlen($code);
2205  $COMMENT_MATCHED = false;
2206  $stuff_to_parse = '';
2207  $endresult = '';
2208 
2209  // "Important" selections are handled like multiline comments
2210  // @todo GET RID OF THIS SHIZ
2211  if ($this->enable_important_blocks) {
2212  $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2213  }
2214 
2215  if ($this->strict_mode) {
2216  // Break the source into bits. Each bit will be a portion of the code
2217  // within script delimiters - for example, HTML between < and >
2218  $k = 0;
2219  $parts = array();
2220  $matches = array();
2221  $next_match_pointer = null;
2222  // we use a copy to unset delimiters on demand (when they are not found)
2223  $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2224  $i = 0;
2225  while ($i < $length) {
2226  $next_match_pos = $length + 1; // never true
2227  foreach ($delim_copy as $dk => $delimiters) {
2228  if(is_array($delimiters)) {
2229  foreach ($delimiters as $open => $close) {
2230  // make sure the cache is setup properly
2231  if (!isset($matches[$dk][$open])) {
2232  $matches[$dk][$open] = array(
2233  'next_match' => -1,
2234  'dk' => $dk,
2235 
2236  'open' => $open, // needed for grouping of adjacent code blocks (see below)
2237  'open_strlen' => strlen($open),
2238 
2239  'close' => $close,
2240  'close_strlen' => strlen($close),
2241  );
2242  }
2243  // Get the next little bit for this opening string
2244  if ($matches[$dk][$open]['next_match'] < $i) {
2245  // only find the next pos if it was not already cached
2246  $open_pos = strpos($code, $open, $i);
2247  if ($open_pos === false) {
2248  // no match for this delimiter ever
2249  unset($delim_copy[$dk][$open]);
2250  continue;
2251  }
2252  $matches[$dk][$open]['next_match'] = $open_pos;
2253  }
2254  if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2255  //So we got a new match, update the close_pos
2256  $matches[$dk][$open]['close_pos'] =
2257  strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2258 
2259  $next_match_pointer =& $matches[$dk][$open];
2260  $next_match_pos = $matches[$dk][$open]['next_match'];
2261  }
2262  }
2263  } else {
2264  //So we should match an RegExp as Strict Block ...
2271  if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2272  preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2273  //We got a match ...
2274  if(isset($matches_rx['start']) && isset($matches_rx['end']))
2275  {
2276  $matches[$dk] = array(
2277  'next_match' => $matches_rx['start'][1],
2278  'dk' => $dk,
2279 
2280  'close_strlen' => strlen($matches_rx['end'][0]),
2281  'close_pos' => $matches_rx['end'][1],
2282  );
2283  } else {
2284  $matches[$dk] = array(
2285  'next_match' => $matches_rx[1][1],
2286  'dk' => $dk,
2287 
2288  'close_strlen' => strlen($matches_rx[2][0]),
2289  'close_pos' => $matches_rx[2][1],
2290  );
2291  }
2292  } else {
2293  // no match for this delimiter ever
2294  unset($delim_copy[$dk]);
2295  continue;
2296  }
2297 
2298  if ($matches[$dk]['next_match'] <= $next_match_pos) {
2299  $next_match_pointer =& $matches[$dk];
2300  $next_match_pos = $matches[$dk]['next_match'];
2301  }
2302  }
2303  }
2304 
2305  // non-highlightable text
2306  $parts[$k] = array(
2307  1 => substr($code, $i, $next_match_pos - $i)
2308  );
2309  ++$k;
2310 
2311  if ($next_match_pos > $length) {
2312  // out of bounds means no next match was found
2313  break;
2314  }
2315 
2316  // highlightable code
2317  $parts[$k][0] = $next_match_pointer['dk'];
2318 
2319  //Only combine for non-rx script blocks
2320  if(is_array($delim_copy[$next_match_pointer['dk']])) {
2321  // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2322  $i = $next_match_pos + $next_match_pointer['open_strlen'];
2323  while (true) {
2324  $close_pos = strpos($code, $next_match_pointer['close'], $i);
2325  if ($close_pos == false) {
2326  break;
2327  }
2328  $i = $close_pos + $next_match_pointer['close_strlen'];
2329  if ($i == $length) {
2330  break;
2331  }
2332  if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2333  substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2334  // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2335  foreach ($matches as $submatches) {
2336  foreach ($submatches as $match) {
2337  if ($match['next_match'] == $i) {
2338  // a different block already matches here!
2339  break 3;
2340  }
2341  }
2342  }
2343  } else {
2344  break;
2345  }
2346  }
2347  } else {
2348  $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2349  $i = $close_pos;
2350  }
2351 
2352  if ($close_pos === false) {
2353  // no closing delimiter found!
2354  $parts[$k][1] = substr($code, $next_match_pos);
2355  ++$k;
2356  break;
2357  } else {
2358  $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2359  ++$k;
2360  }
2361  }
2362  unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2363  $num_parts = $k;
2364 
2365  if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2366  // when we have only one part, we don't have anything to highlight at all.
2367  // if we have a "maybe" strict language, this should be handled as highlightable code
2368  $parts = array(
2369  0 => array(
2370  0 => '',
2371  1 => ''
2372  ),
2373  1 => array(
2374  0 => null,
2375  1 => $parts[0][1]
2376  )
2377  );
2378  $num_parts = 2;
2379  }
2380 
2381  } else {
2382  // Not strict mode - simply dump the source into
2383  // the array at index 1 (the first highlightable block)
2384  $parts = array(
2385  0 => array(
2386  0 => '',
2387  1 => ''
2388  ),
2389  1 => array(
2390  0 => null,
2391  1 => $code
2392  )
2393  );
2394  $num_parts = 2;
2395  }
2396 
2397  //Unset variables we won't need any longer
2398  unset($code);
2399 
2400  //Preload some repeatedly used values regarding hardquotes ...
2401  $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2402  $hq_strlen = strlen($hq);
2403 
2404  //Preload if line numbers are to be generated afterwards
2405  //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2406  $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2407  !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2408 
2409  //preload the escape char for faster checking ...
2410  $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2411 
2412  // this is used for single-line comments
2413  $sc_disallowed_before = "";
2414  $sc_disallowed_after = "";
2415 
2416  if (isset($this->language_data['PARSER_CONTROL'])) {
2417  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2418  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2419  $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2420  }
2421  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2422  $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2423  }
2424  }
2425  }
2426 
2427  //Fix for SF#1932083: Multichar Quotemarks unsupported
2428  $is_string_starter = array();
2429  if ($this->lexic_permissions['STRINGS']) {
2430  foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2431  if (!isset($is_string_starter[$quotemark[0]])) {
2432  $is_string_starter[$quotemark[0]] = (string)$quotemark;
2433  } elseif (is_string($is_string_starter[$quotemark[0]])) {
2434  $is_string_starter[$quotemark[0]] = array(
2435  $is_string_starter[$quotemark[0]],
2436  $quotemark);
2437  } else {
2438  $is_string_starter[$quotemark[0]][] = $quotemark;
2439  }
2440  }
2441  }
2442 
2443  // Now we go through each part. We know that even-indexed parts are
2444  // code that shouldn't be highlighted, and odd-indexed parts should
2445  // be highlighted
2446  for ($key = 0; $key < $num_parts; ++$key) {
2447  $STRICTATTRS = '';
2448 
2449  // If this block should be highlighted...
2450  if (!($key & 1)) {
2451  // Else not a block to highlight
2452  $endresult .= $this->hsc($parts[$key][1]);
2453  unset($parts[$key]);
2454  continue;
2455  }
2456 
2457  $result = '';
2458  $part = $parts[$key][1];
2459 
2460  $highlight_part = true;
2461  if ($this->strict_mode && !is_null($parts[$key][0])) {
2462  // get the class key for this block of code
2463  $script_key = $parts[$key][0];
2464  $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2465  if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2466  $this->lexic_permissions['SCRIPT']) {
2467  // Add a span element around the source to
2468  // highlight the overall source block
2469  if (!$this->use_classes &&
2470  $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2471  $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2472  } else {
2473  $attributes = ' class="sc' . $script_key . '"';
2474  }
2475  $result .= "<span$attributes>";
2476  $STRICTATTRS = $attributes;
2477  }
2478  }
2479 
2480  if ($highlight_part) {
2481  // Now, highlight the code in this block. This code
2482  // is really the engine of GeSHi (along with the method
2483  // parse_non_string_part).
2484 
2485  // cache comment regexps incrementally
2486  $next_comment_regexp_key = '';
2487  $next_comment_regexp_pos = -1;
2488  $next_comment_multi_pos = -1;
2489  $next_comment_single_pos = -1;
2490  $comment_regexp_cache_per_key = array();
2491  $comment_multi_cache_per_key = array();
2492  $comment_single_cache_per_key = array();
2493  $next_open_comment_multi = '';
2494  $next_comment_single_key = '';
2495  $escape_regexp_cache_per_key = array();
2496  $next_escape_regexp_key = '';
2497  $next_escape_regexp_pos = -1;
2498 
2499  $length = strlen($part);
2500  for ($i = 0; $i < $length; ++$i) {
2501  // Get the next char
2502  $char = $part[$i];
2503  $char_len = 1;
2504 
2505  // update regexp comment cache if needed
2506  if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2507  $next_comment_regexp_pos = $length;
2508  foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2509  $match_i = false;
2510  if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2511  ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2512  $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2513  // we have already matched something
2514  if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2515  // this comment is never matched
2516  continue;
2517  }
2518  $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2519  } elseif (
2520  //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2521  (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2522  (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2523  ) {
2524  $match_i = $match[0][1];
2525  if (GESHI_PHP_PRE_433) {
2526  $match_i += $i;
2527  }
2528 
2529  $comment_regexp_cache_per_key[$comment_key] = array(
2530  'key' => $comment_key,
2531  'length' => strlen($match[0][0]),
2532  'pos' => $match_i
2533  );
2534  } else {
2535  $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2536  continue;
2537  }
2538 
2539  if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2540  $next_comment_regexp_pos = $match_i;
2541  $next_comment_regexp_key = $comment_key;
2542  if ($match_i === $i) {
2543  break;
2544  }
2545  }
2546  }
2547  }
2548 
2549  $string_started = false;
2550 
2551  if (isset($is_string_starter[$char])) {
2552  // Possibly the start of a new string ...
2553 
2554  //Check which starter it was ...
2555  //Fix for SF#1932083: Multichar Quotemarks unsupported
2556  if (is_array($is_string_starter[$char])) {
2557  $char_new = '';
2558  foreach ($is_string_starter[$char] as $testchar) {
2559  if ($testchar === substr($part, $i, strlen($testchar)) &&
2560  strlen($testchar) > strlen($char_new)) {
2561  $char_new = $testchar;
2562  $string_started = true;
2563  }
2564  }
2565  if ($string_started) {
2566  $char = $char_new;
2567  }
2568  } else {
2569  $testchar = $is_string_starter[$char];
2570  if ($testchar === substr($part, $i, strlen($testchar))) {
2571  $char = $testchar;
2572  $string_started = true;
2573  }
2574  }
2575  $char_len = strlen($char);
2576  }
2577 
2578  if ($string_started && ($i != $next_comment_regexp_pos)) {
2579  // Hand out the correct style information for this string
2580  $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2581  if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2582  !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2583  $string_key = 0;
2584  }
2585 
2586  // parse the stuff before this
2587  $result .= $this->parse_non_string_part($stuff_to_parse);
2588  $stuff_to_parse = '';
2589 
2590  if (!$this->use_classes) {
2591  $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2592  } else {
2593  $string_attributes = ' class="st'.$string_key.'"';
2594  }
2595 
2596  // now handle the string
2597  $string = "<span$string_attributes>" . GeSHi::hsc($char);
2598  $start = $i + $char_len;
2599  $string_open = true;
2600 
2601  if(empty($this->language_data['ESCAPE_REGEXP'])) {
2602  $next_escape_regexp_pos = $length;
2603  }
2604 
2605  do {
2606  //Get the regular ending pos ...
2607  $close_pos = strpos($part, $char, $start);
2608  if(false === $close_pos) {
2609  $close_pos = $length;
2610  }
2611 
2612  if($this->lexic_permissions['ESCAPE_CHAR']) {
2613  // update escape regexp cache if needed
2614  if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2615  $next_escape_regexp_pos = $length;
2616  foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2617  $match_i = false;
2618  if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2619  ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2620  $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2621  // we have already matched something
2622  if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2623  // this comment is never matched
2624  continue;
2625  }
2626  $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2627  } elseif (
2628  //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2629  (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
2630  (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2631  ) {
2632  $match_i = $match[0][1];
2633  if (GESHI_PHP_PRE_433) {
2634  $match_i += $start;
2635  }
2636 
2637  $escape_regexp_cache_per_key[$escape_key] = array(
2638  'key' => $escape_key,
2639  'length' => strlen($match[0][0]),
2640  'pos' => $match_i
2641  );
2642  } else {
2643  $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2644  continue;
2645  }
2646 
2647  if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2648  $next_escape_regexp_pos = $match_i;
2649  $next_escape_regexp_key = $escape_key;
2650  if ($match_i === $start) {
2651  break;
2652  }
2653  }
2654  }
2655  }
2656 
2657  //Find the next simple escape position
2658  if('' != $this->language_data['ESCAPE_CHAR']) {
2659  $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2660  if(false === $simple_escape) {
2661  $simple_escape = $length;
2662  }
2663  } else {
2664  $simple_escape = $length;
2665  }
2666  } else {
2667  $next_escape_regexp_pos = $length;
2668  $simple_escape = $length;
2669  }
2670 
2671  if($simple_escape < $next_escape_regexp_pos &&
2672  $simple_escape < $length &&
2673  $simple_escape < $close_pos) {
2674  //The nexxt escape sequence is a simple one ...
2675  $es_pos = $simple_escape;
2676 
2677  //Add the stuff not in the string yet ...
2678  $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2679 
2680  //Get the style for this escaped char ...
2681  if (!$this->use_classes) {
2682  $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2683  } else {
2684  $escape_char_attributes = ' class="es0"';
2685  }
2686 
2687  //Add the style for the escape char ...
2688  $string .= "<span$escape_char_attributes>" .
2689  GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2690 
2691  //Get the byte AFTER the ESCAPE_CHAR we just found
2692  $es_char = $part[$es_pos + 1];
2693  if ($es_char == "\n") {
2694  // don't put a newline around newlines
2695  $string .= "</span>\n";
2696  $start = $es_pos + 2;
2697  } elseif (ord($es_char) >= 128) {
2698  //This is an non-ASCII char (UTF8 or single byte)
2699  //This code tries to work around SF#2037598 ...
2700  if(function_exists('mb_substr')) {
2701  $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2702  $string .= $es_char_m . '</span>';
2703  } elseif (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2704  if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2705  "|\xE0[\xA0-\xBF][\x80-\xBF]".
2706  "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2707  "|\xED[\x80-\x9F][\x80-\xBF]".
2708  "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2709  "|[\xF1-\xF3][\x80-\xBF]{3}".
2710  "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2711  $part, $es_char_m, null, $es_pos + 1)) {
2712  $es_char_m = $es_char_m[0];
2713  } else {
2714  $es_char_m = $es_char;
2715  }
2716  $string .= $this->hsc($es_char_m) . '</span>';
2717  } else {
2718  $es_char_m = $this->hsc($es_char);
2719  }
2720  $start = $es_pos + strlen($es_char_m) + 1;
2721  } else {
2722  $string .= $this->hsc($es_char) . '</span>';
2723  $start = $es_pos + 2;
2724  }
2725  } elseif ($next_escape_regexp_pos < $length &&
2726  $next_escape_regexp_pos < $close_pos) {
2727  $es_pos = $next_escape_regexp_pos;
2728  //Add the stuff not in the string yet ...
2729  $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2730 
2731  //Get the key and length of this match ...
2732  $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2733  $escape_str = substr($part, $es_pos, $escape['length']);
2734  $escape_key = $escape['key'];
2735 
2736  //Get the style for this escaped char ...
2737  if (!$this->use_classes) {
2738  $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2739  } else {
2740  $escape_char_attributes = ' class="es' . $escape_key . '"';
2741  }
2742 
2743  //Add the style for the escape char ...
2744  $string .= "<span$escape_char_attributes>" .
2745  $this->hsc($escape_str) . '</span>';
2746 
2747  $start = $es_pos + $escape['length'];
2748  } else {
2749  //Copy the remainder of the string ...
2750  $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2751  $start = $close_pos + $char_len;
2752  $string_open = false;
2753  }
2754  } while($string_open);
2755 
2756  if ($check_linenumbers) {
2757  // Are line numbers used? If, we should end the string before
2758  // the newline and begin it again (so when <li>s are put in the source
2759  // remains XHTML compliant)
2760  // note to self: This opens up possibility of config files specifying
2761  // that languages can/cannot have multiline strings???
2762  $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2763  }
2764 
2765  $result .= $string;
2766  $string = '';
2767  $i = $start - 1;
2768  continue;
2769  } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2770  substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2771  // The start of a hard quoted string
2772  if (!$this->use_classes) {
2773  $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2774  $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2775  } else {
2776  $string_attributes = ' class="st_h"';
2777  $escape_char_attributes = ' class="es_h"';
2778  }
2779  // parse the stuff before this
2780  $result .= $this->parse_non_string_part($stuff_to_parse);
2781  $stuff_to_parse = '';
2782 
2783  // now handle the string
2784  $string = '';
2785 
2786  // look for closing quote
2787  $start = $i + $hq_strlen;
2788  while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2789  $start = $close_pos + 1;
2790  if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2791  (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2792  // make sure this quote is not escaped
2793  foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2794  if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2795  // check wether this quote is escaped or if it is something like '\\'
2796  $escape_char_pos = $close_pos - 1;
2797  while ($escape_char_pos > 0
2798  && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2799  --$escape_char_pos;
2800  }
2801  if (($close_pos - $escape_char_pos) & 1) {
2802  // uneven number of escape chars => this quote is escaped
2803  continue 2;
2804  }
2805  }
2806  }
2807  }
2808 
2809  // found closing quote
2810  break;
2811  }
2812 
2813  //Found the closing delimiter?
2814  if (!$close_pos) {
2815  // span till the end of this $part when no closing delimiter is found
2816  $close_pos = $length;
2817  }
2818 
2819  //Get the actual string
2820  $string = substr($part, $i, $close_pos - $i + 1);
2821  $i = $close_pos;
2822 
2823  // handle escape chars and encode html chars
2824  // (special because when we have escape chars within our string they may not be escaped)
2825  if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2826  $start = 0;
2827  $new_string = '';
2828  while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2829  // hmtl escape stuff before
2830  $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2831  // check if this is a hard escape
2832  foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2833  if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2834  // indeed, this is a hardescape
2835  $new_string .= "<span$escape_char_attributes>" .
2836  $this->hsc($hardescape) . '</span>';
2837  $start = $es_pos + strlen($hardescape);
2838  continue 2;
2839  }
2840  }
2841  // not a hard escape, but a normal escape
2842  // they come in pairs of two
2843  $c = 0;
2844  while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2845  && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2846  && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2847  $c += 2;
2848  }
2849  if ($c) {
2850  $new_string .= "<span$escape_char_attributes>" .
2851  str_repeat($escaped_escape_char, $c) .
2852  '</span>';
2853  $start = $es_pos + $c;
2854  } else {
2855  // this is just a single lonely escape char...
2856  $new_string .= $escaped_escape_char;
2857  $start = $es_pos + 1;
2858  }
2859  }
2860  $string = $new_string . $this->hsc(substr($string, $start));
2861  } else {
2862  $string = $this->hsc($string);
2863  }
2864 
2865  if ($check_linenumbers) {
2866  // Are line numbers used? If, we should end the string before
2867  // the newline and begin it again (so when <li>s are put in the source
2868  // remains XHTML compliant)
2869  // note to self: This opens up possibility of config files specifying
2870  // that languages can/cannot have multiline strings???
2871  $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2872  }
2873 
2874  $result .= "<span$string_attributes>" . $string . '</span>';
2875  $string = '';
2876  continue;
2877  } else {
2878  //Have a look for regexp comments
2879  if ($i == $next_comment_regexp_pos) {
2880  $COMMENT_MATCHED = true;
2881  $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2882  $test_str = $this->hsc(substr($part, $i, $comment['length']));
2883 
2884  //@todo If remove important do remove here
2885  if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2886  if (!$this->use_classes) {
2887  $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2888  } else {
2889  $attributes = ' class="co' . $comment['key'] . '"';
2890  }
2891 
2892  $test_str = "<span$attributes>" . $test_str . "</span>";
2893 
2894  // Short-cut through all the multiline code
2895  if ($check_linenumbers) {
2896  // strreplace to put close span and open span around multiline newlines
2897  $test_str = str_replace(
2898  "\n", "</span>\n<span$attributes>",
2899  str_replace("\n ", "\n&nbsp;", $test_str)
2900  );
2901  }
2902  }
2903 
2904  $i += $comment['length'] - 1;
2905 
2906  // parse the rest
2907  $result .= $this->parse_non_string_part($stuff_to_parse);
2908  $stuff_to_parse = '';
2909  }
2910 
2911  // If we haven't matched a regexp comment, try multi-line comments
2912  if (!$COMMENT_MATCHED) {
2913  // Is this a multiline comment?
2914  if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2915  $next_comment_multi_pos = $length;
2916  foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2917  $match_i = false;
2918  if (isset($comment_multi_cache_per_key[$open]) &&
2919  ($comment_multi_cache_per_key[$open] >= $i ||
2920  $comment_multi_cache_per_key[$open] === false)) {
2921  // we have already matched something
2922  if ($comment_multi_cache_per_key[$open] === false) {
2923  // this comment is never matched
2924  continue;
2925  }
2926  $match_i = $comment_multi_cache_per_key[$open];
2927  } elseif (($match_i = stripos($part, $open, $i)) !== false) {
2928  $comment_multi_cache_per_key[$open] = $match_i;
2929  } else {
2930  $comment_multi_cache_per_key[$open] = false;
2931  continue;
2932  }
2933  if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2934  $next_comment_multi_pos = $match_i;
2935  $next_open_comment_multi = $open;
2936  if ($match_i === $i) {
2937  break;
2938  }
2939  }
2940  }
2941  }
2942  if ($i == $next_comment_multi_pos) {
2943  $open = $next_open_comment_multi;
2944  $close = $this->language_data['COMMENT_MULTI'][$open];
2945  $open_strlen = strlen($open);
2946  $close_strlen = strlen($close);
2947  $COMMENT_MATCHED = true;
2948  $test_str_match = $open;
2949  //@todo If remove important do remove here
2950  if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2951  $open == GESHI_START_IMPORTANT) {
2952  if ($open != GESHI_START_IMPORTANT) {
2953  if (!$this->use_classes) {
2954  $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2955  } else {
2956  $attributes = ' class="coMULTI"';
2957  }
2958  $test_str = "<span$attributes>" . $this->hsc($open);
2959  } else {
2960  if (!$this->use_classes) {
2961  $attributes = ' style="' . $this->important_styles . '"';
2962  } else {
2963  $attributes = ' class="imp"';
2964  }
2965 
2966  // We don't include the start of the comment if it's an
2967  // "important" part
2968  $test_str = "<span$attributes>";
2969  }
2970  } else {
2971  $test_str = $this->hsc($open);
2972  }
2973 
2974  $close_pos = strpos( $part, $close, $i + $open_strlen );
2975 
2976  if ($close_pos === false) {
2977  $close_pos = $length;
2978  }
2979 
2980  // Short-cut through all the multiline code
2981  $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2982  if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2983  $test_str_match == GESHI_START_IMPORTANT) &&
2984  $check_linenumbers) {
2985 
2986  // strreplace to put close span and open span around multiline newlines
2987  $test_str .= str_replace(
2988  "\n", "</span>\n<span$attributes>",
2989  str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2990  );
2991  } else {
2992  $test_str .= $rest_of_comment;
2993  }
2994 
2995  if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2996  $test_str_match == GESHI_START_IMPORTANT) {
2997  $test_str .= '</span>';
2998  }
2999 
3000  $i = $close_pos + $close_strlen - 1;
3001 
3002  // parse the rest
3003  $result .= $this->parse_non_string_part($stuff_to_parse);
3004  $stuff_to_parse = '';
3005  }
3006  }
3007 
3008  // If we haven't matched a multiline comment, try single-line comments
3009  if (!$COMMENT_MATCHED) {
3010  // cache potential single line comment occurances
3011  if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
3012  $next_comment_single_pos = $length;
3013  foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
3014  $match_i = false;
3015  if (isset($comment_single_cache_per_key[$comment_key]) &&
3016  ($comment_single_cache_per_key[$comment_key] >= $i ||
3017  $comment_single_cache_per_key[$comment_key] === false)) {
3018  // we have already matched something
3019  if ($comment_single_cache_per_key[$comment_key] === false) {
3020  // this comment is never matched
3021  continue;
3022  }
3023  $match_i = $comment_single_cache_per_key[$comment_key];
3024  } elseif (
3025  // case sensitive comments
3026  ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3027  ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
3028  // non case sensitive
3029  (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3030  (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
3031  $comment_single_cache_per_key[$comment_key] = $match_i;
3032  } else {
3033  $comment_single_cache_per_key[$comment_key] = false;
3034  continue;
3035  }
3036  if ($match_i !== false && $match_i < $next_comment_single_pos) {
3037  $next_comment_single_pos = $match_i;
3038  $next_comment_single_key = $comment_key;
3039  if ($match_i === $i) {
3040  break;
3041  }
3042  }
3043  }
3044  }
3045  if ($next_comment_single_pos == $i) {
3046  $comment_key = $next_comment_single_key;
3047  $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
3048  $com_len = strlen($comment_mark);
3049 
3050  // This check will find special variables like $# in bash
3051  // or compiler directives of Delphi beginning {$
3052  if ((empty($sc_disallowed_before) || ($i == 0) ||
3053  (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
3054  (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
3055  (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
3056  {
3057  // this is a valid comment
3058  $COMMENT_MATCHED = true;
3059  if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3060  if (!$this->use_classes) {
3061  $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
3062  } else {
3063  $attributes = ' class="co' . $comment_key . '"';
3064  }
3065  $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
3066  } else {
3067  $test_str = $this->hsc($comment_mark);
3068  }
3069 
3070  //Check if this comment is the last in the source
3071  $close_pos = strpos($part, "\n", $i);
3072  $oops = false;
3073  if ($close_pos === false) {
3074  $close_pos = $length;
3075  $oops = true;
3076  }
3077  $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
3078  if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3079  $test_str .= "</span>";
3080  }
3081 
3082  // Take into account that the comment might be the last in the source
3083  if (!$oops) {
3084  $test_str .= "\n";
3085  }
3086 
3087  $i = $close_pos;
3088 
3089  // parse the rest
3090  $result .= $this->parse_non_string_part($stuff_to_parse);
3091  $stuff_to_parse = '';
3092  }
3093  }
3094  }
3095  }
3096 
3097  // Where are we adding this char?
3098  if (!$COMMENT_MATCHED) {
3099  $stuff_to_parse .= $char;
3100  } else {
3101  $result .= $test_str;
3102  unset($test_str);
3103  $COMMENT_MATCHED = false;
3104  }
3105  }
3106  // Parse the last bit
3107  $result .= $this->parse_non_string_part($stuff_to_parse);
3108  $stuff_to_parse = '';
3109  } else {
3110  $result .= $this->hsc($part);
3111  }
3112  // Close the <span> that surrounds the block
3113  if ($STRICTATTRS != '') {
3114  $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
3115  $result .= '</span>';
3116  }
3117 
3118  $endresult .= $result;
3119  unset($part, $parts[$key], $result);
3120  }
3121 
3122  //This fix is related to SF#1923020, but has to be applied regardless of
3123  //actually highlighting symbols.
3125  $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
3126 
3127 // // Parse the last stuff (redundant?)
3128 // $result .= $this->parse_non_string_part($stuff_to_parse);
3129 
3130  // Lop off the very first and last spaces
3131 // $result = substr($result, 1, -1);
3132 
3133  // We're finished: stop timing
3134  $this->set_time($start_time, microtime());
3135 
3136  $this->finalise($endresult);
3137  return $endresult;
3138  }
3139 
3148  function indent(&$result) {
3150  if (false !== strpos($result, "\t")) {
3151  $lines = explode("\n", $result);
3152  $result = null;//Save memory while we process the lines individually
3153  $tab_width = $this->get_real_tab_width();
3154  $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3155 
3156  for ($key = 0, $n = count($lines); $key < $n; $key++) {
3157  $line = $lines[$key];
3158  if (false === strpos($line, "\t")) {
3159  continue;
3160  }
3161 
3162  $pos = 0;
3163  $length = strlen($line);
3164  $lines[$key] = ''; // reduce memory
3165 
3166  $IN_TAG = false;
3167  for ($i = 0; $i < $length; ++$i) {
3168  $char = $line[$i];
3169  // Simple engine to work out whether we're in a tag.
3170  // If we are we modify $pos. This is so we ignore HTML
3171  // in the line and only workout the tab replacement
3172  // via the actual content of the string
3173  // This test could be improved to include strings in the
3174  // html so that < or > would be allowed in user's styles
3175  // (e.g. quotes: '<' '>'; or similar)
3176  if ($IN_TAG) {
3177  if ('>' == $char) {
3178  $IN_TAG = false;
3179  }
3180  $lines[$key] .= $char;
3181  } elseif ('<' == $char) {
3182  $IN_TAG = true;
3183  $lines[$key] .= '<';
3184  } elseif ('&' == $char) {
3185  $substr = substr($line, $i + 3, 5);
3186  $posi = strpos($substr, ';');
3187  if (false === $posi) {
3188  ++$pos;
3189  } else {
3190  $pos -= $posi+2;
3191  }
3192  $lines[$key] .= $char;
3193  } elseif ("\t" == $char) {
3194  $str = '';
3195  // OPTIMISE - move $strs out. Make an array:
3196  // $tabs = array(
3197  // 1 => '&nbsp;',
3198  // 2 => '&nbsp; ',
3199  // 3 => '&nbsp; &nbsp;' etc etc
3200  // to use instead of building a string every time
3201  $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3202  if (($pos & 1) || 1 == $tab_end_width) {
3203  $str .= substr($tab_string, 6, $tab_end_width);
3204  } else {
3205  $str .= substr($tab_string, 0, $tab_end_width+5);
3206  }
3207  $lines[$key] .= $str;
3208  $pos += $tab_end_width;
3209 
3210  if (false === strpos($line, "\t", $i + 1)) {
3211  $lines[$key] .= substr($line, $i + 1);
3212  break;
3213  }
3214  } elseif (0 == $pos && ' ' == $char) {
3215  $lines[$key] .= '&nbsp;';
3216  ++$pos;
3217  } else {
3218  $lines[$key] .= $char;
3219  ++$pos;
3220  }
3221  }
3222  }
3223  $result = implode("\n", $lines);
3224  unset($lines);//We don't need the lines separated beyond this --- free them!
3225  }
3226  // Other whitespace
3227  // BenBE: Fix to reduce the number of replacements to be done
3228  $result = preg_replace('/^ /m', '&nbsp;', $result);
3229  $result = str_replace(' ', ' &nbsp;', $result);
3230 
3231  if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3232  if ($this->line_ending === null) {
3233  $result = nl2br($result);
3234  } else {
3235  $result = str_replace("\n", $this->line_ending, $result);
3236  }
3237  }
3238  }
3239 
3248  function change_case($instr) {
3249  switch ($this->language_data['CASE_KEYWORDS']) {
3250  case GESHI_CAPS_UPPER:
3251  return strtoupper($instr);
3252  case GESHI_CAPS_LOWER:
3253  return strtolower($instr);
3254  default:
3255  return $instr;
3256  }
3257  }
3258 
3269  function handle_keyword_replace($match) {
3271  $keyword = $match[0];
3272  $keyword_match = $match[1];
3273 
3274  $before = '';
3275  $after = '';
3276 
3277  if ($this->keyword_links) {
3278  // Keyword links have been ebabled
3279 
3280  if (isset($this->language_data['URLS'][$k]) &&
3281  $this->language_data['URLS'][$k] != '') {
3282  // There is a base group for this keyword
3283 
3284  // Old system: strtolower
3285  //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3286  // New system: get keyword from language file to get correct case
3287  if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3288  strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3289  foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3290  if (strcasecmp($word, $keyword_match) == 0) {
3291  break;
3292  }
3293  }
3294  } else {
3295  $word = $keyword_match;
3296  }
3297 
3298  $before = '<|UR1|"' .
3299  str_replace(
3300  array(
3301  '{FNAME}',
3302  '{FNAMEL}',
3303  '{FNAMEU}',
3304  '.'),
3305  array(
3306  str_replace('+', '%20', urlencode($this->hsc($word))),
3307  str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3308  str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3309  '<DOT>'),
3310  $this->language_data['URLS'][$k]
3311  ) . '">';
3312  $after = '</a>';
3313  }
3314  }
3315 
3316  return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3317  }
3318 
3329  function handle_regexps_callback($matches) {
3330  // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3331  return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3332  }
3333 
3344  function handle_multiline_regexps($matches) {
3345  $before = $this->_hmr_before;
3346  $after = $this->_hmr_after;
3347  if ($this->_hmr_replace) {
3348  $replace = $this->_hmr_replace;
3349  $search = array();
3350 
3351  foreach (array_keys($matches) as $k) {
3352  $search[] = '\\' . $k;
3353  }
3354 
3355  $before = str_replace($search, $matches, $before);
3356  $after = str_replace($search, $matches, $after);
3357  $replace = str_replace($search, $matches, $replace);
3358  } else {
3359  $replace = $matches[0];
3360  }
3361  return $before
3362  . '<|!REG3XP' . $this->_hmr_key .'!>'
3363  . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3364  . '|>'
3365  . $after;
3366  }
3367 
3377  function parse_non_string_part($stuff_to_parse) {
3378  $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3379 
3380  // Highlight keywords
3381  $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
3382  $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3383  if ($this->lexic_permissions['STRINGS']) {
3384  $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3385  $disallowed_before .= $quotemarks;
3386  $disallowed_after .= $quotemarks;
3387  }
3388  $disallowed_before .= "])";
3389  $disallowed_after .= "])";
3390 
3391  $parser_control_pergroup = false;
3392  if (isset($this->language_data['PARSER_CONTROL'])) {
3393  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3394  $x = 0; // check wether per-keyword-group parser_control is enabled
3395  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3396  $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3397  ++$x;
3398  }
3399  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3400  $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3401  ++$x;
3402  }
3403  $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3404  }
3405  }
3406 
3407  foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3408  if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3409  $this->lexic_permissions['KEYWORDS'][$k]) {
3410 
3411  $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3412  $modifiers = $case_sensitive ? '' : 'i';
3413 
3414  // NEW in 1.0.8 - per-keyword-group parser control
3415  $disallowed_before_local = $disallowed_before;
3416  $disallowed_after_local = $disallowed_after;
3417  if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3418  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3419  $disallowed_before_local =
3420  $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3421  }
3422 
3423  if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3424  $disallowed_after_local =
3425  $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3426  }
3427  }
3428 
3429  $this->_kw_replace_group = $k;
3430 
3431  //NEW in 1.0.8, the cached regexp list
3432  // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3433  for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) {
3434  $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3435  // Might make a more unique string for putting the number in soon
3436  // Basically, we don't put the styles in yet because then the styles themselves will
3437  // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3438  $stuff_to_parse = preg_replace_callback(
3439  "/$disallowed_before_local({$keywordset})(?!<DOT>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
3440  array($this, 'handle_keyword_replace'),
3441  $stuff_to_parse
3442  );
3443  }
3444  }
3445  }
3446 
3447  // Regular expressions
3448  foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3449  if ($this->lexic_permissions['REGEXPS'][$key]) {
3450  if (is_array($regexp)) {
3451  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3452  // produce valid HTML when we match multiple lines
3453  $this->_hmr_replace = $regexp[GESHI_REPLACE];
3454  $this->_hmr_before = $regexp[GESHI_BEFORE];
3455  $this->_hmr_key = $key;
3456  $this->_hmr_after = $regexp[GESHI_AFTER];
3457  $stuff_to_parse = preg_replace_callback(
3458  "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3459  array($this, 'handle_multiline_regexps'),
3460  $stuff_to_parse);
3461  $this->_hmr_replace = false;
3462  $this->_hmr_before = '';
3463  $this->_hmr_after = '';
3464  } else {
3465  $stuff_to_parse = preg_replace(
3466  '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3467  $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3468  $stuff_to_parse);
3469  }
3470  } else {
3471  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3472  // produce valid HTML when we match multiple lines
3473  $this->_hmr_key = $key;
3474  $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3475  array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3476  $this->_hmr_key = '';
3477  } else {
3478  $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3479  }
3480  }
3481  }
3482  }
3483 
3484  // Highlight numbers. As of 1.0.8 we support different types of numbers
3485  $numbers_found = false;
3486 
3487  if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
3488  $numbers_found = true;
3489 
3490  //For each of the formats ...
3491  foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3492  //Check if it should be highlighted ...
3493  $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3494  }
3495  }
3496 
3497  //
3498  // Now that's all done, replace /[number]/ with the correct styles
3499  //
3500  foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3501  if (!$this->use_classes) {
3502  $attributes = ' style="' .
3503  (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3504  $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3505  } else {
3506  $attributes = ' class="kw' . $k . '"';
3507  }
3508  $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3509  }
3510 
3511  if ($numbers_found) {
3512  // Put number styles in
3513  foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3514  //Commented out for now, as this needs some review ...
3515  // if ($numbers_permissions & $id) {
3516  //Get the appropriate style ...
3517  //Checking for unset styles is done by the style cache builder ...
3518  if (!$this->use_classes) {
3519  $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3520  } else {
3521  $attributes = ' class="nu'.$id.'"';
3522  }
3523 
3524  //Set in the correct styles ...
3525  $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3526  // }
3527  }
3528  }
3529 
3530  // Highlight methods and fields in objects
3531  if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3532  $oolang_spaces = "[\s]*";
3533  $oolang_before = "";
3534  $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3535  if (isset($this->language_data['PARSER_CONTROL'])) {
3536  if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3537  if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3538  $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3539  }
3540  if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3541  $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3542  }
3543  if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3544  $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3545  }
3546  }
3547  }
3548 
3549  foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3550  if (false !== strpos($stuff_to_parse, $splitter)) {
3551  if (!$this->use_classes) {
3552  $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3553  } else {
3554  $attributes = ' class="me' . $key . '"';
3555  }
3556  $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3557  }
3558  }
3559  }
3560 
3561  //
3562  // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3563  // You try it, and see what happens ;)
3564  // TODO: Fix lexic permissions not converting entities if shouldn't
3565  // be highlighting regardless
3566  //
3567  if ($this->lexic_permissions['BRACKETS']) {
3568  $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3569  $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3570  }
3571 
3572 
3573  //FIX for symbol highlighting ...
3574  if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3575  //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3576  $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3577  $global_offset = 0;
3578  for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3579  $symbol_match = $pot_symbols[$s_id][0][0];
3580  if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3581  // already highlighted blocks _must_ include either < or >
3582  // so if this conditional applies, we have to skip this match
3583  // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3584  if(strpos($symbol_match, '<SEMI>') === false &&
3585  strpos($symbol_match, '<PIPE>') === false) {
3586  continue;
3587  }
3588  }
3589 
3590  // if we reach this point, we have a valid match which needs to be highlighted
3591 
3592  $symbol_length = strlen($symbol_match);
3593  $symbol_offset = $pot_symbols[$s_id][0][1];
3594  unset($pot_symbols[$s_id]);
3595  $symbol_hl = "";
3596 
3597  // if we have multiple styles, we have to handle them properly
3598  if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3599  $old_sym = -1;
3600  // Split the current stuff to replace into its atomic symbols ...
3601  preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3602  foreach ($sym_match_syms[0] as $sym_ms) {
3603  //Check if consequtive symbols belong to the same group to save output ...
3604  if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3605  && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3606  if (-1 != $old_sym) {
3607  $symbol_hl .= "|>";
3608  }
3609  $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3610  if (!$this->use_classes) {
3611  $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3612  } else {
3613  $symbol_hl .= '<| class="sy' . $old_sym . '">';
3614  }
3615  }
3616  $symbol_hl .= $sym_ms;
3617  }
3618  unset($sym_match_syms);
3619 
3620  //Close remaining tags and insert the replacement at the right position ...
3621  //Take caution if symbol_hl is empty to avoid doubled closing spans.
3622  if (-1 != $old_sym) {
3623  $symbol_hl .= "|>";
3624  }
3625  } else {
3626  if (!$this->use_classes) {
3627  $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3628  } else {
3629  $symbol_hl = '<| class="sy0">';
3630  }
3631  $symbol_hl .= $symbol_match . '|>';
3632  }
3633 
3634  $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3635 
3636  // since we replace old text with something of different size,
3637  // we'll have to keep track of the differences
3638  $global_offset += strlen($symbol_hl) - $symbol_length;
3639  }
3640  }
3641  //FIX for symbol highlighting ...
3642 
3643  // Add class/style for regexps
3644  foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3645  if ($this->lexic_permissions['REGEXPS'][$key]) {
3646  if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3647  $this->_rx_key = $key;
3648  $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3649  array($this, 'handle_regexps_callback'),
3650  $stuff_to_parse);
3651  } else {
3652  if (!$this->use_classes) {
3653  $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3654  } else {
3655  if (is_array($this->language_data['REGEXPS'][$key]) &&
3656  array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3657  $attributes = ' class="' .
3658  $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3659  } else {
3660  $attributes = ' class="re' . $key . '"';
3661  }
3662  }
3663  $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3664  }
3665  }
3666  }
3667 
3668  // Replace <DOT> with . for urls
3669  $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3670  // Replace <|UR1| with <a href= for urls also
3671  if (isset($this->link_styles[GESHI_LINK])) {
3672  if ($this->use_classes) {
3673  $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3674  } else {
3675  $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3676  }
3677  } else {
3678  $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3679  }
3680 
3681  //
3682  // NOW we add the span thingy ;)
3683  //
3684 
3685  $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3686  $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3687  return substr($stuff_to_parse, 1);
3688  }
3689 
3698  function set_time($start_time, $end_time) {
3699  $start = explode(' ', $start_time);
3700  $end = explode(' ', $end_time);
3701  $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3702  }
3703 
3710  function get_time() {
3711  return $this->time;
3712  }
3713 
3720  function merge_arrays() {
3721  $arrays = func_get_args();
3722  $narrays = count($arrays);
3723 
3724  // check arguments
3725  // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3726  for ($i = 0; $i < $narrays; $i ++) {
3727  if (!is_array($arrays[$i])) {
3728  // also array_merge_recursive returns nothing in this case
3729  trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3730  return false;
3731  }
3732  }
3733 
3734  // the first array is in the output set in every case
3735  $ret = $arrays[0];
3736 
3737  // merege $ret with the remaining arrays
3738  for ($i = 1; $i < $narrays; $i ++) {
3739  foreach ($arrays[$i] as $key => $value) {
3740  if (is_array($value) && isset($ret[$key])) {
3741  // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3742  // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3743  $ret[$key] = $this->merge_arrays($ret[$key], $value);
3744  } else {
3745  $ret[$key] = $value;
3746  }
3747  }
3748  }
3749 
3750  return $ret;
3751  }
3752 
3761  function load_language($file_name) {
3762  if ($file_name == $this->loaded_language) {
3763  // this file is already loaded!
3764  return;
3765  }
3766 
3767  //Prepare some stuff before actually loading the language file
3768  $this->loaded_language = $file_name;
3769  $this->parse_cache_built = false;
3770  $this->enable_highlighting();
3771  $language_data = array();
3772 
3773  //Load the language file
3774  require $file_name;
3775 
3776  // Perhaps some checking might be added here later to check that
3777  // $language data is a valid thing but maybe not
3778  $this->language_data = $language_data;
3779 
3780  // Set strict mode if should be set
3781  $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3782 
3783  // Set permissions for all lexics to true
3784  // so they'll be highlighted by default
3785  foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3786  if (!empty($this->language_data['KEYWORDS'][$key])) {
3787  $this->lexic_permissions['KEYWORDS'][$key] = true;
3788  } else {
3789  $this->lexic_permissions['KEYWORDS'][$key] = false;
3790  }
3791  }
3792 
3793  foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3794  $this->lexic_permissions['COMMENTS'][$key] = true;
3795  }
3796  foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3797  $this->lexic_permissions['REGEXPS'][$key] = true;
3798  }
3799 
3800  // for BenBE and future code reviews:
3801  // we can use empty here since we only check for existance and emptiness of an array
3802  // if it is not an array at all but rather false or null this will work as intended as well
3803  // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3804  if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3805  foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3806  // it's either true or false and maybe is true as well
3807  $perm = $value !== GESHI_NEVER;
3808  if ($flag == 'ALL') {
3809  $this->enable_highlighting($perm);
3810  continue;
3811  }
3812  if (!isset($this->lexic_permissions[$flag])) {
3813  // unknown lexic permission
3814  continue;
3815  }
3816  if (is_array($this->lexic_permissions[$flag])) {
3817  foreach ($this->lexic_permissions[$flag] as $key => $val) {
3818  $this->lexic_permissions[$flag][$key] = $perm;
3819  }
3820  } else {
3821  $this->lexic_permissions[$flag] = $perm;
3822  }
3823  }
3824  unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3825  }
3826 
3827  //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3828  //You need to set one for HARDESCAPES only in this case.
3829  if(!isset($this->language_data['HARDCHAR'])) {
3830  $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3831  }
3832 
3833  //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3834  $style_filename = substr($file_name, 0, -4) . '.style.php';
3835  if (is_readable($style_filename)) {
3836  //Clear any style_data that could have been set before ...
3837  if (isset($style_data)) {
3838  unset($style_data);
3839  }
3840 
3841  //Read the Style Information from the style file
3842  include $style_filename;
3843 
3844  //Apply the new styles to our current language styles
3845  if (isset($style_data) && is_array($style_data)) {
3846  $this->language_data['STYLES'] =
3847  $this->merge_arrays($this->language_data['STYLES'], $style_data);
3848  }
3849  }
3850  }
3851 
3860  function finalise(&$parsed_code) {
3861  // Remove end parts of important declarations
3862  // This is BUGGY!! My fault for bad code: fix coming in 1.2
3863  // @todo Remove this crap
3864  if ($this->enable_important_blocks &&
3865  (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3866  $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3867  }
3868 
3869  // Add HTML whitespace stuff if we're using the <div> header
3870  if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3871  $this->indent($parsed_code);
3872  }
3873 
3874  // purge some unnecessary stuff
3876  $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3877 
3878  // If we are using IDs for line numbers, there needs to be an overall
3879  // ID set to prevent collisions.
3880  if ($this->add_ids && !$this->overall_id) {
3881  $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3882  }
3883 
3884  // Get code into lines
3886  $code = explode("\n", $parsed_code);
3887  $parsed_code = $this->header();
3888 
3889  // If we're using line numbers, we insert <li>s and appropriate
3890  // markup to style them (otherwise we don't need to do anything)
3891  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3892  // If we're using the <pre> header, we shouldn't add newlines because
3893  // the <pre> will line-break them (and the <li>s already do this for us)
3894  $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3895 
3896  // Foreach line...
3897  for ($i = 0, $n = count($code); $i < $n;) {
3898  //Reset the attributes for a new line ...
3899  $attrs = array();
3900 
3901  // Make lines have at least one space in them if they're empty
3902  // BenBE: Checking emptiness using trim instead of relying on blanks
3903  if ('' == trim($code[$i])) {
3904  $code[$i] = '&nbsp;';
3905  }
3906 
3907  // If this is a "special line"...
3908  if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3909  $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3910  // Set the attributes to style the line
3911  if ($this->use_classes) {
3912  //$attr = ' class="li2"';
3913  $attrs['class'][] = 'li2';
3914  $def_attr = ' class="de2"';
3915  } else {
3916  //$attr = ' style="' . $this->line_style2 . '"';
3917  $attrs['style'][] = $this->line_style2;
3918  // This style "covers up" the special styles set for special lines
3919  // so that styles applied to special lines don't apply to the actual
3920  // code on that line
3921  $def_attr = ' style="' . $this->code_style . '"';
3922  }
3923  } else {
3924  if ($this->use_classes) {
3925  //$attr = ' class="li1"';
3926  $attrs['class'][] = 'li1';
3927  $def_attr = ' class="de1"';
3928  } else {
3929  //$attr = ' style="' . $this->line_style1 . '"';
3930  $attrs['style'][] = $this->line_style1;
3931  $def_attr = ' style="' . $this->code_style . '"';
3932  }
3933  }
3934 
3935  //Check which type of tag to insert for this line
3936  if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3937  $start = "<pre$def_attr>";
3938  $end = '</pre>';
3939  } else {
3940  // Span or div?
3941  $start = "<div$def_attr>";
3942  $end = '</div>';
3943  }
3944 
3945  ++$i;
3946 
3947  // Are we supposed to use ids? If so, add them
3948  if ($this->add_ids) {
3949  $attrs['id'][] = "$this->overall_id-$i";
3950  }
3951 
3952  //Is this some line with extra styles???
3953  if (in_array($i, $this->highlight_extra_lines)) {
3954  if ($this->use_classes) {
3955  if (isset($this->highlight_extra_lines_styles[$i])) {
3956  $attrs['class'][] = "lx$i";
3957  } else {
3958  $attrs['class'][] = "ln-xtra";
3959  }
3960  } else {
3961  array_push($attrs['style'], $this->get_line_style($i));
3962  }
3963  }
3964 
3965  // Add in the line surrounded by appropriate list HTML
3966  $attr_string = '';
3967  foreach ($attrs as $key => $attr) {
3968  $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3969  }
3970 
3971  $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3972  unset($code[$i - 1]);
3973  }
3974  } else {
3975  $n = count($code);
3976  if ($this->use_classes) {
3977  $attributes = ' class="de1"';
3978  } else {
3979  $attributes = ' style="'. $this->code_style .'"';
3980  }
3981  if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3982  $parsed_code .= '<pre'. $attributes .'>';
3983  } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3984  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3985  if ($this->use_classes) {
3986  $attrs = ' class="ln"';
3987  } else {
3988  $attrs = ' style="'. $this->table_linenumber_style .'"';
3989  }
3990  $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3991  // get linenumbers
3992  // we don't merge it with the for below, since it should be better for
3993  // memory consumption this way
3994  // @todo: but... actually it would still be somewhat nice to merge the two loops
3995  // the mem peaks are at different positions
3996  for ($i = 0; $i < $n; ++$i) {
3997  $close = 0;
3998  // fancy lines
3999  if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
4000  $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
4001  // Set the attributes to style the line
4002  if ($this->use_classes) {
4003  $parsed_code .= '<span class="xtra li2"><span class="de2">';
4004  } else {
4005  // This style "covers up" the special styles set for special lines
4006  // so that styles applied to special lines don't apply to the actual
4007  // code on that line
4008  $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
4009  .'<span style="' . $this->code_style .'">';
4010  }
4011  $close += 2;
4012  }
4013  //Is this some line with extra styles???
4014  if (in_array($i + 1, $this->highlight_extra_lines)) {
4015  if ($this->use_classes) {
4016  if (isset($this->highlight_extra_lines_styles[$i])) {
4017  $parsed_code .= "<span class=\"xtra lx$i\">";
4018  } else {
4019  $parsed_code .= "<span class=\"xtra ln-xtra\">";
4020  }
4021  } else {
4022  $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4023  }
4024  ++$close;
4025  }
4026  $parsed_code .= $this->line_numbers_start + $i;
4027  if ($close) {
4028  $parsed_code .= str_repeat('</span>', $close);
4029  } elseif ($i != $n) {
4030  $parsed_code .= "\n";
4031  }
4032  }
4033  $parsed_code .= '</pre></td><td'.$attributes.'>';
4034  }
4035  $parsed_code .= '<pre'. $attributes .'>';
4036  }
4037  // No line numbers, but still need to handle highlighting lines extra.
4038  // Have to use divs so the full width of the code is highlighted
4039  $close = 0;
4040  for ($i = 0; $i < $n; ++$i) {
4041  // Make lines have at least one space in them if they're empty
4042  // BenBE: Checking emptiness using trim instead of relying on blanks
4043  if ('' == trim($code[$i])) {
4044  $code[$i] = '&nbsp;';
4045  }
4046  // fancy lines
4047  if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
4048  $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
4049  // Set the attributes to style the line
4050  if ($this->use_classes) {
4051  $parsed_code .= '<span class="xtra li2"><span class="de2">';
4052  } else {
4053  // This style "covers up" the special styles set for special lines
4054  // so that styles applied to special lines don't apply to the actual
4055  // code on that line
4056  $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
4057  .'<span style="' . $this->code_style .'">';
4058  }
4059  $close += 2;
4060  }
4061  //Is this some line with extra styles???
4062  if (in_array($i + 1, $this->highlight_extra_lines)) {
4063  if ($this->use_classes) {
4064  if (isset($this->highlight_extra_lines_styles[$i])) {
4065  $parsed_code .= "<span class=\"xtra lx$i\">";
4066  } else {
4067  $parsed_code .= "<span class=\"xtra ln-xtra\">";
4068  }
4069  } else {
4070  $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4071  }
4072  ++$close;
4073  }
4074 
4075  $parsed_code .= $code[$i];
4076 
4077  if ($close) {
4078  $parsed_code .= str_repeat('</span>', $close);
4079  $close = 0;
4080  }
4081  elseif ($i + 1 < $n) {
4082  $parsed_code .= "\n";
4083  }
4084  unset($code[$i]);
4085  }
4086 
4087  if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
4088  $parsed_code .= '</pre>';
4089  }
4090  if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4091  $parsed_code .= '</td>';
4092  }
4093  }
4094 
4095  $parsed_code .= $this->footer();
4096  }
4097 
4105  function header() {
4106  // Get attributes needed
4111  $attributes = ' class="' . $this->_genCSSName($this->language);
4112  if ($this->overall_class != '') {
4113  $attributes .= " ".$this->_genCSSName($this->overall_class);
4114  }
4115  $attributes .= '"';
4116 
4117  if ($this->overall_id != '') {
4118  $attributes .= " id=\"{$this->overall_id}\"";
4119  }
4120  if ($this->overall_style != '' && !$this->use_classes) {
4121  $attributes .= ' style="' . $this->overall_style . '"';
4122  }
4123 
4124  $ol_attributes = '';
4125 
4126  if ($this->line_numbers_start != 1) {
4127  $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
4128  }
4129 
4130  // Get the header HTML
4132  if ($header) {
4133  if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
4134  $header = str_replace("\n", '', $header);
4135  }
4136  $header = $this->replace_keywords($header);
4137 
4138  if ($this->use_classes) {
4139  $attr = ' class="head"';
4140  } else {
4141  $attr = " style=\"{$this->header_content_style}\"";
4142  }
4143  if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4144  $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4145  } else {
4146  $header = "<div$attr>$header</div>";
4147  }
4148  }
4149 
4150  if (GESHI_HEADER_NONE == $this->header_type) {
4151  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4152  return "$header<ol$attributes$ol_attributes>";
4153  }
4154  return $header . ($this->force_code_block ? '<div>' : '');
4155  }
4156 
4157  // Work out what to return and do it
4158  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4159  if ($this->header_type == GESHI_HEADER_PRE) {
4160  return "<pre$attributes>$header<ol$ol_attributes>";
4161  } elseif ($this->header_type == GESHI_HEADER_DIV ||
4162  $this->header_type == GESHI_HEADER_PRE_VALID) {
4163  return "<div$attributes>$header<ol$ol_attributes>";
4164  } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4165  return "<table$attributes>$header<tbody><tr class=\"li1\">";
4166  }
4167  } else {
4168  if ($this->header_type == GESHI_HEADER_PRE) {
4169  return "<pre$attributes>$header" .
4170  ($this->force_code_block ? '<div>' : '');
4171  } else {
4172  return "<div$attributes>$header" .
4173  ($this->force_code_block ? '<div>' : '');
4174  }
4175  }
4176  }
4177 
4185  function footer() {
4186  $footer = $this->footer_content;
4187  if ($footer) {
4188  if ($this->header_type == GESHI_HEADER_PRE) {
4189  $footer = str_replace("\n", '', $footer);;
4190  }
4191  $footer = $this->replace_keywords($footer);
4192 
4193  if ($this->use_classes) {
4194  $attr = ' class="foot"';
4195  } else {
4196  $attr = " style=\"{$this->footer_content_style}\"";
4197  }
4198  if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4199  $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4200  } else {
4201  $footer = "<div$attr>$footer</div>";
4202  }
4203  }
4204 
4205  if (GESHI_HEADER_NONE == $this->header_type) {
4206  return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4207  }
4208 
4209  if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4210  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4211  return "</ol>$footer</div>";
4212  }
4213  return ($this->force_code_block ? '</div>' : '') .
4214  "$footer</div>";
4215  }
4216  elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4217  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4218  return "</tr></tbody>$footer</table>";
4219  }
4220  return ($this->force_code_block ? '</div>' : '') .
4221  "$footer</div>";
4222  }
4223  else {
4224  if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4225  return "</ol>$footer</pre>";
4226  }
4227  return ($this->force_code_block ? '</div>' : '') .
4228  "$footer</pre>";
4229  }
4230  }
4231 
4241  function replace_keywords($instr) {
4242  $keywords = $replacements = array();
4243 
4244  $keywords[] = '<TIME>';
4245  $keywords[] = '{TIME}';
4246  $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4247 
4248  $keywords[] = '<LANGUAGE>';
4249  $keywords[] = '{LANGUAGE}';
4250  $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4251 
4252  $keywords[] = '<VERSION>';
4253  $keywords[] = '{VERSION}';
4254  $replacements[] = $replacements[] = GESHI_VERSION;
4255 
4256  $keywords[] = '<SPEED>';
4257  $keywords[] = '{SPEED}';
4258  if ($time <= 0) {
4259  $speed = 'N/A';
4260  } else {
4261  $speed = strlen($this->source) / $time;
4262  if ($speed >= 1024) {
4263  $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4264  } else {
4265  $speed = sprintf("%.0f B/s", $speed);
4266  }
4267  }
4268  $replacements[] = $replacements[] = $speed;
4269 
4270  return str_replace($keywords, $replacements, $instr);
4271  }
4272 
4326  function hsc($string, $quote_style = ENT_COMPAT) {
4327  // init
4328  static $aTransSpecchar = array(
4329  '&' => '&amp;',
4330  '"' => '&quot;',
4331  '<' => '&lt;',
4332  '>' => '&gt;',
4333 
4334  //This fix is related to SF#1923020, but has to be applied
4335  //regardless of actually highlighting symbols.
4336 
4337  //Circumvent a bug with symbol highlighting
4338  //This is required as ; would produce undesirable side-effects if it
4339  //was not to be processed as an entity.
4340  ';' => '<SEMI>', // Force ; to be processed as entity
4341  '|' => '<PIPE>' // Force | to be processed as entity
4342  ); // ENT_COMPAT set
4343 
4344  switch ($quote_style) {
4345  case ENT_NOQUOTES: // don't convert double quotes
4346  unset($aTransSpecchar['"']);
4347  break;
4348  case ENT_QUOTES: // convert single quotes as well
4349  $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4350  break;
4351  }
4352 
4353  // return translated string
4354  return strtr($string, $aTransSpecchar);
4355  }
4356 
4357  function _genCSSName($name){
4358  return (is_numeric($name[0]) ? '_' : '') . $name;
4359  }
4360 
4370  function get_stylesheet($economy_mode = true) {
4371  // If there's an error, chances are that the language file
4372  // won't have populated the language data file, so we can't
4373  // risk getting a stylesheet...
4374  if ($this->error) {
4375  return '';
4376  }
4377 
4378  //Check if the style rearrangements have been processed ...
4379  //This also does some preprocessing to check which style groups are useable ...
4380  if(!isset($this->language_data['NUMBERS_CACHE'])) {
4381  $this->build_style_cache();
4382  }
4383 
4384  // First, work out what the selector should be. If there's an ID,
4385  // that should be used, the same for a class. Otherwise, a selector
4386  // of '' means that these styles will be applied anywhere
4387  if ($this->overall_id) {
4388  $selector = '#' . $this->_genCSSName($this->overall_id);
4389  } else {
4390  $selector = '.' . $this->_genCSSName($this->language);
4391  if ($this->overall_class) {
4392  $selector .= '.' . $this->_genCSSName($this->overall_class);
4393  }
4394  }
4395  $selector .= ' ';
4396 
4397  // Header of the stylesheet
4398  if (!$economy_mode) {
4399  $stylesheet = "/**\n".
4400  " * GeSHi Dynamically Generated Stylesheet\n".
4401  " * --------------------------------------\n".
4402  " * Dynamically generated stylesheet for {$this->language}\n".
4403  " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4404  " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4405  " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4406  " * --------------------------------------\n".
4407  " */\n";
4408  } else {
4409  $stylesheet = "/**\n".
4410  " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4411  " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4412  " */\n";
4413  }
4414 
4415  // Set the <ol> to have no effect at all if there are line numbers
4416  // (<ol>s have margins that should be destroyed so all layout is
4417  // controlled by the set_overall_style method, which works on the
4418  // <pre> or <div> container). Additionally, set default styles for lines
4419  if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4420  //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4421  $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4422  }
4423 
4424  // Add overall styles
4425  // note: neglect economy_mode, empty styles are meaningless
4426  if ($this->overall_style != '') {
4427  $stylesheet .= "$selector {{$this->overall_style}}\n";
4428  }
4429 
4430  // Add styles for links
4431  // note: economy mode does not make _any_ sense here
4432  // either the style is empty and thus no selector is needed
4433  // or the appropriate key is given.
4434  foreach ($this->link_styles as $key => $style) {
4435  if ($style != '') {
4436  switch ($key) {
4437  case GESHI_LINK:
4438  $stylesheet .= "{$selector}a:link {{$style}}\n";
4439  break;
4440  case GESHI_HOVER:
4441  $stylesheet .= "{$selector}a:hover {{$style}}\n";
4442  break;
4443  case GESHI_ACTIVE:
4444  $stylesheet .= "{$selector}a:active {{$style}}\n";
4445  break;
4446  case GESHI_VISITED:
4447  $stylesheet .= "{$selector}a:visited {{$style}}\n";
4448  break;
4449  }
4450  }
4451  }
4452 
4453  // Header and footer
4454  // note: neglect economy_mode, empty styles are meaningless
4455  if ($this->header_content_style != '') {
4456  $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4457  }
4458  if ($this->footer_content_style != '') {
4459  $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4460  }
4461 
4462  // Styles for important stuff
4463  // note: neglect economy_mode, empty styles are meaningless
4464  if ($this->important_styles != '') {
4465  $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4466  }
4467 
4468  // Simple line number styles
4469  if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4470  $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4471  }
4472  if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4473  $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4474  }
4475  // If there is a style set for fancy line numbers, echo it out
4476  if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4477  $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4478  }
4479 
4480  // note: empty styles are meaningless
4481  foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4482  if ($styles != '' && (!$economy_mode ||
4483  (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4484  $this->lexic_permissions['KEYWORDS'][$group]))) {
4485  $stylesheet .= "$selector.kw$group {{$styles}}\n";
4486  }
4487  }
4488  foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4489  if ($styles != '' && (!$economy_mode ||
4490  (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4491  $this->lexic_permissions['COMMENTS'][$group]) ||
4492  (!empty($this->language_data['COMMENT_REGEXP']) &&
4493  !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4494  $stylesheet .= "$selector.co$group {{$styles}}\n";
4495  }
4496  }
4497  foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4498  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4499  // NEW: since 1.0.8 we have to handle hardescapes
4500  if ($group === 'HARD') {
4501  $group = '_h';
4502  }
4503  $stylesheet .= "$selector.es$group {{$styles}}\n";
4504  }
4505  }
4506  foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4507  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4508  $stylesheet .= "$selector.br$group {{$styles}}\n";
4509  }
4510  }
4511  foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4512  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4513  $stylesheet .= "$selector.sy$group {{$styles}}\n";
4514  }
4515  }
4516  foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4517  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4518  // NEW: since 1.0.8 we have to handle hardquotes
4519  if ($group === 'HARD') {
4520  $group = '_h';
4521  }
4522  $stylesheet .= "$selector.st$group {{$styles}}\n";
4523  }
4524  }
4525  foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4526  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4527  $stylesheet .= "$selector.nu$group {{$styles}}\n";
4528  }
4529  }
4530  foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4531  if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4532  $stylesheet .= "$selector.me$group {{$styles}}\n";
4533  }
4534  }
4535  // note: neglect economy_mode, empty styles are meaningless
4536  foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4537  if ($styles != '') {
4538  $stylesheet .= "$selector.sc$group {{$styles}}\n";
4539  }
4540  }
4541  foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4542  if ($styles != '' && (!$economy_mode ||
4543  (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4544  $this->lexic_permissions['REGEXPS'][$group]))) {
4545  if (is_array($this->language_data['REGEXPS'][$group]) &&
4546  array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4547  $stylesheet .= "$selector.";
4548  $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4549  $stylesheet .= " {{$styles}}\n";
4550  } else {
4551  $stylesheet .= "$selector.re$group {{$styles}}\n";
4552  }
4553  }
4554  }
4555  // Styles for lines being highlighted extra
4556  if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4557  $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4558  }
4559  $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4560  foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4561  $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4562  }
4563 
4564  return $stylesheet;
4565  }
4566 
4574  function get_line_style($line) {
4575  //$style = null;
4576  $style = null;
4577  if (isset($this->highlight_extra_lines_styles[$line])) {
4578  $style = $this->highlight_extra_lines_styles[$line];
4579  } else { // if no "extra" style assigned
4581  }
4582 
4583  return $style;
4584  }
4585 
4601  function optimize_regexp_list($list, $regexp_delimiter = '/') {
4602  $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
4603  '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4604  sort($list);
4605  $regexp_list = array('');
4606  $num_subpatterns = 0;
4607  $list_key = 0;
4608 
4609  // the tokens which we will use to generate the regexp list
4610  $tokens = array();
4611  $prev_keys = array();
4612  // go through all entries of the list and generate the token list
4613  $cur_len = 0;
4614  for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4615  if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4616  // seems like the length of this pcre is growing exorbitantly
4617  $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4618  $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4619  $tokens = array();
4620  $cur_len = 0;
4621  }
4622  $level = 0;
4623  $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4624  $pointer = &$tokens;
4625  // properly assign the new entry to the correct position in the token array
4626  // possibly generate smaller common denominator keys
4627  while (true) {
4628  // get the common denominator
4629  if (isset($prev_keys[$level])) {
4630  if ($prev_keys[$level] == $entry) {
4631  // this is a duplicate entry, skip it
4632  continue 2;
4633  }
4634  $char = 0;
4635  while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4636  && $entry[$char] == $prev_keys[$level][$char]) {
4637  ++$char;
4638  }
4639  if ($char > 0) {
4640  // this entry has at least some chars in common with the current key
4641  if ($char == strlen($prev_keys[$level])) {
4642  // current key is totally matched, i.e. this entry has just some bits appended
4643  $pointer = &$pointer[$prev_keys[$level]];
4644  } else {
4645  // only part of the keys match
4646  $new_key_part1 = substr($prev_keys[$level], 0, $char);
4647  $new_key_part2 = substr($prev_keys[$level], $char);
4648 
4649  if (in_array($new_key_part1[0], $regex_chars)
4650  || in_array($new_key_part2[0], $regex_chars)) {
4651  // this is bad, a regex char as first character
4652  $pointer[$entry] = array('' => true);
4653  array_splice($prev_keys, $level, count($prev_keys), $entry);
4654  $cur_len += strlen($entry);
4655  continue;
4656  } else {
4657  // relocate previous tokens
4658  $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4659  unset($pointer[$prev_keys[$level]]);
4660  $pointer = &$pointer[$new_key_part1];
4661  // recreate key index
4662  array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4663  $cur_len += strlen($new_key_part2);
4664  }
4665  }
4666  ++$level;
4667  $entry = substr($entry, $char);
4668  continue;
4669  }
4670  // else: fall trough, i.e. no common denominator was found
4671  }
4672  if ($level == 0 && !empty($tokens)) {
4673  // we can dump current tokens into the string and throw them away afterwards
4674  $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4675  $new_subpatterns = substr_count($new_entry, '(?:');
4676  if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4677  $regexp_list[++$list_key] = $new_entry;
4678  $num_subpatterns = $new_subpatterns;
4679  } else {
4680  if (!empty($regexp_list[$list_key])) {
4681  $new_entry = '|' . $new_entry;
4682  }
4683  $regexp_list[$list_key] .= $new_entry;
4684  $num_subpatterns += $new_subpatterns;
4685  }
4686  $tokens = array();
4687  $cur_len = 0;
4688  }
4689  // no further common denominator found
4690  $pointer[$entry] = array('' => true);
4691  array_splice($prev_keys, $level, count($prev_keys), $entry);
4692 
4693  $cur_len += strlen($entry);
4694  break;
4695  }
4696  unset($list[$i]);
4697  }
4698  // make sure the last tokens get converted as well
4699  $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4700  if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4701  if ( !empty($regexp_list[$list_key]) ) {
4702  ++$list_key;
4703  }
4704  $regexp_list[$list_key] = $new_entry;
4705  } else {
4706  if (!empty($regexp_list[$list_key])) {
4707  $new_entry = '|' . $new_entry;
4708  }
4709  $regexp_list[$list_key] .= $new_entry;
4710  }
4711  return $regexp_list;
4712  }
4724  function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4725  $list = '';
4726  foreach ($tokens as $token => $sub_tokens) {
4727  $list .= $token;
4728  $close_entry = isset($sub_tokens['']);
4729  unset($sub_tokens['']);
4730  if (!empty($sub_tokens)) {
4731  $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4732  if ($close_entry) {
4733  // make sub_tokens optional
4734  $list .= '?';
4735  }
4736  }
4737  $list .= '|';
4738  }
4739  if (!$recursed) {
4740  // do some optimizations
4741  // common trailing strings
4742  // BUGGY!
4743  //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4744  // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4745  // (?:p)? => p?
4746  $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4747  // (?:a|b|c|d|...)? => [abcd...]?
4748  // TODO: a|bb|c => [ac]|bb
4749  static $callback_2;
4750  if (!isset($callback_2)) {
4751  $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";');
4752  }
4753  $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4754  }
4755  // return $list without trailing pipe
4756  return substr($list, 0, -1);
4757  }
4758 } // End Class GeSHi
4759 
4760 
4761 if (!function_exists('geshi_highlight')) {
4773  function geshi_highlight($string, $language, $path = null, $return = false) {
4774  $geshi = new GeSHi($string, $language, $path);
4775  $geshi->set_header_type(GESHI_HEADER_NONE);
4776 
4777  if ($return) {
4778  return '<code>' . $geshi->parse_code() . '</code>';
4779  }
4780 
4781  echo '<code>' . $geshi->parse_code() . '</code>';
4782 
4783  if ($geshi->error()) {
4784  return false;
4785  }
4786  return true;
4787  }
4788 }
replace_keywords($instr)
Replaces certain keywords in the header and footer with certain configuration values.
Definition: geshi.php:4241
$highlight_extra_lines_style
Definition: geshi.php:428
set_keyword_group_highlighting($key, $flag=true)
Turns highlighting on/off for a keyword group.
Definition: geshi.php:1051
finalise(&$parsed_code)
Takes the parsed code and various options, and creates the HTML surrounding it to make it look nice...
Definition: geshi.php:3860
set_header_content_style($style)
Sets the style for the header content.
Definition: geshi.php:1733
enable_keyword_links($enable=true)
Turns linking of keywords on or off.
Definition: geshi.php:1929
enable_highlighting($flag=true)
Enables all highlighting.
Definition: geshi.php:1443
$path
Definition: aliased.php:25
set_regexps_highlighting($key, $flag)
Turns highlighting on/off for regexps.
Definition: geshi.php:1332
$lexic_permissions
Definition: geshi.php:333
set_escape_characters_style($style, $preserve_defaults=false, $group=0)
Sets the styles for escaped characters.
Definition: geshi.php:1105
$style
Definition: example_012.php:70
$use_language_tab_width
Definition: geshi.php:515
const GESHI_ACTIVE
Links in the source in the :active state.
Definition: geshi.php:107
set_strings_style($style, $preserve_defaults=false, $group=0)
Sets the styles for strings.
Definition: geshi.php:1209
const GESHI_NUMBER_INT_CSTYLE
Enhanced number format for integers like seen in C.
Definition: geshi.php:201
$code_style
Definition: geshi.php:454
const GESHI_ERROR_INVALID_LINE_NUMBER_TYPE
The line number type passed to GeSHi->enable_line_numbers() was invalid.
Definition: geshi.php:244
$footer_content
Definition: geshi.php:362
const GESHI_MAX_PCRE_LENGTH
it&#39;s also important not to generate too long regular expressions be generous here...
Definition: geshi.php:195
set_time($start_time, $end_time)
Sets the time taken to parse the code.
Definition: geshi.php:3698
$line_style2
Definition: geshi.php:478
set_source($source)
Sets the source code for this object.
Definition: geshi.php:662
$source
Definition: geshi.php:267
$result
footer()
Returns the footer for the code block.
Definition: geshi.php:4185
const GESHI_MODIFIERS
The key of the regex array defining any modifiers to the regular expression.
Definition: geshi.php:138
const GESHI_LANG_ROOT(!defined('GESHI_ROOT'))
The language file directory for GeSHi private.
Definition: geshi.php:53
const GESHI_PHP_PRE_433
Used to work around missing PHP features.
Definition: geshi.php:153
const GESHI_START_IMPORTANT
The starter for important parts of the source.
Definition: geshi.php:115
$x
Definition: example_009.php:98
$time
Definition: geshi.php:350
hsc($string, $quote_style=ENT_COMPAT)
Secure replacement for PHP built-in function htmlspecialchars().
Definition: geshi.php:4326
set_overall_style($style, $preserve_defaults=false)
Sets the styles for the code that will be outputted when this object is parsed.
Definition: geshi.php:866
const GESHI_NORMAL_LINE_NUMBERS
Use normal line numbers when building the result.
Definition: geshi.php:65
const GESHI_NUMBER_FLT_SCI_ZERO
Number format to highlight floating-point numbers with support for scientific notation (E) and requir...
Definition: geshi.php:229
set_case_sensitivity($key, $case)
Sets whether a set of keywords are checked for in a case sensitive manner.
Definition: geshi.php:1343
load_language($file_name)
Gets language information and stores it for later use.
Definition: geshi.php:3761
if(strncmp($real_path, SOURCE_ROOT, $base_path_len)) if(!file_exists($path)) $geshi
Definition: aliased.php:40
$error_messages
Definition: geshi.php:298
$loaded_language
Definition: geshi.php:541
highlight_lines_extra($lines, $style=null)
Specifies which lines to highlight extra.
Definition: geshi.php:1843
build_style_cache()
Setup caches needed for styling.
Definition: geshi.php:1942
$encoding
Definition: geshi.php:528
$code
Definition: example_050.php:99
disable_highlighting()
Disables all highlighting.
Definition: geshi.php:1429
const GESHI_NUMBER_HEX_SUFFIX
Number format to highlight hex numbers with a suffix of h.
Definition: geshi.php:221
$link_target
Definition: geshi.php:521
set_symbols_style($style, $preserve_defaults=false, $group=0)
Sets the styles for symbols.
Definition: geshi.php:1170
get_version()
Returns the version of GeSHi.
Definition: geshi.php:613
const GESHI_HEADER_PRE_VALID
Use a pre to wrap lines when line numbers are enabled or to wrap the whole code.
Definition: geshi.php:77
set_footer_content_style($style)
Sets the style for the footer content.
Definition: geshi.php:1743
$header_type
Definition: geshi.php:327
$_hmr_after
Definition: geshi.php:578
$keyword_links
Definition: geshi.php:534
set_header_content($content)
Sets the content of the header block.
Definition: geshi.php:1713
set_link_target($target)
Sets the target for links in code.
Definition: geshi.php:1789
$language
Definition: geshi.php:273
$overall_class
Definition: geshi.php:460
const GESHI_BEFORE
The key of the regex array defining what bracket group in a matched search to put before the replacem...
Definition: geshi.php:141
enable_line_numbers($flag, $nth_row=5)
Sets whether line numbers should be displayed.
Definition: geshi.php:977
const GESHI_CLASS
The key of the regex array defining a custom keyword to use for this regexp&#39;s html tag class...
Definition: geshi.php:147
$line_numbers_start
Definition: geshi.php:442
const GESHI_HOVER
Links in the source in the :hover state.
Definition: geshi.php:105
get_language_fullname($language)
Get full_name for a lang or false.
Definition: geshi.php:805
get_real_tab_width()
Returns the tab width to use, based on the current language and user preference.
Definition: geshi.php:1399
Add rich text string
The name of the decorator.
$url
Definition: shib_logout.php:72
set_line_ending($line_ending)
Sets the line-ending.
Definition: geshi.php:1882
set_highlight_lines_extra_style($styles)
Sets the style for extra-highlighted lines.
Definition: geshi.php:1872
enable_classes($flag=true)
Sets whether CSS classes should be used to highlight the source.
Definition: geshi.php:904
error()
Returns an error message associated with the last GeSHi operation, or false if no error has occurred...
Definition: geshi.php:625
$table_linenumber_style
Definition: geshi.php:484
set_code_style($style, $preserve_defaults=false)
Sets the style for the actual code.
Definition: geshi.php:923
$use_classes
Definition: geshi.php:315
static get_language_name_from_extension( $extension, $lookup=array())
Given a file extension, this method returns either a valid geshi language name, or the empty string i...
Definition: geshi.php:1469
set_strings_highlighting($flag)
Turns highlighting on/off for strings.
Definition: geshi.php:1223
remove_keyword($key, $word, $recompile=true)
Removes a keyword from a keyword group.
Definition: geshi.php:1620
add_keyword_group($key, $styles, $case_sensitive=true, $words=array())
Creates a new keyword group.
Definition: geshi.php:1641
optimize_regexp_list($list, $regexp_delimiter='/')
this functions creates an optimized regular expression list of an array of strings.
Definition: geshi.php:4601
_optimize_regexp_list_tokens_to_string(&$tokens, $recursed=false)
this function creates the appropriate regexp string of an token array you should not call this functi...
Definition: geshi.php:4724
$link_styles
Definition: geshi.php:387
$line_nth_row
Definition: geshi.php:503
$_hmr_replace
Definition: geshi.php:577
const GESHI_NUMBER_FLT_NONSCI_F
Number format to highlight floating-point numbers without support for scientific notation.
Definition: geshi.php:225
get_line_style($line)
Get&#39;s the style that is used for the specified line.
Definition: geshi.php:4574
get_multiline_span()
Get current setting for multiline spans, see GeSHi->enable_multiline_span().
Definition: geshi.php:1005
enable_multiline_span($flag)
Sets wether spans and other HTML markup generated by GeSHi can span over multiple lines or not...
Definition: geshi.php:995
set_numbers_highlighting($flag)
Turns highlighting on/off for numbers.
Definition: geshi.php:1272
__construct($source='', $language='', $path='')
#-
Definition: geshi.php:597
const GESHI_COMMENTS
Used in language files to mark comments.
Definition: geshi.php:150
$allow_multiline_span
Definition: geshi.php:497
set_keyword_group_style($key, $style, $preserve_defaults=false)
Sets the style for a keyword group.
Definition: geshi.php:1020
set_escape_characters_highlighting($flag=true)
Turns highlighting on/off for escaped characters.
Definition: geshi.php:1119
set_regexps_style($key, $style, $preserve_defaults=false)
Sets the styles for regexps.
Definition: geshi.php:1317
$linestyle
const GESHI_REPLACE
The key of the regex array defining what bracket group in a matched search to use as a replacement...
Definition: geshi.php:136
const GESHI_NUMBER_HEX_PREFIX
Number format to highlight hex numbers with a prefix 0x.
Definition: geshi.php:217
$parse_cache_built
Definition: geshi.php:549
set_comments_style($key, $style, $preserve_defaults=false)
Sets the styles for comment groups.
Definition: geshi.php:1066
$language_path
Definition: geshi.php:285
const GESHI_NUMBER_BIN_PREFIX_0B
Number format to highlight binary numbers with a prefix 0b (C)
Definition: geshi.php:207
const GESHI_MAX_PCRE_SUBPATTERNS(!function_exists('stripos'))
make sure we can call stripos
Definition: geshi.php:189
const GESHI_HEADER_DIV
Use a "div" to surround the source.
Definition: geshi.php:73
change_case($instr)
Changes the case of a keyword for those languages where a change is asked for.
Definition: geshi.php:3248
enable_inner_code_block($flag)
Sets whether to force a surrounding block around the highlighted code or not.
Definition: geshi.php:1754
parse_code()
Returns the code in $this->source, highlighted and surrounded by the nessecary HTML.
Definition: geshi.php:2175
const GESHI_ERROR_NO_SUCH_LANG
The language specified does not exist.
Definition: geshi.php:238
$overall_id
Definition: geshi.php:466
set_encoding($encoding)
Sets the encoding used for htmlspecialchars(), for international support.
Definition: geshi.php:1917
$overall_style
Definition: geshi.php:448
$header
$highlight_extra_lines_styles
Definition: geshi.php:422
const GESHI_CAPS_UPPER
Uppercase keywords found.
Definition: geshi.php:97
$line_style1
Definition: geshi.php:472
const GESHI_ERROR_INVALID_HEADER_TYPE
The header type passed to GeSHi->set_header_type() was invalid.
Definition: geshi.php:242
set_brackets_highlighting($flag)
Turns highlighting on/off for brackets.
Definition: geshi.php:1155
const GESHI_NO_LINE_NUMBERS(!defined('GESHI_SECURITY_PARANOID'))
Use no line numbers when building the result.
Definition: geshi.php:63
const GESHI_NUMBER_OCT_PREFIX_0O
Number format to highlight octal numbers with a prefix 0o (logtalk)
Definition: geshi.php:211
const GESHI_NUMBER_BIN_SUFFIX
Number format to highlight binary numbers with a suffix "b".
Definition: geshi.php:203
const GESHI_HEADER_PRE_TABLE
Use a "table" to surround the source:
Definition: geshi.php:91
const GESHI_NUMBER_BIN_PREFIX_PERCENT
Number format to highlight binary numbers with a prefix %.
Definition: geshi.php:205
$strict_mode
Definition: geshi.php:309
get_supported_languages($full_names=false)
Get supported langs or an associative array lang=>full_name.
Definition: geshi.php:754
optimize_keyword_group($key)
compile optimized regexp list for keyword group
Definition: geshi.php:1683
$error
Definition: geshi.php:292
const GESHI_VERSION
The version of this GeSHi file.
Definition: geshi.php:44
get_language_name()
Gets a human-readable language name (thanks to Simon Patterson for the idea :))
Definition: geshi.php:649
const GESHI_HEADER_PRE
Use a "pre" to surround the source.
Definition: geshi.php:75
$n
Definition: RandomTest.php:80
enable_ids($flag=true)
Whether CSS IDs should be added to each line.
Definition: geshi.php:1825
handle_keyword_replace($match)
Handles replacements of keywords to include markup and links if requested.
Definition: geshi.php:3269
const GESHI_VISITED
Links in the source in the :visited state.
Definition: geshi.php:109
enable_strict_mode($mode=true)
Enables/disables strict highlighting.
Definition: geshi.php:1416
set_tab_width($width)
Sets how many spaces a tab is substituted for.
Definition: geshi.php:1372
parse_non_string_part($stuff_to_parse)
Takes a string that has no strings or comments in it, and highlights stuff like keywords, numbers and methods.
Definition: geshi.php:3377
$add_ids
Definition: geshi.php:410
$line_numbers
Definition: geshi.php:490
const GESHI_NUMBER_FLT_NONSCI
Number format to highlight floating-point numbers without support for scientific notation.
Definition: geshi.php:223
const GESHI_SEARCH
The key of the regex array defining what to search for.
Definition: geshi.php:133
$comment
Definition: buildRTE.php:83
set_brackets_style($style, $preserve_defaults=false)
Sets the styles for brackets.
Definition: geshi.php:1137
Create styles array
The data for the language used.
const GESHI_LINK
Links in the source in the :link state.
Definition: geshi.php:103
const GESHI_ERROR_FILE_NOT_READABLE
GeSHi could not open a file for reading (generally a language file)
Definition: geshi.php:240
$highlight_extra_lines
Definition: geshi.php:416
const GESHI_FANCY_LINE_NUMBERS
Use fancy line numbers when building the result.
Definition: geshi.php:67
set_language_path($path)
Sets the path to the directory containing the language files.
Definition: geshi.php:720
const GESHI_CAPS_LOWER
Leave keywords found as the case that they are.
Definition: geshi.php:99
set_url_for_keyword_group($group, $url)
Sets the base URL to be used for keywords.
Definition: geshi.php:1767
get_stylesheet($economy_mode=true)
Returns a stylesheet for the highlighted code.
Definition: geshi.php:4370
const GESHI_NUMBER_OCT_PREFIX_AT
Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series)...
Definition: geshi.php:213
$_kw_replace_group
Definition: geshi.php:566
set_numbers_style($style, $preserve_defaults=false, $group=0)
Sets the styles for numbers.
Definition: geshi.php:1258
const GESHI_MAYBE
Strict mode might apply, and can be enabled or disabled by GeSHi->enable_strict_mode().
Definition: geshi.php:127
$footer_content_style
Definition: geshi.php:374
set_script_style($style, $preserve_defaults=false, $group=0)
Sets the styles for strict code blocks.
Definition: geshi.php:1238
$language_data
Definition: geshi.php:279
set_overall_id($id)
Sets the overall id for this block of code.
Definition: geshi.php:893
start_line_numbers_at($number)
Sets what number line numbers should start at.
Definition: geshi.php:1901
set_header_type($type)
Sets the type of header to be used.
Definition: geshi.php:845
const GESHI_NUMBER_FLT_SCI_SHORT
Number format to highlight floating-point numbers with support for scientific notation (E) and option...
Definition: geshi.php:227
handle_multiline_regexps($matches)
handles newlines in REGEXPS matches.
Definition: geshi.php:3344
$line_ending
Definition: geshi.php:436
indent(&$result)
Swaps out spaces and tabs for HTML indentation.
Definition: geshi.php:3148
set_methods_highlighting($flag)
Turns highlighting on/off for methods.
Definition: geshi.php:1303
$_hmr_before
Definition: geshi.php:576
$_hmr_key
Definition: geshi.php:579
const GESHI_NEVER
#+ private
Definition: geshi.php:124
$enable_important_blocks
Definition: geshi.php:395
$ret
Definition: parser.php:6
load_from_file($file_name, $lookup=array())
Given a file name, this method loads its contents in, and attempts to set the language automatically...
Definition: geshi.php:1576
$force_code_block
Definition: geshi.php:381
for($i=1; $i<=count($kw_cases_sel); $i+=1) $lang
Definition: langwiz.php:349
enable_important_blocks($flag)
Sets whether context-important blocks are highlighted.
Definition: geshi.php:1815
const GESHI_NUMBER_HEX_PREFIX_DOLLAR
Number format to highlight hex numbers with a prefix $.
Definition: geshi.php:219
set_methods_style($key, $style, $preserve_defaults=false)
Sets the styles for methods.
Definition: geshi.php:1289
set_footer_content($content)
Sets the content of the footer block.
Definition: geshi.php:1723
Add data(end) time
Method that wraps PHPs time in order to allow simulations with the workflow.
set_comments_highlighting($key, $flag=true)
Turns highlighting on/off for comment groups.
Definition: geshi.php:1091
$tab_width
Definition: geshi.php:509
get_time()
Gets the time taken to parse the code.
Definition: geshi.php:3710
set_case_keywords($case)
Sets the case that keywords should use when found.
Definition: geshi.php:1357
set_important_styles($styles)
Sets styles for important parts of the code.
Definition: geshi.php:1803
defined( 'APPLICATION_ENV')||define( 'APPLICATION_ENV'
Definition: bootstrap.php:27
const GESHI_NUMBER_INT_BASIC
Basic number format for integers.
Definition: geshi.php:199
const GESHI_CAPS_NO_CHANGE
Lowercase keywords found.
Definition: geshi.php:95
set_link_styles($type, $styles)
Sets styles for links in code.
Definition: geshi.php:1779
remove_keyword_group($key)
Removes a keyword group.
Definition: geshi.php:1666
const GESHI_NUMBER_OCT_PREFIX
Number format to highlight octal numbers with a leading zero.
Definition: geshi.php:209
_genCSSName($name)
Definition: geshi.php:4357
header()
Creates the header for the code block (with correct attributes)
Definition: geshi.php:4105
const GESHI_NUMBER_OCT_SUFFIX
Number format to highlight octal numbers with a suffix of o.
Definition: geshi.php:215
set_overall_class($class)
Sets the overall classname for this block of code.
Definition: geshi.php:882
set_line_style($style1, $style2='', $preserve_defaults=false)
Sets the styles for the line numbers.
Definition: geshi.php:943
$style2
Definition: example_012.php:71
handle_regexps_callback($matches)
handles regular expressions highlighting-definitions with callback functions
Definition: geshi.php:3329
const GESHI_END_IMPORTANT
The ender for important parts of the source.
Definition: geshi.php:117
add_keyword($key, $word)
Adds a keyword to a keyword group for highlighting.
Definition: geshi.php:1592
$important_styles
Definition: geshi.php:404
const GESHI_HEADER_NONE
Use nothing to surround the source.
Definition: geshi.php:71
$header_content
Definition: geshi.php:356
set_symbols_highlighting($flag)
Turns highlighting on/off for symbols.
Definition: geshi.php:1190
$_rx_key
Definition: geshi.php:567
$header_content_style
Definition: geshi.php:368
merge_arrays()
Merges arrays recursively, overwriting values of the first array with values of later arrays...
Definition: geshi.php:3720
const GESHI_ALWAYS
Strict mode always applies.
Definition: geshi.php:129
build_parse_cache()
Setup caches needed for parsing.
Definition: geshi.php:1993
set_use_language_tab_width($use)
Sets whether or not to use tab-stop width specifed by language.
Definition: geshi.php:1388
set_language($language, $force_reset=false)
Sets the language for this object.
Definition: geshi.php:676
const GESHI_AFTER
The key of the regex array defining what bracket group in a matched search to put after the replaceme...
Definition: geshi.php:144