ILIAS  release_5-3 Revision v5.3.23-19-g915713cf615
geshi.php
Go to the documentation of this file.
1<?php
35//
36// GeSHi Constants
37// You should use these constant names in your programs instead of
38// their values - you never know when a value may change in a future
39// version
40//
41
43define('GESHI_VERSION', '1.0.9.0');
44
45// Define the root directory for the GeSHi code tree
46if (!defined('GESHI_ROOT')) {
48 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
49}
52define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
53
54// Define if GeSHi should be paranoid about security
55if (!defined('GESHI_SECURITY_PARANOID')) {
57 define('GESHI_SECURITY_PARANOID', false);
58}
59
60// Line numbers - use with enable_line_numbers()
62define('GESHI_NO_LINE_NUMBERS', 0);
64define('GESHI_NORMAL_LINE_NUMBERS', 1);
66define('GESHI_FANCY_LINE_NUMBERS', 2);
67
68// Container HTML type
70define('GESHI_HEADER_NONE', 0);
72define('GESHI_HEADER_DIV', 1);
74define('GESHI_HEADER_PRE', 2);
76define('GESHI_HEADER_PRE_VALID', 3);
90define('GESHI_HEADER_PRE_TABLE', 4);
91
92// Capatalisation constants
94define('GESHI_CAPS_NO_CHANGE', 0);
96define('GESHI_CAPS_UPPER', 1);
98define('GESHI_CAPS_LOWER', 2);
99
100// Link style constants
102define('GESHI_LINK', 0);
104define('GESHI_HOVER', 1);
106define('GESHI_ACTIVE', 2);
108define('GESHI_VISITED', 3);
109
110// Important string starter/finisher
111// Note that if you change these, they should be as-is: i.e., don't
112// write them as if they had been run through htmlentities()
114define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
116define('GESHI_END_IMPORTANT', '<END GeSHi>');
117
121// When strict mode applies for a language
123define('GESHI_NEVER', 0);
126define('GESHI_MAYBE', 1);
128define('GESHI_ALWAYS', 2);
129
130// Advanced regexp handling constants, used in language files
132define('GESHI_SEARCH', 0);
135define('GESHI_REPLACE', 1);
137define('GESHI_MODIFIERS', 2);
140define('GESHI_BEFORE', 3);
143define('GESHI_AFTER', 4);
146define('GESHI_CLASS', 5);
147
149define('GESHI_COMMENTS', 0);
150
155define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
161define('GESHI_MAX_PCRE_LENGTH', 12288);
162
163//Number format specification
165define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
167define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
169define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
171define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
173define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
175define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
177define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+
179define('GESHI_NUMBER_OCT_PREFIX_AT', 1024); //@[0-7]+
181define('GESHI_NUMBER_OCT_SUFFIX', 2048); //[0-7]+[oO]
183define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
185define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192); //$[0-9a-fA-F]+
187define('GESHI_NUMBER_HEX_SUFFIX', 16384); //[0-9][0-9a-fA-F]*h
189define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
191define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
193define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
195define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
196//Custom formats are passed by RX array
197
198// Error detection - use these to analyse faults
202define('GESHI_ERROR_NO_INPUT', 1);
204define('GESHI_ERROR_NO_SUCH_LANG', 2);
206define('GESHI_ERROR_FILE_NOT_READABLE', 3);
208define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
210define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
226class GeSHi {
227
232 protected $source = '';
233
238 protected $language = '';
239
244 protected $language_data = array();
245
251
257 protected $error = false;
258
263 protected $error_messages = array(
264 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
265 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
266 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
267 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
268 );
269
274 protected $strict_mode = false;
275
280 protected $use_classes = false;
281
293
298 protected $lexic_permissions = array(
299 'KEYWORDS' => array(),
300 'COMMENTS' => array('MULTI' => true),
301 'REGEXPS' => array(),
302 'ESCAPE_CHAR' => true,
303 'BRACKETS' => true,
304 'SYMBOLS' => false,
305 'STRINGS' => true,
306 'NUMBERS' => true,
307 'METHODS' => true,
308 'SCRIPT' => true
309 );
310
315 protected $time = 0;
316
321 protected $header_content = '';
322
327 protected $footer_content = '';
328
333 protected $header_content_style = '';
334
339 protected $footer_content_style = '';
340
346 protected $force_code_block = false;
347
352 protected $link_styles = array();
353
360 protected $enable_important_blocks = false;
361
369 protected $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
370
375 protected $add_ids = false;
376
381 protected $highlight_extra_lines = array();
382
387 protected $highlight_extra_lines_styles = array();
388
393 protected $highlight_extra_lines_style = 'background-color: #ffc;';
394
401 protected $line_ending = null;
402
407 protected $line_numbers_start = 1;
408
413 protected $overall_style = 'font-family:monospace;';
414
419 protected $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
420
425 protected $overall_class = '';
426
431 protected $overall_id = '';
432
437 protected $line_style1 = 'font-weight: normal; vertical-align:top;';
438
443 protected $line_style2 = 'font-weight: bold; vertical-align:top;';
444
449 protected $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
450
456
462 protected $allow_multiline_span = true;
463
468 protected $line_nth_row = 0;
469
474 protected $tab_width = 8;
475
480 protected $use_language_tab_width = false;
481
486 protected $link_target = '';
487
493 protected $encoding = 'utf-8';
494
499 protected $keyword_links = true;
500
506 protected $loaded_language = '';
507
514 protected $parse_cache_built = false;
515
531 protected $_kw_replace_group = 0;
532 protected $_rx_key = 0;
533
541 protected $_hmr_before = '';
542 protected $_hmr_replace = '';
543 protected $_hmr_after = '';
544 protected $_hmr_key = 0;
545
560 public function __construct($source = '', $language = '', $path = '') {
561 if ( is_string($source) && ($source !== '') ) {
562 $this->set_source($source);
563 }
564 if ( is_string($language) && ($language !== '') ) {
565 $this->set_language($language);
566 }
567 $this->set_language_path($path);
568 }
569
576 public function get_version()
577 {
578 return GESHI_VERSION;
579 }
580
588 public function error() {
589 if ($this->error) {
590 //Put some template variables for debugging here ...
591 $debug_tpl_vars = array(
592 '{LANGUAGE}' => $this->language,
593 '{PATH}' => $this->language_path
594 );
595 $msg = str_replace(
596 array_keys($debug_tpl_vars),
597 array_values($debug_tpl_vars),
598 $this->error_messages[$this->error]);
599
600 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
601 }
602 return false;
603 }
604
612 public function get_language_name() {
613 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
614 return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
615 }
616 return $this->language_data['LANG_NAME'];
617 }
618
625 public function set_source($source) {
626 $this->source = $source;
627 $this->highlight_extra_lines = array();
628 }
629
637 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
638 $language = strtolower($language);
639
640 return $language;
641 }
642
653 public function set_language($language, $force_reset = false) {
654 $this->error = false;
655 $this->strict_mode = GESHI_NEVER;
656
657 if ($force_reset) {
658 $this->loaded_language = false;
659 }
660
661 //Clean up the language name to prevent malicious code injection
663
664 //Retreive the full filename
665 $file_name = $this->language_path . $language . '.php';
666 if ($file_name == $this->loaded_language) {
667 // this language is already loaded!
668 return;
669 }
670
671 $this->language = $language;
672
673 //Check if we can read the desired file
674 if (!is_readable($file_name)) {
676 return;
677 }
678
679 // Load the language for parsing
680 $this->load_language($file_name);
681 }
682
695 public function set_language_path($path) {
696 if(strpos($path,':')) {
697 //Security Fix to prevent external directories using fopen wrappers.
698 if(DIRECTORY_SEPARATOR == "\\") {
699 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
700 return;
701 }
702 } else {
703 return;
704 }
705 }
706 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
707 //Security Fix to prevent external directories using fopen wrappers.
708 return;
709 }
710 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
711 //Security Fix to prevent external directories using fopen wrappers.
712 return;
713 }
714 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
715 //Security Fix to prevent external directories using fopen wrappers.
716 return;
717 }
718 if ($path) {
719 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
720 $this->set_language($this->language); // otherwise set_language_path has no effect
721 }
722 }
723
729 public function get_supported_languages($full_names=false)
730 {
731 // return array
732 $back = array();
733
734 // we walk the lang root
735 $dir = dir($this->language_path);
736
737 // foreach entry
738 while (false !== ($entry = $dir->read()))
739 {
740 $full_path = $this->language_path.$entry;
741
742 // Skip all dirs
743 if (is_dir($full_path)) {
744 continue;
745 }
746
747 // we only want lang.php files
748 if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
749 continue;
750 }
751
752 // Raw lang name is here
753 $langname = $matches[1];
754
755 // We want the fullname too?
756 if ($full_names === true)
757 {
758 if (false !== ($fullname = $this->get_language_fullname($langname)))
759 {
760 $back[$langname] = $fullname; // we go associative
761 }
762 }
763 else
764 {
765 // just store raw langname
766 $back[] = $langname;
767 }
768 }
769
770 $dir->close();
771
772 return $back;
773 }
774
781 {
782 //Clean up the language name to prevent malicious code injection
783 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
784
785 $language = strtolower($language);
786
787 // get fullpath-filename for a langname
788 $fullpath = $this->language_path.$language.'.php';
789
790 // we need to get contents :S
791 if (false === ($data = file_get_contents($fullpath))) {
792 $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
793 return false;
794 }
795
796 // match the langname
797 if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
798 $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
799 return false;
800 }
801
802 // return fullname for langname
803 return stripcslashes($matches[1]);
804 }
805
820 public function set_header_type($type) {
821 //Check if we got a valid header type
822 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
825 return;
826 }
827
828 //Set that new header type
829 $this->header_type = $type;
830 }
831
841 public function set_overall_style($style, $preserve_defaults = false) {
842 if (!$preserve_defaults) {
843 $this->overall_style = $style;
844 } else {
845 $this->overall_style .= $style;
846 }
847 }
848
857 public function set_overall_class($class) {
858 $this->overall_class = $class;
859 }
860
868 public function set_overall_id($id) {
869 $this->overall_id = $id;
870 }
871
879 public function enable_classes($flag = true) {
880 $this->use_classes = ($flag) ? true : false;
881 }
882
898 public function set_code_style($style, $preserve_defaults = false) {
899 if (!$preserve_defaults) {
900 $this->code_style = $style;
901 } else {
902 $this->code_style .= $style;
903 }
904 }
905
918 public function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
919 //Check if we got 2 or three parameters
920 if (is_bool($style2)) {
921 $preserve_defaults = $style2;
922 $style2 = '';
923 }
924
925 //Actually set the new styles
926 if (!$preserve_defaults) {
927 $this->line_style1 = $style1;
928 $this->line_style2 = $style2;
929 } else {
930 $this->line_style1 .= $style1;
931 $this->line_style2 .= $style2;
932 }
933 }
934
952 public function enable_line_numbers($flag, $nth_row = 5) {
953 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
954 && GESHI_FANCY_LINE_NUMBERS != $flag) {
956 }
957 $this->line_numbers = $flag;
958 $this->line_nth_row = $nth_row;
959 }
960
970 public function enable_multiline_span($flag) {
971 $this->allow_multiline_span = (bool) $flag;
972 }
973
980 public function get_multiline_span() {
982 }
983
995 public function set_keyword_group_style($key, $style, $preserve_defaults = false) {
996 //Set the style for this keyword group
997 if('*' == $key) {
998 foreach($this->language_data['STYLES']['KEYWORDS'] as $_key => $_value) {
999 if (!$preserve_defaults) {
1000 $this->language_data['STYLES']['KEYWORDS'][$_key] = $style;
1001 } else {
1002 $this->language_data['STYLES']['KEYWORDS'][$_key] .= $style;
1003 }
1004 }
1005 } else {
1006 if (!$preserve_defaults) {
1007 $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
1008 } else {
1009 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
1010 }
1011 }
1012
1013 //Update the lexic permissions
1014 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1015 $this->lexic_permissions['KEYWORDS'][$key] = true;
1016 }
1017 }
1018
1026 public function set_keyword_group_highlighting($key, $flag = true) {
1027 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1028 }
1029
1041 public function set_comments_style($key, $style, $preserve_defaults = false) {
1042 if('*' == $key) {
1043 foreach($this->language_data['STYLES']['COMMENTS'] as $_key => $_value) {
1044 if (!$preserve_defaults) {
1045 $this->language_data['STYLES']['COMMENTS'][$_key] = $style;
1046 } else {
1047 $this->language_data['STYLES']['COMMENTS'][$_key] .= $style;
1048 }
1049 }
1050 } else {
1051 if (!$preserve_defaults) {
1052 $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1053 } else {
1054 $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1055 }
1056 }
1057 }
1058
1066 public function set_comments_highlighting($key, $flag = true) {
1067 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1068 }
1069
1081 public function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1082 if (!$preserve_defaults) {
1083 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1084 } else {
1085 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1086 }
1087 }
1088
1095 public function set_escape_characters_highlighting($flag = true) {
1096 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1097 }
1098
1113 public function set_brackets_style($style, $preserve_defaults = false) {
1114 if (!$preserve_defaults) {
1115 $this->language_data['STYLES']['BRACKETS'][0] = $style;
1116 } else {
1117 $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1118 }
1119 }
1120
1131 public function set_brackets_highlighting($flag) {
1132 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1133 }
1134
1146 public function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1147 // Update the style of symbols
1148 if (!$preserve_defaults) {
1149 $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1150 } else {
1151 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1152 }
1153
1154 // For backward compatibility
1155 if (0 == $group) {
1156 $this->set_brackets_style ($style, $preserve_defaults);
1157 }
1158 }
1159
1166 public function set_symbols_highlighting($flag) {
1167 // Update lexic permissions for this symbol group
1168 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1169
1170 // For backward compatibility
1171 $this->set_brackets_highlighting ($flag);
1172 }
1173
1185 public function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1186 if (!$preserve_defaults) {
1187 $this->language_data['STYLES']['STRINGS'][$group] = $style;
1188 } else {
1189 $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1190 }
1191 }
1192
1199 public function set_strings_highlighting($flag) {
1200 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1201 }
1202
1214 public function set_script_style($style, $preserve_defaults = false, $group = 0) {
1215 // Update the style of symbols
1216 if (!$preserve_defaults) {
1217 $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1218 } else {
1219 $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1220 }
1221 }
1222
1234 public function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1235 if (!$preserve_defaults) {
1236 $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1237 } else {
1238 $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1239 }
1240 }
1241
1248 public function set_numbers_highlighting($flag) {
1249 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1250 }
1251
1265 public function set_methods_style($key, $style, $preserve_defaults = false) {
1266 if (!$preserve_defaults) {
1267 $this->language_data['STYLES']['METHODS'][$key] = $style;
1268 } else {
1269 $this->language_data['STYLES']['METHODS'][$key] .= $style;
1270 }
1271 }
1272
1279 public function set_methods_highlighting($flag) {
1280 $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1281 }
1282
1295 public function set_regexps_style($key, $style, $preserve_defaults = false) {
1296 if (!$preserve_defaults) {
1297 $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1298 } else {
1299 $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1300 }
1301 }
1302
1310 public function set_regexps_highlighting($key, $flag) {
1311 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1312 }
1313
1321 public function set_case_sensitivity($key, $case) {
1322 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1323 }
1324
1335 public function set_case_keywords($case) {
1336 if (in_array($case, array(
1338 $this->language_data['CASE_KEYWORDS'] = $case;
1339 }
1340 }
1341
1350 public function set_tab_width($width) {
1351 $this->tab_width = intval($width);
1352
1353 //Check if it fit's the constraints:
1354 if ($this->tab_width < 1) {
1355 //Return it to the default
1356 $this->tab_width = 8;
1357 }
1358 }
1359
1366 public function set_use_language_tab_width($use) {
1367 $this->use_language_tab_width = (bool) $use;
1368 }
1369
1377 public function get_real_tab_width() {
1378 if (!$this->use_language_tab_width ||
1379 !isset($this->language_data['TAB_WIDTH'])) {
1380 return $this->tab_width;
1381 } else {
1382 return $this->language_data['TAB_WIDTH'];
1383 }
1384 }
1385
1394 public function enable_strict_mode($mode = true) {
1395 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1396 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1397 }
1398 }
1399
1407 public function disable_highlighting() {
1408 $this->enable_highlighting(false);
1409 }
1410
1421 public function enable_highlighting($flag = true) {
1422 $flag = $flag ? true : false;
1423 foreach ($this->lexic_permissions as $key => $value) {
1424 if (is_array($value)) {
1425 foreach ($value as $k => $v) {
1426 $this->lexic_permissions[$key][$k] = $flag;
1427 }
1428 } else {
1429 $this->lexic_permissions[$key] = $flag;
1430 }
1431 }
1432
1433 // Context blocks
1434 $this->enable_important_blocks = $flag;
1435 }
1436
1448 public static function get_language_name_from_extension( $extension, $lookup = array() ) {
1449 $extension = strtolower($extension);
1450
1451 if ( !is_array($lookup) || empty($lookup)) {
1452 $lookup = array(
1453 '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1454 '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1455 '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1456 '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1457 'abap' => array('abap'),
1458 'actionscript' => array('as'),
1459 'ada' => array('a', 'ada', 'adb', 'ads'),
1460 'apache' => array('conf'),
1461 'asm' => array('ash', 'asm', 'inc'),
1462 'asp' => array('asp'),
1463 'bash' => array('sh'),
1464 'bf' => array('bf'),
1465 'c' => array('c', 'h'),
1466 'c_mac' => array('c', 'h'),
1467 'caddcl' => array(),
1468 'cadlisp' => array(),
1469 'cdfg' => array('cdfg'),
1470 'cobol' => array('cbl'),
1471 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1472 'csharp' => array('cs'),
1473 'css' => array('css'),
1474 'd' => array('d'),
1475 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1476 'diff' => array('diff', 'patch'),
1477 'dos' => array('bat', 'cmd'),
1478 'gdb' => array('kcrash', 'crash', 'bt'),
1479 'gettext' => array('po', 'pot'),
1480 'gml' => array('gml'),
1481 'gnuplot' => array('plt'),
1482 'groovy' => array('groovy'),
1483 'haskell' => array('hs'),
1484 'haxe' => array('hx'),
1485 'html4strict' => array('html', 'htm'),
1486 'ini' => array('ini', 'desktop', 'vbp'),
1487 'java' => array('java'),
1488 'javascript' => array('js'),
1489 'klonec' => array('kl1'),
1490 'klonecpp' => array('klx'),
1491 'latex' => array('tex'),
1492 'lisp' => array('lisp'),
1493 'lua' => array('lua'),
1494 'matlab' => array('m'),
1495 'mpasm' => array(),
1496 'mysql' => array('sql'),
1497 'nsis' => array(),
1498 'objc' => array(),
1499 'oobas' => array(),
1500 'oracle8' => array(),
1501 'oracle10' => array(),
1502 'pascal' => array('pas'),
1503 'perl' => array('pl', 'pm'),
1504 'php' => array('php', 'php5', 'phtml', 'phps'),
1505 'povray' => array('pov'),
1506 'providex' => array('pvc', 'pvx'),
1507 'prolog' => array('pl'),
1508 'python' => array('py'),
1509 'qbasic' => array('bi'),
1510 'reg' => array('reg'),
1511 'ruby' => array('rb'),
1512 'sas' => array('sas'),
1513 'scala' => array('scala'),
1514 'scheme' => array('scm'),
1515 'scilab' => array('sci'),
1516 'smalltalk' => array('st'),
1517 'smarty' => array(),
1518 'tcl' => array('tcl'),
1519 'text' => array('txt'),
1520 'vb' => array('bas', 'ctl', 'frm'),
1521 'vbnet' => array('vb', 'sln'),
1522 'visualfoxpro' => array(),
1523 'whitespace' => array('ws'),
1524 'xml' => array('xml', 'svg', 'xrc', 'vbproj', 'csproj', 'userprefs', 'resx', 'stetic', 'settings', 'manifest', 'myapp'),
1525 'z80' => array('z80', 'asm', 'inc')
1526 );
1527 }
1528
1529 foreach ($lookup as $lang => $extensions) {
1530 if (in_array($extension, $extensions)) {
1531 return $lang;
1532 }
1533 }
1534
1535 return 'text';
1536 }
1537
1555 public function load_from_file($file_name, $lookup = array()) {
1556 if (is_readable($file_name)) {
1557 $this->set_source(file_get_contents($file_name));
1558 $this->set_language(self::get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1559 } else {
1561 }
1562 }
1563
1571 public function add_keyword($key, $word) {
1572 if (!is_array($this->language_data['KEYWORDS'][$key])) {
1573 $this->language_data['KEYWORDS'][$key] = array();
1574 }
1575 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1576 $this->language_data['KEYWORDS'][$key][] = $word;
1577
1578 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1579 if ($this->parse_cache_built) {
1580 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1581 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1582 }
1583 }
1584 }
1585
1599 public function remove_keyword($key, $word, $recompile = true) {
1600 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1601 if ($key_to_remove !== false) {
1602 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1603
1604 //NEW in 1.0.8, optionally recompile keyword group
1605 if ($recompile && $this->parse_cache_built) {
1606 $this->optimize_keyword_group($key);
1607 }
1608 }
1609 }
1610
1621 public function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1622 $words = (array) $words;
1623 if (empty($words)) {
1624 // empty word lists mess up highlighting
1625 return false;
1626 }
1627
1628 //Add the new keyword group internally
1629 $this->language_data['KEYWORDS'][$key] = $words;
1630 $this->lexic_permissions['KEYWORDS'][$key] = true;
1631 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1632 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1633
1634 //NEW in 1.0.8, cache keyword regexp
1635 if ($this->parse_cache_built) {
1637 }
1638 return true;
1639 }
1640
1647 public function remove_keyword_group ($key) {
1648 //Remove the keyword group internally
1649 unset($this->language_data['KEYWORDS'][$key]);
1650 unset($this->lexic_permissions['KEYWORDS'][$key]);
1651 unset($this->language_data['CASE_SENSITIVE'][$key]);
1652 unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1653
1654 //NEW in 1.0.8
1655 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1656 }
1657
1664 public function optimize_keyword_group($key) {
1665 $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1666 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1667 $space_as_whitespace = false;
1668 if(isset($this->language_data['PARSER_CONTROL'])) {
1669 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1670 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1671 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1672 }
1673 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1674 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1675 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1676 }
1677 }
1678 }
1679 }
1680 if($space_as_whitespace) {
1681 foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1682 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1683 str_replace(" ", "\\s+", $rxv);
1684 }
1685 }
1686 }
1687
1694 public function set_header_content($content) {
1695 $this->header_content = $content;
1696 }
1697
1704 public function set_footer_content($content) {
1705 $this->footer_content = $content;
1706 }
1707
1715 $this->header_content_style = $style;
1716 }
1717
1725 $this->footer_content_style = $style;
1726 }
1727
1735 public function enable_inner_code_block($flag) {
1736 $this->force_code_block = (bool)$flag;
1737 }
1738
1748 public function set_url_for_keyword_group($group, $url) {
1749 $this->language_data['URLS'][$group] = $url;
1750 }
1751
1760 public function set_link_styles($type, $styles) {
1761 $this->link_styles[$type] = $styles;
1762 }
1763
1770 public function set_link_target($target) {
1771 if (!$target) {
1772 $this->link_target = '';
1773 } else {
1774 $this->link_target = ' target="' . $target . '"';
1775 }
1776 }
1777
1785 $this->important_styles = $styles;
1786 }
1787
1796 public function enable_important_blocks($flag) {
1797 $this->enable_important_blocks = ( $flag ) ? true : false;
1798 }
1799
1806 public function enable_ids($flag = true) {
1807 $this->add_ids = ($flag) ? true : false;
1808 }
1809
1824 public function highlight_lines_extra($lines, $style = null) {
1825 if (is_array($lines)) {
1826 //Split up the job using single lines at a time
1827 foreach ($lines as $line) {
1828 $this->highlight_lines_extra($line, $style);
1829 }
1830 } else {
1831 //Mark the line as being highlighted specially
1832 $lines = intval($lines);
1833 $this->highlight_extra_lines[$lines] = $lines;
1834
1835 //Decide on which style to use
1836 if ($style === null) { //Check if we should use default style
1837 unset($this->highlight_extra_lines_styles[$lines]);
1838 } elseif ($style === false) { //Check if to remove this line
1839 unset($this->highlight_extra_lines[$lines]);
1840 unset($this->highlight_extra_lines_styles[$lines]);
1841 } else {
1842 $this->highlight_extra_lines_styles[$lines] = $style;
1843 }
1844 }
1845 }
1846
1854 $this->highlight_extra_lines_style = $styles;
1855 }
1856
1864 $this->line_ending = (string)$line_ending;
1865 }
1866
1882 public function start_line_numbers_at($number) {
1883 $this->line_numbers_start = abs(intval($number));
1884 }
1885
1898 public function set_encoding($encoding) {
1899 if ($encoding) {
1900 $this->encoding = strtolower($encoding);
1901 }
1902 }
1903
1910 public function enable_keyword_links($enable = true) {
1911 $this->keyword_links = (bool) $enable;
1912 }
1913
1922 protected function build_style_cache() {
1923 //Build the style cache needed to highlight numbers appropriate
1924 if($this->lexic_permissions['NUMBERS']) {
1925 //First check what way highlighting information for numbers are given
1926 if(!isset($this->language_data['NUMBERS'])) {
1927 $this->language_data['NUMBERS'] = 0;
1928 }
1929
1930 if(is_array($this->language_data['NUMBERS'])) {
1931 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1932 } else {
1933 $this->language_data['NUMBERS_CACHE'] = array();
1934 if(!$this->language_data['NUMBERS']) {
1935 $this->language_data['NUMBERS'] =
1938 }
1939
1940 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1941 //Rearrange style indices if required ...
1942 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1943 $this->language_data['STYLES']['NUMBERS'][$i] =
1944 $this->language_data['STYLES']['NUMBERS'][1<<$i];
1945 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1946 }
1947
1948 //Check if this bit is set for highlighting
1949 if($j&1) {
1950 //So this bit is set ...
1951 //Check if it belongs to group 0 or the actual stylegroup
1952 if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1953 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1954 } else {
1955 if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1956 $this->language_data['NUMBERS_CACHE'][0] = 0;
1957 }
1958 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1959 }
1960 }
1961 }
1962 }
1963 }
1964 }
1965
1972 protected function build_parse_cache() {
1973 // check whether language_data is available
1974 if (empty($this->language_data)) {
1975 return false;
1976 }
1977
1978 // cache symbol regexp
1979 //As this is a costy operation, we avoid doing it for multiple groups ...
1980 //Instead we perform it for all symbols at once.
1981 //
1982 //For this to work, we need to reorganize the data arrays.
1983 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1984 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1985
1986 $this->language_data['SYMBOL_DATA'] = array();
1987 $symbol_preg_multi = array(); // multi char symbols
1988 $symbol_preg_single = array(); // single char symbols
1989 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1990 if (is_array($symbols)) {
1991 foreach ($symbols as $sym) {
1992 $sym = $this->hsc($sym);
1993 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1994 $this->language_data['SYMBOL_DATA'][$sym] = $key;
1995 if (isset($sym[1])) { // multiple chars
1996 $symbol_preg_multi[] = preg_quote($sym, '/');
1997 } else { // single char
1998 if ($sym == '-') {
1999 // don't trigger range out of order error
2000 $symbol_preg_single[] = '\-';
2001 } else {
2002 $symbol_preg_single[] = preg_quote($sym, '/');
2003 }
2004 }
2005 }
2006 }
2007 } else {
2008 $symbols = $this->hsc($symbols);
2009 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
2010 $this->language_data['SYMBOL_DATA'][$symbols] = 0;
2011 if (isset($symbols[1])) { // multiple chars
2012 $symbol_preg_multi[] = preg_quote($symbols, '/');
2013 } elseif ($symbols == '-') {
2014 // don't trigger range out of order error
2015 $symbol_preg_single[] = '\-';
2016 } else { // single char
2017 $symbol_preg_single[] = preg_quote($symbols, '/');
2018 }
2019 }
2020 }
2021 }
2022
2023 //Now we have an array with each possible symbol as the key and the style as the actual data.
2024 //This way we can set the correct style just the moment we highlight ...
2025 //
2026 //Now we need to rewrite our array to get a search string that
2027 $symbol_preg = array();
2028 if (!empty($symbol_preg_multi)) {
2029 rsort($symbol_preg_multi);
2030 $symbol_preg[] = implode('|', $symbol_preg_multi);
2031 }
2032 if (!empty($symbol_preg_single)) {
2033 rsort($symbol_preg_single);
2034 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2035 }
2036 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2037 }
2038
2039 // cache optimized regexp for keyword matching
2040 // remove old cache
2041 $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2042 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2043 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2044 $this->lexic_permissions['KEYWORDS'][$key]) {
2045 $this->optimize_keyword_group($key);
2046 }
2047 }
2048
2049 // brackets
2050 if ($this->lexic_permissions['BRACKETS']) {
2051 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2052 if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2053 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2054 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2055 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2056 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2057 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2058 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2059 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2060 );
2061 }
2062 else {
2063 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2064 '<| class="br0">&#91;|>',
2065 '<| class="br0">&#93;|>',
2066 '<| class="br0">&#40;|>',
2067 '<| class="br0">&#41;|>',
2068 '<| class="br0">&#123;|>',
2069 '<| class="br0">&#125;|>',
2070 );
2071 }
2072 }
2073
2074 //Build the parse cache needed to highlight numbers appropriate
2075 if($this->lexic_permissions['NUMBERS']) {
2076 //Check if the style rearrangements have been processed ...
2077 //This also does some preprocessing to check which style groups are useable ...
2078 if(!isset($this->language_data['NUMBERS_CACHE'])) {
2079 $this->build_style_cache();
2080 }
2081
2082 //Number format specification
2083 //All this formats are matched case-insensitively!
2084 static $numbers_format = array(
2086 '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2088 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2090 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2092 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2094 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2096 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2098 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2100 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2102 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2104 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2106 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2108 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2110 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2112 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2114 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2116 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2117 );
2118
2119 //At this step we have an associative array with flag groups for a
2120 //specific style or an string denoting a regexp given its index.
2121 $this->language_data['NUMBERS_RXCACHE'] = array();
2122 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2123 if(is_string($rxdata)) {
2124 $regexp = $rxdata;
2125 } else {
2126 //This is a bitfield of number flags to highlight:
2127 //Build an array, implode them together and make this the actual RX
2128 $rxuse = array();
2129 for($i = 1; $i <= $rxdata; $i<<=1) {
2130 if($rxdata & $i) {
2131 $rxuse[] = $numbers_format[$i];
2132 }
2133 }
2134 $regexp = implode("|", $rxuse);
2135 }
2136
2137 $this->language_data['NUMBERS_RXCACHE'][$key] =
2138 "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i";
2139 }
2140
2141 if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2142 $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2143 }
2144 }
2145
2146 $this->parse_cache_built = true;
2147 }
2148
2159 public function parse_code() {
2160 // Start the timer
2161 $start_time = microtime();
2162
2163 // Replace all newlines to a common form.
2164 $code = str_replace("\r\n", "\n", $this->source);
2165 $code = str_replace("\r", "\n", $code);
2166
2167 // check whether language_data is available
2168 if (empty($this->language_data)) {
2170 }
2171
2172 // Firstly, if there is an error, we won't highlight
2173 if ($this->error) {
2174 //Escape the source for output
2175 $result = $this->hsc($this->source);
2176
2177 //This fix is related to SF#1923020, but has to be applied regardless of
2178 //actually highlighting symbols.
2179 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2180
2181 // Timing is irrelevant
2182 $this->set_time($start_time, $start_time);
2183 $this->finalise($result);
2184 return $result;
2185 }
2186
2187 // make sure the parse cache is up2date
2188 if (!$this->parse_cache_built) {
2189 $this->build_parse_cache();
2190 }
2191
2192 // Initialise various stuff
2193 $length = strlen($code);
2194 $COMMENT_MATCHED = false;
2195 $stuff_to_parse = '';
2196 $endresult = '';
2197
2198 // "Important" selections are handled like multiline comments
2199 // @todo GET RID OF THIS SHIZ
2200 if ($this->enable_important_blocks) {
2201 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2202 }
2203
2204 if ($this->strict_mode) {
2205 // Break the source into bits. Each bit will be a portion of the code
2206 // within script delimiters - for example, HTML between < and >
2207 $k = 0;
2208 $parts = array();
2209 $matches = array();
2210 $next_match_pointer = null;
2211 // we use a copy to unset delimiters on demand (when they are not found)
2212 $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2213 $i = 0;
2214 while ($i < $length) {
2215 $next_match_pos = $length + 1; // never true
2216 foreach ($delim_copy as $dk => $delimiters) {
2217 if(is_array($delimiters)) {
2218 foreach ($delimiters as $open => $close) {
2219 // make sure the cache is setup properly
2220 if (!isset($matches[$dk][$open])) {
2221 $matches[$dk][$open] = array(
2222 'next_match' => -1,
2223 'dk' => $dk,
2224
2225 'open' => $open, // needed for grouping of adjacent code blocks (see below)
2226 'open_strlen' => strlen($open),
2227
2228 'close' => $close,
2229 'close_strlen' => strlen($close),
2230 );
2231 }
2232 // Get the next little bit for this opening string
2233 if ($matches[$dk][$open]['next_match'] < $i) {
2234 // only find the next pos if it was not already cached
2235 $open_pos = strpos($code, $open, $i);
2236 if ($open_pos === false) {
2237 // no match for this delimiter ever
2238 unset($delim_copy[$dk][$open]);
2239 continue;
2240 }
2241 $matches[$dk][$open]['next_match'] = $open_pos;
2242 }
2243 if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2244 //So we got a new match, update the close_pos
2245 $matches[$dk][$open]['close_pos'] =
2246 strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2247
2248 $next_match_pointer =& $matches[$dk][$open];
2249 $next_match_pos = $matches[$dk][$open]['next_match'];
2250 }
2251 }
2252 } else {
2253 //So we should match an RegExp as Strict Block ...
2260 if(preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2261 //We got a match ...
2262 if(isset($matches_rx['start']) && isset($matches_rx['end']))
2263 {
2264 $matches[$dk] = array(
2265 'next_match' => $matches_rx['start'][1],
2266 'dk' => $dk,
2267
2268 'close_strlen' => strlen($matches_rx['end'][0]),
2269 'close_pos' => $matches_rx['end'][1],
2270 );
2271 } else {
2272 $matches[$dk] = array(
2273 'next_match' => $matches_rx[1][1],
2274 'dk' => $dk,
2275
2276 'close_strlen' => strlen($matches_rx[2][0]),
2277 'close_pos' => $matches_rx[2][1],
2278 );
2279 }
2280 } else {
2281 // no match for this delimiter ever
2282 unset($delim_copy[$dk]);
2283 continue;
2284 }
2285
2286 if ($matches[$dk]['next_match'] <= $next_match_pos) {
2287 $next_match_pointer =& $matches[$dk];
2288 $next_match_pos = $matches[$dk]['next_match'];
2289 }
2290 }
2291 }
2292
2293 // non-highlightable text
2294 $parts[$k] = array(
2295 1 => substr($code, $i, $next_match_pos - $i)
2296 );
2297 ++$k;
2298
2299 if ($next_match_pos > $length) {
2300 // out of bounds means no next match was found
2301 break;
2302 }
2303
2304 // highlightable code
2305 $parts[$k][0] = $next_match_pointer['dk'];
2306
2307 //Only combine for non-rx script blocks
2308 if(is_array($delim_copy[$next_match_pointer['dk']])) {
2309 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2310 $i = $next_match_pos + $next_match_pointer['open_strlen'];
2311 while (true) {
2312 $close_pos = strpos($code, $next_match_pointer['close'], $i);
2313 if ($close_pos == false) {
2314 break;
2315 }
2316 $i = $close_pos + $next_match_pointer['close_strlen'];
2317 if ($i == $length) {
2318 break;
2319 }
2320 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2321 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2322 // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2323 foreach ($matches as $submatches) {
2324 foreach ($submatches as $match) {
2325 if ($match['next_match'] == $i) {
2326 // a different block already matches here!
2327 break 3;
2328 }
2329 }
2330 }
2331 } else {
2332 break;
2333 }
2334 }
2335 } else {
2336 $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2337 $i = $close_pos;
2338 }
2339
2340 if ($close_pos === false) {
2341 // no closing delimiter found!
2342 $parts[$k][1] = substr($code, $next_match_pos);
2343 ++$k;
2344 break;
2345 } else {
2346 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2347 ++$k;
2348 }
2349 }
2350 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2351 $num_parts = $k;
2352
2353 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2354 // when we have only one part, we don't have anything to highlight at all.
2355 // if we have a "maybe" strict language, this should be handled as highlightable code
2356 $parts = array(
2357 0 => array(
2358 0 => '',
2359 1 => ''
2360 ),
2361 1 => array(
2362 0 => null,
2363 1 => $parts[0][1]
2364 )
2365 );
2366 $num_parts = 2;
2367 }
2368
2369 } else {
2370 // Not strict mode - simply dump the source into
2371 // the array at index 1 (the first highlightable block)
2372 $parts = array(
2373 0 => array(
2374 0 => '',
2375 1 => ''
2376 ),
2377 1 => array(
2378 0 => null,
2379 1 => $code
2380 )
2381 );
2382 $num_parts = 2;
2383 }
2384
2385 //Unset variables we won't need any longer
2386 unset($code);
2387
2388 //Preload some repeatedly used values regarding hardquotes ...
2389 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2390 $hq_strlen = strlen($hq);
2391
2392 //Preload if line numbers are to be generated afterwards
2393 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2394 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2395 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2396
2397 //preload the escape char for faster checking ...
2398 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2399
2400 // this is used for single-line comments
2401 $sc_disallowed_before = "";
2402 $sc_disallowed_after = "";
2403
2404 if (isset($this->language_data['PARSER_CONTROL'])) {
2405 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2406 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2407 $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2408 }
2409 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2410 $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2411 }
2412 }
2413 }
2414
2415 //Fix for SF#1932083: Multichar Quotemarks unsupported
2416 $is_string_starter = array();
2417 if ($this->lexic_permissions['STRINGS']) {
2418 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2419 if (!isset($is_string_starter[$quotemark[0]])) {
2420 $is_string_starter[$quotemark[0]] = (string)$quotemark;
2421 } elseif (is_string($is_string_starter[$quotemark[0]])) {
2422 $is_string_starter[$quotemark[0]] = array(
2423 $is_string_starter[$quotemark[0]],
2424 $quotemark);
2425 } else {
2426 $is_string_starter[$quotemark[0]][] = $quotemark;
2427 }
2428 }
2429 }
2430
2431 // Now we go through each part. We know that even-indexed parts are
2432 // code that shouldn't be highlighted, and odd-indexed parts should
2433 // be highlighted
2434 for ($key = 0; $key < $num_parts; ++$key) {
2435 $STRICTATTRS = '';
2436
2437 // If this block should be highlighted...
2438 if (!($key & 1)) {
2439 // Else not a block to highlight
2440 $endresult .= $this->hsc($parts[$key][1]);
2441 unset($parts[$key]);
2442 continue;
2443 }
2444
2445 $result = '';
2446 $part = $parts[$key][1];
2447
2448 $highlight_part = true;
2449 if ($this->strict_mode && !is_null($parts[$key][0])) {
2450 // get the class key for this block of code
2451 $script_key = $parts[$key][0];
2452 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2453 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2454 $this->lexic_permissions['SCRIPT']) {
2455 // Add a span element around the source to
2456 // highlight the overall source block
2457 if (!$this->use_classes &&
2458 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2459 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2460 } else {
2461 $attributes = ' class="sc' . $script_key . '"';
2462 }
2463 $result .= "<span$attributes>";
2464 $STRICTATTRS = $attributes;
2465 }
2466 }
2467
2468 if ($highlight_part) {
2469 // Now, highlight the code in this block. This code
2470 // is really the engine of GeSHi (along with the method
2471 // parse_non_string_part).
2472
2473 // cache comment regexps incrementally
2474 $next_comment_regexp_key = '';
2475 $next_comment_regexp_pos = -1;
2476 $next_comment_multi_pos = -1;
2477 $next_comment_single_pos = -1;
2478 $comment_regexp_cache_per_key = array();
2479 $comment_multi_cache_per_key = array();
2480 $comment_single_cache_per_key = array();
2481 $next_open_comment_multi = '';
2482 $next_comment_single_key = '';
2483 $escape_regexp_cache_per_key = array();
2484 $next_escape_regexp_key = '';
2485 $next_escape_regexp_pos = -1;
2486
2487 $length = strlen($part);
2488 for ($i = 0; $i < $length; ++$i) {
2489 // Get the next char
2490 $char = $part[$i];
2491 $char_len = 1;
2492
2493 // update regexp comment cache if needed
2494 if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2495 $next_comment_regexp_pos = $length;
2496 foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2497 $match_i = false;
2498 if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2499 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2500 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2501 // we have already matched something
2502 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2503 // this comment is never matched
2504 continue;
2505 }
2506 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2507 } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) {
2508 $match_i = $match[0][1];
2509
2510 $comment_regexp_cache_per_key[$comment_key] = array(
2511 'key' => $comment_key,
2512 'length' => strlen($match[0][0]),
2513 'pos' => $match_i
2514 );
2515 } else {
2516 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2517 continue;
2518 }
2519
2520 if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2521 $next_comment_regexp_pos = $match_i;
2522 $next_comment_regexp_key = $comment_key;
2523 if ($match_i === $i) {
2524 break;
2525 }
2526 }
2527 }
2528 }
2529
2530 $string_started = false;
2531
2532 if (isset($is_string_starter[$char])) {
2533 // Possibly the start of a new string ...
2534
2535 //Check which starter it was ...
2536 //Fix for SF#1932083: Multichar Quotemarks unsupported
2537 if (is_array($is_string_starter[$char])) {
2538 $char_new = '';
2539 foreach ($is_string_starter[$char] as $testchar) {
2540 if ($testchar === substr($part, $i, strlen($testchar)) &&
2541 strlen($testchar) > strlen($char_new)) {
2542 $char_new = $testchar;
2543 $string_started = true;
2544 }
2545 }
2546 if ($string_started) {
2547 $char = $char_new;
2548 }
2549 } else {
2550 $testchar = $is_string_starter[$char];
2551 if ($testchar === substr($part, $i, strlen($testchar))) {
2552 $char = $testchar;
2553 $string_started = true;
2554 }
2555 }
2556 $char_len = strlen($char);
2557 }
2558
2559 if ($string_started && ($i != $next_comment_regexp_pos)) {
2560 // Hand out the correct style information for this string
2561 $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2562 if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2563 !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2564 $string_key = 0;
2565 }
2566
2567 // parse the stuff before this
2568 $result .= $this->parse_non_string_part($stuff_to_parse);
2569 $stuff_to_parse = '';
2570
2571 if (!$this->use_classes) {
2572 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2573 } else {
2574 $string_attributes = ' class="st'.$string_key.'"';
2575 }
2576
2577 // now handle the string
2578 $string = "<span$string_attributes>" . GeSHi::hsc($char);
2579 $start = $i + $char_len;
2580 $string_open = true;
2581
2582 if(empty($this->language_data['ESCAPE_REGEXP'])) {
2583 $next_escape_regexp_pos = $length;
2584 }
2585
2586 do {
2587 //Get the regular ending pos ...
2588 $close_pos = strpos($part, $char, $start);
2589 if(false === $close_pos) {
2590 $close_pos = $length;
2591 }
2592
2593 if($this->lexic_permissions['ESCAPE_CHAR']) {
2594 // update escape regexp cache if needed
2595 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2596 $next_escape_regexp_pos = $length;
2597 foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2598 $match_i = false;
2599 if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2600 ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2601 $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2602 // we have already matched something
2603 if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2604 // this comment is never matched
2605 continue;
2606 }
2607 $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2608 } elseif (preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) {
2609 $match_i = $match[0][1];
2610
2611 $escape_regexp_cache_per_key[$escape_key] = array(
2612 'key' => $escape_key,
2613 'length' => strlen($match[0][0]),
2614 'pos' => $match_i
2615 );
2616 } else {
2617 $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2618 continue;
2619 }
2620
2621 if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2622 $next_escape_regexp_pos = $match_i;
2623 $next_escape_regexp_key = $escape_key;
2624 if ($match_i === $start) {
2625 break;
2626 }
2627 }
2628 }
2629 }
2630
2631 //Find the next simple escape position
2632 if('' != $this->language_data['ESCAPE_CHAR']) {
2633 $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2634 if(false === $simple_escape) {
2635 $simple_escape = $length;
2636 }
2637 } else {
2638 $simple_escape = $length;
2639 }
2640 } else {
2641 $next_escape_regexp_pos = $length;
2642 $simple_escape = $length;
2643 }
2644
2645 if($simple_escape < $next_escape_regexp_pos &&
2646 $simple_escape < $length &&
2647 $simple_escape < $close_pos) {
2648 //The nexxt escape sequence is a simple one ...
2649 $es_pos = $simple_escape;
2650
2651 //Add the stuff not in the string yet ...
2652 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2653
2654 //Get the style for this escaped char ...
2655 if (!$this->use_classes) {
2656 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2657 } else {
2658 $escape_char_attributes = ' class="es0"';
2659 }
2660
2661 //Add the style for the escape char ...
2662 $string .= "<span$escape_char_attributes>" .
2663 GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2664
2665 //Get the byte AFTER the ESCAPE_CHAR we just found
2666 $es_char = $part[$es_pos + 1];
2667 if ($es_char == "\n") {
2668 // don't put a newline around newlines
2669 $string .= "</span>\n";
2670 $start = $es_pos + 2;
2671 } elseif (ord($es_char) >= 128) {
2672 //This is an non-ASCII char (UTF8 or single byte)
2673 //This code tries to work around SF#2037598 ...
2674 if(function_exists('mb_substr')) {
2675 $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2676 $string .= $es_char_m . '</span>';
2677 } elseif ('utf-8' == $this->encoding) {
2678 if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2679 "|\xE0[\xA0-\xBF][\x80-\xBF]".
2680 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2681 "|\xED[\x80-\x9F][\x80-\xBF]".
2682 "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2683 "|[\xF1-\xF3][\x80-\xBF]{3}".
2684 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2685 $part, $es_char_m, null, $es_pos + 1)) {
2686 $es_char_m = $es_char_m[0];
2687 } else {
2688 $es_char_m = $es_char;
2689 }
2690 $string .= $this->hsc($es_char_m) . '</span>';
2691 } else {
2692 $es_char_m = $this->hsc($es_char);
2693 }
2694 $start = $es_pos + strlen($es_char_m) + 1;
2695 } else {
2696 $string .= $this->hsc($es_char) . '</span>';
2697 $start = $es_pos + 2;
2698 }
2699 } elseif ($next_escape_regexp_pos < $length &&
2700 $next_escape_regexp_pos < $close_pos) {
2701 $es_pos = $next_escape_regexp_pos;
2702 //Add the stuff not in the string yet ...
2703 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2704
2705 //Get the key and length of this match ...
2706 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2707 $escape_str = substr($part, $es_pos, $escape['length']);
2708 $escape_key = $escape['key'];
2709
2710 //Get the style for this escaped char ...
2711 if (!$this->use_classes) {
2712 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2713 } else {
2714 $escape_char_attributes = ' class="es' . $escape_key . '"';
2715 }
2716
2717 //Add the style for the escape char ...
2718 $string .= "<span$escape_char_attributes>" .
2719 $this->hsc($escape_str) . '</span>';
2720
2721 $start = $es_pos + $escape['length'];
2722 } else {
2723 //Copy the remainder of the string ...
2724 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2725 $start = $close_pos + $char_len;
2726 $string_open = false;
2727 }
2728 } while($string_open);
2729
2730 if ($check_linenumbers) {
2731 // Are line numbers used? If, we should end the string before
2732 // the newline and begin it again (so when <li>s are put in the source
2733 // remains XHTML compliant)
2734 // note to self: This opens up possibility of config files specifying
2735 // that languages can/cannot have multiline strings???
2736 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2737 }
2738
2739 $result .= $string;
2740 $string = '';
2741 $i = $start - 1;
2742 continue;
2743 } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2744 substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2745 // The start of a hard quoted string
2746 if (!$this->use_classes) {
2747 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2748 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2749 } else {
2750 $string_attributes = ' class="st_h"';
2751 $escape_char_attributes = ' class="es_h"';
2752 }
2753 // parse the stuff before this
2754 $result .= $this->parse_non_string_part($stuff_to_parse);
2755 $stuff_to_parse = '';
2756
2757 // now handle the string
2758 $string = '';
2759
2760 // look for closing quote
2761 $start = $i + $hq_strlen;
2762 while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2763 $start = $close_pos + 1;
2764 if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2765 (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2766 // make sure this quote is not escaped
2767 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2768 if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2769 // check wether this quote is escaped or if it is something like '\\'
2770 $escape_char_pos = $close_pos - 1;
2771 while ($escape_char_pos > 0
2772 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2773 --$escape_char_pos;
2774 }
2775 if (($close_pos - $escape_char_pos) & 1) {
2776 // uneven number of escape chars => this quote is escaped
2777 continue 2;
2778 }
2779 }
2780 }
2781 }
2782
2783 // found closing quote
2784 break;
2785 }
2786
2787 //Found the closing delimiter?
2788 if (!$close_pos) {
2789 // span till the end of this $part when no closing delimiter is found
2790 $close_pos = $length;
2791 }
2792
2793 //Get the actual string
2794 $string = substr($part, $i, $close_pos - $i + 1);
2795 $i = $close_pos;
2796
2797 // handle escape chars and encode html chars
2798 // (special because when we have escape chars within our string they may not be escaped)
2799 if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2800 $start = 0;
2801 $new_string = '';
2802 while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2803 // hmtl escape stuff before
2804 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2805 // check if this is a hard escape
2806 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2807 if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2808 // indeed, this is a hardescape
2809 $new_string .= "<span$escape_char_attributes>" .
2810 $this->hsc($hardescape) . '</span>';
2811 $start = $es_pos + strlen($hardescape);
2812 continue 2;
2813 }
2814 }
2815 // not a hard escape, but a normal escape
2816 // they come in pairs of two
2817 $c = 0;
2818 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2819 && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2820 && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2821 $c += 2;
2822 }
2823 if ($c) {
2824 $new_string .= "<span$escape_char_attributes>" .
2825 str_repeat($escaped_escape_char, $c) .
2826 '</span>';
2827 $start = $es_pos + $c;
2828 } else {
2829 // this is just a single lonely escape char...
2830 $new_string .= $escaped_escape_char;
2831 $start = $es_pos + 1;
2832 }
2833 }
2834 $string = $new_string . $this->hsc(substr($string, $start));
2835 } else {
2836 $string = $this->hsc($string);
2837 }
2838
2839 if ($check_linenumbers) {
2840 // Are line numbers used? If, we should end the string before
2841 // the newline and begin it again (so when <li>s are put in the source
2842 // remains XHTML compliant)
2843 // note to self: This opens up possibility of config files specifying
2844 // that languages can/cannot have multiline strings???
2845 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2846 }
2847
2848 $result .= "<span$string_attributes>" . $string . '</span>';
2849 $string = '';
2850 continue;
2851 } else {
2852 //Have a look for regexp comments
2853 if ($i == $next_comment_regexp_pos) {
2854 $COMMENT_MATCHED = true;
2855 $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2856 $test_str = $this->hsc(substr($part, $i, $comment['length']));
2857
2858 //@todo If remove important do remove here
2859 if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2860 if (!$this->use_classes) {
2861 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2862 } else {
2863 $attributes = ' class="co' . $comment['key'] . '"';
2864 }
2865
2866 $test_str = "<span$attributes>" . $test_str . "</span>";
2867
2868 // Short-cut through all the multiline code
2869 if ($check_linenumbers) {
2870 // strreplace to put close span and open span around multiline newlines
2871 $test_str = str_replace(
2872 "\n", "</span>\n<span$attributes>",
2873 str_replace("\n ", "\n&nbsp;", $test_str)
2874 );
2875 }
2876 }
2877
2878 $i += $comment['length'] - 1;
2879
2880 // parse the rest
2881 $result .= $this->parse_non_string_part($stuff_to_parse);
2882 $stuff_to_parse = '';
2883 }
2884
2885 // If we haven't matched a regexp comment, try multi-line comments
2886 if (!$COMMENT_MATCHED) {
2887 // Is this a multiline comment?
2888 if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2889 $next_comment_multi_pos = $length;
2890 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2891 $match_i = false;
2892 if (isset($comment_multi_cache_per_key[$open]) &&
2893 ($comment_multi_cache_per_key[$open] >= $i ||
2894 $comment_multi_cache_per_key[$open] === false)) {
2895 // we have already matched something
2896 if ($comment_multi_cache_per_key[$open] === false) {
2897 // this comment is never matched
2898 continue;
2899 }
2900 $match_i = $comment_multi_cache_per_key[$open];
2901 } elseif (($match_i = stripos($part, $open, $i)) !== false) {
2902 $comment_multi_cache_per_key[$open] = $match_i;
2903 } else {
2904 $comment_multi_cache_per_key[$open] = false;
2905 continue;
2906 }
2907 if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2908 $next_comment_multi_pos = $match_i;
2909 $next_open_comment_multi = $open;
2910 if ($match_i === $i) {
2911 break;
2912 }
2913 }
2914 }
2915 }
2916 if ($i == $next_comment_multi_pos) {
2917 $open = $next_open_comment_multi;
2918 $close = $this->language_data['COMMENT_MULTI'][$open];
2919 $open_strlen = strlen($open);
2920 $close_strlen = strlen($close);
2921 $COMMENT_MATCHED = true;
2922 $test_str_match = $open;
2923 //@todo If remove important do remove here
2924 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2925 $open == GESHI_START_IMPORTANT) {
2926 if ($open != GESHI_START_IMPORTANT) {
2927 if (!$this->use_classes) {
2928 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2929 } else {
2930 $attributes = ' class="coMULTI"';
2931 }
2932 $test_str = "<span$attributes>" . $this->hsc($open);
2933 } else {
2934 if (!$this->use_classes) {
2935 $attributes = ' style="' . $this->important_styles . '"';
2936 } else {
2937 $attributes = ' class="imp"';
2938 }
2939
2940 // We don't include the start of the comment if it's an
2941 // "important" part
2942 $test_str = "<span$attributes>";
2943 }
2944 } else {
2945 $test_str = $this->hsc($open);
2946 }
2947
2948 $close_pos = strpos( $part, $close, $i + $open_strlen );
2949
2950 if ($close_pos === false) {
2951 $close_pos = $length;
2952 }
2953
2954 // Short-cut through all the multiline code
2955 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2956 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2957 $test_str_match == GESHI_START_IMPORTANT) &&
2958 $check_linenumbers) {
2959
2960 // strreplace to put close span and open span around multiline newlines
2961 $test_str .= str_replace(
2962 "\n", "</span>\n<span$attributes>",
2963 str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2964 );
2965 } else {
2966 $test_str .= $rest_of_comment;
2967 }
2968
2969 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2970 $test_str_match == GESHI_START_IMPORTANT) {
2971 $test_str .= '</span>';
2972 }
2973
2974 $i = $close_pos + $close_strlen - 1;
2975
2976 // parse the rest
2977 $result .= $this->parse_non_string_part($stuff_to_parse);
2978 $stuff_to_parse = '';
2979 }
2980 }
2981
2982 // If we haven't matched a multiline comment, try single-line comments
2983 if (!$COMMENT_MATCHED) {
2984 // cache potential single line comment occurances
2985 if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2986 $next_comment_single_pos = $length;
2987 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2988 $match_i = false;
2989 if (isset($comment_single_cache_per_key[$comment_key]) &&
2990 ($comment_single_cache_per_key[$comment_key] >= $i ||
2991 $comment_single_cache_per_key[$comment_key] === false)) {
2992 // we have already matched something
2993 if ($comment_single_cache_per_key[$comment_key] === false) {
2994 // this comment is never matched
2995 continue;
2996 }
2997 $match_i = $comment_single_cache_per_key[$comment_key];
2998 } elseif (
2999 // case sensitive comments
3000 ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3001 ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
3002 // non case sensitive
3003 (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
3004 (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
3005 $comment_single_cache_per_key[$comment_key] = $match_i;
3006 } else {
3007 $comment_single_cache_per_key[$comment_key] = false;
3008 continue;
3009 }
3010 if ($match_i !== false && $match_i < $next_comment_single_pos) {
3011 $next_comment_single_pos = $match_i;
3012 $next_comment_single_key = $comment_key;
3013 if ($match_i === $i) {
3014 break;
3015 }
3016 }
3017 }
3018 }
3019 if ($next_comment_single_pos == $i) {
3020 $comment_key = $next_comment_single_key;
3021 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
3022 $com_len = strlen($comment_mark);
3023
3024 // This check will find special variables like $# in bash
3025 // or compiler directives of Delphi beginning {$
3026 if ((empty($sc_disallowed_before) || ($i == 0) ||
3027 (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
3028 (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
3029 (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
3030 {
3031 // this is a valid comment
3032 $COMMENT_MATCHED = true;
3033 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3034 if (!$this->use_classes) {
3035 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
3036 } else {
3037 $attributes = ' class="co' . $comment_key . '"';
3038 }
3039 $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
3040 } else {
3041 $test_str = $this->hsc($comment_mark);
3042 }
3043
3044 //Check if this comment is the last in the source
3045 $close_pos = strpos($part, "\n", $i);
3046 $oops = false;
3047 if ($close_pos === false) {
3048 $close_pos = $length;
3049 $oops = true;
3050 }
3051 $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
3052 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
3053 $test_str .= "</span>";
3054 }
3055
3056 // Take into account that the comment might be the last in the source
3057 if (!$oops) {
3058 $test_str .= "\n";
3059 }
3060
3061 $i = $close_pos;
3062
3063 // parse the rest
3064 $result .= $this->parse_non_string_part($stuff_to_parse);
3065 $stuff_to_parse = '';
3066 }
3067 }
3068 }
3069 }
3070
3071 // Where are we adding this char?
3072 if (!$COMMENT_MATCHED) {
3073 $stuff_to_parse .= $char;
3074 } else {
3075 $result .= $test_str;
3076 unset($test_str);
3077 $COMMENT_MATCHED = false;
3078 }
3079 }
3080 // Parse the last bit
3081 $result .= $this->parse_non_string_part($stuff_to_parse);
3082 $stuff_to_parse = '';
3083 } else {
3084 $result .= $this->hsc($part);
3085 }
3086 // Close the <span> that surrounds the block
3087 if ($STRICTATTRS != '') {
3088 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
3089 $result .= '</span>';
3090 }
3091
3092 $endresult .= $result;
3093 unset($part, $parts[$key], $result);
3094 }
3095
3096 //This fix is related to SF#1923020, but has to be applied regardless of
3097 //actually highlighting symbols.
3099 $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
3100
3101// // Parse the last stuff (redundant?)
3102// $result .= $this->parse_non_string_part($stuff_to_parse);
3103
3104 // Lop off the very first and last spaces
3105// $result = substr($result, 1, -1);
3106
3107 // We're finished: stop timing
3108 $this->set_time($start_time, microtime());
3109
3110 $this->finalise($endresult);
3111 return $endresult;
3112 }
3113
3121 protected function indent(&$result) {
3123 if (false !== strpos($result, "\t")) {
3124 $lines = explode("\n", $result);
3125 $result = null;//Save memory while we process the lines individually
3126 $tab_width = $this->get_real_tab_width();
3127 $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3128
3129 for ($key = 0, $n = count($lines); $key < $n; $key++) {
3130 $line = $lines[$key];
3131 if (false === strpos($line, "\t")) {
3132 continue;
3133 }
3134
3135 $pos = 0;
3136 $length = strlen($line);
3137 $lines[$key] = ''; // reduce memory
3138
3139 $IN_TAG = false;
3140 for ($i = 0; $i < $length; ++$i) {
3141 $char = $line[$i];
3142 // Simple engine to work out whether we're in a tag.
3143 // If we are we modify $pos. This is so we ignore HTML
3144 // in the line and only workout the tab replacement
3145 // via the actual content of the string
3146 // This test could be improved to include strings in the
3147 // html so that < or > would be allowed in user's styles
3148 // (e.g. quotes: '<' '>'; or similar)
3149 if ($IN_TAG) {
3150 if ('>' == $char) {
3151 $IN_TAG = false;
3152 }
3153 $lines[$key] .= $char;
3154 } elseif ('<' == $char) {
3155 $IN_TAG = true;
3156 $lines[$key] .= '<';
3157 } elseif ('&' == $char) {
3158 $substr = substr($line, $i + 3, 5);
3159 $posi = strpos($substr, ';');
3160 if (false === $posi) {
3161 ++$pos;
3162 } else {
3163 $pos -= $posi+2;
3164 }
3165 $lines[$key] .= $char;
3166 } elseif ("\t" == $char) {
3167 $str = '';
3168 // OPTIMISE - move $strs out. Make an array:
3169 // $tabs = array(
3170 // 1 => '&nbsp;',
3171 // 2 => '&nbsp; ',
3172 // 3 => '&nbsp; &nbsp;' etc etc
3173 // to use instead of building a string every time
3174 $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3175 if (($pos & 1) || 1 == $tab_end_width) {
3176 $str .= substr($tab_string, 6, $tab_end_width);
3177 } else {
3178 $str .= substr($tab_string, 0, $tab_end_width+5);
3179 }
3180 $lines[$key] .= $str;
3181 $pos += $tab_end_width;
3182
3183 if (false === strpos($line, "\t", $i + 1)) {
3184 $lines[$key] .= substr($line, $i + 1);
3185 break;
3186 }
3187 } elseif (0 == $pos && ' ' == $char) {
3188 $lines[$key] .= '&nbsp;';
3189 ++$pos;
3190 } else {
3191 $lines[$key] .= $char;
3192 ++$pos;
3193 }
3194 }
3195 }
3196 $result = implode("\n", $lines);
3197 unset($lines);//We don't need the lines separated beyond this --- free them!
3198 }
3199 // Other whitespace
3200 // BenBE: Fix to reduce the number of replacements to be done
3201 $result = preg_replace('/^ /m', '&nbsp;', $result);
3202 $result = str_replace(' ', ' &nbsp;', $result);
3203
3204 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3205 if ($this->line_ending === null) {
3206 $result = nl2br($result);
3207 } else {
3208 $result = str_replace("\n", $this->line_ending, $result);
3209 }
3210 }
3211 }
3212
3220 protected function change_case($instr) {
3221 switch ($this->language_data['CASE_KEYWORDS']) {
3222 case GESHI_CAPS_UPPER:
3223 return strtoupper($instr);
3224 case GESHI_CAPS_LOWER:
3225 return strtolower($instr);
3226 default:
3227 return $instr;
3228 }
3229 }
3230
3240 protected function handle_keyword_replace($match) {
3242 $keyword = $match[0];
3243 $keyword_match = $match[1];
3244
3245 $before = '';
3246 $after = '';
3247
3248 if ($this->keyword_links) {
3249 // Keyword links have been ebabled
3250
3251 if (isset($this->language_data['URLS'][$k]) &&
3252 $this->language_data['URLS'][$k] != '') {
3253 // There is a base group for this keyword
3254
3255 // Old system: strtolower
3256 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3257 // New system: get keyword from language file to get correct case
3258 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3259 strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3260 foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3261 if (strcasecmp($word, $keyword_match) == 0) {
3262 break;
3263 }
3264 }
3265 } else {
3266 $word = $keyword_match;
3267 }
3268
3269 $before = '<|UR1|"' .
3270 str_replace(
3271 array(
3272 '{FNAME}',
3273 '{FNAMEL}',
3274 '{FNAMEU}',
3275 '{FNAMEUF}',
3276 '.'),
3277 array(
3278 str_replace('+', '%20', urlencode($this->hsc($word))),
3279 str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3280 str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3281 str_replace('+', '%20', urlencode($this->hsc(ucfirst($word)))),
3282 '<DOT>'),
3283 $this->language_data['URLS'][$k]
3284 ) . '">';
3285 $after = '</a>';
3286 }
3287 }
3288
3289 return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3290 }
3291
3301 protected function handle_regexps_callback($matches) {
3302 // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3303 return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3304 }
3305
3315 protected function handle_multiline_regexps($matches) {
3316 $before = $this->_hmr_before;
3317 $after = $this->_hmr_after;
3318 if ($this->_hmr_replace) {
3319 $replace = $this->_hmr_replace;
3320 $search = array();
3321
3322 foreach (array_keys($matches) as $k) {
3323 $search[] = '\\' . $k;
3324 }
3325
3326 $before = str_replace($search, $matches, $before);
3327 $after = str_replace($search, $matches, $after);
3328 $replace = str_replace($search, $matches, $replace);
3329 } else {
3330 $replace = $matches[0];
3331 }
3332 return $before
3333 . '<|!REG3XP' . $this->_hmr_key .'!>'
3334 . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3335 . '|>'
3336 . $after;
3337 }
3338
3348 protected function parse_non_string_part($stuff_to_parse) {
3349 $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3350
3351 // Highlight keywords
3352 $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
3353 $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3354 if ($this->lexic_permissions['STRINGS']) {
3355 $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3356 $disallowed_before .= $quotemarks;
3357 $disallowed_after .= $quotemarks;
3358 }
3359 $disallowed_before .= "])";
3360 $disallowed_after .= "])";
3361
3362 $parser_control_pergroup = false;
3363 if (isset($this->language_data['PARSER_CONTROL'])) {
3364 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3365 $x = 0; // check wether per-keyword-group parser_control is enabled
3366 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3367 $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3368 ++$x;
3369 }
3370 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3371 $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3372 ++$x;
3373 }
3374 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3375 }
3376 }
3377
3378 foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3379 if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3380 $this->lexic_permissions['KEYWORDS'][$k]) {
3381
3382 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3383 $modifiers = $case_sensitive ? '' : 'i';
3384
3385 // NEW in 1.0.8 - per-keyword-group parser control
3386 $disallowed_before_local = $disallowed_before;
3387 $disallowed_after_local = $disallowed_after;
3388 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3389 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3390 $disallowed_before_local =
3391 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3392 }
3393
3394 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3395 $disallowed_after_local =
3396 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3397 }
3398 }
3399
3400 $this->_kw_replace_group = $k;
3401
3402 //NEW in 1.0.8, the cached regexp list
3403 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3404 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) {
3405 $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3406 // Might make a more unique string for putting the number in soon
3407 // Basically, we don't put the styles in yet because then the styles themselves will
3408 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3409 $stuff_to_parse = preg_replace_callback(
3410 "/$disallowed_before_local({$keywordset})(?!<DOT>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
3411 array($this, 'handle_keyword_replace'),
3412 $stuff_to_parse
3413 );
3414 }
3415 }
3416 }
3417
3418 // Regular expressions
3419 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3420 if ($this->lexic_permissions['REGEXPS'][$key]) {
3421 if (is_array($regexp)) {
3422 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3423 // produce valid HTML when we match multiple lines
3424 $this->_hmr_replace = $regexp[GESHI_REPLACE];
3425 $this->_hmr_before = $regexp[GESHI_BEFORE];
3426 $this->_hmr_key = $key;
3427 $this->_hmr_after = $regexp[GESHI_AFTER];
3428 $stuff_to_parse = preg_replace_callback(
3429 "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3430 array($this, 'handle_multiline_regexps'),
3431 $stuff_to_parse);
3432 $this->_hmr_replace = false;
3433 $this->_hmr_before = '';
3434 $this->_hmr_after = '';
3435 } else {
3436 $stuff_to_parse = preg_replace(
3437 '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3438 $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3439 $stuff_to_parse);
3440 }
3441 } else {
3442 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3443 // produce valid HTML when we match multiple lines
3444 $this->_hmr_key = $key;
3445 $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3446 array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3447 $this->_hmr_key = '';
3448 } else {
3449 $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3450 }
3451 }
3452 }
3453 }
3454
3455 // Highlight numbers. As of 1.0.8 we support different types of numbers
3456 $numbers_found = false;
3457
3458 if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
3459 $numbers_found = true;
3460
3461 //For each of the formats ...
3462 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3463 //Check if it should be highlighted ...
3464 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3465 }
3466 }
3467
3468 //
3469 // Now that's all done, replace /[number]/ with the correct styles
3470 //
3471 foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3472 if (!$this->use_classes) {
3473 $attributes = ' style="' .
3474 (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3475 $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3476 } else {
3477 $attributes = ' class="kw' . $k . '"';
3478 }
3479 $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3480 }
3481
3482 if ($numbers_found) {
3483 // Put number styles in
3484 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3485 //Commented out for now, as this needs some review ...
3486 // if ($numbers_permissions & $id) {
3487 //Get the appropriate style ...
3488 //Checking for unset styles is done by the style cache builder ...
3489 if (!$this->use_classes) {
3490 $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3491 } else {
3492 $attributes = ' class="nu'.$id.'"';
3493 }
3494
3495 //Set in the correct styles ...
3496 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3497 // }
3498 }
3499 }
3500
3501 // Highlight methods and fields in objects
3502 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3503 $oolang_spaces = "[\s]*";
3504 $oolang_before = "";
3505 $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3506 if (isset($this->language_data['PARSER_CONTROL'])) {
3507 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3508 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3509 $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3510 }
3511 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3512 $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3513 }
3514 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3515 $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3516 }
3517 }
3518 }
3519
3520 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3521 if (false !== strpos($stuff_to_parse, $splitter)) {
3522 if (!$this->use_classes) {
3523 $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3524 } else {
3525 $attributes = ' class="me' . $key . '"';
3526 }
3527 $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3528 }
3529 }
3530 }
3531
3532 //
3533 // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3534 // You try it, and see what happens ;)
3535 // TODO: Fix lexic permissions not converting entities if shouldn't
3536 // be highlighting regardless
3537 //
3538 if ($this->lexic_permissions['BRACKETS']) {
3539 $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3540 $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3541 }
3542
3543
3544 //FIX for symbol highlighting ...
3545 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3546 //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3547 $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3548 $global_offset = 0;
3549 for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3550 $symbol_match = $pot_symbols[$s_id][0][0];
3551 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3552 // already highlighted blocks _must_ include either < or >
3553 // so if this conditional applies, we have to skip this match
3554 // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3555 if(strpos($symbol_match, '<SEMI>') === false &&
3556 strpos($symbol_match, '<PIPE>') === false) {
3557 continue;
3558 }
3559 }
3560
3561 // if we reach this point, we have a valid match which needs to be highlighted
3562
3563 $symbol_length = strlen($symbol_match);
3564 $symbol_offset = $pot_symbols[$s_id][0][1];
3565 unset($pot_symbols[$s_id]);
3566 $symbol_hl = "";
3567
3568 // if we have multiple styles, we have to handle them properly
3569 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3570 $old_sym = -1;
3571 // Split the current stuff to replace into its atomic symbols ...
3572 preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3573 foreach ($sym_match_syms[0] as $sym_ms) {
3574 //Check if consequtive symbols belong to the same group to save output ...
3575 if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3576 && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3577 if (-1 != $old_sym) {
3578 $symbol_hl .= "|>";
3579 }
3580 $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3581 if (!$this->use_classes) {
3582 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3583 } else {
3584 $symbol_hl .= '<| class="sy' . $old_sym . '">';
3585 }
3586 }
3587 $symbol_hl .= $sym_ms;
3588 }
3589 unset($sym_match_syms);
3590
3591 //Close remaining tags and insert the replacement at the right position ...
3592 //Take caution if symbol_hl is empty to avoid doubled closing spans.
3593 if (-1 != $old_sym) {
3594 $symbol_hl .= "|>";
3595 }
3596 } else {
3597 if (!$this->use_classes) {
3598 $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3599 } else {
3600 $symbol_hl = '<| class="sy0">';
3601 }
3602 $symbol_hl .= $symbol_match . '|>';
3603 }
3604
3605 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3606
3607 // since we replace old text with something of different size,
3608 // we'll have to keep track of the differences
3609 $global_offset += strlen($symbol_hl) - $symbol_length;
3610 }
3611 }
3612 //FIX for symbol highlighting ...
3613
3614 // Add class/style for regexps
3615 foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3616 if ($this->lexic_permissions['REGEXPS'][$key]) {
3617 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3618 $this->_rx_key = $key;
3619 $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3620 array($this, 'handle_regexps_callback'),
3621 $stuff_to_parse);
3622 } else {
3623 if (!$this->use_classes) {
3624 $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3625 } else {
3626 if (is_array($this->language_data['REGEXPS'][$key]) &&
3627 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3628 $attributes = ' class="' .
3629 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3630 } else {
3631 $attributes = ' class="re' . $key . '"';
3632 }
3633 }
3634 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3635 }
3636 }
3637 }
3638
3639 // Replace <DOT> with . for urls
3640 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3641 // Replace <|UR1| with <a href= for urls also
3642 if (isset($this->link_styles[GESHI_LINK])) {
3643 if ($this->use_classes) {
3644 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3645 } else {
3646 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3647 }
3648 } else {
3649 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3650 }
3651
3652 //
3653 // NOW we add the span thingy ;)
3654 //
3655
3656 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3657 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3658 return substr($stuff_to_parse, 1);
3659 }
3660
3668 protected function set_time($start_time, $end_time) {
3669 $start = explode(' ', $start_time);
3670 $end = explode(' ', $end_time);
3671 $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3672 }
3673
3680 public function get_time() {
3681 return $this->time;
3682 }
3683
3689 protected function merge_arrays() {
3690 $arrays = func_get_args();
3691 $narrays = count($arrays);
3692
3693 // check arguments
3694 // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3695 for ($i = 0; $i < $narrays; $i ++) {
3696 if (!is_array($arrays[$i])) {
3697 // also array_merge_recursive returns nothing in this case
3698 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3699 return false;
3700 }
3701 }
3702
3703 // the first array is in the output set in every case
3704 $ret = $arrays[0];
3705
3706 // merege $ret with the remaining arrays
3707 for ($i = 1; $i < $narrays; $i ++) {
3708 foreach ($arrays[$i] as $key => $value) {
3709 if (is_array($value) && isset($ret[$key])) {
3710 // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3711 // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3712 $ret[$key] = $this->merge_arrays($ret[$key], $value);
3713 } else {
3714 $ret[$key] = $value;
3715 }
3716 }
3717 }
3718
3719 return $ret;
3720 }
3721
3729 protected function load_language($file_name) {
3730 if ($file_name == $this->loaded_language) {
3731 // this file is already loaded!
3732 return;
3733 }
3734
3735 //Prepare some stuff before actually loading the language file
3736 $this->loaded_language = $file_name;
3737 $this->parse_cache_built = false;
3738 $this->enable_highlighting();
3739 $language_data = array();
3740
3741 //Load the language file
3742 require $file_name;
3743
3744 // Perhaps some checking might be added here later to check that
3745 // $language data is a valid thing but maybe not
3746 $this->language_data = $language_data;
3747
3748 // Set strict mode if should be set
3749 $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3750
3751 // Set permissions for all lexics to true
3752 // so they'll be highlighted by default
3753 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3754 if (!empty($this->language_data['KEYWORDS'][$key])) {
3755 $this->lexic_permissions['KEYWORDS'][$key] = true;
3756 } else {
3757 $this->lexic_permissions['KEYWORDS'][$key] = false;
3758 }
3759 }
3760
3761 foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3762 $this->lexic_permissions['COMMENTS'][$key] = true;
3763 }
3764 foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3765 $this->lexic_permissions['REGEXPS'][$key] = true;
3766 }
3767
3768 // for BenBE and future code reviews:
3769 // we can use empty here since we only check for existance and emptiness of an array
3770 // if it is not an array at all but rather false or null this will work as intended as well
3771 // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3772 if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3773 foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3774 // it's either true or false and maybe is true as well
3775 $perm = $value !== GESHI_NEVER;
3776 if ($flag == 'ALL') {
3777 $this->enable_highlighting($perm);
3778 continue;
3779 }
3780 if (!isset($this->lexic_permissions[$flag])) {
3781 // unknown lexic permission
3782 continue;
3783 }
3784 if (is_array($this->lexic_permissions[$flag])) {
3785 foreach ($this->lexic_permissions[$flag] as $key => $val) {
3786 $this->lexic_permissions[$flag][$key] = $perm;
3787 }
3788 } else {
3789 $this->lexic_permissions[$flag] = $perm;
3790 }
3791 }
3792 unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3793 }
3794
3795 //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3796 //You need to set one for HARDESCAPES only in this case.
3797 if(!isset($this->language_data['HARDCHAR'])) {
3798 $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3799 }
3800
3801 //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3802 $style_filename = substr($file_name, 0, -4) . '.style.php';
3803 if (is_readable($style_filename)) {
3804 //Clear any style_data that could have been set before ...
3805 if (isset($style_data)) {
3806 unset($style_data);
3807 }
3808
3809 //Read the Style Information from the style file
3810 include $style_filename;
3811
3812 //Apply the new styles to our current language styles
3813 if (isset($style_data) && is_array($style_data)) {
3814 $this->language_data['STYLES'] =
3815 $this->merge_arrays($this->language_data['STYLES'], $style_data);
3816 }
3817 }
3818 }
3819
3827 protected function finalise(&$parsed_code) {
3828 // Remove end parts of important declarations
3829 // This is BUGGY!! My fault for bad code: fix coming in 1.2
3830 // @todo Remove this crap
3831 if ($this->enable_important_blocks &&
3832 (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3833 $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3834 }
3835
3836 // Add HTML whitespace stuff if we're using the <div> header
3837 if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3838 $this->indent($parsed_code);
3839 }
3840
3841 // purge some unnecessary stuff
3843 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3844
3845 // If we are using IDs for line numbers, there needs to be an overall
3846 // ID set to prevent collisions.
3847 if ($this->add_ids && !$this->overall_id) {
3848 $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3849 }
3850
3851 // Get code into lines
3853 $code = explode("\n", $parsed_code);
3854 $parsed_code = $this->header();
3855
3856 // If we're using line numbers, we insert <li>s and appropriate
3857 // markup to style them (otherwise we don't need to do anything)
3858 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3859 // If we're using the <pre> header, we shouldn't add newlines because
3860 // the <pre> will line-break them (and the <li>s already do this for us)
3861 $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3862
3863 // Foreach line...
3864 for ($i = 0, $n = count($code); $i < $n;) {
3865 //Reset the attributes for a new line ...
3866 $attrs = array();
3867
3868 // Make lines have at least one space in them if they're empty
3869 // BenBE: Checking emptiness using trim instead of relying on blanks
3870 if ('' == trim($code[$i])) {
3871 $code[$i] = '&nbsp;';
3872 }
3873
3874 // If this is a "special line"...
3875 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3876 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3877 // Set the attributes to style the line
3878 if ($this->use_classes) {
3879 //$attr = ' class="li2"';
3880 $attrs['class'][] = 'li2';
3881 $def_attr = ' class="de2"';
3882 } else {
3883 //$attr = ' style="' . $this->line_style2 . '"';
3884 $attrs['style'][] = $this->line_style2;
3885 // This style "covers up" the special styles set for special lines
3886 // so that styles applied to special lines don't apply to the actual
3887 // code on that line
3888 $def_attr = ' style="' . $this->code_style . '"';
3889 }
3890 } else {
3891 if ($this->use_classes) {
3892 //$attr = ' class="li1"';
3893 $attrs['class'][] = 'li1';
3894 $def_attr = ' class="de1"';
3895 } else {
3896 //$attr = ' style="' . $this->line_style1 . '"';
3897 $attrs['style'][] = $this->line_style1;
3898 $def_attr = ' style="' . $this->code_style . '"';
3899 }
3900 }
3901
3902 //Check which type of tag to insert for this line
3903 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3904 $start = "<pre$def_attr>";
3905 $end = '</pre>';
3906 } else {
3907 // Span or div?
3908 $start = "<div$def_attr>";
3909 $end = '</div>';
3910 }
3911
3912 ++$i;
3913
3914 // Are we supposed to use ids? If so, add them
3915 if ($this->add_ids) {
3916 $attrs['id'][] = "$this->overall_id-$i";
3917 }
3918
3919 //Is this some line with extra styles???
3920 if (in_array($i, $this->highlight_extra_lines)) {
3921 if ($this->use_classes) {
3922 if (isset($this->highlight_extra_lines_styles[$i])) {
3923 $attrs['class'][] = "lx$i";
3924 } else {
3925 $attrs['class'][] = "ln-xtra";
3926 }
3927 } else {
3928 array_push($attrs['style'], $this->get_line_style($i));
3929 }
3930 }
3931
3932 // Add in the line surrounded by appropriate list HTML
3933 $attr_string = '';
3934 foreach ($attrs as $key => $attr) {
3935 $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3936 }
3937
3938 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3939 unset($code[$i - 1]);
3940 }
3941 } else {
3942 $n = count($code);
3943 if ($this->use_classes) {
3944 $attributes = ' class="de1"';
3945 } else {
3946 $attributes = ' style="'. $this->code_style .'"';
3947 }
3948 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3949 $parsed_code .= '<pre'. $attributes .'>';
3950 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3951 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3952 if ($this->use_classes) {
3953 $attrs = ' class="ln"';
3954 } else {
3955 $attrs = ' style="'. $this->table_linenumber_style .'"';
3956 }
3957 $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3958 // get linenumbers
3959 // we don't merge it with the for below, since it should be better for
3960 // memory consumption this way
3961 // @todo: but... actually it would still be somewhat nice to merge the two loops
3962 // the mem peaks are at different positions
3963 for ($i = 0; $i < $n; ++$i) {
3964 $close = 0;
3965 // fancy lines
3966 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3967 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3968 // Set the attributes to style the line
3969 if ($this->use_classes) {
3970 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3971 } else {
3972 // This style "covers up" the special styles set for special lines
3973 // so that styles applied to special lines don't apply to the actual
3974 // code on that line
3975 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3976 .'<span style="' . $this->code_style .'">';
3977 }
3978 $close += 2;
3979 }
3980 //Is this some line with extra styles???
3981 if (in_array($i + 1, $this->highlight_extra_lines)) {
3982 if ($this->use_classes) {
3983 if (isset($this->highlight_extra_lines_styles[$i])) {
3984 $parsed_code .= "<span class=\"xtra lx$i\">";
3985 } else {
3986 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3987 }
3988 } else {
3989 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3990 }
3991 ++$close;
3992 }
3993 $parsed_code .= $this->line_numbers_start + $i;
3994 if ($close) {
3995 $parsed_code .= str_repeat('</span>', $close);
3996 } elseif ($i != $n) {
3997 $parsed_code .= "\n";
3998 }
3999 }
4000 $parsed_code .= '</pre></td><td'.$attributes.'>';
4001 }
4002 $parsed_code .= '<pre'. $attributes .'>';
4003 }
4004 // No line numbers, but still need to handle highlighting lines extra.
4005 // Have to use divs so the full width of the code is highlighted
4006 $close = 0;
4007 for ($i = 0; $i < $n; ++$i) {
4008 // Make lines have at least one space in them if they're empty
4009 // BenBE: Checking emptiness using trim instead of relying on blanks
4010 if ('' == trim($code[$i])) {
4011 $code[$i] = '&nbsp;';
4012 }
4013 // fancy lines
4014 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
4015 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
4016 // Set the attributes to style the line
4017 if ($this->use_classes) {
4018 $parsed_code .= '<span class="xtra li2"><span class="de2">';
4019 } else {
4020 // This style "covers up" the special styles set for special lines
4021 // so that styles applied to special lines don't apply to the actual
4022 // code on that line
4023 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
4024 .'<span style="' . $this->code_style .'">';
4025 }
4026 $close += 2;
4027 }
4028 //Is this some line with extra styles???
4029 if (in_array($i + 1, $this->highlight_extra_lines)) {
4030 if ($this->use_classes) {
4031 if (isset($this->highlight_extra_lines_styles[$i])) {
4032 $parsed_code .= "<span class=\"xtra lx$i\">";
4033 } else {
4034 $parsed_code .= "<span class=\"xtra ln-xtra\">";
4035 }
4036 } else {
4037 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
4038 }
4039 ++$close;
4040 }
4041
4042 $parsed_code .= $code[$i];
4043
4044 if ($close) {
4045 $parsed_code .= str_repeat('</span>', $close);
4046 $close = 0;
4047 }
4048 elseif ($i + 1 < $n) {
4049 $parsed_code .= "\n";
4050 }
4051 unset($code[$i]);
4052 }
4053
4054 if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
4055 $parsed_code .= '</pre>';
4056 }
4057 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4058 $parsed_code .= '</td>';
4059 }
4060 }
4061
4062 $parsed_code .= $this->footer();
4063 }
4064
4071 protected function header() {
4072 // Get attributes needed
4077 $attributes = ' class="' . $this->_genCSSName($this->language);
4078 if ($this->overall_class != '') {
4079 $attributes .= " ".$this->_genCSSName($this->overall_class);
4080 }
4081 $attributes .= '"';
4082
4083 if ($this->overall_id != '') {
4084 $attributes .= " id=\"{$this->overall_id}\"";
4085 }
4086 if ($this->overall_style != '' && !$this->use_classes) {
4087 $attributes .= ' style="' . $this->overall_style . '"';
4088 }
4089
4090 $ol_attributes = '';
4091
4092 if ($this->line_numbers_start != 1) {
4093 $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
4094 }
4095
4096 // Get the header HTML
4098 if ($header) {
4099 if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
4100 $header = str_replace("\n", '', $header);
4101 }
4103
4104 if ($this->use_classes) {
4105 $attr = ' class="head"';
4106 } else {
4107 $attr = " style=\"{$this->header_content_style}\"";
4108 }
4109 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4110 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4111 } else {
4112 $header = "<div$attr>$header</div>";
4113 }
4114 }
4115
4116 if (GESHI_HEADER_NONE == $this->header_type) {
4117 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4118 return "$header<ol$attributes$ol_attributes>";
4119 }
4120 return $header . ($this->force_code_block ? '<div>' : '');
4121 }
4122
4123 // Work out what to return and do it
4124 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4125 if ($this->header_type == GESHI_HEADER_PRE) {
4126 return "<pre$attributes>$header<ol$ol_attributes>";
4127 } elseif ($this->header_type == GESHI_HEADER_DIV ||
4128 $this->header_type == GESHI_HEADER_PRE_VALID) {
4129 return "<div$attributes>$header<ol$ol_attributes>";
4130 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4131 return "<table$attributes>$header<tbody><tr class=\"li1\">";
4132 }
4133 } else {
4134 if ($this->header_type == GESHI_HEADER_PRE) {
4135 return "<pre$attributes>$header" .
4136 ($this->force_code_block ? '<div>' : '');
4137 } else {
4138 return "<div$attributes>$header" .
4139 ($this->force_code_block ? '<div>' : '');
4140 }
4141 }
4142 }
4143
4150 protected function footer() {
4151 $footer = $this->footer_content;
4152 if ($footer) {
4153 if ($this->header_type == GESHI_HEADER_PRE) {
4154 $footer = str_replace("\n", '', $footer);;
4155 }
4156 $footer = $this->replace_keywords($footer);
4157
4158 if ($this->use_classes) {
4159 $attr = ' class="foot"';
4160 } else {
4161 $attr = " style=\"{$this->footer_content_style}\"";
4162 }
4163 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4164 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4165 } else {
4166 $footer = "<div$attr>$footer</div>";
4167 }
4168 }
4169
4170 if (GESHI_HEADER_NONE == $this->header_type) {
4171 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4172 }
4173
4174 if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4175 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4176 return "</ol>$footer</div>";
4177 }
4178 return ($this->force_code_block ? '</div>' : '') .
4179 "$footer</div>";
4180 }
4181 elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4182 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4183 return "</tr></tbody>$footer</table>";
4184 }
4185 return ($this->force_code_block ? '</div>' : '') .
4186 "$footer</div>";
4187 }
4188 else {
4189 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4190 return "</ol>$footer</pre>";
4191 }
4192 return ($this->force_code_block ? '</div>' : '') .
4193 "$footer</pre>";
4194 }
4195 }
4196
4205 protected function replace_keywords($instr) {
4206 $keywords = $replacements = array();
4207
4208 $keywords[] = '<TIME>';
4209 $keywords[] = '{TIME}';
4210 $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4211
4212 $keywords[] = '<LANGUAGE>';
4213 $keywords[] = '{LANGUAGE}';
4214 $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4215
4216 $keywords[] = '<VERSION>';
4217 $keywords[] = '{VERSION}';
4218 $replacements[] = $replacements[] = GESHI_VERSION;
4219
4220 $keywords[] = '<SPEED>';
4221 $keywords[] = '{SPEED}';
4222 if ($time <= 0) {
4223 $speed = 'N/A';
4224 } else {
4225 $speed = strlen($this->source) / $time;
4226 if ($speed >= 1024) {
4227 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4228 } else {
4229 $speed = sprintf("%.0f B/s", $speed);
4230 }
4231 }
4232 $replacements[] = $replacements[] = $speed;
4233
4234 return str_replace($keywords, $replacements, $instr);
4235 }
4236
4289 protected function hsc($string, $quote_style = ENT_COMPAT) {
4290 // init
4291 static $aTransSpecchar = array(
4292 '&' => '&amp;',
4293 '"' => '&quot;',
4294 '<' => '&lt;',
4295 '>' => '&gt;',
4296
4297 //This fix is related to SF#1923020, but has to be applied
4298 //regardless of actually highlighting symbols.
4299
4300 //Circumvent a bug with symbol highlighting
4301 //This is required as ; would produce undesirable side-effects if it
4302 //was not to be processed as an entity.
4303 ';' => '<SEMI>', // Force ; to be processed as entity
4304 '|' => '<PIPE>' // Force | to be processed as entity
4305 ); // ENT_COMPAT set
4306
4307 switch ($quote_style) {
4308 case ENT_NOQUOTES: // don't convert double quotes
4309 unset($aTransSpecchar['"']);
4310 break;
4311 case ENT_QUOTES: // convert single quotes as well
4312 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4313 break;
4314 }
4315
4316 // return translated string
4317 return strtr($string, $aTransSpecchar);
4318 }
4319
4328 protected function _genCSSName($name) {
4329 return (is_numeric($name[0]) ? '_' : '') . $name;
4330 }
4331
4341 public function get_stylesheet($economy_mode = true) {
4342 // If there's an error, chances are that the language file
4343 // won't have populated the language data file, so we can't
4344 // risk getting a stylesheet...
4345 if ($this->error) {
4346 return '';
4347 }
4348
4349 //Check if the style rearrangements have been processed ...
4350 //This also does some preprocessing to check which style groups are useable ...
4351 if(!isset($this->language_data['NUMBERS_CACHE'])) {
4352 $this->build_style_cache();
4353 }
4354
4355 // First, work out what the selector should be. If there's an ID,
4356 // that should be used, the same for a class. Otherwise, a selector
4357 // of '' means that these styles will be applied anywhere
4358 if ($this->overall_id) {
4359 $selector = '#' . $this->_genCSSName($this->overall_id);
4360 } else {
4361 $selector = '.' . $this->_genCSSName($this->language);
4362 if ($this->overall_class) {
4363 $selector .= '.' . $this->_genCSSName($this->overall_class);
4364 }
4365 }
4366 $selector .= ' ';
4367
4368 // Header of the stylesheet
4369 if (!$economy_mode) {
4370 $stylesheet = "/**\n".
4371 " * GeSHi Dynamically Generated Stylesheet\n".
4372 " * --------------------------------------\n".
4373 " * Dynamically generated stylesheet for {$this->language}\n".
4374 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4375 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4376 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4377 " * --------------------------------------\n".
4378 " */\n";
4379 } else {
4380 $stylesheet = "/**\n".
4381 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2014 Benny Baumann\n" .
4382 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4383 " */\n";
4384 }
4385
4386 // Set the <ol> to have no effect at all if there are line numbers
4387 // (<ol>s have margins that should be destroyed so all layout is
4388 // controlled by the set_overall_style method, which works on the
4389 // <pre> or <div> container). Additionally, set default styles for lines
4390 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4391 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4392 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4393 }
4394
4395 // Add overall styles
4396 // note: neglect economy_mode, empty styles are meaningless
4397 if ($this->overall_style != '') {
4398 $stylesheet .= "$selector {{$this->overall_style}}\n";
4399 }
4400
4401 // Add styles for links
4402 // note: economy mode does not make _any_ sense here
4403 // either the style is empty and thus no selector is needed
4404 // or the appropriate key is given.
4405 foreach ($this->link_styles as $key => $style) {
4406 if ($style != '') {
4407 switch ($key) {
4408 case GESHI_LINK:
4409 $stylesheet .= "{$selector}a:link {{$style}}\n";
4410 break;
4411 case GESHI_HOVER:
4412 $stylesheet .= "{$selector}a:hover {{$style}}\n";
4413 break;
4414 case GESHI_ACTIVE:
4415 $stylesheet .= "{$selector}a:active {{$style}}\n";
4416 break;
4417 case GESHI_VISITED:
4418 $stylesheet .= "{$selector}a:visited {{$style}}\n";
4419 break;
4420 }
4421 }
4422 }
4423
4424 // Header and footer
4425 // note: neglect economy_mode, empty styles are meaningless
4426 if ($this->header_content_style != '') {
4427 $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4428 }
4429 if ($this->footer_content_style != '') {
4430 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4431 }
4432
4433 // Styles for important stuff
4434 // note: neglect economy_mode, empty styles are meaningless
4435 if ($this->important_styles != '') {
4436 $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4437 }
4438
4439 // Simple line number styles
4440 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4441 $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4442 }
4443 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4444 $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4445 }
4446 // If there is a style set for fancy line numbers, echo it out
4447 if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4448 $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4449 }
4450
4451 // note: empty styles are meaningless
4452 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4453 if ($styles != '' && (!$economy_mode ||
4454 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4455 $this->lexic_permissions['KEYWORDS'][$group]))) {
4456 $stylesheet .= "$selector.kw$group {{$styles}}\n";
4457 }
4458 }
4459 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4460 if ($styles != '' && (!$economy_mode ||
4461 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4462 $this->lexic_permissions['COMMENTS'][$group]) ||
4463 (!empty($this->language_data['COMMENT_REGEXP']) &&
4464 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4465 $stylesheet .= "$selector.co$group {{$styles}}\n";
4466 }
4467 }
4468 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4469 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4470 // NEW: since 1.0.8 we have to handle hardescapes
4471 if ($group === 'HARD') {
4472 $group = '_h';
4473 }
4474 $stylesheet .= "$selector.es$group {{$styles}}\n";
4475 }
4476 }
4477 foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4478 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4479 $stylesheet .= "$selector.br$group {{$styles}}\n";
4480 }
4481 }
4482 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4483 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4484 $stylesheet .= "$selector.sy$group {{$styles}}\n";
4485 }
4486 }
4487 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4488 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4489 // NEW: since 1.0.8 we have to handle hardquotes
4490 if ($group === 'HARD') {
4491 $group = '_h';
4492 }
4493 $stylesheet .= "$selector.st$group {{$styles}}\n";
4494 }
4495 }
4496 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4497 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4498 $stylesheet .= "$selector.nu$group {{$styles}}\n";
4499 }
4500 }
4501 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4502 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4503 $stylesheet .= "$selector.me$group {{$styles}}\n";
4504 }
4505 }
4506 // note: neglect economy_mode, empty styles are meaningless
4507 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4508 if ($styles != '') {
4509 $stylesheet .= "$selector.sc$group {{$styles}}\n";
4510 }
4511 }
4512 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4513 if ($styles != '' && (!$economy_mode ||
4514 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4515 $this->lexic_permissions['REGEXPS'][$group]))) {
4516 if (is_array($this->language_data['REGEXPS'][$group]) &&
4517 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4518 $stylesheet .= "$selector.";
4519 $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4520 $stylesheet .= " {{$styles}}\n";
4521 } else {
4522 $stylesheet .= "$selector.re$group {{$styles}}\n";
4523 }
4524 }
4525 }
4526 // Styles for lines being highlighted extra
4527 if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4528 $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4529 }
4530 $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4531 foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4532 $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4533 }
4534
4535 return $stylesheet;
4536 }
4537
4544 protected function get_line_style($line) {
4545 $style = null;
4546 if (isset($this->highlight_extra_lines_styles[$line])) {
4547 $style = $this->highlight_extra_lines_styles[$line];
4548 } else { // if no "extra" style assigned
4550 }
4551
4552 return $style;
4553 }
4554
4569 protected function optimize_regexp_list($list, $regexp_delimiter = '/') {
4570 $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
4571 '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4572 sort($list);
4573 $regexp_list = array('');
4574 $num_subpatterns = 0;
4575 $list_key = 0;
4576
4577 // the tokens which we will use to generate the regexp list
4578 $tokens = array();
4579 $prev_keys = array();
4580 // go through all entries of the list and generate the token list
4581 $cur_len = 0;
4582 for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4583 if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4584 // seems like the length of this pcre is growing exorbitantly
4585 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4586 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4587 $tokens = array();
4588 $cur_len = 0;
4589 }
4590 $level = 0;
4591 $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4592 $pointer = &$tokens;
4593 // properly assign the new entry to the correct position in the token array
4594 // possibly generate smaller common denominator keys
4595 while (true) {
4596 // get the common denominator
4597 if (isset($prev_keys[$level])) {
4598 if ($prev_keys[$level] == $entry) {
4599 // this is a duplicate entry, skip it
4600 continue 2;
4601 }
4602 $char = 0;
4603 while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4604 && $entry[$char] == $prev_keys[$level][$char]) {
4605 ++$char;
4606 }
4607 if ($char > 0) {
4608 // this entry has at least some chars in common with the current key
4609 if ($char == strlen($prev_keys[$level])) {
4610 // current key is totally matched, i.e. this entry has just some bits appended
4611 $pointer = &$pointer[$prev_keys[$level]];
4612 } else {
4613 // only part of the keys match
4614 $new_key_part1 = substr($prev_keys[$level], 0, $char);
4615 $new_key_part2 = substr($prev_keys[$level], $char);
4616
4617 if (in_array($new_key_part1[0], $regex_chars)
4618 || in_array($new_key_part2[0], $regex_chars)) {
4619 // this is bad, a regex char as first character
4620 $pointer[$entry] = array('' => true);
4621 array_splice($prev_keys, $level, count($prev_keys), $entry);
4622 $cur_len += strlen($entry);
4623 continue;
4624 } else {
4625 // relocate previous tokens
4626 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4627 unset($pointer[$prev_keys[$level]]);
4628 $pointer = &$pointer[$new_key_part1];
4629 // recreate key index
4630 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4631 $cur_len += strlen($new_key_part2);
4632 }
4633 }
4634 ++$level;
4635 $entry = substr($entry, $char);
4636 continue;
4637 }
4638 // else: fall trough, i.e. no common denominator was found
4639 }
4640 if ($level == 0 && !empty($tokens)) {
4641 // we can dump current tokens into the string and throw them away afterwards
4642 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4643 $new_subpatterns = substr_count($new_entry, '(?:');
4644 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4645 $regexp_list[++$list_key] = $new_entry;
4646 $num_subpatterns = $new_subpatterns;
4647 } else {
4648 if (!empty($regexp_list[$list_key])) {
4649 $new_entry = '|' . $new_entry;
4650 }
4651 $regexp_list[$list_key] .= $new_entry;
4652 $num_subpatterns += $new_subpatterns;
4653 }
4654 $tokens = array();
4655 $cur_len = 0;
4656 }
4657 // no further common denominator found
4658 $pointer[$entry] = array('' => true);
4659 array_splice($prev_keys, $level, count($prev_keys), $entry);
4660
4661 $cur_len += strlen($entry);
4662 break;
4663 }
4664 unset($list[$i]);
4665 }
4666 // make sure the last tokens get converted as well
4667 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4668 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4669 if ( !empty($regexp_list[$list_key]) ) {
4670 ++$list_key;
4671 }
4672 $regexp_list[$list_key] = $new_entry;
4673 } else {
4674 if (!empty($regexp_list[$list_key])) {
4675 $new_entry = '|' . $new_entry;
4676 }
4677 $regexp_list[$list_key] .= $new_entry;
4678 }
4679 return $regexp_list;
4680 }
4681
4692 protected function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4693 $list = '';
4694 foreach ($tokens as $token => $sub_tokens) {
4695 $list .= $token;
4696 $close_entry = isset($sub_tokens['']);
4697 unset($sub_tokens['']);
4698 if (!empty($sub_tokens)) {
4699 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4700 if ($close_entry) {
4701 // make sub_tokens optional
4702 $list .= '?';
4703 }
4704 }
4705 $list .= '|';
4706 }
4707 if (!$recursed) {
4708 // do some optimizations
4709 // common trailing strings
4710 // BUGGY!
4711 //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4712 // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4713 // (?:p)? => p?
4714 $list = preg_replace('#\‍(\?\:(.)\‍)\?#', '\1?', $list);
4715 // (?:a|b|c|d|...)? => [abcd...]?
4716 // TODO: a|bb|c => [ac]|bb
4717 static $callback_2;
4718 if (!isset($callback_2)) {
4719 $callback_2 = function($matches) {
4720 return "[" . str_replace("|", "", $matches[1]) . "]";
4721 };
4722 }
4723 $list = preg_replace_callback('#\‍(\?\:((?:.\|)+.)\‍)#', $callback_2, $list);
4724 }
4725 // return $list without trailing pipe
4726 return substr($list, 0, -1);
4727 }
4728} // End Class GeSHi
4729
4730
4731if (!function_exists('geshi_highlight')) {
4743 function geshi_highlight($string, $language, $path = null, $return = false) {
4744 $geshi = new GeSHi($string, $language, $path);
4745 $geshi->set_header_type(GESHI_HEADER_NONE);
4746
4747 if ($return) {
4748 return '<code>' . $geshi->parse_code() . '</code>';
4749 }
4750
4751 echo '<code>' . $geshi->parse_code() . '</code>';
4752
4753 if ($geshi->error()) {
4754 return false;
4755 }
4756 return true;
4757 }
4758}
sprintf('%.4f', $callTime)
$result
$n
Definition: RandomTest.php:85
$comment
Definition: buildRTE.php:83
An exception for terminatinating execution or to throw for unit testing.
Definition: geshi.php:226
get_line_style($line)
Get's the style that is used for the specified line.
Definition: geshi.php:4544
enable_multiline_span($flag)
Sets wether spans and other HTML markup generated by GeSHi can span over multiple lines or not.
Definition: geshi.php:970
static get_language_name_from_extension( $extension, $lookup=array())
Given a file extension, this method returns either a valid geshi language name, or the empty string i...
Definition: geshi.php:1448
$error_messages
Definition: geshi.php:263
$time
Definition: geshi.php:315
get_language_name()
Gets a human-readable language name (thanks to Simon Patterson for the idea :))
Definition: geshi.php:612
$footer_content
Definition: geshi.php:327
set_language($language, $force_reset=false)
Sets the language for this object.
Definition: geshi.php:653
$link_target
Definition: geshi.php:486
build_parse_cache()
Setup caches needed for parsing.
Definition: geshi.php:1972
$force_code_block
Definition: geshi.php:346
$line_style1
Definition: geshi.php:437
$error
Definition: geshi.php:257
enable_highlighting($flag=true)
Enables all highlighting.
Definition: geshi.php:1421
$header_content
Definition: geshi.php:321
header()
Creates the header for the code block (with correct attributes)
Definition: geshi.php:4071
set_tab_width($width)
Sets how many spaces a tab is substituted for.
Definition: geshi.php:1350
$source
Definition: geshi.php:232
handle_regexps_callback($matches)
handles regular expressions highlighting-definitions with callback functions
Definition: geshi.php:3301
set_script_style($style, $preserve_defaults=false, $group=0)
Sets the styles for strict code blocks.
Definition: geshi.php:1214
get_time()
Gets the time taken to parse the code.
Definition: geshi.php:3680
merge_arrays()
Merges arrays recursively, overwriting values of the first array with values of later arrays.
Definition: geshi.php:3689
$_hmr_replace
Definition: geshi.php:542
enable_line_numbers($flag, $nth_row=5)
Sets whether line numbers should be displayed.
Definition: geshi.php:952
set_use_language_tab_width($use)
Sets whether or not to use tab-stop width specifed by language.
Definition: geshi.php:1366
set_url_for_keyword_group($group, $url)
Sets the base URL to be used for keywords.
Definition: geshi.php:1748
$table_linenumber_style
Definition: geshi.php:449
get_supported_languages($full_names=false)
Get supported langs or an associative array lang=>full_name.
Definition: geshi.php:729
_optimize_regexp_list_tokens_to_string(&$tokens, $recursed=false)
this function creates the appropriate regexp string of an token array you should not call this functi...
Definition: geshi.php:4692
$loaded_language
Definition: geshi.php:506
$encoding
Definition: geshi.php:493
set_time($start_time, $end_time)
Sets the time taken to parse the code.
Definition: geshi.php:3668
$_hmr_after
Definition: geshi.php:543
optimize_keyword_group($key)
compile optimized regexp list for keyword group
Definition: geshi.php:1664
build_style_cache()
Setup caches needed for styling.
Definition: geshi.php:1922
$lexic_permissions
Definition: geshi.php:298
_genCSSName($name)
Generate a CSS class name from a given string.
Definition: geshi.php:4328
$_hmr_key
Definition: geshi.php:544
enable_important_blocks($flag)
Sets whether context-important blocks are highlighted.
Definition: geshi.php:1796
$overall_id
Definition: geshi.php:431
set_regexps_style($key, $style, $preserve_defaults=false)
Sets the styles for regexps.
Definition: geshi.php:1295
set_overall_style($style, $preserve_defaults=false)
Sets the styles for the code that will be outputted when this object is parsed.
Definition: geshi.php:841
$line_numbers
Definition: geshi.php:455
$language_data
Definition: geshi.php:244
set_overall_class($class)
Sets the overall classname for this block of code.
Definition: geshi.php:857
set_important_styles($styles)
Sets styles for important parts of the code.
Definition: geshi.php:1784
handle_multiline_regexps($matches)
handles newlines in REGEXPS matches.
Definition: geshi.php:3315
start_line_numbers_at($number)
Sets what number line numbers should start at.
Definition: geshi.php:1882
$footer_content_style
Definition: geshi.php:339
$line_numbers_start
Definition: geshi.php:407
set_brackets_highlighting($flag)
Turns highlighting on/off for brackets.
Definition: geshi.php:1131
$header_content_style
Definition: geshi.php:333
indent(&$result)
Swaps out spaces and tabs for HTML indentation.
Definition: geshi.php:3121
set_case_sensitivity($key, $case)
Sets whether a set of keywords are checked for in a case sensitive manner.
Definition: geshi.php:1321
$_rx_key
Definition: geshi.php:532
$line_style2
Definition: geshi.php:443
set_numbers_style($style, $preserve_defaults=false, $group=0)
Sets the styles for numbers.
Definition: geshi.php:1234
$strict_mode
Definition: geshi.php:274
set_encoding($encoding)
Sets the encoding used for htmlspecialchars(), for international support.
Definition: geshi.php:1898
set_regexps_highlighting($key, $flag)
Turns highlighting on/off for regexps.
Definition: geshi.php:1310
set_header_content($content)
Sets the content of the header block.
Definition: geshi.php:1694
$add_ids
Definition: geshi.php:375
set_numbers_highlighting($flag)
Turns highlighting on/off for numbers.
Definition: geshi.php:1248
$line_ending
Definition: geshi.php:401
optimize_regexp_list($list, $regexp_delimiter='/')
this functions creates an optimized regular expression list of an array of strings.
Definition: geshi.php:4569
$parse_cache_built
Definition: geshi.php:514
replace_keywords($instr)
Replaces certain keywords in the header and footer with certain configuration values.
Definition: geshi.php:4205
load_language($file_name)
Gets language information and stores it for later use.
Definition: geshi.php:3729
$_kw_replace_group
Definition: geshi.php:531
enable_classes($flag=true)
Sets whether CSS classes should be used to highlight the source.
Definition: geshi.php:879
remove_keyword_group($key)
Removes a keyword group.
Definition: geshi.php:1647
$code_style
Definition: geshi.php:419
$allow_multiline_span
Definition: geshi.php:462
set_footer_content_style($style)
Sets the style for the footer content.
Definition: geshi.php:1724
$line_nth_row
Definition: geshi.php:468
set_keyword_group_highlighting($key, $flag=true)
Turns highlighting on/off for a keyword group.
Definition: geshi.php:1026
parse_code()
Returns the code in $this->source, highlighted and surrounded by the nessecary HTML.
Definition: geshi.php:2159
$header_type
Definition: geshi.php:292
$language_path
Definition: geshi.php:250
$highlight_extra_lines_style
Definition: geshi.php:393
set_strings_style($style, $preserve_defaults=false, $group=0)
Sets the styles for strings.
Definition: geshi.php:1185
get_version()
Returns the version of GeSHi.
Definition: geshi.php:576
get_multiline_span()
Get current setting for multiline spans, see GeSHi->enable_multiline_span().
Definition: geshi.php:980
enable_keyword_links($enable=true)
Turns linking of keywords on or off.
Definition: geshi.php:1910
$language
Definition: geshi.php:238
remove_keyword($key, $word, $recompile=true)
Removes a keyword from a keyword group.
Definition: geshi.php:1599
set_footer_content($content)
Sets the content of the footer block.
Definition: geshi.php:1704
add_keyword_group($key, $styles, $case_sensitive=true, $words=array())
Creates a new keyword group.
Definition: geshi.php:1621
parse_non_string_part($stuff_to_parse)
Takes a string that has no strings or comments in it, and highlights stuff like keywords,...
Definition: geshi.php:3348
finalise(&$parsed_code)
Takes the parsed code and various options, and creates the HTML surrounding it to make it look nice.
Definition: geshi.php:3827
$highlight_extra_lines
Definition: geshi.php:381
$use_classes
Definition: geshi.php:280
set_line_style($style1, $style2='', $preserve_defaults=false)
Sets the styles for the line numbers.
Definition: geshi.php:918
disable_highlighting()
Disables all highlighting.
Definition: geshi.php:1407
change_case($instr)
Changes the case of a keyword for those languages where a change is asked for.
Definition: geshi.php:3220
$overall_style
Definition: geshi.php:413
set_symbols_style($style, $preserve_defaults=false, $group=0)
Sets the styles for symbols.
Definition: geshi.php:1146
set_escape_characters_highlighting($flag=true)
Turns highlighting on/off for escaped characters.
Definition: geshi.php:1095
$tab_width
Definition: geshi.php:474
set_case_keywords($case)
Sets the case that keywords should use when found.
Definition: geshi.php:1335
set_link_styles($type, $styles)
Sets styles for links in code.
Definition: geshi.php:1760
set_language_path($path)
Sets the path to the directory containing the language files.
Definition: geshi.php:695
$_hmr_before
Definition: geshi.php:541
$overall_class
Definition: geshi.php:425
footer()
Returns the footer for the code block.
Definition: geshi.php:4150
set_comments_highlighting($key, $flag=true)
Turns highlighting on/off for comment groups.
Definition: geshi.php:1066
$enable_important_blocks
Definition: geshi.php:360
enable_strict_mode($mode=true)
Enables/disables strict highlighting.
Definition: geshi.php:1394
hsc($string, $quote_style=ENT_COMPAT)
Secure replacement for PHP built-in function htmlspecialchars().
Definition: geshi.php:4289
set_strings_highlighting($flag)
Turns highlighting on/off for strings.
Definition: geshi.php:1199
add_keyword($key, $word)
Adds a keyword to a keyword group for highlighting.
Definition: geshi.php:1571
handle_keyword_replace($match)
Handles replacements of keywords to include markup and links if requested.
Definition: geshi.php:3240
set_overall_id($id)
Sets the overall id for this block of code.
Definition: geshi.php:868
$keyword_links
Definition: geshi.php:499
enable_inner_code_block($flag)
Sets whether to force a surrounding block around the highlighted code or not.
Definition: geshi.php:1735
set_header_type($type)
Sets the type of header to be used.
Definition: geshi.php:820
__construct($source='', $language='', $path='')
Creates a new GeSHi object, with source and language.
Definition: geshi.php:560
$important_styles
Definition: geshi.php:369
set_symbols_highlighting($flag)
Turns highlighting on/off for symbols.
Definition: geshi.php:1166
load_from_file($file_name, $lookup=array())
Given a file name, this method loads its contents in, and attempts to set the language automatically.
Definition: geshi.php:1555
set_methods_style($key, $style, $preserve_defaults=false)
Sets the styles for methods.
Definition: geshi.php:1265
enable_ids($flag=true)
Whether CSS IDs should be added to each line.
Definition: geshi.php:1806
get_real_tab_width()
Returns the tab width to use, based on the current language and user preference.
Definition: geshi.php:1377
set_line_ending($line_ending)
Sets the line-ending.
Definition: geshi.php:1863
set_header_content_style($style)
Sets the style for the header content.
Definition: geshi.php:1714
set_brackets_style($style, $preserve_defaults=false)
Sets the styles for brackets.
Definition: geshi.php:1113
set_comments_style($key, $style, $preserve_defaults=false)
Sets the styles for comment groups.
Definition: geshi.php:1041
highlight_lines_extra($lines, $style=null)
Specifies which lines to highlight extra.
Definition: geshi.php:1824
error()
Returns an error message associated with the last GeSHi operation, or false if no error has occurred.
Definition: geshi.php:588
set_keyword_group_style($key, $style, $preserve_defaults=false)
Sets the style for a keyword group.
Definition: geshi.php:995
set_link_target($target)
Sets the target for links in code.
Definition: geshi.php:1770
$link_styles
Definition: geshi.php:352
get_language_fullname($language)
Get full_name for a lang or false.
Definition: geshi.php:780
set_escape_characters_style($style, $preserve_defaults=false, $group=0)
Sets the styles for escaped characters.
Definition: geshi.php:1081
get_stylesheet($economy_mode=true)
Returns a stylesheet for the highlighted code.
Definition: geshi.php:4341
$use_language_tab_width
Definition: geshi.php:480
set_highlight_lines_extra_style($styles)
Sets the style for extra-highlighted lines.
Definition: geshi.php:1853
set_code_style($style, $preserve_defaults=false)
Sets the style for the actual code.
Definition: geshi.php:898
set_source($source)
Sets the source code for this object.
Definition: geshi.php:625
set_methods_highlighting($flag)
Turns highlighting on/off for methods.
Definition: geshi.php:1279
strip_language_name($language)
Clean up the language name to prevent malicious code injection.
Definition: geshi.php:636
$highlight_extra_lines_styles
Definition: geshi.php:387
$lang
Definition: consent.php:3
$key
Definition: croninfo.php:18
$i
Definition: disco.tpl.php:19
$x
Definition: example_009.php:98
$style
Definition: example_012.php:70
$style2
Definition: example_012.php:71
$code
Definition: example_050.php:99
$linestyle
if(!array_key_exists('StateId', $_REQUEST)) $id
const GESHI_NUMBER_INT_BASIC
Basic number format for integers.
Definition: geshi.php:165
const GESHI_MAYBE
Strict mode might apply, and can be enabled or disabled by GeSHi->enable_strict_mode().
Definition: geshi.php:126
const GESHI_HOVER
Links in the source in the :hover state.
Definition: geshi.php:104
const GESHI_HEADER_PRE_TABLE
Use a "table" to surround the source:
Definition: geshi.php:90
const GESHI_NUMBER_BIN_PREFIX_PERCENT
Number format to highlight binary numbers with a prefix %.
Definition: geshi.php:171
const GESHI_VISITED
Links in the source in the :visited state.
Definition: geshi.php:108
const GESHI_NUMBER_OCT_PREFIX_0O
Number format to highlight octal numbers with a prefix 0o (logtalk)
Definition: geshi.php:177
const GESHI_NUMBER_FLT_NONSCI
Number format to highlight floating-point numbers without support for scientific notation.
Definition: geshi.php:189
const GESHI_LINK
Links in the source in the :link state.
Definition: geshi.php:102
const GESHI_NUMBER_FLT_NONSCI_F
Number format to highlight floating-point numbers without support for scientific notation.
Definition: geshi.php:191
const GESHI_NUMBER_INT_CSTYLE
Enhanced number format for integers like seen in C.
Definition: geshi.php:167
const GESHI_CAPS_NO_CHANGE
Lowercase keywords found.
Definition: geshi.php:94
const GESHI_HEADER_PRE_VALID
Use a pre to wrap lines when line numbers are enabled or to wrap the whole code.
Definition: geshi.php:76
const GESHI_NUMBER_BIN_PREFIX_0B
Number format to highlight binary numbers with a prefix 0b (C)
Definition: geshi.php:173
const GESHI_ACTIVE
Links in the source in the :active state.
Definition: geshi.php:106
const GESHI_BEFORE
The key of the regex array defining what bracket group in a matched search to put before the replacem...
Definition: geshi.php:140
const GESHI_ERROR_INVALID_HEADER_TYPE
The header type passed to GeSHi->set_header_type() was invalid.
Definition: geshi.php:208
const GESHI_NUMBER_OCT_PREFIX
Number format to highlight octal numbers with a leading zero.
Definition: geshi.php:175
const GESHI_NUMBER_OCT_PREFIX_AT
Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series).
Definition: geshi.php:179
const GESHI_NUMBER_HEX_SUFFIX
Number format to highlight hex numbers with a suffix of h.
Definition: geshi.php:187
const GESHI_SEARCH
The key of the regex array defining what to search for.
Definition: geshi.php:132
const GESHI_NUMBER_BIN_SUFFIX
Number format to highlight binary numbers with a suffix "b".
Definition: geshi.php:169
const GESHI_NUMBER_OCT_SUFFIX
Number format to highlight octal numbers with a suffix of o.
Definition: geshi.php:181
const GESHI_LANG_ROOT(!defined('GESHI_ROOT'))
The language file directory for GeSHi @access private.
Definition: geshi.php:52
const GESHI_HEADER_NONE
Use nothing to surround the source.
Definition: geshi.php:70
const GESHI_CLASS
The key of the regex array defining a custom keyword to use for this regexp's html tag class.
Definition: geshi.php:146
const GESHI_NUMBER_HEX_PREFIX
Number format to highlight hex numbers with a prefix 0x.
Definition: geshi.php:183
const GESHI_NUMBER_HEX_PREFIX_DOLLAR
Number format to highlight hex numbers with a prefix $.
Definition: geshi.php:185
const GESHI_AFTER
The key of the regex array defining what bracket group in a matched search to put after the replaceme...
Definition: geshi.php:143
const GESHI_MAX_PCRE_SUBPATTERNS
some old PHP / PCRE subpatterns only support up to xxx subpatterns in regular expressions.
Definition: geshi.php:155
const GESHI_HEADER_DIV
Use a "div" to surround the source.
Definition: geshi.php:72
const GESHI_NUMBER_FLT_SCI_ZERO
Number format to highlight floating-point numbers with support for scientific notation (E) and requir...
Definition: geshi.php:195
const GESHI_NORMAL_LINE_NUMBERS
Use normal line numbers when building the result.
Definition: geshi.php:64
const GESHI_ERROR_FILE_NOT_READABLE
GeSHi could not open a file for reading (generally a language file)
Definition: geshi.php:206
const GESHI_NO_LINE_NUMBERS(!defined('GESHI_SECURITY_PARANOID'))
Use no line numbers when building the result.
Definition: geshi.php:62
const GESHI_ALWAYS
Strict mode always applies.
Definition: geshi.php:128
const GESHI_REPLACE
The key of the regex array defining what bracket group in a matched search to use as a replacement.
Definition: geshi.php:135
const GESHI_VERSION
The version of this GeSHi file.
Definition: geshi.php:43
const GESHI_FANCY_LINE_NUMBERS
Use fancy line numbers when building the result.
Definition: geshi.php:66
const GESHI_ERROR_INVALID_LINE_NUMBER_TYPE
The line number type passed to GeSHi->enable_line_numbers() was invalid.
Definition: geshi.php:210
const GESHI_ERROR_NO_SUCH_LANG
The language specified does not exist.
Definition: geshi.php:204
const GESHI_CAPS_LOWER
Leave keywords found as the case that they are.
Definition: geshi.php:98
const GESHI_START_IMPORTANT
The starter for important parts of the source.
Definition: geshi.php:114
const GESHI_HEADER_PRE
Use a "pre" to surround the source.
Definition: geshi.php:74
const GESHI_COMMENTS
Used in language files to mark comments.
Definition: geshi.php:149
const GESHI_NEVER
#+ @access private
Definition: geshi.php:123
const GESHI_MAX_PCRE_LENGTH
it's also important not to generate too long regular expressions be generous here....
Definition: geshi.php:161
const GESHI_NUMBER_FLT_SCI_SHORT
Number format to highlight floating-point numbers with support for scientific notation (E) and option...
Definition: geshi.php:193
const GESHI_END_IMPORTANT
The ender for important parts of the source.
Definition: geshi.php:116
const GESHI_MODIFIERS
The key of the regex array defining any modifiers to the regular expression.
Definition: geshi.php:137
const GESHI_CAPS_UPPER
Uppercase keywords found.
Definition: geshi.php:96
if($format !==null) $name
Definition: metadata.php:146
$end
Definition: saml1-acs.php:18
$ret
Definition: parser.php:6
defined( 'APPLICATION_ENV')||define( 'APPLICATION_ENV'
Definition: bootstrap.php:27
$type
$url
if(isset($_REQUEST['delete'])) $list
Definition: registry.php:41
$attributes