ILIAS  Release_4_1_x_branch Revision 61804
 All Data Structures Namespaces Files Functions Variables Groups Pages
json.php
Go to the documentation of this file.
1 <?php
2 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
3 
62 define('SERVICES_JSON_SLICE', 1);
63 
67 define('SERVICES_JSON_IN_STR', 2);
68 
72 define('SERVICES_JSON_IN_ARR', 3);
73 
77 define('SERVICES_JSON_IN_OBJ', 4);
78 
82 define('SERVICES_JSON_IN_CMT', 5);
83 
87 define('SERVICES_JSON_LOOSE_TYPE', 16);
88 
92 define('SERVICES_JSON_SUPPRESS_ERRORS', 32);
93 
115 class Services_JSON
116 {
133  function Services_JSON($use = 0)
134  {
135  $this->use = $use;
136  }
137 
149  function utf162utf8($utf16)
150  {
151  // oh please oh please oh please oh please oh please
152  if(function_exists('mb_convert_encoding')) {
153  return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
154  }
155 
156  $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
157 
158  switch(true) {
159  case ((0x7F & $bytes) == $bytes):
160  // this case should never be reached, because we are in ASCII range
161  // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
162  return chr(0x7F & $bytes);
163 
164  case (0x07FF & $bytes) == $bytes:
165  // return a 2-byte UTF-8 character
166  // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
167  return chr(0xC0 | (($bytes >> 6) & 0x1F))
168  . chr(0x80 | ($bytes & 0x3F));
169 
170  case (0xFFFF & $bytes) == $bytes:
171  // return a 3-byte UTF-8 character
172  // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
173  return chr(0xE0 | (($bytes >> 12) & 0x0F))
174  . chr(0x80 | (($bytes >> 6) & 0x3F))
175  . chr(0x80 | ($bytes & 0x3F));
176  }
177 
178  // ignoring UTF-32 for now, sorry
179  return '';
180  }
181 
193  function utf82utf16($utf8)
194  {
195  // oh please oh please oh please oh please oh please
196  if(function_exists('mb_convert_encoding')) {
197  return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8');
198  }
199 
200  switch(strlen($utf8)) {
201  case 1:
202  // this case should never be reached, because we are in ASCII range
203  // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
204  return $utf8;
205 
206  case 2:
207  // return a UTF-16 character from a 2-byte UTF-8 char
208  // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
209  return chr(0x07 & (ord($utf8{0}) >> 2))
210  . chr((0xC0 & (ord($utf8{0}) << 6))
211  | (0x3F & ord($utf8{1})));
212 
213  case 3:
214  // return a UTF-16 character from a 3-byte UTF-8 char
215  // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
216  return chr((0xF0 & (ord($utf8{0}) << 4))
217  | (0x0F & (ord($utf8{1}) >> 2)))
218  . chr((0xC0 & (ord($utf8{1}) << 6))
219  | (0x7F & ord($utf8{2})));
220  }
221 
222  // ignoring UTF-32 for now, sorry
223  return '';
224  }
225 
237  function encode($var)
238  {
239  switch (gettype($var)) {
240  case 'boolean':
241  return $var ? 'true' : 'false';
242 
243  case 'NULL':
244  return 'null';
245 
246  case 'integer':
247  return (int) $var;
248 
249  case 'double':
250  case 'float':
251  return (float) $var;
252 
253  case 'string':
254  // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
255  $ascii = '';
256  $strlen_var = strlen($var);
257 
258  /*
259  * Iterate over every character in the string,
260  * escaping with a slash or encoding to UTF-8 where necessary
261  */
262  for ($c = 0; $c < $strlen_var; ++$c) {
263 
264  $ord_var_c = ord($var{$c});
265 
266  switch (true) {
267  case $ord_var_c == 0x08:
268  $ascii .= '\b';
269  break;
270  case $ord_var_c == 0x09:
271  $ascii .= '\t';
272  break;
273  case $ord_var_c == 0x0A:
274  $ascii .= '\n';
275  break;
276  case $ord_var_c == 0x0C:
277  $ascii .= '\f';
278  break;
279  case $ord_var_c == 0x0D:
280  $ascii .= '\r';
281  break;
282 
283  case $ord_var_c == 0x22:
284  case $ord_var_c == 0x2F:
285  case $ord_var_c == 0x5C:
286  // double quote, slash, slosh
287  $ascii .= '\\'.$var{$c};
288  break;
289 
290  case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
291  // characters U-00000000 - U-0000007F (same as ASCII)
292  $ascii .= $var{$c};
293  break;
294 
295  case (($ord_var_c & 0xE0) == 0xC0):
296  // characters U-00000080 - U-000007FF, mask 110XXXXX
297  // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
298  $char = pack('C*', $ord_var_c, ord($var{$c + 1}));
299  $c += 1;
300  $utf16 = $this->utf82utf16($char);
301  $ascii .= sprintf('\u%04s', bin2hex($utf16));
302  break;
303 
304  case (($ord_var_c & 0xF0) == 0xE0):
305  // characters U-00000800 - U-0000FFFF, mask 1110XXXX
306  // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
307  $char = pack('C*', $ord_var_c,
308  ord($var{$c + 1}),
309  ord($var{$c + 2}));
310  $c += 2;
311  $utf16 = $this->utf82utf16($char);
312  $ascii .= sprintf('\u%04s', bin2hex($utf16));
313  break;
314 
315  case (($ord_var_c & 0xF8) == 0xF0):
316  // characters U-00010000 - U-001FFFFF, mask 11110XXX
317  // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
318  $char = pack('C*', $ord_var_c,
319  ord($var{$c + 1}),
320  ord($var{$c + 2}),
321  ord($var{$c + 3}));
322  $c += 3;
323  $utf16 = $this->utf82utf16($char);
324  $ascii .= sprintf('\u%04s', bin2hex($utf16));
325  break;
326 
327  case (($ord_var_c & 0xFC) == 0xF8):
328  // characters U-00200000 - U-03FFFFFF, mask 111110XX
329  // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
330  $char = pack('C*', $ord_var_c,
331  ord($var{$c + 1}),
332  ord($var{$c + 2}),
333  ord($var{$c + 3}),
334  ord($var{$c + 4}));
335  $c += 4;
336  $utf16 = $this->utf82utf16($char);
337  $ascii .= sprintf('\u%04s', bin2hex($utf16));
338  break;
339 
340  case (($ord_var_c & 0xFE) == 0xFC):
341  // characters U-04000000 - U-7FFFFFFF, mask 1111110X
342  // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
343  $char = pack('C*', $ord_var_c,
344  ord($var{$c + 1}),
345  ord($var{$c + 2}),
346  ord($var{$c + 3}),
347  ord($var{$c + 4}),
348  ord($var{$c + 5}));
349  $c += 5;
350  $utf16 = $this->utf82utf16($char);
351  $ascii .= sprintf('\u%04s', bin2hex($utf16));
352  break;
353  }
354  }
355 
356  return '"'.$ascii.'"';
357 
358  case 'array':
359  /*
360  * As per JSON spec if any array key is not an integer
361  * we must treat the the whole array as an object. We
362  * also try to catch a sparsely populated associative
363  * array with numeric keys here because some JS engines
364  * will create an array with empty indexes up to
365  * max_index which can cause memory issues and because
366  * the keys, which may be relevant, will be remapped
367  * otherwise.
368  *
369  * As per the ECMA and JSON specification an object may
370  * have any string as a property. Unfortunately due to
371  * a hole in the ECMA specification if the key is a
372  * ECMA reserved word or starts with a digit the
373  * parameter is only accessible using ECMAScript's
374  * bracket notation.
375  */
376 
377  // treat as a JSON object
378  if (is_array($var) && count($var) && (array_keys($var) !== range(0, sizeof($var) - 1))) {
379  $properties = array_map(array($this, 'name_value'),
380  array_keys($var),
381  array_values($var));
382 
383  foreach($properties as $property) {
384  if(Services_JSON::isError($property)) {
385  return $property;
386  }
387  }
388 
389  return '{' . join(',', $properties) . '}';
390  }
391 
392  // treat it like a regular array
393  $elements = array_map(array($this, 'encode'), $var);
394 
395  foreach($elements as $element) {
396  if(Services_JSON::isError($element)) {
397  return $element;
398  }
399  }
400 
401  return '[' . join(',', $elements) . ']';
402 
403  case 'object':
404  $vars = get_object_vars($var);
405 
406  $properties = array_map(array($this, 'name_value'),
407  array_keys($vars),
408  array_values($vars));
409 
410  foreach($properties as $property) {
411  if(Services_JSON::isError($property)) {
412  return $property;
413  }
414  }
415 
416  return '{' . join(',', $properties) . '}';
417 
418  default:
419  return ($this->use & SERVICES_JSON_SUPPRESS_ERRORS)
420  ? 'null'
421  : new Services_JSON_Error(gettype($var)." can not be encoded as JSON string");
422  }
423  }
424 
434  function name_value($name, $value)
435  {
436  $encoded_value = $this->encode($value);
437 
438  if(Services_JSON::isError($encoded_value)) {
439  return $encoded_value;
440  }
441 
442  return $this->encode(strval($name)) . ':' . $encoded_value;
443  }
444 
453  function reduce_string($str)
454  {
455  $str = preg_replace(array(
456 
457  // eliminate single line comments in '// ...' form
458  '#^\s*//(.+)$#m',
459 
460  // eliminate multi-line comments in '/* ... */' form, at start of string
461  '#^\s*/\*(.+)\*/#Us',
462 
463  // eliminate multi-line comments in '/* ... */' form, at end of string
464  '#/\*(.+)\*/\s*$#Us'
465 
466  ), '', $str);
467 
468  // eliminate extraneous space
469  return trim($str);
470  }
471 
484  function decode($str)
485  {
486  $str = $this->reduce_string($str);
487 
488  switch (strtolower($str)) {
489  case 'true':
490  return true;
491 
492  case 'false':
493  return false;
494 
495  case 'null':
496  return null;
497 
498  default:
499  $m = array();
500 
501  if (is_numeric($str)) {
502  // Lookie-loo, it's a number
503 
504  // This would work on its own, but I'm trying to be
505  // good about returning integers where appropriate:
506  // return (float)$str;
507 
508  // Return float or int, as appropriate
509  return ((float)$str == (integer)$str)
510  ? (integer)$str
511  : (float)$str;
512 
513  } elseif (preg_match('/^("|\').*(\1)$/s', $str, $m) && $m[1] == $m[2]) {
514  // STRINGS RETURNED IN UTF-8 FORMAT
515  $delim = substr($str, 0, 1);
516  $chrs = substr($str, 1, -1);
517  $utf8 = '';
518  $strlen_chrs = strlen($chrs);
519 
520  for ($c = 0; $c < $strlen_chrs; ++$c) {
521 
522  $substr_chrs_c_2 = substr($chrs, $c, 2);
523  $ord_chrs_c = ord($chrs{$c});
524 
525  switch (true) {
526  case $substr_chrs_c_2 == '\b':
527  $utf8 .= chr(0x08);
528  ++$c;
529  break;
530  case $substr_chrs_c_2 == '\t':
531  $utf8 .= chr(0x09);
532  ++$c;
533  break;
534  case $substr_chrs_c_2 == '\n':
535  $utf8 .= chr(0x0A);
536  ++$c;
537  break;
538  case $substr_chrs_c_2 == '\f':
539  $utf8 .= chr(0x0C);
540  ++$c;
541  break;
542  case $substr_chrs_c_2 == '\r':
543  $utf8 .= chr(0x0D);
544  ++$c;
545  break;
546 
547  case $substr_chrs_c_2 == '\\"':
548  case $substr_chrs_c_2 == '\\\'':
549  case $substr_chrs_c_2 == '\\\\':
550  case $substr_chrs_c_2 == '\\/':
551  if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
552  ($delim == "'" && $substr_chrs_c_2 != '\\"')) {
553  $utf8 .= $chrs{++$c};
554  }
555  break;
556 
557  case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
558  // single, escaped unicode character
559  $utf16 = chr(hexdec(substr($chrs, ($c + 2), 2)))
560  . chr(hexdec(substr($chrs, ($c + 4), 2)));
561  $utf8 .= $this->utf162utf8($utf16);
562  $c += 5;
563  break;
564 
565  case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
566  $utf8 .= $chrs{$c};
567  break;
568 
569  case ($ord_chrs_c & 0xE0) == 0xC0:
570  // characters U-00000080 - U-000007FF, mask 110XXXXX
571  //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
572  $utf8 .= substr($chrs, $c, 2);
573  ++$c;
574  break;
575 
576  case ($ord_chrs_c & 0xF0) == 0xE0:
577  // characters U-00000800 - U-0000FFFF, mask 1110XXXX
578  // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
579  $utf8 .= substr($chrs, $c, 3);
580  $c += 2;
581  break;
582 
583  case ($ord_chrs_c & 0xF8) == 0xF0:
584  // characters U-00010000 - U-001FFFFF, mask 11110XXX
585  // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
586  $utf8 .= substr($chrs, $c, 4);
587  $c += 3;
588  break;
589 
590  case ($ord_chrs_c & 0xFC) == 0xF8:
591  // characters U-00200000 - U-03FFFFFF, mask 111110XX
592  // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
593  $utf8 .= substr($chrs, $c, 5);
594  $c += 4;
595  break;
596 
597  case ($ord_chrs_c & 0xFE) == 0xFC:
598  // characters U-04000000 - U-7FFFFFFF, mask 1111110X
599  // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
600  $utf8 .= substr($chrs, $c, 6);
601  $c += 5;
602  break;
603 
604  }
605 
606  }
607 
608  return $utf8;
609 
610  } elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
611  // array, or object notation
612 
613  if ($str{0} == '[') {
614  $stk = array(SERVICES_JSON_IN_ARR);
615  $arr = array();
616  } else {
617  if ($this->use & SERVICES_JSON_LOOSE_TYPE) {
618  $stk = array(SERVICES_JSON_IN_OBJ);
619  $obj = array();
620  } else {
621  $stk = array(SERVICES_JSON_IN_OBJ);
622  $obj = new stdClass();
623  }
624  }
625 
626  array_push($stk, array('what' => SERVICES_JSON_SLICE,
627  'where' => 0,
628  'delim' => false));
629 
630  $chrs = substr($str, 1, -1);
631  $chrs = $this->reduce_string($chrs);
632 
633  if ($chrs == '') {
634  if (reset($stk) == SERVICES_JSON_IN_ARR) {
635  return $arr;
636 
637  } else {
638  return $obj;
639 
640  }
641  }
642 
643  //print("\nparsing {$chrs}\n");
644 
645  $strlen_chrs = strlen($chrs);
646 
647  for ($c = 0; $c <= $strlen_chrs; ++$c) {
648 
649  $top = end($stk);
650  $substr_chrs_c_2 = substr($chrs, $c, 2);
651 
652  if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == SERVICES_JSON_SLICE))) {
653  // found a comma that is not inside a string, array, etc.,
654  // OR we've reached the end of the character list
655  $slice = substr($chrs, $top['where'], ($c - $top['where']));
656  array_push($stk, array('what' => SERVICES_JSON_SLICE, 'where' => ($c + 1), 'delim' => false));
657  //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
658 
659  if (reset($stk) == SERVICES_JSON_IN_ARR) {
660  // we are in an array, so just push an element onto the stack
661  array_push($arr, $this->decode($slice));
662 
663  } elseif (reset($stk) == SERVICES_JSON_IN_OBJ) {
664  // we are in an object, so figure
665  // out the property name and set an
666  // element in an associative array,
667  // for now
668  $parts = array();
669 
670  if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
671  // "name":value pair
672  $key = $this->decode($parts[1]);
673  $val = $this->decode($parts[2]);
674 
675  if ($this->use & SERVICES_JSON_LOOSE_TYPE) {
676  $obj[$key] = $val;
677  } else {
678  $obj->$key = $val;
679  }
680  } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
681  // name:value pair, where name is unquoted
682  $key = $parts[1];
683  $val = $this->decode($parts[2]);
684 
685  if ($this->use & SERVICES_JSON_LOOSE_TYPE) {
686  $obj[$key] = $val;
687  } else {
688  $obj->$key = $val;
689  }
690  }
691 
692  }
693 
694  } elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != SERVICES_JSON_IN_STR)) {
695  // found a quote, and we are not inside a string
696  array_push($stk, array('what' => SERVICES_JSON_IN_STR, 'where' => $c, 'delim' => $chrs{$c}));
697  //print("Found start of string at {$c}\n");
698 
699  } elseif (($chrs{$c} == $top['delim']) &&
700  ($top['what'] == SERVICES_JSON_IN_STR) &&
701  ((strlen(substr($chrs, 0, $c)) - strlen(rtrim(substr($chrs, 0, $c), '\\'))) % 2 != 1)) {
702  // found a quote, we're in a string, and it's not escaped
703  // we know that it's not escaped becase there is _not_ an
704  // odd number of backslashes at the end of the string so far
705  array_pop($stk);
706  //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
707 
708  } elseif (($chrs{$c} == '[') &&
709  in_array($top['what'], array(SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ))) {
710  // found a left-bracket, and we are in an array, object, or slice
711  array_push($stk, array('what' => SERVICES_JSON_IN_ARR, 'where' => $c, 'delim' => false));
712  //print("Found start of array at {$c}\n");
713 
714  } elseif (($chrs{$c} == ']') && ($top['what'] == SERVICES_JSON_IN_ARR)) {
715  // found a right-bracket, and we're in an array
716  array_pop($stk);
717  //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
718 
719  } elseif (($chrs{$c} == '{') &&
720  in_array($top['what'], array(SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ))) {
721  // found a left-brace, and we are in an array, object, or slice
722  array_push($stk, array('what' => SERVICES_JSON_IN_OBJ, 'where' => $c, 'delim' => false));
723  //print("Found start of object at {$c}\n");
724 
725  } elseif (($chrs{$c} == '}') && ($top['what'] == SERVICES_JSON_IN_OBJ)) {
726  // found a right-brace, and we're in an object
727  array_pop($stk);
728  //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
729 
730  } elseif (($substr_chrs_c_2 == '/*') &&
731  in_array($top['what'], array(SERVICES_JSON_SLICE, SERVICES_JSON_IN_ARR, SERVICES_JSON_IN_OBJ))) {
732  // found a comment start, and we are in an array, object, or slice
733  array_push($stk, array('what' => SERVICES_JSON_IN_CMT, 'where' => $c, 'delim' => false));
734  $c++;
735  //print("Found start of comment at {$c}\n");
736 
737  } elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == SERVICES_JSON_IN_CMT)) {
738  // found a comment end, and we're in one now
739  array_pop($stk);
740  $c++;
741 
742  for ($i = $top['where']; $i <= $c; ++$i)
743  $chrs = substr_replace($chrs, ' ', $i, 1);
744 
745  //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
746 
747  }
748 
749  }
750 
751  if (reset($stk) == SERVICES_JSON_IN_ARR) {
752  return $arr;
753 
754  } elseif (reset($stk) == SERVICES_JSON_IN_OBJ) {
755  return $obj;
756 
757  }
758 
759  }
760  }
761  }
762 
766  function isError($data, $code = null)
767  {
768  if (class_exists('pear')) {
769  return PEAR::isError($data, $code);
770  } elseif (is_object($data) && (get_class($data) == 'services_json_error' ||
771  is_subclass_of($data, 'services_json_error'))) {
772  return true;
773  }
774 
775  return false;
776  }
777 }
778 
779 if (class_exists('PEAR_Error')) {
780 
781  class Services_JSON_Error extends PEAR_Error
782  {
783  function Services_JSON_Error($message = 'unknown error', $code = null,
784  $mode = null, $options = null, $userinfo = null)
785  {
786  parent::PEAR_Error($message, $code, $mode, $options, $userinfo);
787  }
788  }
789 
790 } else {
791 
795  class Services_JSON_Error
796  {
797  function Services_JSON_Error($message = 'unknown error', $code = null,
798  $mode = null, $options = null, $userinfo = null)
799  {
800 
801  }
802  }
803 
804 }