ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
Diff.php
Go to the documentation of this file.
1 <?php
16 class Text_Diff
17 {
18 
24  public $_edits;
25 
33  public function __construct($from_lines, $to_lines)
34  {
35  array_walk($from_lines, array($this, '_trimNewlines'));
36  array_walk($to_lines, array($this, '_trimNewlines'));
37 
38  if (extension_loaded('xdiff')) {
40  } else {
42  }
43 
44  $this->_edits = $engine->diff($from_lines, $to_lines);
45  }
46 
50  public function getDiff()
51  {
52  return $this->_edits;
53  }
54 
69  public function reverse()
70  {
71  $rev = clone($obj);
72 
73  $rev->_edits = array();
74  foreach ($this->_edits as $edit) {
75  $rev->_edits[] = $edit->reverse();
76  }
77  return $rev;
78  }
79 
85  public function isEmpty()
86  {
87  foreach ($this->_edits as $edit) {
88  if (!$edit instanceof Text_Diff_Op_copy) {
89  return false;
90  }
91  }
92  return true;
93  }
94 
102  public function lcs()
103  {
104  $lcs = 0;
105  foreach ($this->_edits as $edit) {
106  if ($edit instanceof Text_Diff_Op_copy) {
107  $lcs += count($edit->orig);
108  }
109  }
110  return $lcs;
111  }
112 
120  public function getOriginal()
121  {
122  $lines = array();
123  foreach ($this->_edits as $edit) {
124  if ($edit->orig) {
125  array_splice($lines, count($lines), 0, $edit->orig);
126  }
127  }
128  return $lines;
129  }
130 
138  public function getFinal()
139  {
140  $lines = array();
141  foreach ($this->_edits as $edit) {
142  if ($edit->final) {
143  array_splice($lines, count($lines), 0, $edit->final);
144  }
145  }
146  return $lines;
147  }
148 
156  public function _trimNewlines(&$line, $key)
157  {
158  $line = str_replace(array("\n", "\r"), '', $line);
159  }
160 
166  public function _check($from_lines, $to_lines)
167  {
168  if (serialize($from_lines) != serialize($this->getOriginal())) {
169  trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
170  }
171  if (serialize($to_lines) != serialize($this->getFinal())) {
172  trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
173  }
174 
175  $rev = $this->reverse();
176  if (serialize($to_lines) != serialize($rev->getOriginal())) {
177  trigger_error("Reversed original doesn't match", E_USER_ERROR);
178  }
179  if (serialize($from_lines) != serialize($rev->getFinal())) {
180  trigger_error("Reversed final doesn't match", E_USER_ERROR);
181  }
182 
183  $prevtype = null;
184  foreach ($this->_edits as $edit) {
185  if ($prevtype == get_class($edit)) {
186  trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
187  }
188  $prevtype = get_class($edit);
189  }
190 
191  return true;
192  }
193 }
194 
202 {
203 
220  public function __construct(
221  $from_lines,
222  $to_lines,
223  $mapped_from_lines,
224  $mapped_to_lines
225  ) {
226  assert(count($from_lines) == count($mapped_from_lines));
227  assert(count($to_lines) == count($mapped_to_lines));
228 
229  parent::__construct($mapped_from_lines, $mapped_to_lines);
230 
231  $xi = $yi = 0;
232  for ($i = 0; $i < count($this->_edits); $i++) {
233  $orig = &$this->_edits[$i]->orig;
234  if (is_array($orig)) {
235  $orig = array_slice($from_lines, $xi, count($orig));
236  $xi += count($orig);
237  }
238 
239  $final = &$this->_edits[$i]->final;
240  if (is_array($final)) {
241  $final = array_slice($to_lines, $yi, count($final));
242  $yi += count($final);
243  }
244  }
245  }
246 }
247 
259 {
260  public function diff($from_lines, $to_lines)
261  {
262  /* Convert the two input arrays into strings for xdiff processing. */
263  $from_string = implode("\n", $from_lines);
264  $to_string = implode("\n", $to_lines);
265 
266  /* Diff the two strings and convert the result to an array. */
267  $diff = xdiff_string_diff($from_string, $to_string, count($to_lines));
268  $diff = explode("\n", $diff);
269 
270  /* Walk through the diff one line at a time. We build the $edits
271  * array of diff operations by reading the first character of the
272  * xdiff output (which is in the "unified diff" format).
273  *
274  * Note that we don't have enough information to detect "changed"
275  * lines using this approach, so we can't add Text_Diff_Op_changed
276  * instances to the $edits array. The result is still perfectly
277  * valid, albeit a little less descriptive and efficient. */
278  $edits = array();
279  foreach ($diff as $line) {
280  switch ($line[0]) {
281  case ' ':
282  $edits[] = new Text_Diff_Op_copy(array(substr($line, 1)));
283  break;
284 
285  case '+':
286  $edits[] = new Text_Diff_Op_add(array(substr($line, 1)));
287  break;
288 
289  case '-':
290  $edits[] = new Text_Diff_Op_delete(array(substr($line, 1)));
291  break;
292  }
293  }
294 
295  return $edits;
296  }
297 }
298 
324 {
325  public function diff($from_lines, $to_lines)
326  {
327  $n_from = count($from_lines);
328  $n_to = count($to_lines);
329 
330  $this->xchanged = $this->ychanged = array();
331  $this->xv = $this->yv = array();
332  $this->xind = $this->yind = array();
333  unset($this->seq);
334  unset($this->in_seq);
335  unset($this->lcs);
336 
337  // Skip leading common lines.
338  for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
339  if ($from_lines[$skip] != $to_lines[$skip]) {
340  break;
341  }
342  $this->xchanged[$skip] = $this->ychanged[$skip] = false;
343  }
344 
345  // Skip trailing common lines.
346  $xi = $n_from;
347  $yi = $n_to;
348  for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
349  if ($from_lines[$xi] != $to_lines[$yi]) {
350  break;
351  }
352  $this->xchanged[$xi] = $this->ychanged[$yi] = false;
353  }
354 
355  // Ignore lines which do not exist in both files.
356  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
357  $xhash[$from_lines[$xi]] = 1;
358  }
359  for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
360  $line = $to_lines[$yi];
361  if (($this->ychanged[$yi] = empty($xhash[$line]))) {
362  continue;
363  }
364  $yhash[$line] = 1;
365  $this->yv[] = $line;
366  $this->yind[] = $yi;
367  }
368  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
369  $line = $from_lines[$xi];
370  if (($this->xchanged[$xi] = empty($yhash[$line]))) {
371  continue;
372  }
373  $this->xv[] = $line;
374  $this->xind[] = $xi;
375  }
376 
377  // Find the LCS.
378  $this->_compareseq(0, count($this->xv), 0, count($this->yv));
379 
380  // Merge edits when possible.
381  $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
382  $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
383 
384  // Compute the edit operations.
385  $edits = array();
386  $xi = $yi = 0;
387  while ($xi < $n_from || $yi < $n_to) {
388  assert($yi < $n_to || $this->xchanged[$xi]);
389  assert($xi < $n_from || $this->ychanged[$yi]);
390 
391  // Skip matching "snake".
392  $copy = array();
393  while ($xi < $n_from && $yi < $n_to
394  && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
395  $copy[] = $from_lines[$xi++];
396  ++$yi;
397  }
398  if ($copy) {
399  $edits[] = new Text_Diff_Op_copy($copy);
400  }
401 
402  // Find deletes & adds.
403  $delete = array();
404  while ($xi < $n_from && $this->xchanged[$xi]) {
405  $delete[] = $from_lines[$xi++];
406  }
407 
408  $add = array();
409  while ($yi < $n_to && $this->ychanged[$yi]) {
410  $add[] = $to_lines[$yi++];
411  }
412 
413  if ($delete && $add) {
414  $edits[] = new Text_Diff_Op_change($delete, $add);
415  } elseif ($delete) {
416  $edits[] = new Text_Diff_Op_delete($delete);
417  } elseif ($add) {
418  $edits[] = new Text_Diff_Op_add($add);
419  }
420  }
421 
422  return $edits;
423  }
424 
441  public function _diag($xoff, $xlim, $yoff, $ylim, $nchunks)
442  {
443  $flip = false;
444 
445  if ($xlim - $xoff > $ylim - $yoff) {
446  /* Things seems faster (I'm not sure I understand why) when the
447  * shortest sequence is in X. */
448  $flip = true;
449  list($xoff, $xlim, $yoff, $ylim)
450  = array($yoff, $ylim, $xoff, $xlim);
451  }
452 
453  if ($flip) {
454  for ($i = $ylim - 1; $i >= $yoff; $i--) {
455  $ymatches[$this->xv[$i]][] = $i;
456  }
457  } else {
458  for ($i = $ylim - 1; $i >= $yoff; $i--) {
459  $ymatches[$this->yv[$i]][] = $i;
460  }
461  }
462 
463  $this->lcs = 0;
464  $this->seq[0] = $yoff - 1;
465  $this->in_seq = array();
466  $ymids[0] = array();
467 
468  $numer = $xlim - $xoff + $nchunks - 1;
469  $x = $xoff;
470  for ($chunk = 0; $chunk < $nchunks; $chunk++) {
471  if ($chunk > 0) {
472  for ($i = 0; $i <= $this->lcs; $i++) {
473  $ymids[$i][$chunk - 1] = $this->seq[$i];
474  }
475  }
476 
477  $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $chunk) / $nchunks);
478  for (; $x < $x1; $x++) {
479  $line = $flip ? $this->yv[$x] : $this->xv[$x];
480  if (empty($ymatches[$line])) {
481  continue;
482  }
483  $matches = $ymatches[$line];
484  foreach ($matches as $y) {
485  if (empty($this->in_seq[$y])) {
486  $k = $this->_lcsPos($y);
487  assert($k > 0);
488  $ymids[$k] = $ymids[$k - 1];
489  break;
490  }
491  }
492 
493  while (list($junk, $y) = each($matches)) {
494  if ($y > $this->seq[$k - 1]) {
495  assert($y < $this->seq[$k]);
496  /* Optimization: this is a common case: next match is
497  * just replacing previous match. */
498  $this->in_seq[$this->seq[$k]] = false;
499  $this->seq[$k] = $y;
500  $this->in_seq[$y] = 1;
501  } elseif (empty($this->in_seq[$y])) {
502  $k = $this->_lcsPos($y);
503  assert($k > 0);
504  $ymids[$k] = $ymids[$k - 1];
505  }
506  }
507  }
508  }
509 
510  $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
511  $ymid = $ymids[$this->lcs];
512  for ($n = 0; $n < $nchunks - 1; $n++) {
513  $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $n) / $nchunks);
514  $y1 = $ymid[$n] + 1;
515  $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
516  }
517  $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
518 
519  return array($this->lcs, $seps);
520  }
521 
522  public function _lcsPos($ypos)
523  {
524  $end = $this->lcs;
525  if ($end == 0 || $ypos > $this->seq[$end]) {
526  $this->seq[++$this->lcs] = $ypos;
527  $this->in_seq[$ypos] = 1;
528  return $this->lcs;
529  }
530 
531  $beg = 1;
532  while ($beg < $end) {
533  $mid = (int) (($beg + $end) / 2);
534  if ($ypos > $this->seq[$mid]) {
535  $beg = $mid + 1;
536  } else {
537  $end = $mid;
538  }
539  }
540 
541  assert($ypos != $this->seq[$end]);
542 
543  $this->in_seq[$this->seq[$end]] = false;
544  $this->seq[$end] = $ypos;
545  $this->in_seq[$ypos] = 1;
546  return $end;
547  }
548 
561  public function _compareseq($xoff, $xlim, $yoff, $ylim)
562  {
563  /* Slide down the bottom initial diagonal. */
564  while ($xoff < $xlim && $yoff < $ylim
565  && $this->xv[$xoff] == $this->yv[$yoff]) {
566  ++$xoff;
567  ++$yoff;
568  }
569 
570  /* Slide up the top initial diagonal. */
571  while ($xlim > $xoff && $ylim > $yoff
572  && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
573  --$xlim;
574  --$ylim;
575  }
576 
577  if ($xoff == $xlim || $yoff == $ylim) {
578  $lcs = 0;
579  } else {
580  /* This is ad hoc but seems to work well. $nchunks =
581  * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks =
582  * max(2,min(8,(int)$nchunks)); */
583  $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
584  list($lcs, $seps)
585  = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
586  }
587 
588  if ($lcs == 0) {
589  /* X and Y sequences have no common subsequence: mark all
590  * changed. */
591  while ($yoff < $ylim) {
592  $this->ychanged[$this->yind[$yoff++]] = 1;
593  }
594  while ($xoff < $xlim) {
595  $this->xchanged[$this->xind[$xoff++]] = 1;
596  }
597  } else {
598  /* Use the partitions to split this problem into subproblems. */
599  reset($seps);
600  $pt1 = $seps[0];
601  while ($pt2 = next($seps)) {
602  $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
603  $pt1 = $pt2;
604  }
605  }
606  }
607 
620  public function _shiftBoundaries($lines, &$changed, $other_changed)
621  {
622  $i = 0;
623  $j = 0;
624 
625  assert(count($lines) == count($changed));
626  $len = count($lines);
627  $other_len = count($other_changed);
628 
629  while (1) {
630  /* Scan forward to find the beginning of another run of
631  * changes. Also keep track of the corresponding point in the
632  * other file.
633  *
634  * Throughout this code, $i and $j are adjusted together so that
635  * the first $i elements of $changed and the first $j elements of
636  * $other_changed both contain the same number of zeros (unchanged
637  * lines).
638  *
639  * Furthermore, $j is always kept so that $j == $other_len or
640  * $other_changed[$j] == false. */
641  while ($j < $other_len && $other_changed[$j]) {
642  $j++;
643  }
644 
645  while ($i < $len && !$changed[$i]) {
646  assert($j < $other_len && !$other_changed[$j]);
647  $i++;
648  $j++;
649  while ($j < $other_len && $other_changed[$j]) {
650  $j++;
651  }
652  }
653 
654  if ($i == $len) {
655  break;
656  }
657 
658  $start = $i;
659 
660  /* Find the end of this run of changes. */
661  while (++$i < $len && $changed[$i]) {
662  continue;
663  }
664 
665  do {
666  /* Record the length of this run of changes, so that we can
667  * later determine whether the run has grown. */
668  $runlength = $i - $start;
669 
670  /* Move the changed region back, so long as the previous
671  * unchanged line matches the last changed one. This merges
672  * with previous changed regions. */
673  while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
674  $changed[--$start] = 1;
675  $changed[--$i] = false;
676  while ($start > 0 && $changed[$start - 1]) {
677  $start--;
678  }
679  assert($j > 0);
680  while ($other_changed[--$j]) {
681  continue;
682  }
683  assert($j >= 0 && !$other_changed[$j]);
684  }
685 
686  /* Set CORRESPONDING to the end of the changed run, at the
687  * last point where it corresponds to a changed run in the
688  * other file. CORRESPONDING == LEN means no such point has
689  * been found. */
690  $corresponding = $j < $other_len ? $i : $len;
691 
692  /* Move the changed region forward, so long as the first
693  * changed line matches the following unchanged one. This
694  * merges with following changed regions. Do this second, so
695  * that if there are no merges, the changed region is moved
696  * forward as far as possible. */
697  while ($i < $len && $lines[$start] == $lines[$i]) {
698  $changed[$start++] = false;
699  $changed[$i++] = 1;
700  while ($i < $len && $changed[$i]) {
701  $i++;
702  }
703 
704  assert($j < $other_len && !$other_changed[$j]);
705  $j++;
706  if ($j < $other_len && $other_changed[$j]) {
707  $corresponding = $i;
708  while ($j < $other_len && $other_changed[$j]) {
709  $j++;
710  }
711  }
712  }
713  } while ($runlength != $i - $start);
714 
715  /* If possible, move the fully-merged run of changes back to a
716  * corresponding run in the other file. */
717  while ($corresponding < $i) {
718  $changed[--$start] = 1;
719  $changed[--$i] = 0;
720  assert($j > 0);
721  while ($other_changed[--$j]) {
722  continue;
723  }
724  assert($j >= 0 && !$other_changed[$j]);
725  }
726  }
727  }
728 }
729 
737 {
738  public $orig;
739  public $final;
740 
741  public function reverse()
742  {
743  trigger_error('Abstract method', E_USER_ERROR);
744  }
745 
746  public function norig()
747  {
748  return $this->orig ? count($this->orig) : 0;
749  }
750 
751  public function nfinal()
752  {
753  return $this->final ? count($this->final) : 0;
754  }
755 }
756 
764 {
765  public function __construct($orig, $final = false)
766  {
767  if (!is_array($final)) {
768  $final = $orig;
769  }
770  $this->orig = $orig;
771  $this->final = $final;
772  }
773 
774  public function reverse()
775  {
776  return $reverse = new Text_Diff_Op_copy($this->final, $this->orig);
777  }
778 }
779 
787 {
788  public function __construct($lines)
789  {
790  $this->orig = $lines;
791  $this->final = false;
792  }
793 
794  public function reverse()
795  {
796  return $reverse = new Text_Diff_Op_add($this->orig);
797  }
798 }
799 
807 {
808  public function __construct($lines)
809  {
810  $this->final = $lines;
811  $this->orig = false;
812  }
813 
814  public function reverse()
815  {
816  return $reverse = new Text_Diff_Op_delete($this->final);
817  }
818 }
819 
827 {
828  public function __construct($orig, $final)
829  {
830  $this->orig = $orig;
831  $this->final = $final;
832  }
833 
834  public function reverse()
835  {
836  return $reverse = new Text_Diff_Op_change($this->final, $this->orig);
837  }
838 }
getDiff()
Returns the array of differences.
Definition: Diff.php:50
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments.
Definition: Diff.php:441
_compareseq($xoff, $xlim, $yoff, $ylim)
Finds LCS of two sequences.
Definition: Diff.php:561
lcs()
Computes the length of the Longest Common Subsequence (LCS).
Definition: Diff.php:102
__construct($lines)
Definition: Diff.php:788
__construct( $from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines)
Computes a diff between sequences of strings.
Definition: Diff.php:220
_check($from_lines, $to_lines)
Checks a diff for validity.
Definition: Diff.php:166
$engine
Definition: workflow.php:89
$_edits
Definition: Diff.php:24
_trimNewlines(&$line, $key)
Removes trailing newlines from a line of text.
Definition: Diff.php:156
getOriginal()
Gets the original set of lines.
Definition: Diff.php:120
$start
Definition: bench.php:8
$y
Definition: example_007.php:83
__construct($from_lines, $to_lines)
Computes diffs between sequences of strings.
Definition: Diff.php:33
$n
Definition: RandomTest.php:85
diff($from_lines, $to_lines)
Definition: Diff.php:325
Text_Diff.
diff($from_lines, $to_lines)
Definition: Diff.php:260
$i
Definition: disco.tpl.php:19
getFinal()
Gets the final set of lines.
Definition: Diff.php:138
__construct($orig, $final=false)
Definition: Diff.php:765
reverse()
Computes a reversed diff.
Definition: Diff.php:69
isEmpty()
Checks for an empty diff.
Definition: Diff.php:85
$key
Definition: croninfo.php:18
$x
Definition: complexTest.php:9
_shiftBoundaries($lines, &$changed, $other_changed)
Adjusts inserts/deletes of identical lines to join changes as much as possible.
Definition: Diff.php:620
__construct($lines)
Definition: Diff.php:808
__construct($orig, $final)
Definition: Diff.php:828