ILIAS  release_5-1 Revision 5.0.0-5477-g43f3e3fab5f
Diff.php
Go to the documentation of this file.
1<?php
16class Text_Diff {
17
23 public $_edits;
24
32 function Text_Diff($from_lines, $to_lines)
33 {
34 array_walk($from_lines, array($this, '_trimNewlines'));
35 array_walk($to_lines, array($this, '_trimNewlines'));
36
37 if (extension_loaded('xdiff')) {
38 $engine = new Text_Diff_Engine_xdiff();
39 } else {
40 $engine = new Text_Diff_Engine_native();
41 }
42
43 $this->_edits = $engine->diff($from_lines, $to_lines);
44 }
45
49 function getDiff()
50 {
51 return $this->_edits;
52 }
53
68 function reverse()
69 {
70 $rev = clone($obj);
71
72 $rev->_edits = array();
73 foreach ($this->_edits as $edit) {
74 $rev->_edits[] = $edit->reverse();
75 }
76 return $rev;
77 }
78
84 function isEmpty()
85 {
86 foreach ($this->_edits as $edit) {
87 if (!$edit instanceof Text_Diff_Op_copy) {
88 return false;
89 }
90 }
91 return true;
92 }
93
101 function lcs()
102 {
103 $lcs = 0;
104 foreach ($this->_edits as $edit) {
105 if ($edit instanceof Text_Diff_Op_copy) {
106 $lcs += count($edit->orig);
107 }
108 }
109 return $lcs;
110 }
111
119 function getOriginal()
120 {
121 $lines = array();
122 foreach ($this->_edits as $edit) {
123 if ($edit->orig) {
124 array_splice($lines, count($lines), 0, $edit->orig);
125 }
126 }
127 return $lines;
128 }
129
137 function getFinal()
138 {
139 $lines = array();
140 foreach ($this->_edits as $edit) {
141 if ($edit->final) {
142 array_splice($lines, count($lines), 0, $edit->final);
143 }
144 }
145 return $lines;
146 }
147
155 function _trimNewlines(&$line, $key)
156 {
157 $line = str_replace(array("\n", "\r"), '', $line);
158 }
159
165 function _check($from_lines, $to_lines)
166 {
167 if (serialize($from_lines) != serialize($this->getOriginal())) {
168 trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
169 }
170 if (serialize($to_lines) != serialize($this->getFinal())) {
171 trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
172 }
173
174 $rev = $this->reverse();
175 if (serialize($to_lines) != serialize($rev->getOriginal())) {
176 trigger_error("Reversed original doesn't match", E_USER_ERROR);
177 }
178 if (serialize($from_lines) != serialize($rev->getFinal())) {
179 trigger_error("Reversed final doesn't match", E_USER_ERROR);
180 }
181
182 $prevtype = null;
183 foreach ($this->_edits as $edit) {
184 if ($prevtype == get_class($edit)) {
185 trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
186 }
187 $prevtype = get_class($edit);
188 }
189
190 return true;
191 }
192
193}
194
202
219 function Text_MappedDiff($from_lines, $to_lines,
220 $mapped_from_lines, $mapped_to_lines)
221 {
222 assert(count($from_lines) == count($mapped_from_lines));
223 assert(count($to_lines) == count($mapped_to_lines));
224
225 parent::Text_Diff($mapped_from_lines, $mapped_to_lines);
226
227 $xi = $yi = 0;
228 for ($i = 0; $i < count($this->_edits); $i++) {
229 $orig = &$this->_edits[$i]->orig;
230 if (is_array($orig)) {
231 $orig = array_slice($from_lines, $xi, count($orig));
232 $xi += count($orig);
233 }
234
235 $final = &$this->_edits[$i]->final;
236 if (is_array($final)) {
237 $final = array_slice($to_lines, $yi, count($final));
238 $yi += count($final);
239 }
240 }
241 }
242
243}
244
256
257 function diff($from_lines, $to_lines)
258 {
259 /* Convert the two input arrays into strings for xdiff processing. */
260 $from_string = implode("\n", $from_lines);
261 $to_string = implode("\n", $to_lines);
262
263 /* Diff the two strings and convert the result to an array. */
264 $diff = xdiff_string_diff($from_string, $to_string, count($to_lines));
265 $diff = explode("\n", $diff);
266
267 /* Walk through the diff one line at a time. We build the $edits
268 * array of diff operations by reading the first character of the
269 * xdiff output (which is in the "unified diff" format).
270 *
271 * Note that we don't have enough information to detect "changed"
272 * lines using this approach, so we can't add Text_Diff_Op_changed
273 * instances to the $edits array. The result is still perfectly
274 * valid, albeit a little less descriptive and efficient. */
275 $edits = array();
276 foreach ($diff as $line) {
277 switch ($line[0]) {
278 case ' ':
279 $edits[] = new Text_Diff_Op_copy(array(substr($line, 1)));
280 break;
281
282 case '+':
283 $edits[] = new Text_Diff_Op_add(array(substr($line, 1)));
284 break;
285
286 case '-':
287 $edits[] = new Text_Diff_Op_delete(array(substr($line, 1)));
288 break;
289 }
290 }
291
292 return $edits;
293 }
294
295}
296
322
323 function diff($from_lines, $to_lines)
324 {
325 $n_from = count($from_lines);
326 $n_to = count($to_lines);
327
328 $this->xchanged = $this->ychanged = array();
329 $this->xv = $this->yv = array();
330 $this->xind = $this->yind = array();
331 unset($this->seq);
332 unset($this->in_seq);
333 unset($this->lcs);
334
335 // Skip leading common lines.
336 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
337 if ($from_lines[$skip] != $to_lines[$skip]) {
338 break;
339 }
340 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
341 }
342
343 // Skip trailing common lines.
344 $xi = $n_from; $yi = $n_to;
345 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
346 if ($from_lines[$xi] != $to_lines[$yi]) {
347 break;
348 }
349 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
350 }
351
352 // Ignore lines which do not exist in both files.
353 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
354 $xhash[$from_lines[$xi]] = 1;
355 }
356 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
357 $line = $to_lines[$yi];
358 if (($this->ychanged[$yi] = empty($xhash[$line]))) {
359 continue;
360 }
361 $yhash[$line] = 1;
362 $this->yv[] = $line;
363 $this->yind[] = $yi;
364 }
365 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
366 $line = $from_lines[$xi];
367 if (($this->xchanged[$xi] = empty($yhash[$line]))) {
368 continue;
369 }
370 $this->xv[] = $line;
371 $this->xind[] = $xi;
372 }
373
374 // Find the LCS.
375 $this->_compareseq(0, count($this->xv), 0, count($this->yv));
376
377 // Merge edits when possible.
378 $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
379 $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
380
381 // Compute the edit operations.
382 $edits = array();
383 $xi = $yi = 0;
384 while ($xi < $n_from || $yi < $n_to) {
385 assert($yi < $n_to || $this->xchanged[$xi]);
386 assert($xi < $n_from || $this->ychanged[$yi]);
387
388 // Skip matching "snake".
389 $copy = array();
390 while ($xi < $n_from && $yi < $n_to
391 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
392 $copy[] = $from_lines[$xi++];
393 ++$yi;
394 }
395 if ($copy) {
396 $edits[] = new Text_Diff_Op_copy($copy);
397 }
398
399 // Find deletes & adds.
400 $delete = array();
401 while ($xi < $n_from && $this->xchanged[$xi]) {
402 $delete[] = $from_lines[$xi++];
403 }
404
405 $add = array();
406 while ($yi < $n_to && $this->ychanged[$yi]) {
407 $add[] = $to_lines[$yi++];
408 }
409
410 if ($delete && $add) {
411 $edits[] = new Text_Diff_Op_change($delete, $add);
412 } elseif ($delete) {
413 $edits[] = new Text_Diff_Op_delete($delete);
414 } elseif ($add) {
415 $edits[] = new Text_Diff_Op_add($add);
416 }
417 }
418
419 return $edits;
420 }
421
438 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
439 {
440 $flip = false;
441
442 if ($xlim - $xoff > $ylim - $yoff) {
443 /* Things seems faster (I'm not sure I understand why) when the
444 * shortest sequence is in X. */
445 $flip = true;
446 list ($xoff, $xlim, $yoff, $ylim)
447 = array($yoff, $ylim, $xoff, $xlim);
448 }
449
450 if ($flip) {
451 for ($i = $ylim - 1; $i >= $yoff; $i--) {
452 $ymatches[$this->xv[$i]][] = $i;
453 }
454 } else {
455 for ($i = $ylim - 1; $i >= $yoff; $i--) {
456 $ymatches[$this->yv[$i]][] = $i;
457 }
458 }
459
460 $this->lcs = 0;
461 $this->seq[0]= $yoff - 1;
462 $this->in_seq = array();
463 $ymids[0] = array();
464
465 $numer = $xlim - $xoff + $nchunks - 1;
466 $x = $xoff;
467 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
468 if ($chunk > 0) {
469 for ($i = 0; $i <= $this->lcs; $i++) {
470 $ymids[$i][$chunk - 1] = $this->seq[$i];
471 }
472 }
473
474 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
475 for (; $x < $x1; $x++) {
476 $line = $flip ? $this->yv[$x] : $this->xv[$x];
477 if (empty($ymatches[$line])) {
478 continue;
479 }
480 $matches = $ymatches[$line];
481 foreach ($matches as $y) {
482 if (empty($this->in_seq[$y])) {
483 $k = $this->_lcsPos($y);
484 assert($k > 0);
485 $ymids[$k] = $ymids[$k - 1];
486 break;
487 }
488 }
489
490 while (list($junk, $y) = each($matches)) {
491 if ($y > $this->seq[$k - 1]) {
492 assert($y < $this->seq[$k]);
493 /* Optimization: this is a common case: next match is
494 * just replacing previous match. */
495 $this->in_seq[$this->seq[$k]] = false;
496 $this->seq[$k] = $y;
497 $this->in_seq[$y] = 1;
498 } elseif (empty($this->in_seq[$y])) {
499 $k = $this->_lcsPos($y);
500 assert($k > 0);
501 $ymids[$k] = $ymids[$k - 1];
502 }
503 }
504 }
505 }
506
507 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
508 $ymid = $ymids[$this->lcs];
509 for ($n = 0; $n < $nchunks - 1; $n++) {
510 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
511 $y1 = $ymid[$n] + 1;
512 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
513 }
514 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
515
516 return array($this->lcs, $seps);
517 }
518
519 function _lcsPos($ypos)
520 {
521 $end = $this->lcs;
522 if ($end == 0 || $ypos > $this->seq[$end]) {
523 $this->seq[++$this->lcs] = $ypos;
524 $this->in_seq[$ypos] = 1;
525 return $this->lcs;
526 }
527
528 $beg = 1;
529 while ($beg < $end) {
530 $mid = (int)(($beg + $end) / 2);
531 if ($ypos > $this->seq[$mid]) {
532 $beg = $mid + 1;
533 } else {
534 $end = $mid;
535 }
536 }
537
538 assert($ypos != $this->seq[$end]);
539
540 $this->in_seq[$this->seq[$end]] = false;
541 $this->seq[$end] = $ypos;
542 $this->in_seq[$ypos] = 1;
543 return $end;
544 }
545
558 function _compareseq ($xoff, $xlim, $yoff, $ylim)
559 {
560 /* Slide down the bottom initial diagonal. */
561 while ($xoff < $xlim && $yoff < $ylim
562 && $this->xv[$xoff] == $this->yv[$yoff]) {
563 ++$xoff;
564 ++$yoff;
565 }
566
567 /* Slide up the top initial diagonal. */
568 while ($xlim > $xoff && $ylim > $yoff
569 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
570 --$xlim;
571 --$ylim;
572 }
573
574 if ($xoff == $xlim || $yoff == $ylim) {
575 $lcs = 0;
576 } else {
577 /* This is ad hoc but seems to work well. $nchunks =
578 * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks =
579 * max(2,min(8,(int)$nchunks)); */
580 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
581 list($lcs, $seps)
582 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
583 }
584
585 if ($lcs == 0) {
586 /* X and Y sequences have no common subsequence: mark all
587 * changed. */
588 while ($yoff < $ylim) {
589 $this->ychanged[$this->yind[$yoff++]] = 1;
590 }
591 while ($xoff < $xlim) {
592 $this->xchanged[$this->xind[$xoff++]] = 1;
593 }
594 } else {
595 /* Use the partitions to split this problem into subproblems. */
596 reset($seps);
597 $pt1 = $seps[0];
598 while ($pt2 = next($seps)) {
599 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
600 $pt1 = $pt2;
601 }
602 }
603 }
604
617 function _shiftBoundaries($lines, &$changed, $other_changed)
618 {
619 $i = 0;
620 $j = 0;
621
622 assert('count($lines) == count($changed)');
623 $len = count($lines);
624 $other_len = count($other_changed);
625
626 while (1) {
627 /* Scan forward to find the beginning of another run of
628 * changes. Also keep track of the corresponding point in the
629 * other file.
630 *
631 * Throughout this code, $i and $j are adjusted together so that
632 * the first $i elements of $changed and the first $j elements of
633 * $other_changed both contain the same number of zeros (unchanged
634 * lines).
635 *
636 * Furthermore, $j is always kept so that $j == $other_len or
637 * $other_changed[$j] == false. */
638 while ($j < $other_len && $other_changed[$j]) {
639 $j++;
640 }
641
642 while ($i < $len && ! $changed[$i]) {
643 assert('$j < $other_len && ! $other_changed[$j]');
644 $i++; $j++;
645 while ($j < $other_len && $other_changed[$j]) {
646 $j++;
647 }
648 }
649
650 if ($i == $len) {
651 break;
652 }
653
654 $start = $i;
655
656 /* Find the end of this run of changes. */
657 while (++$i < $len && $changed[$i]) {
658 continue;
659 }
660
661 do {
662 /* Record the length of this run of changes, so that we can
663 * later determine whether the run has grown. */
664 $runlength = $i - $start;
665
666 /* Move the changed region back, so long as the previous
667 * unchanged line matches the last changed one. This merges
668 * with previous changed regions. */
669 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
670 $changed[--$start] = 1;
671 $changed[--$i] = false;
672 while ($start > 0 && $changed[$start - 1]) {
673 $start--;
674 }
675 assert('$j > 0');
676 while ($other_changed[--$j]) {
677 continue;
678 }
679 assert('$j >= 0 && !$other_changed[$j]');
680 }
681
682 /* Set CORRESPONDING to the end of the changed run, at the
683 * last point where it corresponds to a changed run in the
684 * other file. CORRESPONDING == LEN means no such point has
685 * been found. */
686 $corresponding = $j < $other_len ? $i : $len;
687
688 /* Move the changed region forward, so long as the first
689 * changed line matches the following unchanged one. This
690 * merges with following changed regions. Do this second, so
691 * that if there are no merges, the changed region is moved
692 * forward as far as possible. */
693 while ($i < $len && $lines[$start] == $lines[$i]) {
694 $changed[$start++] = false;
695 $changed[$i++] = 1;
696 while ($i < $len && $changed[$i]) {
697 $i++;
698 }
699
700 assert('$j < $other_len && ! $other_changed[$j]');
701 $j++;
702 if ($j < $other_len && $other_changed[$j]) {
703 $corresponding = $i;
704 while ($j < $other_len && $other_changed[$j]) {
705 $j++;
706 }
707 }
708 }
709 } while ($runlength != $i - $start);
710
711 /* If possible, move the fully-merged run of changes back to a
712 * corresponding run in the other file. */
713 while ($corresponding < $i) {
714 $changed[--$start] = 1;
715 $changed[--$i] = 0;
716 assert('$j > 0');
717 while ($other_changed[--$j]) {
718 continue;
719 }
720 assert('$j >= 0 && !$other_changed[$j]');
721 }
722 }
723 }
724
725}
726
734
735 public $orig;
736 public $final;
737
738 function reverse()
739 {
740 trigger_error('Abstract method', E_USER_ERROR);
741 }
742
743 function norig()
744 {
745 return $this->orig ? count($this->orig) : 0;
746 }
747
748 function nfinal()
749 {
750 return $this->final ? count($this->final) : 0;
751 }
752
753}
754
762
763 function Text_Diff_Op_copy($orig, $final = false)
764 {
765 if (!is_array($final)) {
766 $final = $orig;
767 }
768 $this->orig = $orig;
769 $this->final = $final;
770 }
771
772 function reverse()
773 {
774 return $reverse = new Text_Diff_Op_copy($this->final, $this->orig);
775 }
776
777}
778
786
787 function Text_Diff_Op_delete($lines)
788 {
789 $this->orig = $lines;
790 $this->final = false;
791 }
792
793 function reverse()
794 {
795 return $reverse = new Text_Diff_Op_add($this->orig);
796 }
797
798}
799
807
808 function Text_Diff_Op_add($lines)
809 {
810 $this->final = $lines;
811 $this->orig = false;
812 }
813
814 function reverse()
815 {
816 return $reverse = new Text_Diff_Op_delete($this->final);
817 }
818
819}
820
828
830 {
831 $this->orig = $orig;
832 $this->final = $final;
833 }
834
835 function reverse()
836 {
837 return $reverse = new Text_Diff_Op_change($this->final, $this->orig);
838 }
839
840}
$n
Definition: RandomTest.php:80
diff($from_lines, $to_lines)
Definition: Diff.php:323
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF,...
Definition: Diff.php:438
_compareseq($xoff, $xlim, $yoff, $ylim)
Finds LCS of two sequences.
Definition: Diff.php:558
_shiftBoundaries($lines, &$changed, $other_changed)
Adjusts inserts/deletes of identical lines to join changes as much as possible.
Definition: Diff.php:617
diff($from_lines, $to_lines)
Definition: Diff.php:257
Text_Diff_Op_add($lines)
Definition: Diff.php:808
Text_Diff_Op_change($orig, $final)
Definition: Diff.php:829
Text_Diff_Op_copy($orig, $final=false)
Definition: Diff.php:763
Text_Diff_Op_delete($lines)
Definition: Diff.php:787
getOriginal()
Gets the original set of lines.
Definition: Diff.php:119
isEmpty()
Checks for an empty diff.
Definition: Diff.php:84
reverse()
Computes a reversed diff.
Definition: Diff.php:68
$_edits
Definition: Diff.php:23
getFinal()
Gets the final set of lines.
Definition: Diff.php:137
lcs()
Computes the length of the Longest Common Subsequence (LCS).
Definition: Diff.php:101
getDiff()
Returns the array of differences.
Definition: Diff.php:49
Text_Diff($from_lines, $to_lines)
Computes diffs between sequences of strings.
Definition: Diff.php:32
_trimNewlines(&$line, $key)
Removes trailing newlines from a line of text.
Definition: Diff.php:155
_check($from_lines, $to_lines)
Checks a diff for validity.
Definition: Diff.php:165
Text_MappedDiff($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines)
Computes a diff between sequences of strings.
Definition: Diff.php:219
$y
Definition: example_007.php:83
$x
Definition: example_009.php:98
Text_Diff.