ILIAS  release_5-3 Revision v5.3.23-19-g915713cf615
Diff.php
Go to the documentation of this file.
1<?php
17{
18
24 public $_edits;
25
33 public function __construct($from_lines, $to_lines)
34 {
35 array_walk($from_lines, array($this, '_trimNewlines'));
36 array_walk($to_lines, array($this, '_trimNewlines'));
37
38 if (extension_loaded('xdiff')) {
40 } else {
42 }
43
44 $this->_edits = $engine->diff($from_lines, $to_lines);
45 }
46
50 public function getDiff()
51 {
52 return $this->_edits;
53 }
54
69 public function reverse()
70 {
71 $rev = clone($obj);
72
73 $rev->_edits = array();
74 foreach ($this->_edits as $edit) {
75 $rev->_edits[] = $edit->reverse();
76 }
77 return $rev;
78 }
79
85 public function isEmpty()
86 {
87 foreach ($this->_edits as $edit) {
88 if (!$edit instanceof Text_Diff_Op_copy) {
89 return false;
90 }
91 }
92 return true;
93 }
94
102 public function lcs()
103 {
104 $lcs = 0;
105 foreach ($this->_edits as $edit) {
106 if ($edit instanceof Text_Diff_Op_copy) {
107 $lcs += count($edit->orig);
108 }
109 }
110 return $lcs;
111 }
112
120 public function getOriginal()
121 {
122 $lines = array();
123 foreach ($this->_edits as $edit) {
124 if ($edit->orig) {
125 array_splice($lines, count($lines), 0, $edit->orig);
126 }
127 }
128 return $lines;
129 }
130
138 public function getFinal()
139 {
140 $lines = array();
141 foreach ($this->_edits as $edit) {
142 if ($edit->final) {
143 array_splice($lines, count($lines), 0, $edit->final);
144 }
145 }
146 return $lines;
147 }
148
156 public function _trimNewlines(&$line, $key)
157 {
158 $line = str_replace(array("\n", "\r"), '', $line);
159 }
160
166 public function _check($from_lines, $to_lines)
167 {
168 if (serialize($from_lines) != serialize($this->getOriginal())) {
169 trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
170 }
171 if (serialize($to_lines) != serialize($this->getFinal())) {
172 trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
173 }
174
175 $rev = $this->reverse();
176 if (serialize($to_lines) != serialize($rev->getOriginal())) {
177 trigger_error("Reversed original doesn't match", E_USER_ERROR);
178 }
179 if (serialize($from_lines) != serialize($rev->getFinal())) {
180 trigger_error("Reversed final doesn't match", E_USER_ERROR);
181 }
182
183 $prevtype = null;
184 foreach ($this->_edits as $edit) {
185 if ($prevtype == get_class($edit)) {
186 trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
187 }
188 $prevtype = get_class($edit);
189 }
190
191 return true;
192 }
193}
194
202{
203
220 public function __construct(
221 $from_lines,
222 $to_lines,
223 $mapped_from_lines,
224 $mapped_to_lines
225 ) {
226 assert(count($from_lines) == count($mapped_from_lines));
227 assert(count($to_lines) == count($mapped_to_lines));
228
229 parent::__construct($mapped_from_lines, $mapped_to_lines);
230
231 $xi = $yi = 0;
232 for ($i = 0; $i < count($this->_edits); $i++) {
233 $orig = &$this->_edits[$i]->orig;
234 if (is_array($orig)) {
235 $orig = array_slice($from_lines, $xi, count($orig));
236 $xi += count($orig);
237 }
238
239 $final = &$this->_edits[$i]->final;
240 if (is_array($final)) {
241 $final = array_slice($to_lines, $yi, count($final));
242 $yi += count($final);
243 }
244 }
245 }
246}
247
259{
260 public function diff($from_lines, $to_lines)
261 {
262 /* Convert the two input arrays into strings for xdiff processing. */
263 $from_string = implode("\n", $from_lines);
264 $to_string = implode("\n", $to_lines);
265
266 /* Diff the two strings and convert the result to an array. */
267 $diff = xdiff_string_diff($from_string, $to_string, count($to_lines));
268 $diff = explode("\n", $diff);
269
270 /* Walk through the diff one line at a time. We build the $edits
271 * array of diff operations by reading the first character of the
272 * xdiff output (which is in the "unified diff" format).
273 *
274 * Note that we don't have enough information to detect "changed"
275 * lines using this approach, so we can't add Text_Diff_Op_changed
276 * instances to the $edits array. The result is still perfectly
277 * valid, albeit a little less descriptive and efficient. */
278 $edits = array();
279 foreach ($diff as $line) {
280 switch ($line[0]) {
281 case ' ':
282 $edits[] = new Text_Diff_Op_copy(array(substr($line, 1)));
283 break;
284
285 case '+':
286 $edits[] = new Text_Diff_Op_add(array(substr($line, 1)));
287 break;
288
289 case '-':
290 $edits[] = new Text_Diff_Op_delete(array(substr($line, 1)));
291 break;
292 }
293 }
294
295 return $edits;
296 }
297}
298
324{
325 public function diff($from_lines, $to_lines)
326 {
327 $n_from = count($from_lines);
328 $n_to = count($to_lines);
329
330 $this->xchanged = $this->ychanged = array();
331 $this->xv = $this->yv = array();
332 $this->xind = $this->yind = array();
333 unset($this->seq);
334 unset($this->in_seq);
335 unset($this->lcs);
336
337 // Skip leading common lines.
338 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
339 if ($from_lines[$skip] != $to_lines[$skip]) {
340 break;
341 }
342 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
343 }
344
345 // Skip trailing common lines.
346 $xi = $n_from;
347 $yi = $n_to;
348 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
349 if ($from_lines[$xi] != $to_lines[$yi]) {
350 break;
351 }
352 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
353 }
354
355 // Ignore lines which do not exist in both files.
356 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
357 $xhash[$from_lines[$xi]] = 1;
358 }
359 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
360 $line = $to_lines[$yi];
361 if (($this->ychanged[$yi] = empty($xhash[$line]))) {
362 continue;
363 }
364 $yhash[$line] = 1;
365 $this->yv[] = $line;
366 $this->yind[] = $yi;
367 }
368 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
369 $line = $from_lines[$xi];
370 if (($this->xchanged[$xi] = empty($yhash[$line]))) {
371 continue;
372 }
373 $this->xv[] = $line;
374 $this->xind[] = $xi;
375 }
376
377 // Find the LCS.
378 $this->_compareseq(0, count($this->xv), 0, count($this->yv));
379
380 // Merge edits when possible.
381 $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
382 $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
383
384 // Compute the edit operations.
385 $edits = array();
386 $xi = $yi = 0;
387 while ($xi < $n_from || $yi < $n_to) {
388 assert($yi < $n_to || $this->xchanged[$xi]);
389 assert($xi < $n_from || $this->ychanged[$yi]);
390
391 // Skip matching "snake".
392 $copy = array();
393 while ($xi < $n_from && $yi < $n_to
394 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
395 $copy[] = $from_lines[$xi++];
396 ++$yi;
397 }
398 if ($copy) {
399 $edits[] = new Text_Diff_Op_copy($copy);
400 }
401
402 // Find deletes & adds.
403 $delete = array();
404 while ($xi < $n_from && $this->xchanged[$xi]) {
405 $delete[] = $from_lines[$xi++];
406 }
407
408 $add = array();
409 while ($yi < $n_to && $this->ychanged[$yi]) {
410 $add[] = $to_lines[$yi++];
411 }
412
413 if ($delete && $add) {
414 $edits[] = new Text_Diff_Op_change($delete, $add);
415 } elseif ($delete) {
416 $edits[] = new Text_Diff_Op_delete($delete);
417 } elseif ($add) {
418 $edits[] = new Text_Diff_Op_add($add);
419 }
420 }
421
422 return $edits;
423 }
424
441 public function _diag($xoff, $xlim, $yoff, $ylim, $nchunks)
442 {
443 $flip = false;
444
445 if ($xlim - $xoff > $ylim - $yoff) {
446 /* Things seems faster (I'm not sure I understand why) when the
447 * shortest sequence is in X. */
448 $flip = true;
449 list($xoff, $xlim, $yoff, $ylim)
450 = array($yoff, $ylim, $xoff, $xlim);
451 }
452
453 if ($flip) {
454 for ($i = $ylim - 1; $i >= $yoff; $i--) {
455 $ymatches[$this->xv[$i]][] = $i;
456 }
457 } else {
458 for ($i = $ylim - 1; $i >= $yoff; $i--) {
459 $ymatches[$this->yv[$i]][] = $i;
460 }
461 }
462
463 $this->lcs = 0;
464 $this->seq[0]= $yoff - 1;
465 $this->in_seq = array();
466 $ymids[0] = array();
467
468 $numer = $xlim - $xoff + $nchunks - 1;
469 $x = $xoff;
470 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
471 if ($chunk > 0) {
472 for ($i = 0; $i <= $this->lcs; $i++) {
473 $ymids[$i][$chunk - 1] = $this->seq[$i];
474 }
475 }
476
477 $x1 = $xoff + (int) (($numer + ($xlim-$xoff)*$chunk) / $nchunks);
478 for (; $x < $x1; $x++) {
479 $line = $flip ? $this->yv[$x] : $this->xv[$x];
480 if (empty($ymatches[$line])) {
481 continue;
482 }
483 $matches = $ymatches[$line];
484 foreach ($matches as $y) {
485 if (empty($this->in_seq[$y])) {
486 $k = $this->_lcsPos($y);
487 assert($k > 0);
488 $ymids[$k] = $ymids[$k - 1];
489 break;
490 }
491 }
492
493 while (list($junk, $y) = each($matches)) {
494 if ($y > $this->seq[$k - 1]) {
495 assert($y < $this->seq[$k]);
496 /* Optimization: this is a common case: next match is
497 * just replacing previous match. */
498 $this->in_seq[$this->seq[$k]] = false;
499 $this->seq[$k] = $y;
500 $this->in_seq[$y] = 1;
501 } elseif (empty($this->in_seq[$y])) {
502 $k = $this->_lcsPos($y);
503 assert($k > 0);
504 $ymids[$k] = $ymids[$k - 1];
505 }
506 }
507 }
508 }
509
510 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
511 $ymid = $ymids[$this->lcs];
512 for ($n = 0; $n < $nchunks - 1; $n++) {
513 $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $n) / $nchunks);
514 $y1 = $ymid[$n] + 1;
515 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
516 }
517 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
518
519 return array($this->lcs, $seps);
520 }
521
522 public function _lcsPos($ypos)
523 {
524 $end = $this->lcs;
525 if ($end == 0 || $ypos > $this->seq[$end]) {
526 $this->seq[++$this->lcs] = $ypos;
527 $this->in_seq[$ypos] = 1;
528 return $this->lcs;
529 }
530
531 $beg = 1;
532 while ($beg < $end) {
533 $mid = (int) (($beg + $end) / 2);
534 if ($ypos > $this->seq[$mid]) {
535 $beg = $mid + 1;
536 } else {
537 $end = $mid;
538 }
539 }
540
541 assert($ypos != $this->seq[$end]);
542
543 $this->in_seq[$this->seq[$end]] = false;
544 $this->seq[$end] = $ypos;
545 $this->in_seq[$ypos] = 1;
546 return $end;
547 }
548
561 public function _compareseq($xoff, $xlim, $yoff, $ylim)
562 {
563 /* Slide down the bottom initial diagonal. */
564 while ($xoff < $xlim && $yoff < $ylim
565 && $this->xv[$xoff] == $this->yv[$yoff]) {
566 ++$xoff;
567 ++$yoff;
568 }
569
570 /* Slide up the top initial diagonal. */
571 while ($xlim > $xoff && $ylim > $yoff
572 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
573 --$xlim;
574 --$ylim;
575 }
576
577 if ($xoff == $xlim || $yoff == $ylim) {
578 $lcs = 0;
579 } else {
580 /* This is ad hoc but seems to work well. $nchunks =
581 * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks =
582 * max(2,min(8,(int)$nchunks)); */
583 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
584 list($lcs, $seps)
585 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
586 }
587
588 if ($lcs == 0) {
589 /* X and Y sequences have no common subsequence: mark all
590 * changed. */
591 while ($yoff < $ylim) {
592 $this->ychanged[$this->yind[$yoff++]] = 1;
593 }
594 while ($xoff < $xlim) {
595 $this->xchanged[$this->xind[$xoff++]] = 1;
596 }
597 } else {
598 /* Use the partitions to split this problem into subproblems. */
599 reset($seps);
600 $pt1 = $seps[0];
601 while ($pt2 = next($seps)) {
602 $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
603 $pt1 = $pt2;
604 }
605 }
606 }
607
620 public function _shiftBoundaries($lines, &$changed, $other_changed)
621 {
622 $i = 0;
623 $j = 0;
624
625 assert('count($lines) == count($changed)');
626 $len = count($lines);
627 $other_len = count($other_changed);
628
629 while (1) {
630 /* Scan forward to find the beginning of another run of
631 * changes. Also keep track of the corresponding point in the
632 * other file.
633 *
634 * Throughout this code, $i and $j are adjusted together so that
635 * the first $i elements of $changed and the first $j elements of
636 * $other_changed both contain the same number of zeros (unchanged
637 * lines).
638 *
639 * Furthermore, $j is always kept so that $j == $other_len or
640 * $other_changed[$j] == false. */
641 while ($j < $other_len && $other_changed[$j]) {
642 $j++;
643 }
644
645 while ($i < $len && !$changed[$i]) {
646 assert('$j < $other_len && ! $other_changed[$j]');
647 $i++;
648 $j++;
649 while ($j < $other_len && $other_changed[$j]) {
650 $j++;
651 }
652 }
653
654 if ($i == $len) {
655 break;
656 }
657
658 $start = $i;
659
660 /* Find the end of this run of changes. */
661 while (++$i < $len && $changed[$i]) {
662 continue;
663 }
664
665 do {
666 /* Record the length of this run of changes, so that we can
667 * later determine whether the run has grown. */
668 $runlength = $i - $start;
669
670 /* Move the changed region back, so long as the previous
671 * unchanged line matches the last changed one. This merges
672 * with previous changed regions. */
673 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
674 $changed[--$start] = 1;
675 $changed[--$i] = false;
676 while ($start > 0 && $changed[$start - 1]) {
677 $start--;
678 }
679 assert('$j > 0');
680 while ($other_changed[--$j]) {
681 continue;
682 }
683 assert('$j >= 0 && !$other_changed[$j]');
684 }
685
686 /* Set CORRESPONDING to the end of the changed run, at the
687 * last point where it corresponds to a changed run in the
688 * other file. CORRESPONDING == LEN means no such point has
689 * been found. */
690 $corresponding = $j < $other_len ? $i : $len;
691
692 /* Move the changed region forward, so long as the first
693 * changed line matches the following unchanged one. This
694 * merges with following changed regions. Do this second, so
695 * that if there are no merges, the changed region is moved
696 * forward as far as possible. */
697 while ($i < $len && $lines[$start] == $lines[$i]) {
698 $changed[$start++] = false;
699 $changed[$i++] = 1;
700 while ($i < $len && $changed[$i]) {
701 $i++;
702 }
703
704 assert('$j < $other_len && ! $other_changed[$j]');
705 $j++;
706 if ($j < $other_len && $other_changed[$j]) {
707 $corresponding = $i;
708 while ($j < $other_len && $other_changed[$j]) {
709 $j++;
710 }
711 }
712 }
713 } while ($runlength != $i - $start);
714
715 /* If possible, move the fully-merged run of changes back to a
716 * corresponding run in the other file. */
717 while ($corresponding < $i) {
718 $changed[--$start] = 1;
719 $changed[--$i] = 0;
720 assert('$j > 0');
721 while ($other_changed[--$j]) {
722 continue;
723 }
724 assert('$j >= 0 && !$other_changed[$j]');
725 }
726 }
727 }
728}
729
737{
738 public $orig;
739 public $final;
740
741 public function reverse()
742 {
743 trigger_error('Abstract method', E_USER_ERROR);
744 }
745
746 public function norig()
747 {
748 return $this->orig ? count($this->orig) : 0;
749 }
750
751 public function nfinal()
752 {
753 return $this->final ? count($this->final) : 0;
754 }
755}
756
764{
765 public function __construct($orig, $final = false)
766 {
767 if (!is_array($final)) {
768 $final = $orig;
769 }
770 $this->orig = $orig;
771 $this->final = $final;
772 }
773
774 public function reverse()
775 {
776 return $reverse = new Text_Diff_Op_copy($this->final, $this->orig);
777 }
778}
779
787{
788 public function __construct($lines)
789 {
790 $this->orig = $lines;
791 $this->final = false;
792 }
793
794 public function reverse()
795 {
796 return $reverse = new Text_Diff_Op_add($this->orig);
797 }
798}
799
807{
808 public function __construct($lines)
809 {
810 $this->final = $lines;
811 $this->orig = false;
812 }
813
814 public function reverse()
815 {
816 return $reverse = new Text_Diff_Op_delete($this->final);
817 }
818}
819
827{
828 public function __construct($orig, $final)
829 {
830 $this->orig = $orig;
831 $this->final = $final;
832 }
833
834 public function reverse()
835 {
836 return $reverse = new Text_Diff_Op_change($this->final, $this->orig);
837 }
838}
$n
Definition: RandomTest.php:85
An exception for terminatinating execution or to throw for unit testing.
diff($from_lines, $to_lines)
Definition: Diff.php:325
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF,...
Definition: Diff.php:441
_compareseq($xoff, $xlim, $yoff, $ylim)
Finds LCS of two sequences.
Definition: Diff.php:561
_shiftBoundaries($lines, &$changed, $other_changed)
Adjusts inserts/deletes of identical lines to join changes as much as possible.
Definition: Diff.php:620
diff($from_lines, $to_lines)
Definition: Diff.php:260
__construct($lines)
Definition: Diff.php:808
__construct($orig, $final)
Definition: Diff.php:828
__construct($orig, $final=false)
Definition: Diff.php:765
__construct($lines)
Definition: Diff.php:788
getOriginal()
Gets the original set of lines.
Definition: Diff.php:120
isEmpty()
Checks for an empty diff.
Definition: Diff.php:85
reverse()
Computes a reversed diff.
Definition: Diff.php:69
$_edits
Definition: Diff.php:24
getFinal()
Gets the final set of lines.
Definition: Diff.php:138
__construct($from_lines, $to_lines)
Computes diffs between sequences of strings.
Definition: Diff.php:33
lcs()
Computes the length of the Longest Common Subsequence (LCS).
Definition: Diff.php:102
getDiff()
Returns the array of differences.
Definition: Diff.php:50
_trimNewlines(&$line, $key)
Removes trailing newlines from a line of text.
Definition: Diff.php:156
_check($from_lines, $to_lines)
Checks a diff for validity.
Definition: Diff.php:166
__construct( $from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines)
Computes a diff between sequences of strings.
Definition: Diff.php:220
$key
Definition: croninfo.php:18
$i
Definition: disco.tpl.php:19
$y
Definition: example_007.php:83
$x
Definition: example_009.php:98
$end
Definition: saml1-acs.php:18
Text_Diff.
$engine
Definition: workflow.php:89