ILIAS  release_4-3 Revision
 All Data Structures Namespaces Files Functions Variables Groups Pages
Diff.php
Go to the documentation of this file.
1 <?php
16 class Text_Diff {
17 
23  public $_edits;
24 
32  function Text_Diff($from_lines, $to_lines)
33  {
34  array_walk($from_lines, array($this, '_trimNewlines'));
35  array_walk($to_lines, array($this, '_trimNewlines'));
36 
37  if (extension_loaded('xdiff')) {
38  $engine = new Text_Diff_Engine_xdiff();
39  } else {
40  $engine = new Text_Diff_Engine_native();
41  }
42 
43  $this->_edits = $engine->diff($from_lines, $to_lines);
44  }
45 
49  function getDiff()
50  {
51  return $this->_edits;
52  }
53 
68  function reverse()
69  {
70  $rev = clone($obj);
71 
72  $rev->_edits = array();
73  foreach ($this->_edits as $edit) {
74  $rev->_edits[] = $edit->reverse();
75  }
76  return $rev;
77  }
78 
84  function isEmpty()
85  {
86  foreach ($this->_edits as $edit) {
87  if (!$edit instanceof Text_Diff_Op_copy) {
88  return false;
89  }
90  }
91  return true;
92  }
93 
101  function lcs()
102  {
103  $lcs = 0;
104  foreach ($this->_edits as $edit) {
105  if ($edit instanceof Text_Diff_Op_copy) {
106  $lcs += count($edit->orig);
107  }
108  }
109  return $lcs;
110  }
111 
119  function getOriginal()
120  {
121  $lines = array();
122  foreach ($this->_edits as $edit) {
123  if ($edit->orig) {
124  array_splice($lines, count($lines), 0, $edit->orig);
125  }
126  }
127  return $lines;
128  }
129 
137  function getFinal()
138  {
139  $lines = array();
140  foreach ($this->_edits as $edit) {
141  if ($edit->final) {
142  array_splice($lines, count($lines), 0, $edit->final);
143  }
144  }
145  return $lines;
146  }
147 
155  function _trimNewlines(&$line, $key)
156  {
157  $line = str_replace(array("\n", "\r"), '', $line);
158  }
159 
165  function _check($from_lines, $to_lines)
166  {
167  if (serialize($from_lines) != serialize($this->getOriginal())) {
168  trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
169  }
170  if (serialize($to_lines) != serialize($this->getFinal())) {
171  trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
172  }
173 
174  $rev = $this->reverse();
175  if (serialize($to_lines) != serialize($rev->getOriginal())) {
176  trigger_error("Reversed original doesn't match", E_USER_ERROR);
177  }
178  if (serialize($from_lines) != serialize($rev->getFinal())) {
179  trigger_error("Reversed final doesn't match", E_USER_ERROR);
180  }
181 
182  $prevtype = null;
183  foreach ($this->_edits as $edit) {
184  if ($prevtype == get_class($edit)) {
185  trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
186  }
187  $prevtype = get_class($edit);
188  }
189 
190  return true;
191  }
192 
193 }
194 
201 class Text_MappedDiff extends Text_Diff {
202 
219  function Text_MappedDiff($from_lines, $to_lines,
220  $mapped_from_lines, $mapped_to_lines)
221  {
222  assert(count($from_lines) == count($mapped_from_lines));
223  assert(count($to_lines) == count($mapped_to_lines));
224 
225  parent::Text_Diff($mapped_from_lines, $mapped_to_lines);
226 
227  $xi = $yi = 0;
228  for ($i = 0; $i < count($this->_edits); $i++) {
229  $orig = &$this->_edits[$i]->orig;
230  if (is_array($orig)) {
231  $orig = array_slice($from_lines, $xi, count($orig));
232  $xi += count($orig);
233  }
234 
235  $final = &$this->_edits[$i]->final;
236  if (is_array($final)) {
237  $final = array_slice($to_lines, $yi, count($final));
238  $yi += count($final);
239  }
240  }
241  }
242 
243 }
244 
256 
257  function diff($from_lines, $to_lines)
258  {
259  /* Convert the two input arrays into strings for xdiff processing. */
260  $from_string = implode("\n", $from_lines);
261  $to_string = implode("\n", $to_lines);
262 
263  /* Diff the two strings and convert the result to an array. */
264  $diff = xdiff_string_diff($from_string, $to_string, count($to_lines));
265  $diff = explode("\n", $diff);
266 
267  /* Walk through the diff one line at a time. We build the $edits
268  * array of diff operations by reading the first character of the
269  * xdiff output (which is in the "unified diff" format).
270  *
271  * Note that we don't have enough information to detect "changed"
272  * lines using this approach, so we can't add Text_Diff_Op_changed
273  * instances to the $edits array. The result is still perfectly
274  * valid, albeit a little less descriptive and efficient. */
275  $edits = array();
276  foreach ($diff as $line) {
277  switch ($line[0]) {
278  case ' ':
279  $edits[] = new Text_Diff_Op_copy(array(substr($line, 1)));
280  break;
281 
282  case '+':
283  $edits[] = new Text_Diff_Op_add(array(substr($line, 1)));
284  break;
285 
286  case '-':
287  $edits[] = new Text_Diff_Op_delete(array(substr($line, 1)));
288  break;
289  }
290  }
291 
292  return $edits;
293  }
294 
295 }
296 
322 
323  function diff($from_lines, $to_lines)
324  {
325  $n_from = count($from_lines);
326  $n_to = count($to_lines);
327 
328  $this->xchanged = $this->ychanged = array();
329  $this->xv = $this->yv = array();
330  $this->xind = $this->yind = array();
331  unset($this->seq);
332  unset($this->in_seq);
333  unset($this->lcs);
334 
335  // Skip leading common lines.
336  for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
337  if ($from_lines[$skip] != $to_lines[$skip]) {
338  break;
339  }
340  $this->xchanged[$skip] = $this->ychanged[$skip] = false;
341  }
342 
343  // Skip trailing common lines.
344  $xi = $n_from; $yi = $n_to;
345  for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
346  if ($from_lines[$xi] != $to_lines[$yi]) {
347  break;
348  }
349  $this->xchanged[$xi] = $this->ychanged[$yi] = false;
350  }
351 
352  // Ignore lines which do not exist in both files.
353  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
354  $xhash[$from_lines[$xi]] = 1;
355  }
356  for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
357  $line = $to_lines[$yi];
358  if (($this->ychanged[$yi] = empty($xhash[$line]))) {
359  continue;
360  }
361  $yhash[$line] = 1;
362  $this->yv[] = $line;
363  $this->yind[] = $yi;
364  }
365  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
366  $line = $from_lines[$xi];
367  if (($this->xchanged[$xi] = empty($yhash[$line]))) {
368  continue;
369  }
370  $this->xv[] = $line;
371  $this->xind[] = $xi;
372  }
373 
374  // Find the LCS.
375  $this->_compareseq(0, count($this->xv), 0, count($this->yv));
376 
377  // Merge edits when possible.
378  $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
379  $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
380 
381  // Compute the edit operations.
382  $edits = array();
383  $xi = $yi = 0;
384  while ($xi < $n_from || $yi < $n_to) {
385  assert($yi < $n_to || $this->xchanged[$xi]);
386  assert($xi < $n_from || $this->ychanged[$yi]);
387 
388  // Skip matching "snake".
389  $copy = array();
390  while ($xi < $n_from && $yi < $n_to
391  && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
392  $copy[] = $from_lines[$xi++];
393  ++$yi;
394  }
395  if ($copy) {
396  $edits[] = new Text_Diff_Op_copy($copy);
397  }
398 
399  // Find deletes & adds.
400  $delete = array();
401  while ($xi < $n_from && $this->xchanged[$xi]) {
402  $delete[] = $from_lines[$xi++];
403  }
404 
405  $add = array();
406  while ($yi < $n_to && $this->ychanged[$yi]) {
407  $add[] = $to_lines[$yi++];
408  }
409 
410  if ($delete && $add) {
411  $edits[] = new Text_Diff_Op_change($delete, $add);
412  } elseif ($delete) {
413  $edits[] = new Text_Diff_Op_delete($delete);
414  } elseif ($add) {
415  $edits[] = new Text_Diff_Op_add($add);
416  }
417  }
418 
419  return $edits;
420  }
421 
438  function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
439  {
440  $flip = false;
441 
442  if ($xlim - $xoff > $ylim - $yoff) {
443  /* Things seems faster (I'm not sure I understand why) when the
444  * shortest sequence is in X. */
445  $flip = true;
446  list ($xoff, $xlim, $yoff, $ylim)
447  = array($yoff, $ylim, $xoff, $xlim);
448  }
449 
450  if ($flip) {
451  for ($i = $ylim - 1; $i >= $yoff; $i--) {
452  $ymatches[$this->xv[$i]][] = $i;
453  }
454  } else {
455  for ($i = $ylim - 1; $i >= $yoff; $i--) {
456  $ymatches[$this->yv[$i]][] = $i;
457  }
458  }
459 
460  $this->lcs = 0;
461  $this->seq[0]= $yoff - 1;
462  $this->in_seq = array();
463  $ymids[0] = array();
464 
465  $numer = $xlim - $xoff + $nchunks - 1;
466  $x = $xoff;
467  for ($chunk = 0; $chunk < $nchunks; $chunk++) {
468  if ($chunk > 0) {
469  for ($i = 0; $i <= $this->lcs; $i++) {
470  $ymids[$i][$chunk - 1] = $this->seq[$i];
471  }
472  }
473 
474  $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
475  for (; $x < $x1; $x++) {
476  $line = $flip ? $this->yv[$x] : $this->xv[$x];
477  if (empty($ymatches[$line])) {
478  continue;
479  }
480  $matches = $ymatches[$line];
481  foreach ($matches as $y) {
482  if (empty($this->in_seq[$y])) {
483  $k = $this->_lcsPos($y);
484  assert($k > 0);
485  $ymids[$k] = $ymids[$k - 1];
486  break;
487  }
488  }
489 
490  while (list($junk, $y) = each($matches)) {
491  if ($y > $this->seq[$k - 1]) {
492  assert($y < $this->seq[$k]);
493  /* Optimization: this is a common case: next match is
494  * just replacing previous match. */
495  $this->in_seq[$this->seq[$k]] = false;
496  $this->seq[$k] = $y;
497  $this->in_seq[$y] = 1;
498  } elseif (empty($this->in_seq[$y])) {
499  $k = $this->_lcsPos($y);
500  assert($k > 0);
501  $ymids[$k] = $ymids[$k - 1];
502  }
503  }
504  }
505  }
506 
507  $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
508  $ymid = $ymids[$this->lcs];
509  for ($n = 0; $n < $nchunks - 1; $n++) {
510  $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
511  $y1 = $ymid[$n] + 1;
512  $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
513  }
514  $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
515 
516  return array($this->lcs, $seps);
517  }
518 
519  function _lcsPos($ypos)
520  {
521  $end = $this->lcs;
522  if ($end == 0 || $ypos > $this->seq[$end]) {
523  $this->seq[++$this->lcs] = $ypos;
524  $this->in_seq[$ypos] = 1;
525  return $this->lcs;
526  }
527 
528  $beg = 1;
529  while ($beg < $end) {
530  $mid = (int)(($beg + $end) / 2);
531  if ($ypos > $this->seq[$mid]) {
532  $beg = $mid + 1;
533  } else {
534  $end = $mid;
535  }
536  }
537 
538  assert($ypos != $this->seq[$end]);
539 
540  $this->in_seq[$this->seq[$end]] = false;
541  $this->seq[$end] = $ypos;
542  $this->in_seq[$ypos] = 1;
543  return $end;
544  }
545 
558  function _compareseq ($xoff, $xlim, $yoff, $ylim)
559  {
560  /* Slide down the bottom initial diagonal. */
561  while ($xoff < $xlim && $yoff < $ylim
562  && $this->xv[$xoff] == $this->yv[$yoff]) {
563  ++$xoff;
564  ++$yoff;
565  }
566 
567  /* Slide up the top initial diagonal. */
568  while ($xlim > $xoff && $ylim > $yoff
569  && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
570  --$xlim;
571  --$ylim;
572  }
573 
574  if ($xoff == $xlim || $yoff == $ylim) {
575  $lcs = 0;
576  } else {
577  /* This is ad hoc but seems to work well. $nchunks =
578  * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks =
579  * max(2,min(8,(int)$nchunks)); */
580  $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
581  list($lcs, $seps)
582  = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
583  }
584 
585  if ($lcs == 0) {
586  /* X and Y sequences have no common subsequence: mark all
587  * changed. */
588  while ($yoff < $ylim) {
589  $this->ychanged[$this->yind[$yoff++]] = 1;
590  }
591  while ($xoff < $xlim) {
592  $this->xchanged[$this->xind[$xoff++]] = 1;
593  }
594  } else {
595  /* Use the partitions to split this problem into subproblems. */
596  reset($seps);
597  $pt1 = $seps[0];
598  while ($pt2 = next($seps)) {
599  $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
600  $pt1 = $pt2;
601  }
602  }
603  }
604 
617  function _shiftBoundaries($lines, &$changed, $other_changed)
618  {
619  $i = 0;
620  $j = 0;
621 
622  assert('count($lines) == count($changed)');
623  $len = count($lines);
624  $other_len = count($other_changed);
625 
626  while (1) {
627  /* Scan forward to find the beginning of another run of
628  * changes. Also keep track of the corresponding point in the
629  * other file.
630  *
631  * Throughout this code, $i and $j are adjusted together so that
632  * the first $i elements of $changed and the first $j elements of
633  * $other_changed both contain the same number of zeros (unchanged
634  * lines).
635  *
636  * Furthermore, $j is always kept so that $j == $other_len or
637  * $other_changed[$j] == false. */
638  while ($j < $other_len && $other_changed[$j]) {
639  $j++;
640  }
641 
642  while ($i < $len && ! $changed[$i]) {
643  assert('$j < $other_len && ! $other_changed[$j]');
644  $i++; $j++;
645  while ($j < $other_len && $other_changed[$j]) {
646  $j++;
647  }
648  }
649 
650  if ($i == $len) {
651  break;
652  }
653 
654  $start = $i;
655 
656  /* Find the end of this run of changes. */
657  while (++$i < $len && $changed[$i]) {
658  continue;
659  }
660 
661  do {
662  /* Record the length of this run of changes, so that we can
663  * later determine whether the run has grown. */
664  $runlength = $i - $start;
665 
666  /* Move the changed region back, so long as the previous
667  * unchanged line matches the last changed one. This merges
668  * with previous changed regions. */
669  while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
670  $changed[--$start] = 1;
671  $changed[--$i] = false;
672  while ($start > 0 && $changed[$start - 1]) {
673  $start--;
674  }
675  assert('$j > 0');
676  while ($other_changed[--$j]) {
677  continue;
678  }
679  assert('$j >= 0 && !$other_changed[$j]');
680  }
681 
682  /* Set CORRESPONDING to the end of the changed run, at the
683  * last point where it corresponds to a changed run in the
684  * other file. CORRESPONDING == LEN means no such point has
685  * been found. */
686  $corresponding = $j < $other_len ? $i : $len;
687 
688  /* Move the changed region forward, so long as the first
689  * changed line matches the following unchanged one. This
690  * merges with following changed regions. Do this second, so
691  * that if there are no merges, the changed region is moved
692  * forward as far as possible. */
693  while ($i < $len && $lines[$start] == $lines[$i]) {
694  $changed[$start++] = false;
695  $changed[$i++] = 1;
696  while ($i < $len && $changed[$i]) {
697  $i++;
698  }
699 
700  assert('$j < $other_len && ! $other_changed[$j]');
701  $j++;
702  if ($j < $other_len && $other_changed[$j]) {
703  $corresponding = $i;
704  while ($j < $other_len && $other_changed[$j]) {
705  $j++;
706  }
707  }
708  }
709  } while ($runlength != $i - $start);
710 
711  /* If possible, move the fully-merged run of changes back to a
712  * corresponding run in the other file. */
713  while ($corresponding < $i) {
714  $changed[--$start] = 1;
715  $changed[--$i] = 0;
716  assert('$j > 0');
717  while ($other_changed[--$j]) {
718  continue;
719  }
720  assert('$j >= 0 && !$other_changed[$j]');
721  }
722  }
723  }
724 
725 }
726 
734 
735  public $orig;
736  public $final;
737 
738  function reverse()
739  {
740  trigger_error('Abstract method', E_USER_ERROR);
741  }
742 
743  function norig()
744  {
745  return $this->orig ? count($this->orig) : 0;
746  }
747 
748  function nfinal()
749  {
750  return $this->final ? count($this->final) : 0;
751  }
752 
753 }
754 
762 
763  function Text_Diff_Op_copy($orig, $final = false)
764  {
765  if (!is_array($final)) {
766  $final = $orig;
767  }
768  $this->orig = $orig;
769  $this->final = $final;
770  }
771 
772  function reverse()
773  {
774  return $reverse = new Text_Diff_Op_copy($this->final, $this->orig);
775  }
776 
777 }
778 
786 
787  function Text_Diff_Op_delete($lines)
788  {
789  $this->orig = $lines;
790  $this->final = false;
791  }
792 
793  function reverse()
794  {
795  return $reverse = new Text_Diff_Op_add($this->orig);
796  }
797 
798 }
799 
807 
808  function Text_Diff_Op_add($lines)
809  {
810  $this->final = $lines;
811  $this->orig = false;
812  }
813 
814  function reverse()
815  {
816  return $reverse = new Text_Diff_Op_delete($this->final);
817  }
818 
819 }
820 
828 
830  {
831  $this->orig = $orig;
832  $this->final = $final;
833  }
834 
835  function reverse()
836  {
837  return $reverse = new Text_Diff_Op_change($this->final, $this->orig);
838  }
839 
840 }