00001 <?php
00016 class Text_Diff {
00017
00023 public $_edits;
00024
00032 function Text_Diff($from_lines, $to_lines)
00033 {
00034 array_walk($from_lines, array($this, '_trimNewlines'));
00035 array_walk($to_lines, array($this, '_trimNewlines'));
00036
00037 if (extension_loaded('xdiff')) {
00038 $engine = new Text_Diff_Engine_xdiff();
00039 } else {
00040 $engine = new Text_Diff_Engine_native();
00041 }
00042
00043 $this->_edits = $engine->diff($from_lines, $to_lines);
00044 }
00045
00049 function getDiff()
00050 {
00051 return $this->_edits;
00052 }
00053
00068 function reverse()
00069 {
00070 $rev = clone($obj);
00071
00072 $rev->_edits = array();
00073 foreach ($this->_edits as $edit) {
00074 $rev->_edits[] = $edit->reverse();
00075 }
00076 return $rev;
00077 }
00078
00084 function isEmpty()
00085 {
00086 foreach ($this->_edits as $edit) {
00087 if (!$edit instanceof Text_Diff_Op_copy) {
00088 return false;
00089 }
00090 }
00091 return true;
00092 }
00093
00101 function lcs()
00102 {
00103 $lcs = 0;
00104 foreach ($this->_edits as $edit) {
00105 if ($edit instanceof Text_Diff_Op_copy) {
00106 $lcs += count($edit->orig);
00107 }
00108 }
00109 return $lcs;
00110 }
00111
00119 function getOriginal()
00120 {
00121 $lines = array();
00122 foreach ($this->_edits as $edit) {
00123 if ($edit->orig) {
00124 array_splice($lines, count($lines), 0, $edit->orig);
00125 }
00126 }
00127 return $lines;
00128 }
00129
00137 function getFinal()
00138 {
00139 $lines = array();
00140 foreach ($this->_edits as $edit) {
00141 if ($edit->final) {
00142 array_splice($lines, count($lines), 0, $edit->final);
00143 }
00144 }
00145 return $lines;
00146 }
00147
00155 function _trimNewlines(&$line, $key)
00156 {
00157 $line = str_replace(array("\n", "\r"), '', $line);
00158 }
00159
00165 function _check($from_lines, $to_lines)
00166 {
00167 if (serialize($from_lines) != serialize($this->getOriginal())) {
00168 trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
00169 }
00170 if (serialize($to_lines) != serialize($this->getFinal())) {
00171 trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
00172 }
00173
00174 $rev = $this->reverse();
00175 if (serialize($to_lines) != serialize($rev->getOriginal())) {
00176 trigger_error("Reversed original doesn't match", E_USER_ERROR);
00177 }
00178 if (serialize($from_lines) != serialize($rev->getFinal())) {
00179 trigger_error("Reversed final doesn't match", E_USER_ERROR);
00180 }
00181
00182 $prevtype = null;
00183 foreach ($this->_edits as $edit) {
00184 if ($prevtype == get_class($edit)) {
00185 trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
00186 }
00187 $prevtype = get_class($edit);
00188 }
00189
00190 return true;
00191 }
00192
00193 }
00194
00201 class Text_MappedDiff extends Text_Diff {
00202
00219 function Text_MappedDiff($from_lines, $to_lines,
00220 $mapped_from_lines, $mapped_to_lines)
00221 {
00222 assert(count($from_lines) == count($mapped_from_lines));
00223 assert(count($to_lines) == count($mapped_to_lines));
00224
00225 parent::Text_Diff($mapped_from_lines, $mapped_to_lines);
00226
00227 $xi = $yi = 0;
00228 for ($i = 0; $i < count($this->_edits); $i++) {
00229 $orig = &$this->_edits[$i]->orig;
00230 if (is_array($orig)) {
00231 $orig = array_slice($from_lines, $xi, count($orig));
00232 $xi += count($orig);
00233 }
00234
00235 $final = &$this->_edits[$i]->final;
00236 if (is_array($final)) {
00237 $final = array_slice($to_lines, $yi, count($final));
00238 $yi += count($final);
00239 }
00240 }
00241 }
00242
00243 }
00244
00255 class Text_Diff_Engine_xdiff {
00256
00257 function diff($from_lines, $to_lines)
00258 {
00259
00260 $from_string = implode("\n", $from_lines);
00261 $to_string = implode("\n", $to_lines);
00262
00263
00264 $diff = xdiff_string_diff($from_string, $to_string, count($to_lines));
00265 $diff = explode("\n", $diff);
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275 $edits = array();
00276 foreach ($diff as $line) {
00277 switch ($line[0]) {
00278 case ' ':
00279 $edits[] = new Text_Diff_Op_copy(array(substr($line, 1)));
00280 break;
00281
00282 case '+':
00283 $edits[] = new Text_Diff_Op_add(array(substr($line, 1)));
00284 break;
00285
00286 case '-':
00287 $edits[] = new Text_Diff_Op_delete(array(substr($line, 1)));
00288 break;
00289 }
00290 }
00291
00292 return $edits;
00293 }
00294
00295 }
00296
00321 class Text_Diff_Engine_native {
00322
00323 function diff($from_lines, $to_lines)
00324 {
00325 $n_from = count($from_lines);
00326 $n_to = count($to_lines);
00327
00328 $this->xchanged = $this->ychanged = array();
00329 $this->xv = $this->yv = array();
00330 $this->xind = $this->yind = array();
00331 unset($this->seq);
00332 unset($this->in_seq);
00333 unset($this->lcs);
00334
00335
00336 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
00337 if ($from_lines[$skip] != $to_lines[$skip]) {
00338 break;
00339 }
00340 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
00341 }
00342
00343
00344 $xi = $n_from; $yi = $n_to;
00345 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
00346 if ($from_lines[$xi] != $to_lines[$yi]) {
00347 break;
00348 }
00349 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
00350 }
00351
00352
00353 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
00354 $xhash[$from_lines[$xi]] = 1;
00355 }
00356 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
00357 $line = $to_lines[$yi];
00358 if (($this->ychanged[$yi] = empty($xhash[$line]))) {
00359 continue;
00360 }
00361 $yhash[$line] = 1;
00362 $this->yv[] = $line;
00363 $this->yind[] = $yi;
00364 }
00365 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
00366 $line = $from_lines[$xi];
00367 if (($this->xchanged[$xi] = empty($yhash[$line]))) {
00368 continue;
00369 }
00370 $this->xv[] = $line;
00371 $this->xind[] = $xi;
00372 }
00373
00374
00375 $this->_compareseq(0, count($this->xv), 0, count($this->yv));
00376
00377
00378 $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
00379 $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
00380
00381
00382 $edits = array();
00383 $xi = $yi = 0;
00384 while ($xi < $n_from || $yi < $n_to) {
00385 assert($yi < $n_to || $this->xchanged[$xi]);
00386 assert($xi < $n_from || $this->ychanged[$yi]);
00387
00388
00389 $copy = array();
00390 while ($xi < $n_from && $yi < $n_to
00391 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
00392 $copy[] = $from_lines[$xi++];
00393 ++$yi;
00394 }
00395 if ($copy) {
00396 $edits[] = new Text_Diff_Op_copy($copy);
00397 }
00398
00399
00400 $delete = array();
00401 while ($xi < $n_from && $this->xchanged[$xi]) {
00402 $delete[] = $from_lines[$xi++];
00403 }
00404
00405 $add = array();
00406 while ($yi < $n_to && $this->ychanged[$yi]) {
00407 $add[] = $to_lines[$yi++];
00408 }
00409
00410 if ($delete && $add) {
00411 $edits[] = new Text_Diff_Op_change($delete, $add);
00412 } elseif ($delete) {
00413 $edits[] = new Text_Diff_Op_delete($delete);
00414 } elseif ($add) {
00415 $edits[] = new Text_Diff_Op_add($add);
00416 }
00417 }
00418
00419 return $edits;
00420 }
00421
00438 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
00439 {
00440 $flip = false;
00441
00442 if ($xlim - $xoff > $ylim - $yoff) {
00443
00444
00445 $flip = true;
00446 list ($xoff, $xlim, $yoff, $ylim)
00447 = array($yoff, $ylim, $xoff, $xlim);
00448 }
00449
00450 if ($flip) {
00451 for ($i = $ylim - 1; $i >= $yoff; $i--) {
00452 $ymatches[$this->xv[$i]][] = $i;
00453 }
00454 } else {
00455 for ($i = $ylim - 1; $i >= $yoff; $i--) {
00456 $ymatches[$this->yv[$i]][] = $i;
00457 }
00458 }
00459
00460 $this->lcs = 0;
00461 $this->seq[0]= $yoff - 1;
00462 $this->in_seq = array();
00463 $ymids[0] = array();
00464
00465 $numer = $xlim - $xoff + $nchunks - 1;
00466 $x = $xoff;
00467 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
00468 if ($chunk > 0) {
00469 for ($i = 0; $i <= $this->lcs; $i++) {
00470 $ymids[$i][$chunk - 1] = $this->seq[$i];
00471 }
00472 }
00473
00474 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
00475 for (; $x < $x1; $x++) {
00476 $line = $flip ? $this->yv[$x] : $this->xv[$x];
00477 if (empty($ymatches[$line])) {
00478 continue;
00479 }
00480 $matches = $ymatches[$line];
00481 foreach ($matches as $y) {
00482 if (empty($this->in_seq[$y])) {
00483 $k = $this->_lcsPos($y);
00484 assert($k > 0);
00485 $ymids[$k] = $ymids[$k - 1];
00486 break;
00487 }
00488 }
00489
00490 while (list($junk, $y) = each($matches)) {
00491 if ($y > $this->seq[$k - 1]) {
00492 assert($y < $this->seq[$k]);
00493
00494
00495 $this->in_seq[$this->seq[$k]] = false;
00496 $this->seq[$k] = $y;
00497 $this->in_seq[$y] = 1;
00498 } elseif (empty($this->in_seq[$y])) {
00499 $k = $this->_lcsPos($y);
00500 assert($k > 0);
00501 $ymids[$k] = $ymids[$k - 1];
00502 }
00503 }
00504 }
00505 }
00506
00507 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
00508 $ymid = $ymids[$this->lcs];
00509 for ($n = 0; $n < $nchunks - 1; $n++) {
00510 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
00511 $y1 = $ymid[$n] + 1;
00512 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
00513 }
00514 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
00515
00516 return array($this->lcs, $seps);
00517 }
00518
00519 function _lcsPos($ypos)
00520 {
00521 $end = $this->lcs;
00522 if ($end == 0 || $ypos > $this->seq[$end]) {
00523 $this->seq[++$this->lcs] = $ypos;
00524 $this->in_seq[$ypos] = 1;
00525 return $this->lcs;
00526 }
00527
00528 $beg = 1;
00529 while ($beg < $end) {
00530 $mid = (int)(($beg + $end) / 2);
00531 if ($ypos > $this->seq[$mid]) {
00532 $beg = $mid + 1;
00533 } else {
00534 $end = $mid;
00535 }
00536 }
00537
00538 assert($ypos != $this->seq[$end]);
00539
00540 $this->in_seq[$this->seq[$end]] = false;
00541 $this->seq[$end] = $ypos;
00542 $this->in_seq[$ypos] = 1;
00543 return $end;
00544 }
00545
00558 function _compareseq ($xoff, $xlim, $yoff, $ylim)
00559 {
00560
00561 while ($xoff < $xlim && $yoff < $ylim
00562 && $this->xv[$xoff] == $this->yv[$yoff]) {
00563 ++$xoff;
00564 ++$yoff;
00565 }
00566
00567
00568 while ($xlim > $xoff && $ylim > $yoff
00569 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
00570 --$xlim;
00571 --$ylim;
00572 }
00573
00574 if ($xoff == $xlim || $yoff == $ylim) {
00575 $lcs = 0;
00576 } else {
00577
00578
00579
00580 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
00581 list($lcs, $seps)
00582 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
00583 }
00584
00585 if ($lcs == 0) {
00586
00587
00588 while ($yoff < $ylim) {
00589 $this->ychanged[$this->yind[$yoff++]] = 1;
00590 }
00591 while ($xoff < $xlim) {
00592 $this->xchanged[$this->xind[$xoff++]] = 1;
00593 }
00594 } else {
00595
00596 reset($seps);
00597 $pt1 = $seps[0];
00598 while ($pt2 = next($seps)) {
00599 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
00600 $pt1 = $pt2;
00601 }
00602 }
00603 }
00604
00617 function _shiftBoundaries($lines, &$changed, $other_changed)
00618 {
00619 $i = 0;
00620 $j = 0;
00621
00622 assert('count($lines) == count($changed)');
00623 $len = count($lines);
00624 $other_len = count($other_changed);
00625
00626 while (1) {
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638 while ($j < $other_len && $other_changed[$j]) {
00639 $j++;
00640 }
00641
00642 while ($i < $len && ! $changed[$i]) {
00643 assert('$j < $other_len && ! $other_changed[$j]');
00644 $i++; $j++;
00645 while ($j < $other_len && $other_changed[$j]) {
00646 $j++;
00647 }
00648 }
00649
00650 if ($i == $len) {
00651 break;
00652 }
00653
00654 $start = $i;
00655
00656
00657 while (++$i < $len && $changed[$i]) {
00658 continue;
00659 }
00660
00661 do {
00662
00663
00664 $runlength = $i - $start;
00665
00666
00667
00668
00669 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
00670 $changed[--$start] = 1;
00671 $changed[--$i] = false;
00672 while ($start > 0 && $changed[$start - 1]) {
00673 $start--;
00674 }
00675 assert('$j > 0');
00676 while ($other_changed[--$j]) {
00677 continue;
00678 }
00679 assert('$j >= 0 && !$other_changed[$j]');
00680 }
00681
00682
00683
00684
00685
00686 $corresponding = $j < $other_len ? $i : $len;
00687
00688
00689
00690
00691
00692
00693 while ($i < $len && $lines[$start] == $lines[$i]) {
00694 $changed[$start++] = false;
00695 $changed[$i++] = 1;
00696 while ($i < $len && $changed[$i]) {
00697 $i++;
00698 }
00699
00700 assert('$j < $other_len && ! $other_changed[$j]');
00701 $j++;
00702 if ($j < $other_len && $other_changed[$j]) {
00703 $corresponding = $i;
00704 while ($j < $other_len && $other_changed[$j]) {
00705 $j++;
00706 }
00707 }
00708 }
00709 } while ($runlength != $i - $start);
00710
00711
00712
00713 while ($corresponding < $i) {
00714 $changed[--$start] = 1;
00715 $changed[--$i] = 0;
00716 assert('$j > 0');
00717 while ($other_changed[--$j]) {
00718 continue;
00719 }
00720 assert('$j >= 0 && !$other_changed[$j]');
00721 }
00722 }
00723 }
00724
00725 }
00726
00733 class Text_Diff_Op {
00734
00735 public $orig;
00736 public $final;
00737
00738 function reverse()
00739 {
00740 trigger_error('Abstract method', E_USER_ERROR);
00741 }
00742
00743 function norig()
00744 {
00745 return $this->orig ? count($this->orig) : 0;
00746 }
00747
00748 function nfinal()
00749 {
00750 return $this->final ? count($this->final) : 0;
00751 }
00752
00753 }
00754
00761 class Text_Diff_Op_copy extends Text_Diff_Op {
00762
00763 function Text_Diff_Op_copy($orig, $final = false)
00764 {
00765 if (!is_array($final)) {
00766 $final = $orig;
00767 }
00768 $this->orig = $orig;
00769 $this->final = $final;
00770 }
00771
00772 function reverse()
00773 {
00774 return $reverse = new Text_Diff_Op_copy($this->final, $this->orig);
00775 }
00776
00777 }
00778
00785 class Text_Diff_Op_delete extends Text_Diff_Op {
00786
00787 function Text_Diff_Op_delete($lines)
00788 {
00789 $this->orig = $lines;
00790 $this->final = false;
00791 }
00792
00793 function reverse()
00794 {
00795 return $reverse = new Text_Diff_Op_add($this->orig);
00796 }
00797
00798 }
00799
00806 class Text_Diff_Op_add extends Text_Diff_Op {
00807
00808 function Text_Diff_Op_add($lines)
00809 {
00810 $this->final = $lines;
00811 $this->orig = false;
00812 }
00813
00814 function reverse()
00815 {
00816 return $reverse = new Text_Diff_Op_delete($this->final);
00817 }
00818
00819 }
00820
00827 class Text_Diff_Op_change extends Text_Diff_Op {
00828
00829 function Text_Diff_Op_change($orig, $final)
00830 {
00831 $this->orig = $orig;
00832 $this->final = $final;
00833 }
00834
00835 function reverse()
00836 {
00837 return $reverse = new Text_Diff_Op_change($this->final, $this->orig);
00838 }
00839
00840 }