• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

Services/XHTMLValidator/validator/Text_Diff/Diff.php

Go to the documentation of this file.
00001 <?php
00016 class Text_Diff {
00017 
00023     public $_edits;
00024 
00032     function Text_Diff($from_lines, $to_lines)
00033     {
00034         array_walk($from_lines, array($this, '_trimNewlines'));
00035         array_walk($to_lines, array($this, '_trimNewlines'));
00036 
00037         if (extension_loaded('xdiff')) {
00038             $engine = new Text_Diff_Engine_xdiff();
00039         } else {
00040             $engine = new Text_Diff_Engine_native();
00041         }
00042 
00043         $this->_edits = $engine->diff($from_lines, $to_lines);
00044     }
00045 
00049     function getDiff()
00050     {
00051         return $this->_edits;
00052     }
00053 
00068     function reverse()
00069     {
00070         $rev = clone($obj);
00071 
00072         $rev->_edits = array();
00073         foreach ($this->_edits as $edit) {
00074             $rev->_edits[] = $edit->reverse();
00075         }
00076         return $rev;
00077     }
00078 
00084     function isEmpty()
00085     {
00086         foreach ($this->_edits as $edit) {
00087             if (!$edit instanceof Text_Diff_Op_copy) {
00088                 return false;
00089             }
00090         }
00091         return true;
00092     }
00093 
00101     function lcs()
00102     {
00103         $lcs = 0;
00104         foreach ($this->_edits as $edit) {
00105             if ($edit instanceof Text_Diff_Op_copy) {
00106                 $lcs += count($edit->orig);
00107             }
00108         }
00109         return $lcs;
00110     }
00111 
00119     function getOriginal()
00120     {
00121         $lines = array();
00122         foreach ($this->_edits as $edit) {
00123             if ($edit->orig) {
00124                 array_splice($lines, count($lines), 0, $edit->orig);
00125             }
00126         }
00127         return $lines;
00128     }
00129 
00137     function getFinal()
00138     {
00139         $lines = array();
00140         foreach ($this->_edits as $edit) {
00141             if ($edit->final) {
00142                 array_splice($lines, count($lines), 0, $edit->final);
00143             }
00144         }
00145         return $lines;
00146     }
00147 
00155     function _trimNewlines(&$line, $key)
00156     {
00157         $line = str_replace(array("\n", "\r"), '', $line);
00158     }
00159 
00165     function _check($from_lines, $to_lines)
00166     {
00167         if (serialize($from_lines) != serialize($this->getOriginal())) {
00168             trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
00169         }
00170         if (serialize($to_lines) != serialize($this->getFinal())) {
00171             trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
00172         }
00173 
00174         $rev = $this->reverse();
00175         if (serialize($to_lines) != serialize($rev->getOriginal())) {
00176             trigger_error("Reversed original doesn't match", E_USER_ERROR);
00177         }
00178         if (serialize($from_lines) != serialize($rev->getFinal())) {
00179             trigger_error("Reversed final doesn't match", E_USER_ERROR);
00180         }
00181 
00182         $prevtype = null;
00183         foreach ($this->_edits as $edit) {
00184             if ($prevtype == get_class($edit)) {
00185                 trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
00186             }
00187             $prevtype = get_class($edit);
00188         }
00189 
00190         return true;
00191     }
00192 
00193 }
00194 
00201 class Text_MappedDiff extends Text_Diff {
00202 
00219     function Text_MappedDiff($from_lines, $to_lines,
00220                              $mapped_from_lines, $mapped_to_lines)
00221     {
00222         assert(count($from_lines) == count($mapped_from_lines));
00223         assert(count($to_lines) == count($mapped_to_lines));
00224 
00225         parent::Text_Diff($mapped_from_lines, $mapped_to_lines);
00226 
00227         $xi = $yi = 0;
00228         for ($i = 0; $i < count($this->_edits); $i++) {
00229             $orig = &$this->_edits[$i]->orig;
00230             if (is_array($orig)) {
00231                 $orig = array_slice($from_lines, $xi, count($orig));
00232                 $xi += count($orig);
00233             }
00234 
00235             $final = &$this->_edits[$i]->final;
00236             if (is_array($final)) {
00237                 $final = array_slice($to_lines, $yi, count($final));
00238                 $yi += count($final);
00239             }
00240         }
00241     }
00242 
00243 }
00244 
00255 class Text_Diff_Engine_xdiff {
00256 
00257     function diff($from_lines, $to_lines)
00258     {
00259         /* Convert the two input arrays into strings for xdiff processing. */
00260         $from_string = implode("\n", $from_lines);
00261         $to_string = implode("\n", $to_lines);
00262 
00263         /* Diff the two strings and convert the result to an array. */
00264         $diff = xdiff_string_diff($from_string, $to_string, count($to_lines));
00265         $diff = explode("\n", $diff);
00266 
00267         /* Walk through the diff one line at a time.  We build the $edits
00268          * array of diff operations by reading the first character of the
00269          * xdiff output (which is in the "unified diff" format).
00270          *
00271          * Note that we don't have enough information to detect "changed"
00272          * lines using this approach, so we can't add Text_Diff_Op_changed
00273          * instances to the $edits array.  The result is still perfectly
00274          * valid, albeit a little less descriptive and efficient. */
00275         $edits = array();
00276         foreach ($diff as $line) {
00277             switch ($line[0]) {
00278             case ' ':
00279                 $edits[] = new Text_Diff_Op_copy(array(substr($line, 1)));
00280                 break;
00281 
00282             case '+':
00283                 $edits[] = new Text_Diff_Op_add(array(substr($line, 1)));
00284                 break;
00285 
00286             case '-':
00287                 $edits[] = new Text_Diff_Op_delete(array(substr($line, 1)));
00288                 break;
00289             }
00290         }
00291 
00292         return $edits;
00293     }
00294 
00295 }
00296 
00321 class Text_Diff_Engine_native {
00322 
00323     function diff($from_lines, $to_lines)
00324     {
00325         $n_from = count($from_lines);
00326         $n_to = count($to_lines);
00327 
00328         $this->xchanged = $this->ychanged = array();
00329         $this->xv = $this->yv = array();
00330         $this->xind = $this->yind = array();
00331         unset($this->seq);
00332         unset($this->in_seq);
00333         unset($this->lcs);
00334 
00335         // Skip leading common lines.
00336         for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
00337             if ($from_lines[$skip] != $to_lines[$skip]) {
00338                 break;
00339             }
00340             $this->xchanged[$skip] = $this->ychanged[$skip] = false;
00341         }
00342 
00343         // Skip trailing common lines.
00344         $xi = $n_from; $yi = $n_to;
00345         for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
00346             if ($from_lines[$xi] != $to_lines[$yi]) {
00347                 break;
00348             }
00349             $this->xchanged[$xi] = $this->ychanged[$yi] = false;
00350         }
00351 
00352         // Ignore lines which do not exist in both files.
00353         for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
00354             $xhash[$from_lines[$xi]] = 1;
00355         }
00356         for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
00357             $line = $to_lines[$yi];
00358             if (($this->ychanged[$yi] = empty($xhash[$line]))) {
00359                 continue;
00360             }
00361             $yhash[$line] = 1;
00362             $this->yv[] = $line;
00363             $this->yind[] = $yi;
00364         }
00365         for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
00366             $line = $from_lines[$xi];
00367             if (($this->xchanged[$xi] = empty($yhash[$line]))) {
00368                 continue;
00369             }
00370             $this->xv[] = $line;
00371             $this->xind[] = $xi;
00372         }
00373 
00374         // Find the LCS.
00375         $this->_compareseq(0, count($this->xv), 0, count($this->yv));
00376 
00377         // Merge edits when possible.
00378         $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
00379         $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
00380 
00381         // Compute the edit operations.
00382         $edits = array();
00383         $xi = $yi = 0;
00384         while ($xi < $n_from || $yi < $n_to) {
00385             assert($yi < $n_to || $this->xchanged[$xi]);
00386             assert($xi < $n_from || $this->ychanged[$yi]);
00387 
00388             // Skip matching "snake".
00389             $copy = array();
00390             while ($xi < $n_from && $yi < $n_to
00391                    && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
00392                 $copy[] = $from_lines[$xi++];
00393                 ++$yi;
00394             }
00395             if ($copy) {
00396                 $edits[] = new Text_Diff_Op_copy($copy);
00397             }
00398 
00399             // Find deletes & adds.
00400             $delete = array();
00401             while ($xi < $n_from && $this->xchanged[$xi]) {
00402                 $delete[] = $from_lines[$xi++];
00403             }
00404 
00405             $add = array();
00406             while ($yi < $n_to && $this->ychanged[$yi]) {
00407                 $add[] = $to_lines[$yi++];
00408             }
00409 
00410             if ($delete && $add) {
00411                 $edits[] = new Text_Diff_Op_change($delete, $add);
00412             } elseif ($delete) {
00413                 $edits[] = new Text_Diff_Op_delete($delete);
00414             } elseif ($add) {
00415                 $edits[] = new Text_Diff_Op_add($add);
00416             }
00417         }
00418 
00419         return $edits;
00420     }
00421 
00438     function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
00439     {
00440         $flip = false;
00441 
00442         if ($xlim - $xoff > $ylim - $yoff) {
00443             /* Things seems faster (I'm not sure I understand why) when the
00444              * shortest sequence is in X. */
00445             $flip = true;
00446             list ($xoff, $xlim, $yoff, $ylim)
00447                 = array($yoff, $ylim, $xoff, $xlim);
00448         }
00449 
00450         if ($flip) {
00451             for ($i = $ylim - 1; $i >= $yoff; $i--) {
00452                 $ymatches[$this->xv[$i]][] = $i;
00453             }
00454         } else {
00455             for ($i = $ylim - 1; $i >= $yoff; $i--) {
00456                 $ymatches[$this->yv[$i]][] = $i;
00457             }
00458         }
00459 
00460         $this->lcs = 0;
00461         $this->seq[0]= $yoff - 1;
00462         $this->in_seq = array();
00463         $ymids[0] = array();
00464 
00465         $numer = $xlim - $xoff + $nchunks - 1;
00466         $x = $xoff;
00467         for ($chunk = 0; $chunk < $nchunks; $chunk++) {
00468             if ($chunk > 0) {
00469                 for ($i = 0; $i <= $this->lcs; $i++) {
00470                     $ymids[$i][$chunk - 1] = $this->seq[$i];
00471                 }
00472             }
00473 
00474             $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
00475             for (; $x < $x1; $x++) {
00476                 $line = $flip ? $this->yv[$x] : $this->xv[$x];
00477                 if (empty($ymatches[$line])) {
00478                     continue;
00479                 }
00480                 $matches = $ymatches[$line];
00481                 foreach ($matches as $y) {
00482                     if (empty($this->in_seq[$y])) {
00483                         $k = $this->_lcsPos($y);
00484                         assert($k > 0);
00485                         $ymids[$k] = $ymids[$k - 1];
00486                         break;
00487                     }
00488                 }
00489 
00490                 while (list($junk, $y) = each($matches)) {
00491                     if ($y > $this->seq[$k - 1]) {
00492                         assert($y < $this->seq[$k]);
00493                         /* Optimization: this is a common case: next match is
00494                          * just replacing previous match. */
00495                         $this->in_seq[$this->seq[$k]] = false;
00496                         $this->seq[$k] = $y;
00497                         $this->in_seq[$y] = 1;
00498                     } elseif (empty($this->in_seq[$y])) {
00499                         $k = $this->_lcsPos($y);
00500                         assert($k > 0);
00501                         $ymids[$k] = $ymids[$k - 1];
00502                     }
00503                 }
00504             }
00505         }
00506 
00507         $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
00508         $ymid = $ymids[$this->lcs];
00509         for ($n = 0; $n < $nchunks - 1; $n++) {
00510             $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
00511             $y1 = $ymid[$n] + 1;
00512             $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
00513         }
00514         $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
00515 
00516         return array($this->lcs, $seps);
00517     }
00518 
00519     function _lcsPos($ypos)
00520     {
00521         $end = $this->lcs;
00522         if ($end == 0 || $ypos > $this->seq[$end]) {
00523             $this->seq[++$this->lcs] = $ypos;
00524             $this->in_seq[$ypos] = 1;
00525             return $this->lcs;
00526         }
00527 
00528         $beg = 1;
00529         while ($beg < $end) {
00530             $mid = (int)(($beg + $end) / 2);
00531             if ($ypos > $this->seq[$mid]) {
00532                 $beg = $mid + 1;
00533             } else {
00534                 $end = $mid;
00535             }
00536         }
00537 
00538         assert($ypos != $this->seq[$end]);
00539 
00540         $this->in_seq[$this->seq[$end]] = false;
00541         $this->seq[$end] = $ypos;
00542         $this->in_seq[$ypos] = 1;
00543         return $end;
00544     }
00545 
00558     function _compareseq ($xoff, $xlim, $yoff, $ylim)
00559     {
00560         /* Slide down the bottom initial diagonal. */
00561         while ($xoff < $xlim && $yoff < $ylim
00562                && $this->xv[$xoff] == $this->yv[$yoff]) {
00563             ++$xoff;
00564             ++$yoff;
00565         }
00566 
00567         /* Slide up the top initial diagonal. */
00568         while ($xlim > $xoff && $ylim > $yoff
00569                && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
00570             --$xlim;
00571             --$ylim;
00572         }
00573 
00574         if ($xoff == $xlim || $yoff == $ylim) {
00575             $lcs = 0;
00576         } else {
00577             /* This is ad hoc but seems to work well.  $nchunks =
00578              * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks =
00579              * max(2,min(8,(int)$nchunks)); */
00580             $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
00581             list($lcs, $seps)
00582                 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
00583         }
00584 
00585         if ($lcs == 0) {
00586             /* X and Y sequences have no common subsequence: mark all
00587              * changed. */
00588             while ($yoff < $ylim) {
00589                 $this->ychanged[$this->yind[$yoff++]] = 1;
00590             }
00591             while ($xoff < $xlim) {
00592                 $this->xchanged[$this->xind[$xoff++]] = 1;
00593             }
00594         } else {
00595             /* Use the partitions to split this problem into subproblems. */
00596             reset($seps);
00597             $pt1 = $seps[0];
00598             while ($pt2 = next($seps)) {
00599                 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
00600                 $pt1 = $pt2;
00601             }
00602         }
00603     }
00604 
00617     function _shiftBoundaries($lines, &$changed, $other_changed)
00618     {
00619         $i = 0;
00620         $j = 0;
00621 
00622         assert('count($lines) == count($changed)');
00623         $len = count($lines);
00624         $other_len = count($other_changed);
00625 
00626         while (1) {
00627             /* Scan forward to find the beginning of another run of
00628              * changes. Also keep track of the corresponding point in the
00629              * other file.
00630              *
00631              * Throughout this code, $i and $j are adjusted together so that
00632              * the first $i elements of $changed and the first $j elements of
00633              * $other_changed both contain the same number of zeros (unchanged
00634              * lines).
00635              *
00636              * Furthermore, $j is always kept so that $j == $other_len or
00637              * $other_changed[$j] == false. */
00638             while ($j < $other_len && $other_changed[$j]) {
00639                 $j++;
00640             }
00641 
00642             while ($i < $len && ! $changed[$i]) {
00643                 assert('$j < $other_len && ! $other_changed[$j]');
00644                 $i++; $j++;
00645                 while ($j < $other_len && $other_changed[$j]) {
00646                     $j++;
00647                 }
00648             }
00649 
00650             if ($i == $len) {
00651                 break;
00652             }
00653 
00654             $start = $i;
00655 
00656             /* Find the end of this run of changes. */
00657             while (++$i < $len && $changed[$i]) {
00658                 continue;
00659             }
00660 
00661             do {
00662                 /* Record the length of this run of changes, so that we can
00663                  * later determine whether the run has grown. */
00664                 $runlength = $i - $start;
00665 
00666                 /* Move the changed region back, so long as the previous
00667                  * unchanged line matches the last changed one.  This merges
00668                  * with previous changed regions. */
00669                 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
00670                     $changed[--$start] = 1;
00671                     $changed[--$i] = false;
00672                     while ($start > 0 && $changed[$start - 1]) {
00673                         $start--;
00674                     }
00675                     assert('$j > 0');
00676                     while ($other_changed[--$j]) {
00677                         continue;
00678                     }
00679                     assert('$j >= 0 && !$other_changed[$j]');
00680                 }
00681 
00682                 /* Set CORRESPONDING to the end of the changed run, at the
00683                  * last point where it corresponds to a changed run in the
00684                  * other file. CORRESPONDING == LEN means no such point has
00685                  * been found. */
00686                 $corresponding = $j < $other_len ? $i : $len;
00687 
00688                 /* Move the changed region forward, so long as the first
00689                  * changed line matches the following unchanged one.  This
00690                  * merges with following changed regions.  Do this second, so
00691                  * that if there are no merges, the changed region is moved
00692                  * forward as far as possible. */
00693                 while ($i < $len && $lines[$start] == $lines[$i]) {
00694                     $changed[$start++] = false;
00695                     $changed[$i++] = 1;
00696                     while ($i < $len && $changed[$i]) {
00697                         $i++;
00698                     }
00699 
00700                     assert('$j < $other_len && ! $other_changed[$j]');
00701                     $j++;
00702                     if ($j < $other_len && $other_changed[$j]) {
00703                         $corresponding = $i;
00704                         while ($j < $other_len && $other_changed[$j]) {
00705                             $j++;
00706                         }
00707                     }
00708                 }
00709             } while ($runlength != $i - $start);
00710 
00711             /* If possible, move the fully-merged run of changes back to a
00712              * corresponding run in the other file. */
00713             while ($corresponding < $i) {
00714                 $changed[--$start] = 1;
00715                 $changed[--$i] = 0;
00716                 assert('$j > 0');
00717                 while ($other_changed[--$j]) {
00718                     continue;
00719                 }
00720                 assert('$j >= 0 && !$other_changed[$j]');
00721             }
00722         }
00723     }
00724 
00725 }
00726 
00733 class Text_Diff_Op {
00734 
00735     public $orig;
00736     public $final;
00737 
00738     function reverse()
00739     {
00740         trigger_error('Abstract method', E_USER_ERROR);
00741     }
00742 
00743     function norig()
00744     {
00745         return $this->orig ? count($this->orig) : 0;
00746     }
00747 
00748     function nfinal()
00749     {
00750         return $this->final ? count($this->final) : 0;
00751     }
00752 
00753 }
00754 
00761 class Text_Diff_Op_copy extends Text_Diff_Op {
00762 
00763     function Text_Diff_Op_copy($orig, $final = false)
00764     {
00765         if (!is_array($final)) {
00766             $final = $orig;
00767         }
00768         $this->orig = $orig;
00769         $this->final = $final;
00770     }
00771 
00772     function reverse()
00773     {
00774         return $reverse = new Text_Diff_Op_copy($this->final, $this->orig);
00775     }
00776 
00777 }
00778 
00785 class Text_Diff_Op_delete extends Text_Diff_Op {
00786 
00787     function Text_Diff_Op_delete($lines)
00788     {
00789         $this->orig = $lines;
00790         $this->final = false;
00791     }
00792 
00793     function reverse()
00794     {
00795         return $reverse = new Text_Diff_Op_add($this->orig);
00796     }
00797 
00798 }
00799 
00806 class Text_Diff_Op_add extends Text_Diff_Op {
00807 
00808     function Text_Diff_Op_add($lines)
00809     {
00810         $this->final = $lines;
00811         $this->orig = false;
00812     }
00813 
00814     function reverse()
00815     {
00816         return $reverse = new Text_Diff_Op_delete($this->final);
00817     }
00818 
00819 }
00820 
00827 class Text_Diff_Op_change extends Text_Diff_Op {
00828 
00829     function Text_Diff_Op_change($orig, $final)
00830     {
00831         $this->orig = $orig;
00832         $this->final = $final;
00833     }
00834 
00835     function reverse()
00836     {
00837         return $reverse = new Text_Diff_Op_change($this->final, $this->orig);
00838     }
00839 
00840 }

Generated on Fri Dec 13 2013 13:52:12 for ILIAS Release_3_7_x_branch .rev 46817 by  doxygen 1.7.1