ILIAS  release_5-2 Revision v5.2.25-18-g3f80b828510
Text_Diff_Engine_native Class Reference
+ Collaboration diagram for Text_Diff_Engine_native:

Public Member Functions

 diff ($from_lines, $to_lines)
 
 _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
 Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments. More...
 
 _lcsPos ($ypos)
 
 _compareseq ($xoff, $xlim, $yoff, $ylim)
 Finds LCS of two sequences. More...
 
 _shiftBoundaries ($lines, &$changed, $other_changed)
 Adjusts inserts/deletes of identical lines to join changes as much as possible. More...
 

Detailed Description

Definition at line 321 of file Diff.php.

Member Function Documentation

◆ _compareseq()

Text_Diff_Engine_native::_compareseq (   $xoff,
  $xlim,
  $yoff,
  $ylim 
)

Finds LCS of two sequences.

The results are recorded in the vectors $this->{x,y}changed[], by storing a 1 in the element for each line that is an insertion or deletion (ie. is not in the LCS).

The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1.

Note that XLIM, YLIM are exclusive bounds. All line numbers are origin-0 and discarded lines are not counted.

Definition at line 558 of file Diff.php.

559  {
560  /* Slide down the bottom initial diagonal. */
561  while ($xoff < $xlim && $yoff < $ylim
562  && $this->xv[$xoff] == $this->yv[$yoff]) {
563  ++$xoff;
564  ++$yoff;
565  }
566 
567  /* Slide up the top initial diagonal. */
568  while ($xlim > $xoff && $ylim > $yoff
569  && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
570  --$xlim;
571  --$ylim;
572  }
573 
574  if ($xoff == $xlim || $yoff == $ylim) {
575  $lcs = 0;
576  } else {
577  /* This is ad hoc but seems to work well. $nchunks =
578  * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks =
579  * max(2,min(8,(int)$nchunks)); */
580  $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
581  list($lcs, $seps)
582  = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
583  }
584 
585  if ($lcs == 0) {
586  /* X and Y sequences have no common subsequence: mark all
587  * changed. */
588  while ($yoff < $ylim) {
589  $this->ychanged[$this->yind[$yoff++]] = 1;
590  }
591  while ($xoff < $xlim) {
592  $this->xchanged[$this->xind[$xoff++]] = 1;
593  }
594  } else {
595  /* Use the partitions to split this problem into subproblems. */
596  reset($seps);
597  $pt1 = $seps[0];
598  while ($pt2 = next($seps)) {
599  $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
600  $pt1 = $pt2;
601  }
602  }
603  }
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments.
Definition: Diff.php:438
_compareseq($xoff, $xlim, $yoff, $ylim)
Finds LCS of two sequences.
Definition: Diff.php:558

◆ _diag()

Text_Diff_Engine_native::_diag (   $xoff,
  $xlim,
  $yoff,
  $ylim,
  $nchunks 
)

Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments.

Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of NCHUNKS+1 (X, Y) indexes giving the diving points between sub sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1), the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).

This function assumes that the first lines of the specified portions of the two files do not match, and likewise that the last lines do not match. The caller must trim matching lines from the beginning and end of the portions it is going to specify.

Definition at line 438 of file Diff.php.

References $n, $x, $y, array, and Text_Diff\lcs().

439  {
440  $flip = false;
441 
442  if ($xlim - $xoff > $ylim - $yoff) {
443  /* Things seems faster (I'm not sure I understand why) when the
444  * shortest sequence is in X. */
445  $flip = true;
446  list ($xoff, $xlim, $yoff, $ylim)
447  = array($yoff, $ylim, $xoff, $xlim);
448  }
449 
450  if ($flip) {
451  for ($i = $ylim - 1; $i >= $yoff; $i--) {
452  $ymatches[$this->xv[$i]][] = $i;
453  }
454  } else {
455  for ($i = $ylim - 1; $i >= $yoff; $i--) {
456  $ymatches[$this->yv[$i]][] = $i;
457  }
458  }
459 
460  $this->lcs = 0;
461  $this->seq[0]= $yoff - 1;
462  $this->in_seq = array();
463  $ymids[0] = array();
464 
465  $numer = $xlim - $xoff + $nchunks - 1;
466  $x = $xoff;
467  for ($chunk = 0; $chunk < $nchunks; $chunk++) {
468  if ($chunk > 0) {
469  for ($i = 0; $i <= $this->lcs; $i++) {
470  $ymids[$i][$chunk - 1] = $this->seq[$i];
471  }
472  }
473 
474  $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
475  for (; $x < $x1; $x++) {
476  $line = $flip ? $this->yv[$x] : $this->xv[$x];
477  if (empty($ymatches[$line])) {
478  continue;
479  }
480  $matches = $ymatches[$line];
481  foreach ($matches as $y) {
482  if (empty($this->in_seq[$y])) {
483  $k = $this->_lcsPos($y);
484  assert($k > 0);
485  $ymids[$k] = $ymids[$k - 1];
486  break;
487  }
488  }
489 
490  while (list($junk, $y) = each($matches)) {
491  if ($y > $this->seq[$k - 1]) {
492  assert($y < $this->seq[$k]);
493  /* Optimization: this is a common case: next match is
494  * just replacing previous match. */
495  $this->in_seq[$this->seq[$k]] = false;
496  $this->seq[$k] = $y;
497  $this->in_seq[$y] = 1;
498  } elseif (empty($this->in_seq[$y])) {
499  $k = $this->_lcsPos($y);
500  assert($k > 0);
501  $ymids[$k] = $ymids[$k - 1];
502  }
503  }
504  }
505  }
506 
507  $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
508  $ymid = $ymids[$this->lcs];
509  for ($n = 0; $n < $nchunks - 1; $n++) {
510  $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
511  $y1 = $ymid[$n] + 1;
512  $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
513  }
514  $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
515 
516  return array($this->lcs, $seps);
517  }
$x
Definition: example_009.php:98
$y
Definition: example_007.php:83
$n
Definition: RandomTest.php:80
Create styles array
The data for the language used.
+ Here is the call graph for this function:

◆ _lcsPos()

Text_Diff_Engine_native::_lcsPos (   $ypos)

Definition at line 519 of file Diff.php.

520  {
521  $end = $this->lcs;
522  if ($end == 0 || $ypos > $this->seq[$end]) {
523  $this->seq[++$this->lcs] = $ypos;
524  $this->in_seq[$ypos] = 1;
525  return $this->lcs;
526  }
527 
528  $beg = 1;
529  while ($beg < $end) {
530  $mid = (int)(($beg + $end) / 2);
531  if ($ypos > $this->seq[$mid]) {
532  $beg = $mid + 1;
533  } else {
534  $end = $mid;
535  }
536  }
537 
538  assert($ypos != $this->seq[$end]);
539 
540  $this->in_seq[$this->seq[$end]] = false;
541  $this->seq[$end] = $ypos;
542  $this->in_seq[$ypos] = 1;
543  return $end;
544  }

◆ _shiftBoundaries()

Text_Diff_Engine_native::_shiftBoundaries (   $lines,
$changed,
  $other_changed 
)

Adjusts inserts/deletes of identical lines to join changes as much as possible.

We do something when a run of changed lines include a line at one end and has an excluded, identical line at the other. We are free to choose which identical line is included. `compareseq' usually chooses the one at the beginning, but usually it is cleaner to consider the following identical line to be the "change".

This is extracted verbatim from analyze.c (GNU diffutils-2.7).

Definition at line 617 of file Diff.php.

References $changed, and $start.

618  {
619  $i = 0;
620  $j = 0;
621 
622  assert('count($lines) == count($changed)');
623  $len = count($lines);
624  $other_len = count($other_changed);
625 
626  while (1) {
627  /* Scan forward to find the beginning of another run of
628  * changes. Also keep track of the corresponding point in the
629  * other file.
630  *
631  * Throughout this code, $i and $j are adjusted together so that
632  * the first $i elements of $changed and the first $j elements of
633  * $other_changed both contain the same number of zeros (unchanged
634  * lines).
635  *
636  * Furthermore, $j is always kept so that $j == $other_len or
637  * $other_changed[$j] == false. */
638  while ($j < $other_len && $other_changed[$j]) {
639  $j++;
640  }
641 
642  while ($i < $len && ! $changed[$i]) {
643  assert('$j < $other_len && ! $other_changed[$j]');
644  $i++; $j++;
645  while ($j < $other_len && $other_changed[$j]) {
646  $j++;
647  }
648  }
649 
650  if ($i == $len) {
651  break;
652  }
653 
654  $start = $i;
655 
656  /* Find the end of this run of changes. */
657  while (++$i < $len && $changed[$i]) {
658  continue;
659  }
660 
661  do {
662  /* Record the length of this run of changes, so that we can
663  * later determine whether the run has grown. */
664  $runlength = $i - $start;
665 
666  /* Move the changed region back, so long as the previous
667  * unchanged line matches the last changed one. This merges
668  * with previous changed regions. */
669  while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
670  $changed[--$start] = 1;
671  $changed[--$i] = false;
672  while ($start > 0 && $changed[$start - 1]) {
673  $start--;
674  }
675  assert('$j > 0');
676  while ($other_changed[--$j]) {
677  continue;
678  }
679  assert('$j >= 0 && !$other_changed[$j]');
680  }
681 
682  /* Set CORRESPONDING to the end of the changed run, at the
683  * last point where it corresponds to a changed run in the
684  * other file. CORRESPONDING == LEN means no such point has
685  * been found. */
686  $corresponding = $j < $other_len ? $i : $len;
687 
688  /* Move the changed region forward, so long as the first
689  * changed line matches the following unchanged one. This
690  * merges with following changed regions. Do this second, so
691  * that if there are no merges, the changed region is moved
692  * forward as far as possible. */
693  while ($i < $len && $lines[$start] == $lines[$i]) {
694  $changed[$start++] = false;
695  $changed[$i++] = 1;
696  while ($i < $len && $changed[$i]) {
697  $i++;
698  }
699 
700  assert('$j < $other_len && ! $other_changed[$j]');
701  $j++;
702  if ($j < $other_len && $other_changed[$j]) {
703  $corresponding = $i;
704  while ($j < $other_len && $other_changed[$j]) {
705  $j++;
706  }
707  }
708  }
709  } while ($runlength != $i - $start);
710 
711  /* If possible, move the fully-merged run of changes back to a
712  * corresponding run in the other file. */
713  while ($corresponding < $i) {
714  $changed[--$start] = 1;
715  $changed[--$i] = 0;
716  assert('$j > 0');
717  while ($other_changed[--$j]) {
718  continue;
719  }
720  assert('$j >= 0 && !$other_changed[$j]');
721  }
722  }
723  }

◆ diff()

Text_Diff_Engine_native::diff (   $from_lines,
  $to_lines 
)

Definition at line 323 of file Diff.php.

References array, and Text_Diff\lcs().

324  {
325  $n_from = count($from_lines);
326  $n_to = count($to_lines);
327 
328  $this->xchanged = $this->ychanged = array();
329  $this->xv = $this->yv = array();
330  $this->xind = $this->yind = array();
331  unset($this->seq);
332  unset($this->in_seq);
333  unset($this->lcs);
334 
335  // Skip leading common lines.
336  for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
337  if ($from_lines[$skip] != $to_lines[$skip]) {
338  break;
339  }
340  $this->xchanged[$skip] = $this->ychanged[$skip] = false;
341  }
342 
343  // Skip trailing common lines.
344  $xi = $n_from; $yi = $n_to;
345  for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
346  if ($from_lines[$xi] != $to_lines[$yi]) {
347  break;
348  }
349  $this->xchanged[$xi] = $this->ychanged[$yi] = false;
350  }
351 
352  // Ignore lines which do not exist in both files.
353  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
354  $xhash[$from_lines[$xi]] = 1;
355  }
356  for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
357  $line = $to_lines[$yi];
358  if (($this->ychanged[$yi] = empty($xhash[$line]))) {
359  continue;
360  }
361  $yhash[$line] = 1;
362  $this->yv[] = $line;
363  $this->yind[] = $yi;
364  }
365  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
366  $line = $from_lines[$xi];
367  if (($this->xchanged[$xi] = empty($yhash[$line]))) {
368  continue;
369  }
370  $this->xv[] = $line;
371  $this->xind[] = $xi;
372  }
373 
374  // Find the LCS.
375  $this->_compareseq(0, count($this->xv), 0, count($this->yv));
376 
377  // Merge edits when possible.
378  $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
379  $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
380 
381  // Compute the edit operations.
382  $edits = array();
383  $xi = $yi = 0;
384  while ($xi < $n_from || $yi < $n_to) {
385  assert($yi < $n_to || $this->xchanged[$xi]);
386  assert($xi < $n_from || $this->ychanged[$yi]);
387 
388  // Skip matching "snake".
389  $copy = array();
390  while ($xi < $n_from && $yi < $n_to
391  && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
392  $copy[] = $from_lines[$xi++];
393  ++$yi;
394  }
395  if ($copy) {
396  $edits[] = new Text_Diff_Op_copy($copy);
397  }
398 
399  // Find deletes & adds.
400  $delete = array();
401  while ($xi < $n_from && $this->xchanged[$xi]) {
402  $delete[] = $from_lines[$xi++];
403  }
404 
405  $add = array();
406  while ($yi < $n_to && $this->ychanged[$yi]) {
407  $add[] = $to_lines[$yi++];
408  }
409 
410  if ($delete && $add) {
411  $edits[] = new Text_Diff_Op_change($delete, $add);
412  } elseif ($delete) {
413  $edits[] = new Text_Diff_Op_delete($delete);
414  } elseif ($add) {
415  $edits[] = new Text_Diff_Op_add($add);
416  }
417  }
418 
419  return $edits;
420  }
_compareseq($xoff, $xlim, $yoff, $ylim)
Finds LCS of two sequences.
Definition: Diff.php:558
Create styles array
The data for the language used.
_shiftBoundaries($lines, &$changed, $other_changed)
Adjusts inserts/deletes of identical lines to join changes as much as possible.
Definition: Diff.php:617
+ Here is the call graph for this function:

The documentation for this class was generated from the following file: