ILIAS  release_5-3 Revision v5.3.23-19-g915713cf615
Text_Diff_Engine_native Class Reference
+ Collaboration diagram for Text_Diff_Engine_native:

Public Member Functions

 diff ($from_lines, $to_lines)
 
 _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
 Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments. More...
 
 _lcsPos ($ypos)
 
 _compareseq ($xoff, $xlim, $yoff, $ylim)
 Finds LCS of two sequences. More...
 
 _shiftBoundaries ($lines, &$changed, $other_changed)
 Adjusts inserts/deletes of identical lines to join changes as much as possible. More...
 

Detailed Description

Definition at line 323 of file Diff.php.

Member Function Documentation

◆ _compareseq()

Text_Diff_Engine_native::_compareseq (   $xoff,
  $xlim,
  $yoff,
  $ylim 
)

Finds LCS of two sequences.

The results are recorded in the vectors $this->{x,y}changed[], by storing a 1 in the element for each line that is an insertion or deletion (ie. is not in the LCS).

The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1.

Note that XLIM, YLIM are exclusive bounds. All line numbers are origin-0 and discarded lines are not counted.

Definition at line 561 of file Diff.php.

562  {
563  /* Slide down the bottom initial diagonal. */
564  while ($xoff < $xlim && $yoff < $ylim
565  && $this->xv[$xoff] == $this->yv[$yoff]) {
566  ++$xoff;
567  ++$yoff;
568  }
569 
570  /* Slide up the top initial diagonal. */
571  while ($xlim > $xoff && $ylim > $yoff
572  && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
573  --$xlim;
574  --$ylim;
575  }
576 
577  if ($xoff == $xlim || $yoff == $ylim) {
578  $lcs = 0;
579  } else {
580  /* This is ad hoc but seems to work well. $nchunks =
581  * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks =
582  * max(2,min(8,(int)$nchunks)); */
583  $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
584  list($lcs, $seps)
585  = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
586  }
587 
588  if ($lcs == 0) {
589  /* X and Y sequences have no common subsequence: mark all
590  * changed. */
591  while ($yoff < $ylim) {
592  $this->ychanged[$this->yind[$yoff++]] = 1;
593  }
594  while ($xoff < $xlim) {
595  $this->xchanged[$this->xind[$xoff++]] = 1;
596  }
597  } else {
598  /* Use the partitions to split this problem into subproblems. */
599  reset($seps);
600  $pt1 = $seps[0];
601  while ($pt2 = next($seps)) {
602  $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
603  $pt1 = $pt2;
604  }
605  }
606  }
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments.
Definition: Diff.php:441
_compareseq($xoff, $xlim, $yoff, $ylim)
Finds LCS of two sequences.
Definition: Diff.php:561

◆ _diag()

Text_Diff_Engine_native::_diag (   $xoff,
  $xlim,
  $yoff,
  $ylim,
  $nchunks 
)

Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments.

Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of NCHUNKS+1 (X, Y) indexes giving the diving points between sub sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1), the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).

This function assumes that the first lines of the specified portions of the two files do not match, and likewise that the last lines do not match. The caller must trim matching lines from the beginning and end of the portions it is going to specify.

Definition at line 441 of file Diff.php.

References $i, $n, $x, $y, array, and Text_Diff\lcs().

442  {
443  $flip = false;
444 
445  if ($xlim - $xoff > $ylim - $yoff) {
446  /* Things seems faster (I'm not sure I understand why) when the
447  * shortest sequence is in X. */
448  $flip = true;
449  list($xoff, $xlim, $yoff, $ylim)
450  = array($yoff, $ylim, $xoff, $xlim);
451  }
452 
453  if ($flip) {
454  for ($i = $ylim - 1; $i >= $yoff; $i--) {
455  $ymatches[$this->xv[$i]][] = $i;
456  }
457  } else {
458  for ($i = $ylim - 1; $i >= $yoff; $i--) {
459  $ymatches[$this->yv[$i]][] = $i;
460  }
461  }
462 
463  $this->lcs = 0;
464  $this->seq[0]= $yoff - 1;
465  $this->in_seq = array();
466  $ymids[0] = array();
467 
468  $numer = $xlim - $xoff + $nchunks - 1;
469  $x = $xoff;
470  for ($chunk = 0; $chunk < $nchunks; $chunk++) {
471  if ($chunk > 0) {
472  for ($i = 0; $i <= $this->lcs; $i++) {
473  $ymids[$i][$chunk - 1] = $this->seq[$i];
474  }
475  }
476 
477  $x1 = $xoff + (int) (($numer + ($xlim-$xoff)*$chunk) / $nchunks);
478  for (; $x < $x1; $x++) {
479  $line = $flip ? $this->yv[$x] : $this->xv[$x];
480  if (empty($ymatches[$line])) {
481  continue;
482  }
483  $matches = $ymatches[$line];
484  foreach ($matches as $y) {
485  if (empty($this->in_seq[$y])) {
486  $k = $this->_lcsPos($y);
487  assert($k > 0);
488  $ymids[$k] = $ymids[$k - 1];
489  break;
490  }
491  }
492 
493  while (list($junk, $y) = each($matches)) {
494  if ($y > $this->seq[$k - 1]) {
495  assert($y < $this->seq[$k]);
496  /* Optimization: this is a common case: next match is
497  * just replacing previous match. */
498  $this->in_seq[$this->seq[$k]] = false;
499  $this->seq[$k] = $y;
500  $this->in_seq[$y] = 1;
501  } elseif (empty($this->in_seq[$y])) {
502  $k = $this->_lcsPos($y);
503  assert($k > 0);
504  $ymids[$k] = $ymids[$k - 1];
505  }
506  }
507  }
508  }
509 
510  $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
511  $ymid = $ymids[$this->lcs];
512  for ($n = 0; $n < $nchunks - 1; $n++) {
513  $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $n) / $nchunks);
514  $y1 = $ymid[$n] + 1;
515  $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
516  }
517  $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
518 
519  return array($this->lcs, $seps);
520  }
$x
Definition: example_009.php:98
$y
Definition: example_007.php:83
$n
Definition: RandomTest.php:85
Create styles array
The data for the language used.
$i
Definition: disco.tpl.php:19
+ Here is the call graph for this function:

◆ _lcsPos()

Text_Diff_Engine_native::_lcsPos (   $ypos)

Definition at line 522 of file Diff.php.

References $end.

523  {
524  $end = $this->lcs;
525  if ($end == 0 || $ypos > $this->seq[$end]) {
526  $this->seq[++$this->lcs] = $ypos;
527  $this->in_seq[$ypos] = 1;
528  return $this->lcs;
529  }
530 
531  $beg = 1;
532  while ($beg < $end) {
533  $mid = (int) (($beg + $end) / 2);
534  if ($ypos > $this->seq[$mid]) {
535  $beg = $mid + 1;
536  } else {
537  $end = $mid;
538  }
539  }
540 
541  assert($ypos != $this->seq[$end]);
542 
543  $this->in_seq[$this->seq[$end]] = false;
544  $this->seq[$end] = $ypos;
545  $this->in_seq[$ypos] = 1;
546  return $end;
547  }
$end
Definition: saml1-acs.php:18

◆ _shiftBoundaries()

Text_Diff_Engine_native::_shiftBoundaries (   $lines,
$changed,
  $other_changed 
)

Adjusts inserts/deletes of identical lines to join changes as much as possible.

We do something when a run of changed lines include a line at one end and has an excluded, identical line at the other. We are free to choose which identical line is included. `compareseq' usually chooses the one at the beginning, but usually it is cleaner to consider the following identical line to be the "change".

This is extracted verbatim from analyze.c (GNU diffutils-2.7).

Definition at line 620 of file Diff.php.

References $changed, and $i.

621  {
622  $i = 0;
623  $j = 0;
624 
625  assert('count($lines) == count($changed)');
626  $len = count($lines);
627  $other_len = count($other_changed);
628 
629  while (1) {
630  /* Scan forward to find the beginning of another run of
631  * changes. Also keep track of the corresponding point in the
632  * other file.
633  *
634  * Throughout this code, $i and $j are adjusted together so that
635  * the first $i elements of $changed and the first $j elements of
636  * $other_changed both contain the same number of zeros (unchanged
637  * lines).
638  *
639  * Furthermore, $j is always kept so that $j == $other_len or
640  * $other_changed[$j] == false. */
641  while ($j < $other_len && $other_changed[$j]) {
642  $j++;
643  }
644 
645  while ($i < $len && !$changed[$i]) {
646  assert('$j < $other_len && ! $other_changed[$j]');
647  $i++;
648  $j++;
649  while ($j < $other_len && $other_changed[$j]) {
650  $j++;
651  }
652  }
653 
654  if ($i == $len) {
655  break;
656  }
657 
658  $start = $i;
659 
660  /* Find the end of this run of changes. */
661  while (++$i < $len && $changed[$i]) {
662  continue;
663  }
664 
665  do {
666  /* Record the length of this run of changes, so that we can
667  * later determine whether the run has grown. */
668  $runlength = $i - $start;
669 
670  /* Move the changed region back, so long as the previous
671  * unchanged line matches the last changed one. This merges
672  * with previous changed regions. */
673  while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
674  $changed[--$start] = 1;
675  $changed[--$i] = false;
676  while ($start > 0 && $changed[$start - 1]) {
677  $start--;
678  }
679  assert('$j > 0');
680  while ($other_changed[--$j]) {
681  continue;
682  }
683  assert('$j >= 0 && !$other_changed[$j]');
684  }
685 
686  /* Set CORRESPONDING to the end of the changed run, at the
687  * last point where it corresponds to a changed run in the
688  * other file. CORRESPONDING == LEN means no such point has
689  * been found. */
690  $corresponding = $j < $other_len ? $i : $len;
691 
692  /* Move the changed region forward, so long as the first
693  * changed line matches the following unchanged one. This
694  * merges with following changed regions. Do this second, so
695  * that if there are no merges, the changed region is moved
696  * forward as far as possible. */
697  while ($i < $len && $lines[$start] == $lines[$i]) {
698  $changed[$start++] = false;
699  $changed[$i++] = 1;
700  while ($i < $len && $changed[$i]) {
701  $i++;
702  }
703 
704  assert('$j < $other_len && ! $other_changed[$j]');
705  $j++;
706  if ($j < $other_len && $other_changed[$j]) {
707  $corresponding = $i;
708  while ($j < $other_len && $other_changed[$j]) {
709  $j++;
710  }
711  }
712  }
713  } while ($runlength != $i - $start);
714 
715  /* If possible, move the fully-merged run of changes back to a
716  * corresponding run in the other file. */
717  while ($corresponding < $i) {
718  $changed[--$start] = 1;
719  $changed[--$i] = 0;
720  assert('$j > 0');
721  while ($other_changed[--$j]) {
722  continue;
723  }
724  assert('$j >= 0 && !$other_changed[$j]');
725  }
726  }
727  }
$i
Definition: disco.tpl.php:19

◆ diff()

Text_Diff_Engine_native::diff (   $from_lines,
  $to_lines 
)

Definition at line 325 of file Diff.php.

References array, and Text_Diff\lcs().

326  {
327  $n_from = count($from_lines);
328  $n_to = count($to_lines);
329 
330  $this->xchanged = $this->ychanged = array();
331  $this->xv = $this->yv = array();
332  $this->xind = $this->yind = array();
333  unset($this->seq);
334  unset($this->in_seq);
335  unset($this->lcs);
336 
337  // Skip leading common lines.
338  for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
339  if ($from_lines[$skip] != $to_lines[$skip]) {
340  break;
341  }
342  $this->xchanged[$skip] = $this->ychanged[$skip] = false;
343  }
344 
345  // Skip trailing common lines.
346  $xi = $n_from;
347  $yi = $n_to;
348  for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
349  if ($from_lines[$xi] != $to_lines[$yi]) {
350  break;
351  }
352  $this->xchanged[$xi] = $this->ychanged[$yi] = false;
353  }
354 
355  // Ignore lines which do not exist in both files.
356  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
357  $xhash[$from_lines[$xi]] = 1;
358  }
359  for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
360  $line = $to_lines[$yi];
361  if (($this->ychanged[$yi] = empty($xhash[$line]))) {
362  continue;
363  }
364  $yhash[$line] = 1;
365  $this->yv[] = $line;
366  $this->yind[] = $yi;
367  }
368  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
369  $line = $from_lines[$xi];
370  if (($this->xchanged[$xi] = empty($yhash[$line]))) {
371  continue;
372  }
373  $this->xv[] = $line;
374  $this->xind[] = $xi;
375  }
376 
377  // Find the LCS.
378  $this->_compareseq(0, count($this->xv), 0, count($this->yv));
379 
380  // Merge edits when possible.
381  $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
382  $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
383 
384  // Compute the edit operations.
385  $edits = array();
386  $xi = $yi = 0;
387  while ($xi < $n_from || $yi < $n_to) {
388  assert($yi < $n_to || $this->xchanged[$xi]);
389  assert($xi < $n_from || $this->ychanged[$yi]);
390 
391  // Skip matching "snake".
392  $copy = array();
393  while ($xi < $n_from && $yi < $n_to
394  && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
395  $copy[] = $from_lines[$xi++];
396  ++$yi;
397  }
398  if ($copy) {
399  $edits[] = new Text_Diff_Op_copy($copy);
400  }
401 
402  // Find deletes & adds.
403  $delete = array();
404  while ($xi < $n_from && $this->xchanged[$xi]) {
405  $delete[] = $from_lines[$xi++];
406  }
407 
408  $add = array();
409  while ($yi < $n_to && $this->ychanged[$yi]) {
410  $add[] = $to_lines[$yi++];
411  }
412 
413  if ($delete && $add) {
414  $edits[] = new Text_Diff_Op_change($delete, $add);
415  } elseif ($delete) {
416  $edits[] = new Text_Diff_Op_delete($delete);
417  } elseif ($add) {
418  $edits[] = new Text_Diff_Op_add($add);
419  }
420  }
421 
422  return $edits;
423  }
_compareseq($xoff, $xlim, $yoff, $ylim)
Finds LCS of two sequences.
Definition: Diff.php:561
Create styles array
The data for the language used.
_shiftBoundaries($lines, &$changed, $other_changed)
Adjusts inserts/deletes of identical lines to join changes as much as possible.
Definition: Diff.php:620
+ Here is the call graph for this function:

The documentation for this class was generated from the following file: