ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
Text_Diff_Engine_native Class Reference
+ Collaboration diagram for Text_Diff_Engine_native:

Public Member Functions

 diff ($from_lines, $to_lines)
 
 _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
 Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments. More...
 
 _lcsPos ($ypos)
 
 _compareseq ($xoff, $xlim, $yoff, $ylim)
 Finds LCS of two sequences. More...
 
 _shiftBoundaries ($lines, &$changed, $other_changed)
 Adjusts inserts/deletes of identical lines to join changes as much as possible. More...
 

Detailed Description

Definition at line 323 of file Diff.php.

Member Function Documentation

◆ _compareseq()

Text_Diff_Engine_native::_compareseq (   $xoff,
  $xlim,
  $yoff,
  $ylim 
)

Finds LCS of two sequences.

The results are recorded in the vectors $this->{x,y}changed[], by storing a 1 in the element for each line that is an insertion or deletion (ie. is not in the LCS).

The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1.

Note that XLIM, YLIM are exclusive bounds. All line numbers are origin-0 and discarded lines are not counted.

Definition at line 561 of file Diff.php.

562 {
563 /* Slide down the bottom initial diagonal. */
564 while ($xoff < $xlim && $yoff < $ylim
565 && $this->xv[$xoff] == $this->yv[$yoff]) {
566 ++$xoff;
567 ++$yoff;
568 }
569
570 /* Slide up the top initial diagonal. */
571 while ($xlim > $xoff && $ylim > $yoff
572 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
573 --$xlim;
574 --$ylim;
575 }
576
577 if ($xoff == $xlim || $yoff == $ylim) {
578 $lcs = 0;
579 } else {
580 /* This is ad hoc but seems to work well. $nchunks =
581 * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks =
582 * max(2,min(8,(int)$nchunks)); */
583 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
584 list($lcs, $seps)
585 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
586 }
587
588 if ($lcs == 0) {
589 /* X and Y sequences have no common subsequence: mark all
590 * changed. */
591 while ($yoff < $ylim) {
592 $this->ychanged[$this->yind[$yoff++]] = 1;
593 }
594 while ($xoff < $xlim) {
595 $this->xchanged[$this->xind[$xoff++]] = 1;
596 }
597 } else {
598 /* Use the partitions to split this problem into subproblems. */
599 reset($seps);
600 $pt1 = $seps[0];
601 while ($pt2 = next($seps)) {
602 $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
603 $pt1 = $pt2;
604 }
605 }
606 }
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF,...
Definition: Diff.php:441
_compareseq($xoff, $xlim, $yoff, $ylim)
Finds LCS of two sequences.
Definition: Diff.php:561

References _compareseq(), and _diag().

Referenced by _compareseq(), and diff().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ _diag()

Text_Diff_Engine_native::_diag (   $xoff,
  $xlim,
  $yoff,
  $ylim,
  $nchunks 
)

Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments.

Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of NCHUNKS+1 (X, Y) indexes giving the diving points between sub sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1), the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).

This function assumes that the first lines of the specified portions of the two files do not match, and likewise that the last lines do not match. The caller must trim matching lines from the beginning and end of the portions it is going to specify.

Definition at line 441 of file Diff.php.

442 {
443 $flip = false;
444
445 if ($xlim - $xoff > $ylim - $yoff) {
446 /* Things seems faster (I'm not sure I understand why) when the
447 * shortest sequence is in X. */
448 $flip = true;
449 list($xoff, $xlim, $yoff, $ylim)
450 = array($yoff, $ylim, $xoff, $xlim);
451 }
452
453 if ($flip) {
454 for ($i = $ylim - 1; $i >= $yoff; $i--) {
455 $ymatches[$this->xv[$i]][] = $i;
456 }
457 } else {
458 for ($i = $ylim - 1; $i >= $yoff; $i--) {
459 $ymatches[$this->yv[$i]][] = $i;
460 }
461 }
462
463 $this->lcs = 0;
464 $this->seq[0] = $yoff - 1;
465 $this->in_seq = array();
466 $ymids[0] = array();
467
468 $numer = $xlim - $xoff + $nchunks - 1;
469 $x = $xoff;
470 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
471 if ($chunk > 0) {
472 for ($i = 0; $i <= $this->lcs; $i++) {
473 $ymids[$i][$chunk - 1] = $this->seq[$i];
474 }
475 }
476
477 $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $chunk) / $nchunks);
478 for (; $x < $x1; $x++) {
479 $line = $flip ? $this->yv[$x] : $this->xv[$x];
480 if (empty($ymatches[$line])) {
481 continue;
482 }
483 $matches = $ymatches[$line];
484 foreach ($matches as $y) {
485 if (empty($this->in_seq[$y])) {
486 $k = $this->_lcsPos($y);
487 assert($k > 0);
488 $ymids[$k] = $ymids[$k - 1];
489 break;
490 }
491 }
492
493 while (list($junk, $y) = each($matches)) {
494 if ($y > $this->seq[$k - 1]) {
495 assert($y < $this->seq[$k]);
496 /* Optimization: this is a common case: next match is
497 * just replacing previous match. */
498 $this->in_seq[$this->seq[$k]] = false;
499 $this->seq[$k] = $y;
500 $this->in_seq[$y] = 1;
501 } elseif (empty($this->in_seq[$y])) {
502 $k = $this->_lcsPos($y);
503 assert($k > 0);
504 $ymids[$k] = $ymids[$k - 1];
505 }
506 }
507 }
508 }
509
510 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
511 $ymid = $ymids[$this->lcs];
512 for ($n = 0; $n < $nchunks - 1; $n++) {
513 $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $n) / $nchunks);
514 $y1 = $ymid[$n] + 1;
515 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
516 }
517 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
518
519 return array($this->lcs, $seps);
520 }
$n
Definition: RandomTest.php:85
$x
Definition: complexTest.php:9
$i
Definition: disco.tpl.php:19
$y
Definition: example_007.php:83

References $i, $n, $x, $y, and _lcsPos().

Referenced by _compareseq().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ _lcsPos()

Text_Diff_Engine_native::_lcsPos (   $ypos)

Definition at line 522 of file Diff.php.

523 {
524 $end = $this->lcs;
525 if ($end == 0 || $ypos > $this->seq[$end]) {
526 $this->seq[++$this->lcs] = $ypos;
527 $this->in_seq[$ypos] = 1;
528 return $this->lcs;
529 }
530
531 $beg = 1;
532 while ($beg < $end) {
533 $mid = (int) (($beg + $end) / 2);
534 if ($ypos > $this->seq[$mid]) {
535 $beg = $mid + 1;
536 } else {
537 $end = $mid;
538 }
539 }
540
541 assert($ypos != $this->seq[$end]);
542
543 $this->in_seq[$this->seq[$end]] = false;
544 $this->seq[$end] = $ypos;
545 $this->in_seq[$ypos] = 1;
546 return $end;
547 }

References $end.

Referenced by _diag().

+ Here is the caller graph for this function:

◆ _shiftBoundaries()

Text_Diff_Engine_native::_shiftBoundaries (   $lines,
$changed,
  $other_changed 
)

Adjusts inserts/deletes of identical lines to join changes as much as possible.

We do something when a run of changed lines include a line at one end and has an excluded, identical line at the other. We are free to choose which identical line is included. ‘compareseq’ usually chooses the one at the beginning, but usually it is cleaner to consider the following identical line to be the "change".

This is extracted verbatim from analyze.c (GNU diffutils-2.7).

Definition at line 620 of file Diff.php.

621 {
622 $i = 0;
623 $j = 0;
624
625 assert(count($lines) == count($changed));
626 $len = count($lines);
627 $other_len = count($other_changed);
628
629 while (1) {
630 /* Scan forward to find the beginning of another run of
631 * changes. Also keep track of the corresponding point in the
632 * other file.
633 *
634 * Throughout this code, $i and $j are adjusted together so that
635 * the first $i elements of $changed and the first $j elements of
636 * $other_changed both contain the same number of zeros (unchanged
637 * lines).
638 *
639 * Furthermore, $j is always kept so that $j == $other_len or
640 * $other_changed[$j] == false. */
641 while ($j < $other_len && $other_changed[$j]) {
642 $j++;
643 }
644
645 while ($i < $len && !$changed[$i]) {
646 assert($j < $other_len && !$other_changed[$j]);
647 $i++;
648 $j++;
649 while ($j < $other_len && $other_changed[$j]) {
650 $j++;
651 }
652 }
653
654 if ($i == $len) {
655 break;
656 }
657
658 $start = $i;
659
660 /* Find the end of this run of changes. */
661 while (++$i < $len && $changed[$i]) {
662 continue;
663 }
664
665 do {
666 /* Record the length of this run of changes, so that we can
667 * later determine whether the run has grown. */
668 $runlength = $i - $start;
669
670 /* Move the changed region back, so long as the previous
671 * unchanged line matches the last changed one. This merges
672 * with previous changed regions. */
673 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
674 $changed[--$start] = 1;
675 $changed[--$i] = false;
676 while ($start > 0 && $changed[$start - 1]) {
677 $start--;
678 }
679 assert($j > 0);
680 while ($other_changed[--$j]) {
681 continue;
682 }
683 assert($j >= 0 && !$other_changed[$j]);
684 }
685
686 /* Set CORRESPONDING to the end of the changed run, at the
687 * last point where it corresponds to a changed run in the
688 * other file. CORRESPONDING == LEN means no such point has
689 * been found. */
690 $corresponding = $j < $other_len ? $i : $len;
691
692 /* Move the changed region forward, so long as the first
693 * changed line matches the following unchanged one. This
694 * merges with following changed regions. Do this second, so
695 * that if there are no merges, the changed region is moved
696 * forward as far as possible. */
697 while ($i < $len && $lines[$start] == $lines[$i]) {
698 $changed[$start++] = false;
699 $changed[$i++] = 1;
700 while ($i < $len && $changed[$i]) {
701 $i++;
702 }
703
704 assert($j < $other_len && !$other_changed[$j]);
705 $j++;
706 if ($j < $other_len && $other_changed[$j]) {
707 $corresponding = $i;
708 while ($j < $other_len && $other_changed[$j]) {
709 $j++;
710 }
711 }
712 }
713 } while ($runlength != $i - $start);
714
715 /* If possible, move the fully-merged run of changes back to a
716 * corresponding run in the other file. */
717 while ($corresponding < $i) {
718 $changed[--$start] = 1;
719 $changed[--$i] = 0;
720 assert($j > 0);
721 while ($other_changed[--$j]) {
722 continue;
723 }
724 assert($j >= 0 && !$other_changed[$j]);
725 }
726 }
727 }
$start
Definition: bench.php:8

References $changed, $i, and $start.

Referenced by diff().

+ Here is the caller graph for this function:

◆ diff()

Text_Diff_Engine_native::diff (   $from_lines,
  $to_lines 
)

Definition at line 325 of file Diff.php.

326 {
327 $n_from = count($from_lines);
328 $n_to = count($to_lines);
329
330 $this->xchanged = $this->ychanged = array();
331 $this->xv = $this->yv = array();
332 $this->xind = $this->yind = array();
333 unset($this->seq);
334 unset($this->in_seq);
335 unset($this->lcs);
336
337 // Skip leading common lines.
338 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
339 if ($from_lines[$skip] != $to_lines[$skip]) {
340 break;
341 }
342 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
343 }
344
345 // Skip trailing common lines.
346 $xi = $n_from;
347 $yi = $n_to;
348 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
349 if ($from_lines[$xi] != $to_lines[$yi]) {
350 break;
351 }
352 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
353 }
354
355 // Ignore lines which do not exist in both files.
356 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
357 $xhash[$from_lines[$xi]] = 1;
358 }
359 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
360 $line = $to_lines[$yi];
361 if (($this->ychanged[$yi] = empty($xhash[$line]))) {
362 continue;
363 }
364 $yhash[$line] = 1;
365 $this->yv[] = $line;
366 $this->yind[] = $yi;
367 }
368 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
369 $line = $from_lines[$xi];
370 if (($this->xchanged[$xi] = empty($yhash[$line]))) {
371 continue;
372 }
373 $this->xv[] = $line;
374 $this->xind[] = $xi;
375 }
376
377 // Find the LCS.
378 $this->_compareseq(0, count($this->xv), 0, count($this->yv));
379
380 // Merge edits when possible.
381 $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
382 $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
383
384 // Compute the edit operations.
385 $edits = array();
386 $xi = $yi = 0;
387 while ($xi < $n_from || $yi < $n_to) {
388 assert($yi < $n_to || $this->xchanged[$xi]);
389 assert($xi < $n_from || $this->ychanged[$yi]);
390
391 // Skip matching "snake".
392 $copy = array();
393 while ($xi < $n_from && $yi < $n_to
394 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
395 $copy[] = $from_lines[$xi++];
396 ++$yi;
397 }
398 if ($copy) {
399 $edits[] = new Text_Diff_Op_copy($copy);
400 }
401
402 // Find deletes & adds.
403 $delete = array();
404 while ($xi < $n_from && $this->xchanged[$xi]) {
405 $delete[] = $from_lines[$xi++];
406 }
407
408 $add = array();
409 while ($yi < $n_to && $this->ychanged[$yi]) {
410 $add[] = $to_lines[$yi++];
411 }
412
413 if ($delete && $add) {
414 $edits[] = new Text_Diff_Op_change($delete, $add);
415 } elseif ($delete) {
416 $edits[] = new Text_Diff_Op_delete($delete);
417 } elseif ($add) {
418 $edits[] = new Text_Diff_Op_add($add);
419 }
420 }
421
422 return $edits;
423 }
_shiftBoundaries($lines, &$changed, $other_changed)
Adjusts inserts/deletes of identical lines to join changes as much as possible.
Definition: Diff.php:620

References _compareseq(), and _shiftBoundaries().

+ Here is the call graph for this function:

The documentation for this class was generated from the following file: