ILIAS  release_5-1 Revision 5.0.0-5477-g43f3e3fab5f
Text_Diff_Engine_native Class Reference
+ Collaboration diagram for Text_Diff_Engine_native:

Public Member Functions

 diff ($from_lines, $to_lines)
 
 _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
 Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments. More...
 
 _lcsPos ($ypos)
 
 _compareseq ($xoff, $xlim, $yoff, $ylim)
 Finds LCS of two sequences. More...
 
 _shiftBoundaries ($lines, &$changed, $other_changed)
 Adjusts inserts/deletes of identical lines to join changes as much as possible. More...
 

Detailed Description

Definition at line 321 of file Diff.php.

Member Function Documentation

◆ _compareseq()

Text_Diff_Engine_native::_compareseq (   $xoff,
  $xlim,
  $yoff,
  $ylim 
)

Finds LCS of two sequences.

The results are recorded in the vectors $this->{x,y}changed[], by storing a 1 in the element for each line that is an insertion or deletion (ie. is not in the LCS).

The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1.

Note that XLIM, YLIM are exclusive bounds. All line numbers are origin-0 and discarded lines are not counted.

Definition at line 558 of file Diff.php.

559 {
560 /* Slide down the bottom initial diagonal. */
561 while ($xoff < $xlim && $yoff < $ylim
562 && $this->xv[$xoff] == $this->yv[$yoff]) {
563 ++$xoff;
564 ++$yoff;
565 }
566
567 /* Slide up the top initial diagonal. */
568 while ($xlim > $xoff && $ylim > $yoff
569 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
570 --$xlim;
571 --$ylim;
572 }
573
574 if ($xoff == $xlim || $yoff == $ylim) {
575 $lcs = 0;
576 } else {
577 /* This is ad hoc but seems to work well. $nchunks =
578 * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks =
579 * max(2,min(8,(int)$nchunks)); */
580 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
581 list($lcs, $seps)
582 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
583 }
584
585 if ($lcs == 0) {
586 /* X and Y sequences have no common subsequence: mark all
587 * changed. */
588 while ($yoff < $ylim) {
589 $this->ychanged[$this->yind[$yoff++]] = 1;
590 }
591 while ($xoff < $xlim) {
592 $this->xchanged[$this->xind[$xoff++]] = 1;
593 }
594 } else {
595 /* Use the partitions to split this problem into subproblems. */
596 reset($seps);
597 $pt1 = $seps[0];
598 while ($pt2 = next($seps)) {
599 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
600 $pt1 = $pt2;
601 }
602 }
603 }
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF,...
Definition: Diff.php:438
_compareseq($xoff, $xlim, $yoff, $ylim)
Finds LCS of two sequences.
Definition: Diff.php:558

References _compareseq(), and _diag().

Referenced by _compareseq(), and diff().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ _diag()

Text_Diff_Engine_native::_diag (   $xoff,
  $xlim,
  $yoff,
  $ylim,
  $nchunks 
)

Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments.

Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of NCHUNKS+1 (X, Y) indexes giving the diving points between sub sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1), the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).

This function assumes that the first lines of the specified portions of the two files do not match, and likewise that the last lines do not match. The caller must trim matching lines from the beginning and end of the portions it is going to specify.

Definition at line 438 of file Diff.php.

439 {
440 $flip = false;
441
442 if ($xlim - $xoff > $ylim - $yoff) {
443 /* Things seems faster (I'm not sure I understand why) when the
444 * shortest sequence is in X. */
445 $flip = true;
446 list ($xoff, $xlim, $yoff, $ylim)
447 = array($yoff, $ylim, $xoff, $xlim);
448 }
449
450 if ($flip) {
451 for ($i = $ylim - 1; $i >= $yoff; $i--) {
452 $ymatches[$this->xv[$i]][] = $i;
453 }
454 } else {
455 for ($i = $ylim - 1; $i >= $yoff; $i--) {
456 $ymatches[$this->yv[$i]][] = $i;
457 }
458 }
459
460 $this->lcs = 0;
461 $this->seq[0]= $yoff - 1;
462 $this->in_seq = array();
463 $ymids[0] = array();
464
465 $numer = $xlim - $xoff + $nchunks - 1;
466 $x = $xoff;
467 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
468 if ($chunk > 0) {
469 for ($i = 0; $i <= $this->lcs; $i++) {
470 $ymids[$i][$chunk - 1] = $this->seq[$i];
471 }
472 }
473
474 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
475 for (; $x < $x1; $x++) {
476 $line = $flip ? $this->yv[$x] : $this->xv[$x];
477 if (empty($ymatches[$line])) {
478 continue;
479 }
480 $matches = $ymatches[$line];
481 foreach ($matches as $y) {
482 if (empty($this->in_seq[$y])) {
483 $k = $this->_lcsPos($y);
484 assert($k > 0);
485 $ymids[$k] = $ymids[$k - 1];
486 break;
487 }
488 }
489
490 while (list($junk, $y) = each($matches)) {
491 if ($y > $this->seq[$k - 1]) {
492 assert($y < $this->seq[$k]);
493 /* Optimization: this is a common case: next match is
494 * just replacing previous match. */
495 $this->in_seq[$this->seq[$k]] = false;
496 $this->seq[$k] = $y;
497 $this->in_seq[$y] = 1;
498 } elseif (empty($this->in_seq[$y])) {
499 $k = $this->_lcsPos($y);
500 assert($k > 0);
501 $ymids[$k] = $ymids[$k - 1];
502 }
503 }
504 }
505 }
506
507 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
508 $ymid = $ymids[$this->lcs];
509 for ($n = 0; $n < $nchunks - 1; $n++) {
510 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
511 $y1 = $ymid[$n] + 1;
512 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
513 }
514 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
515
516 return array($this->lcs, $seps);
517 }
$n
Definition: RandomTest.php:80
$y
Definition: example_007.php:83
$x
Definition: example_009.php:98

References $n, $x, $y, and _lcsPos().

Referenced by _compareseq().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ _lcsPos()

Text_Diff_Engine_native::_lcsPos (   $ypos)

Definition at line 519 of file Diff.php.

520 {
521 $end = $this->lcs;
522 if ($end == 0 || $ypos > $this->seq[$end]) {
523 $this->seq[++$this->lcs] = $ypos;
524 $this->in_seq[$ypos] = 1;
525 return $this->lcs;
526 }
527
528 $beg = 1;
529 while ($beg < $end) {
530 $mid = (int)(($beg + $end) / 2);
531 if ($ypos > $this->seq[$mid]) {
532 $beg = $mid + 1;
533 } else {
534 $end = $mid;
535 }
536 }
537
538 assert($ypos != $this->seq[$end]);
539
540 $this->in_seq[$this->seq[$end]] = false;
541 $this->seq[$end] = $ypos;
542 $this->in_seq[$ypos] = 1;
543 return $end;
544 }

Referenced by _diag().

+ Here is the caller graph for this function:

◆ _shiftBoundaries()

Text_Diff_Engine_native::_shiftBoundaries (   $lines,
$changed,
  $other_changed 
)

Adjusts inserts/deletes of identical lines to join changes as much as possible.

We do something when a run of changed lines include a line at one end and has an excluded, identical line at the other. We are free to choose which identical line is included. ‘compareseq’ usually chooses the one at the beginning, but usually it is cleaner to consider the following identical line to be the "change".

This is extracted verbatim from analyze.c (GNU diffutils-2.7).

Definition at line 617 of file Diff.php.

618 {
619 $i = 0;
620 $j = 0;
621
622 assert('count($lines) == count($changed)');
623 $len = count($lines);
624 $other_len = count($other_changed);
625
626 while (1) {
627 /* Scan forward to find the beginning of another run of
628 * changes. Also keep track of the corresponding point in the
629 * other file.
630 *
631 * Throughout this code, $i and $j are adjusted together so that
632 * the first $i elements of $changed and the first $j elements of
633 * $other_changed both contain the same number of zeros (unchanged
634 * lines).
635 *
636 * Furthermore, $j is always kept so that $j == $other_len or
637 * $other_changed[$j] == false. */
638 while ($j < $other_len && $other_changed[$j]) {
639 $j++;
640 }
641
642 while ($i < $len && ! $changed[$i]) {
643 assert('$j < $other_len && ! $other_changed[$j]');
644 $i++; $j++;
645 while ($j < $other_len && $other_changed[$j]) {
646 $j++;
647 }
648 }
649
650 if ($i == $len) {
651 break;
652 }
653
654 $start = $i;
655
656 /* Find the end of this run of changes. */
657 while (++$i < $len && $changed[$i]) {
658 continue;
659 }
660
661 do {
662 /* Record the length of this run of changes, so that we can
663 * later determine whether the run has grown. */
664 $runlength = $i - $start;
665
666 /* Move the changed region back, so long as the previous
667 * unchanged line matches the last changed one. This merges
668 * with previous changed regions. */
669 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
670 $changed[--$start] = 1;
671 $changed[--$i] = false;
672 while ($start > 0 && $changed[$start - 1]) {
673 $start--;
674 }
675 assert('$j > 0');
676 while ($other_changed[--$j]) {
677 continue;
678 }
679 assert('$j >= 0 && !$other_changed[$j]');
680 }
681
682 /* Set CORRESPONDING to the end of the changed run, at the
683 * last point where it corresponds to a changed run in the
684 * other file. CORRESPONDING == LEN means no such point has
685 * been found. */
686 $corresponding = $j < $other_len ? $i : $len;
687
688 /* Move the changed region forward, so long as the first
689 * changed line matches the following unchanged one. This
690 * merges with following changed regions. Do this second, so
691 * that if there are no merges, the changed region is moved
692 * forward as far as possible. */
693 while ($i < $len && $lines[$start] == $lines[$i]) {
694 $changed[$start++] = false;
695 $changed[$i++] = 1;
696 while ($i < $len && $changed[$i]) {
697 $i++;
698 }
699
700 assert('$j < $other_len && ! $other_changed[$j]');
701 $j++;
702 if ($j < $other_len && $other_changed[$j]) {
703 $corresponding = $i;
704 while ($j < $other_len && $other_changed[$j]) {
705 $j++;
706 }
707 }
708 }
709 } while ($runlength != $i - $start);
710
711 /* If possible, move the fully-merged run of changes back to a
712 * corresponding run in the other file. */
713 while ($corresponding < $i) {
714 $changed[--$start] = 1;
715 $changed[--$i] = 0;
716 assert('$j > 0');
717 while ($other_changed[--$j]) {
718 continue;
719 }
720 assert('$j >= 0 && !$other_changed[$j]');
721 }
722 }
723 }

References $changed.

Referenced by diff().

+ Here is the caller graph for this function:

◆ diff()

Text_Diff_Engine_native::diff (   $from_lines,
  $to_lines 
)

Definition at line 323 of file Diff.php.

324 {
325 $n_from = count($from_lines);
326 $n_to = count($to_lines);
327
328 $this->xchanged = $this->ychanged = array();
329 $this->xv = $this->yv = array();
330 $this->xind = $this->yind = array();
331 unset($this->seq);
332 unset($this->in_seq);
333 unset($this->lcs);
334
335 // Skip leading common lines.
336 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
337 if ($from_lines[$skip] != $to_lines[$skip]) {
338 break;
339 }
340 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
341 }
342
343 // Skip trailing common lines.
344 $xi = $n_from; $yi = $n_to;
345 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
346 if ($from_lines[$xi] != $to_lines[$yi]) {
347 break;
348 }
349 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
350 }
351
352 // Ignore lines which do not exist in both files.
353 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
354 $xhash[$from_lines[$xi]] = 1;
355 }
356 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
357 $line = $to_lines[$yi];
358 if (($this->ychanged[$yi] = empty($xhash[$line]))) {
359 continue;
360 }
361 $yhash[$line] = 1;
362 $this->yv[] = $line;
363 $this->yind[] = $yi;
364 }
365 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
366 $line = $from_lines[$xi];
367 if (($this->xchanged[$xi] = empty($yhash[$line]))) {
368 continue;
369 }
370 $this->xv[] = $line;
371 $this->xind[] = $xi;
372 }
373
374 // Find the LCS.
375 $this->_compareseq(0, count($this->xv), 0, count($this->yv));
376
377 // Merge edits when possible.
378 $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
379 $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
380
381 // Compute the edit operations.
382 $edits = array();
383 $xi = $yi = 0;
384 while ($xi < $n_from || $yi < $n_to) {
385 assert($yi < $n_to || $this->xchanged[$xi]);
386 assert($xi < $n_from || $this->ychanged[$yi]);
387
388 // Skip matching "snake".
389 $copy = array();
390 while ($xi < $n_from && $yi < $n_to
391 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
392 $copy[] = $from_lines[$xi++];
393 ++$yi;
394 }
395 if ($copy) {
396 $edits[] = new Text_Diff_Op_copy($copy);
397 }
398
399 // Find deletes & adds.
400 $delete = array();
401 while ($xi < $n_from && $this->xchanged[$xi]) {
402 $delete[] = $from_lines[$xi++];
403 }
404
405 $add = array();
406 while ($yi < $n_to && $this->ychanged[$yi]) {
407 $add[] = $to_lines[$yi++];
408 }
409
410 if ($delete && $add) {
411 $edits[] = new Text_Diff_Op_change($delete, $add);
412 } elseif ($delete) {
413 $edits[] = new Text_Diff_Op_delete($delete);
414 } elseif ($add) {
415 $edits[] = new Text_Diff_Op_add($add);
416 }
417 }
418
419 return $edits;
420 }
_shiftBoundaries($lines, &$changed, $other_changed)
Adjusts inserts/deletes of identical lines to join changes as much as possible.
Definition: Diff.php:617

References _compareseq(), and _shiftBoundaries().

+ Here is the call graph for this function:

The documentation for this class was generated from the following file: