ILIAS  trunk Revision v12.0_alpha-377-g3641b37b9db
class.WordLevelDiff.php
Go to the documentation of this file.
1<?php
2
3// Copyright holded by MediaWiki contributers, Licensed under GPL version 2 or later
4
5
13// A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
14//
15// Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
16// You may copy this code freely under the conditions of the GPL.
17//
18
19define('USE_ASSERTS', function_exists('assert'));
20
26abstract class _DiffOp
27{
28 public $type;
29 public $orig;
30 public $closing;
31
32 abstract public function reverse();
33}
34
40class _DiffOp_Copy extends _DiffOp
41{
42 public $type = 'copy';
43
44 public function __construct($orig, $closing = false)
45 {
46 if (!is_array($closing)) {
48 }
49 $this->orig = $orig;
50 $this->closing = $closing;
51 }
52
53 public function reverse()
54 {
55 return new _DiffOp_Copy($this->closing, $this->orig);
56 }
57}
58
65{
66 public $type = 'delete';
67
68 public function __construct($lines)
69 {
70 $this->orig = $lines;
71 $this->closing = false;
72 }
73
74 public function reverse()
75 {
76 return new _DiffOp_Add($this->orig);
77 }
78}
79
85class _DiffOp_Add extends _DiffOp
86{
87 public $type = 'add';
88
89 public function __construct($lines)
90 {
91 $this->closing = $lines;
92 $this->orig = false;
93 }
94
95 public function reverse()
96 {
97 return new _DiffOp_Delete($this->closing);
98 }
99}
100
107{
108 public $type = 'change';
109
110 public function __construct($orig, $closing)
111 {
112 $this->orig = $orig;
113 $this->closing = $closing;
114 }
115
116 public function reverse()
117 {
118 return new _DiffOp_Change($this->closing, $this->orig);
119 }
120}
121
122
147{
148 public const MAX_XREF_LENGTH = 10000;
149 private array $xchanged;
150 private array $xv;
151 private array $xind;
152 private array $ychanged;
153 private array $yv;
154 private array $yind;
155 private int $lcs;
156 private array $in_seq;
157 private array $seq = [];
158
159 public function diff($from_lines, $to_lines)
160 {
161 $fname = '_DiffEngine::diff';
162 //wfProfileIn( $fname );
163
164 $n_from = sizeof($from_lines);
165 $n_to = sizeof($to_lines);
166
167 $this->xchanged = $this->ychanged = array();
168 $this->xv = $this->yv = array();
169 $this->xind = $this->yind = array();
170 unset($this->seq);
171 unset($this->in_seq);
172 unset($this->lcs);
173
174 // Skip leading common lines.
175 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
176 if ($from_lines[$skip] !== $to_lines[$skip]) {
177 break;
178 }
179 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
180 }
181 // Skip trailing common lines.
182 $xi = $n_from;
183 $yi = $n_to;
184 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
185 if ($from_lines[$xi] !== $to_lines[$yi]) {
186 break;
187 }
188 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
189 }
190
191 // Ignore lines which do not exist in both files.
192 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
193 $xhash[$this->_line_hash($from_lines[$xi])] = 1;
194 }
195
196 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
197 $line = $to_lines[$yi];
198 if (($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)]))) {
199 continue;
200 }
201 $yhash[$this->_line_hash($line)] = 1;
202 $this->yv[] = $line;
203 $this->yind[] = $yi;
204 }
205 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
206 $line = $from_lines[$xi];
207 if (($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)]))) {
208 continue;
209 }
210 $this->xv[] = $line;
211 $this->xind[] = $xi;
212 }
213
214 // Find the LCS.
215 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
216
217 // Merge edits when possible
218 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
219 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
220
221 // Compute the edit operations.
222 $edits = array();
223 $xi = $yi = 0;
224 while ($xi < $n_from || $yi < $n_to) {
225 USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
226 USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
227
228 // Skip matching "snake".
229 $copy = array();
230 while ($xi < $n_from && $yi < $n_to
231 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
232 $copy[] = $from_lines[$xi++];
233 ++$yi;
234 }
235 if ($copy) {
236 $edits[] = new _DiffOp_Copy($copy);
237 }
238
239 // Find deletes & adds.
240 $delete = array();
241 while ($xi < $n_from && $this->xchanged[$xi]) {
242 $delete[] = $from_lines[$xi++];
243 }
244
245 $add = array();
246 while ($yi < $n_to && $this->ychanged[$yi]) {
247 $add[] = $to_lines[$yi++];
248 }
249
250 if ($delete && $add) {
251 $edits[] = new _DiffOp_Change($delete, $add);
252 } elseif ($delete) {
253 $edits[] = new _DiffOp_Delete($delete);
254 } elseif ($add) {
255 $edits[] = new _DiffOp_Add($add);
256 }
257 }
258 //wfProfileOut( $fname );
259 return $edits;
260 }
261
265 public function _line_hash($line)
266 {
267 if (strlen($line) > self::MAX_XREF_LENGTH) {
268 return md5($line);
269 } else {
270 return $line;
271 }
272 }
273
274
275 /* Divide the Largest Common Subsequence (LCS) of the sequences
276 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
277 * sized segments.
278 *
279 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
280 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
281 * sub sequences. The first sub-sequence is contained in [X0, X1),
282 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
283 * that (X0, Y0) == (XOFF, YOFF) and
284 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
285 *
286 * This function assumes that the first lines of the specified portions
287 * of the two files do not match, and likewise that the last lines do not
288 * match. The caller must trim matching lines from the beginning and end
289 * of the portions it is going to specify.
290 */
291 public function _diag($xoff, $xlim, $yoff, $ylim, $nchunks)
292 {
293 $fname = '_DiffEngine::_diag';
294 //wfProfileIn( $fname );
295 $flip = false;
296
297 if ($xlim - $xoff > $ylim - $yoff) {
298 // Things seems faster (I'm not sure I understand why)
299 // when the shortest sequence in X.
300 $flip = true;
301 list($xoff, $xlim, $yoff, $ylim)
302 = array( $yoff, $ylim, $xoff, $xlim);
303 }
304
305 if ($flip) {
306 for ($i = $ylim - 1; $i >= $yoff; $i--) {
307 $ymatches[$this->xv[$i]][] = $i;
308 }
309 } else {
310 for ($i = $ylim - 1; $i >= $yoff; $i--) {
311 $ymatches[$this->yv[$i]][] = $i;
312 }
313 }
314
315 $this->lcs = 0;
316 $this->seq[0] = $yoff - 1;
317 $this->in_seq = array();
318 $ymids[0] = array();
319
320 $numer = $xlim - $xoff + $nchunks - 1;
321 $x = $xoff;
322 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
323 //wfProfileIn( "$fname-chunk" );
324 if ($chunk > 0) {
325 for ($i = 0; $i <= $this->lcs; $i++) {
326 $ymids[$i][$chunk - 1] = $this->seq[$i];
327 }
328 }
329
330 $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $chunk) / $nchunks);
331 for (; $x < $x1; $x++) {
332 $line = $flip ? $this->yv[$x] : $this->xv[$x];
333 if (empty($ymatches[$line])) {
334 continue;
335 }
336 $matches = $ymatches[$line];
337 reset($matches);
338 foreach ($matches as $junk => $y) {
339 if (empty($this->in_seq[$y])) {
340 $k = $this->_lcs_pos($y);
341 USE_ASSERTS && assert($k > 0);
342 $ymids[$k] = $ymids[$k - 1];
343 break;
344 }
345 }
346 foreach ($matches as $y) {
347 if ($y > $this->seq[$k - 1]) {
348 USE_ASSERTS && assert($y < $this->seq[$k]);
349 // Optimization: this is a common case:
350 // next match is just replacing previous match.
351 $this->in_seq[$this->seq[$k]] = false;
352 $this->seq[$k] = $y;
353 $this->in_seq[$y] = 1;
354 } elseif (empty($this->in_seq[$y])) {
355 $k = $this->_lcs_pos($y);
356 USE_ASSERTS && assert($k > 0);
357 $ymids[$k] = $ymids[$k - 1];
358 }
359 }
360 }
361 //wfProfileOut( "$fname-chunk" );
362 }
363
364 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
365 $ymid = $ymids[$this->lcs];
366 for ($n = 0; $n < $nchunks - 1; $n++) {
367 $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $n) / $nchunks);
368 $y1 = $ymid[$n] + 1;
369 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
370 }
371 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
372
373 //wfProfileOut( $fname );
374 return array($this->lcs, $seps);
375 }
376
377 public function _lcs_pos($ypos)
378 {
379 $fname = '_DiffEngine::_lcs_pos';
380 //wfProfileIn( $fname );
381
382 $end = $this->lcs;
383 if ($end == 0 || $ypos > $this->seq[$end]) {
384 $this->seq[++$this->lcs] = $ypos;
385 $this->in_seq[$ypos] = 1;
386 //wfProfileOut( $fname );
387 return $this->lcs;
388 }
389
390 $beg = 1;
391 while ($beg < $end) {
392 $mid = (int) (($beg + $end) / 2);
393 if ($ypos > $this->seq[$mid]) {
394 $beg = $mid + 1;
395 } else {
396 $end = $mid;
397 }
398 }
399
400 USE_ASSERTS && assert($ypos != $this->seq[$end]);
401
402 $this->in_seq[$this->seq[$end]] = false;
403 $this->seq[$end] = $ypos;
404 $this->in_seq[$ypos] = 1;
405 //wfProfileOut( $fname );
406 return $end;
407 }
408
409 /* Find LCS of two sequences.
410 *
411 * The results are recorded in the vectors $this->{x,y}changed[], by
412 * storing a 1 in the element for each line that is an insertion
413 * or deletion (ie. is not in the LCS).
414 *
415 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
416 *
417 * Note that XLIM, YLIM are exclusive bounds.
418 * All line numbers are origin-0 and discarded lines are not counted.
419 */
420 public function _compareseq($xoff, $xlim, $yoff, $ylim)
421 {
422 $fname = '_DiffEngine::_compareseq';
423 //wfProfileIn( $fname );
424
425 // Slide down the bottom initial diagonal.
426 while ($xoff < $xlim && $yoff < $ylim
427 && $this->xv[$xoff] == $this->yv[$yoff]) {
428 ++$xoff;
429 ++$yoff;
430 }
431
432 // Slide up the top initial diagonal.
433 while ($xlim > $xoff && $ylim > $yoff
434 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
435 --$xlim;
436 --$ylim;
437 }
438
439 if ($xoff == $xlim || $yoff == $ylim) {
440 $lcs = 0;
441 } else {
442 // This is ad hoc but seems to work well.
443 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
444 //$nchunks = max(2,min(8,(int)$nchunks));
445 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
446 list($lcs, $seps)
447 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
448 }
449
450 if ($lcs == 0) {
451 // X and Y sequences have no common subsequence:
452 // mark all changed.
453 while ($yoff < $ylim) {
454 $this->ychanged[$this->yind[$yoff++]] = 1;
455 }
456 while ($xoff < $xlim) {
457 $this->xchanged[$this->xind[$xoff++]] = 1;
458 }
459 } else {
460 // Use the partitions to split this problem into subproblems.
461 reset($seps);
462 $pt1 = $seps[0];
463 while ($pt2 = next($seps)) {
464 $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
465 $pt1 = $pt2;
466 }
467 }
468 //wfProfileOut( $fname );
469 }
470
471 /* Adjust inserts/deletes of identical lines to join changes
472 * as much as possible.
473 *
474 * We do something when a run of changed lines include a
475 * line at one end and has an excluded, identical line at the other.
476 * We are free to choose which identical line is included.
477 * `compareseq' usually chooses the one at the beginning,
478 * but usually it is cleaner to consider the following identical line
479 * to be the "change".
480 *
481 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
482 */
483 public function _shift_boundaries($lines, &$changed, $other_changed)
484 {
485 $fname = '_DiffEngine::_shift_boundaries';
486 //wfProfileIn( $fname );
487 $i = 0;
488 $j = 0;
489
490 USE_ASSERTS && assert(sizeof($lines) == sizeof($changed));
491 $len = sizeof($lines);
492 $other_len = sizeof($other_changed);
493
494 while (1) {
495 /*
496 * Scan forwards to find beginning of another run of changes.
497 * Also keep track of the corresponding point in the other file.
498 *
499 * Throughout this code, $i and $j are adjusted together so that
500 * the first $i elements of $changed and the first $j elements
501 * of $other_changed both contain the same number of zeros
502 * (unchanged lines).
503 * Furthermore, $j is always kept so that $j == $other_len or
504 * $other_changed[$j] == false.
505 */
506 while ($j < $other_len && $other_changed[$j]) {
507 $j++;
508 }
509
510 while ($i < $len && !$changed[$i]) {
511 USE_ASSERTS && assert($j < $other_len && !$other_changed[$j]);
512 $i++;
513 $j++;
514 while ($j < $other_len && $other_changed[$j]) {
515 $j++;
516 }
517 }
518
519 if ($i == $len) {
520 break;
521 }
522
523 $start = $i;
524
525 // Find the end of this run of changes.
526 while (++$i < $len && $changed[$i]) {
527 }
528
529 do {
530 /*
531 * Record the length of this run of changes, so that
532 * we can later determine whether the run has grown.
533 */
534 $runlength = $i - $start;
535
536 /*
537 * Move the changed region back, so long as the
538 * previous unchanged line matches the last changed one.
539 * This merges with previous changed regions.
540 */
541 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
542 $changed[--$start] = 1;
543 $changed[--$i] = false;
544 while ($start > 0 && $changed[$start - 1]) {
545 $start--;
546 }
547 USE_ASSERTS && assert($j > 0);
548 while ($other_changed[--$j]) {
549 }
550 USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
551 }
552
553 /*
554 * Set CORRESPONDING to the end of the changed run, at the last
555 * point where it corresponds to a changed run in the other file.
556 * CORRESPONDING == LEN means no such point has been found.
557 */
558 $corresponding = $j < $other_len ? $i : $len;
559
560 /*
561 * Move the changed region forward, so long as the
562 * first changed line matches the following unchanged one.
563 * This merges with following changed regions.
564 * Do this second, so that if there are no merges,
565 * the changed region is moved forward as far as possible.
566 */
567 while ($i < $len && $lines[$start] == $lines[$i]) {
568 $changed[$start++] = false;
569 $changed[$i++] = 1;
570 while ($i < $len && $changed[$i]) {
571 $i++;
572 }
573
574 USE_ASSERTS && assert($j < $other_len && !$other_changed[$j]);
575 $j++;
576 if ($j < $other_len && $other_changed[$j]) {
577 $corresponding = $i;
578 while ($j < $other_len && $other_changed[$j]) {
579 $j++;
580 }
581 }
582 }
583 } while ($runlength != $i - $start);
584
585 /*
586 * If possible, move the fully-merged run of changes
587 * back to a corresponding run in the other file.
588 */
589 while ($corresponding < $i) {
590 $changed[--$start] = 1;
591 $changed[--$i] = 0;
592 USE_ASSERTS && assert($j > 0);
593 while ($other_changed[--$j]) {
594 }
595 USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
596 }
597 }
598 //wfProfileOut( $fname );
599 }
600}
601
608class Diff
609{
610 public $edits;
611
620 public function __construct($from_lines, $to_lines)
621 {
622 $eng = new _DiffEngine();
623 $this->edits = $eng->diff($from_lines, $to_lines);
624 //$this->_check($from_lines, $to_lines);
625 }
626
637 public function reverse()
638 {
639 $rev = $this;
640 $rev->edits = array();
641 foreach ($this->edits as $edit) {
642 $rev->edits[] = $edit->reverse();
643 }
644 return $rev;
645 }
646
652 public function isEmpty()
653 {
654 foreach ($this->edits as $edit) {
655 if ($edit->type != 'copy') {
656 return false;
657 }
658 }
659 return true;
660 }
661
669 public function lcs()
670 {
671 $lcs = 0;
672 foreach ($this->edits as $edit) {
673 if ($edit->type == 'copy') {
674 $lcs += sizeof($edit->orig);
675 }
676 }
677 return $lcs;
678 }
679
688 public function orig()
689 {
690 $lines = array();
691
692 foreach ($this->edits as $edit) {
693 if ($edit->orig) {
694 array_splice($lines, sizeof($lines), 0, $edit->orig);
695 }
696 }
697 return $lines;
698 }
699
708 public function closing()
709 {
710 $lines = array();
711
712 foreach ($this->edits as $edit) {
713 if ($edit->closing) {
714 array_splice($lines, sizeof($lines), 0, $edit->closing);
715 }
716 }
717 return $lines;
718 }
719
725 public function _check($from_lines, $to_lines)
726 {
727 $fname = 'Diff::_check';
728 //wfProfileIn( $fname );
729 if (serialize($from_lines) != serialize($this->orig())) {
730 throw new \LogicException("Reconstructed original doesn't match");
731 }
732 if (serialize($to_lines) != serialize($this->closing())) {
733 throw new \LogicException("Reconstructed closing doesn't match");
734 }
735
736 $rev = $this->reverse();
737 if (serialize($to_lines) != serialize($rev->orig())) {
738 throw new \LogicException("Reversed original doesn't match");
739 }
740 if (serialize($from_lines) != serialize($rev->closing())) {
741 throw new \LogicException("Reversed closing doesn't match");
742 }
743
744
745 $prevtype = 'none';
746 foreach ($this->edits as $edit) {
747 if ($prevtype == $edit->type) {
748 throw new \RuntimeException("Edit sequence is non-optimal");
749 }
750 $prevtype = $edit->type;
751 }
752
753 $lcs = $this->lcs();
754 trigger_error('Diff okay: LCS = ' . $lcs, E_USER_NOTICE);
755 //wfProfileOut( $fname );
756 }
757}
758
764class MappedDiff extends Diff
765{
789 public function __construct(
790 $from_lines,
791 $to_lines,
792 $mapped_from_lines,
793 $mapped_to_lines
794 ) {
795 $fname = 'MappedDiff::MappedDiff';
796 //wfProfileIn( $fname );
797
798 assert(sizeof($from_lines) == sizeof($mapped_from_lines));
799 assert(sizeof($to_lines) == sizeof($mapped_to_lines));
800
801 parent::__construct($mapped_from_lines, $mapped_to_lines);
802
803 $xi = $yi = 0;
804 for ($i = 0; $i < sizeof($this->edits); $i++) {
805 $orig = &$this->edits[$i]->orig;
806 if (is_array($orig)) {
807 $orig = array_slice($from_lines, $xi, sizeof($orig));
808 $xi += sizeof($orig);
809 }
810
811 $closing = &$this->edits[$i]->closing;
812 if (is_array($closing)) {
813 $closing = array_slice($to_lines, $yi, sizeof($closing));
814 $yi += sizeof($closing);
815 }
816 }
817 //wfProfileOut( $fname );
818 }
819}
820
832{
840
848
855 public function format($diff)
856 {
857 $fname = 'DiffFormatter::format';
858 //wfProfileIn( $fname );
859
860 $xi = $yi = 1;
861 $block = false;
862 $context = array();
863
866
867 $this->_start_diff();
868
869 foreach ($diff->edits as $edit) {
870 if ($edit->type == 'copy') {
871 if (is_array($block)) {
872 if (sizeof($edit->orig) <= $nlead + $ntrail) {
873 $block[] = $edit;
874 } else {
875 if ($ntrail) {
876 $context = array_slice($edit->orig, 0, $ntrail);
877 $block[] = new _DiffOp_Copy($context);
878 }
879 $this->_block(
880 $x0,
881 $ntrail + $xi - $x0,
882 $y0,
883 $ntrail + $yi - $y0,
884 $block
885 );
886 $block = false;
887 }
888 }
889 $context = $edit->orig;
890 } else {
891 if (!is_array($block)) {
892 $context = array_slice($context, sizeof($context) - $nlead);
893 $x0 = $xi - sizeof($context);
894 $y0 = $yi - sizeof($context);
895 $block = array();
896 if ($context) {
897 $block[] = new _DiffOp_Copy($context);
898 }
899 }
900 $block[] = $edit;
901 }
902
903 if ($edit->orig) {
904 $xi += sizeof($edit->orig);
905 }
906 if ($edit->closing) {
907 $yi += sizeof($edit->closing);
908 }
909 }
910
911 if (is_array($block)) {
912 $this->_block(
913 $x0,
914 $xi - $x0,
915 $y0,
916 $yi - $y0,
917 $block
918 );
919 }
920
921 $end = $this->_end_diff();
922 //wfProfileOut( $fname );
923 return $end;
924 }
925
926 public function _block($xbeg, $xlen, $ybeg, $ylen, $edits)
927 {
928 $fname = 'DiffFormatter::_block';
929 //wfProfileIn( $fname );
930 $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
931 foreach ($edits as $edit) {
932 if ($edit->type == 'copy') {
933 $this->_context($edit->orig);
934 } elseif ($edit->type == 'add') {
935 $this->_added($edit->closing);
936 } elseif ($edit->type == 'delete') {
937 $this->_deleted($edit->orig);
938 } elseif ($edit->type == 'change') {
939 $this->_changed($edit->orig, $edit->closing);
940 } else {
941 throw new \RuntimeException('Unknown edit type');
942 }
943 }
944 $this->_end_block();
945 //wfProfileOut( $fname );
946 }
947
948 public function _start_diff()
949 {
950 ob_start();
951 }
952
953 public function _end_diff()
954 {
955 $val = ob_get_contents();
956 ob_end_clean();
957 return $val;
958 }
959
960 public function _block_header($xbeg, $xlen, $ybeg, $ylen)
961 {
962 if ($xlen > 1) {
963 $xbeg .= "," . ($xbeg + $xlen - 1);
964 }
965 if ($ylen > 1) {
966 $ybeg .= "," . ($ybeg + $ylen - 1);
967 }
968
969 return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
970 }
971
972 public function _start_block($header)
973 {
974 echo $header;
975 }
976
977 public function _end_block()
978 {
979 }
980
981 public function _lines($lines, $prefix = ' ')
982 {
983 foreach ($lines as $line) {
984 echo "$prefix $line\n";
985 }
986 }
987
988 public function _context($lines)
989 {
990 $this->_lines($lines);
991 }
992
993 public function _added($lines)
994 {
995 $this->_lines($lines, '>');
996 }
997 public function _deleted($lines)
998 {
999 $this->_lines($lines, '<');
1000 }
1001
1002 public function _changed($orig, $closing)
1003 {
1004 $this->_deleted($orig);
1005 echo "---\n";
1006 $this->_added($closing);
1007 }
1008}
1009
1010
1016define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
1017
1024{
1025 private array $_lines;
1026 private string $_line;
1027 private string $_group;
1028 private string $_tag;
1029
1030 public function __construct()
1031 {
1032 $this->_lines = array();
1033 $this->_line = '';
1034 $this->_group = '';
1035 $this->_tag = '';
1036 }
1037
1038 public function _flushGroup($new_tag)
1039 {
1040 if ($this->_group !== '') {
1041 if ($this->_tag == 'ins') {
1042 $this->_line .= '[ilDiffInsStart]' .
1043 htmlspecialchars($this->_group) . '[ilDiffInsEnd]';
1044 } elseif ($this->_tag == 'del') {
1045 $this->_line .= '[ilDiffDelStart]' .
1046 htmlspecialchars($this->_group) . '[ilDiffDelEnd]';
1047 } else {
1048 $this->_line .= htmlspecialchars($this->_group);
1049 }
1050 }
1051 $this->_group = '';
1052 $this->_tag = $new_tag;
1053 }
1054
1055 public function _flushLine($new_tag)
1056 {
1057 $this->_flushGroup($new_tag);
1058 if ($this->_line != '') {
1059 array_push($this->_lines, $this->_line);
1060 } else {
1061 # make empty lines visible by inserting an NBSP
1062 array_push($this->_lines, NBSP);
1063 }
1064 $this->_line = '';
1065 }
1066
1067 public function addWords($words, $tag = '')
1068 {
1069 if ($tag != $this->_tag) {
1070 $this->_flushGroup($tag);
1071 }
1072
1073 foreach ($words as $word) {
1074 // new-line should only come as first char of word.
1075 if ($word == '') {
1076 continue;
1077 }
1078 if ($word[0] == "\n") {
1079 $this->_flushLine($tag);
1080 $word = substr($word, 1);
1081 }
1082 assert(!strstr($word, "\n"));
1083 $this->_group .= $word;
1084 }
1085 }
1086
1087 public function getLines()
1088 {
1089 $this->_flushLine('~done');
1090 return $this->_lines;
1091 }
1092}
1093
1100{
1101 public const MAX_LINE_LENGTH = 10000;
1102
1103 public function __construct($orig_lines, $closing_lines)
1104 {
1105 $fname = 'WordLevelDiff::WordLevelDiff';
1106 //wfProfileIn( $fname );
1107
1108 list($orig_words, $orig_stripped) = $this->_split($orig_lines);
1109 list($closing_words, $closing_stripped) = $this->_split($closing_lines);
1110
1112 $orig_words,
1113 $closing_words,
1114 $orig_stripped,
1115 $closing_stripped
1116 );
1117 //wfProfileOut( $fname );
1118 }
1119
1120 public function _split($lines)
1121 {
1122 $fname = 'WordLevelDiff::_split';
1123 //wfProfileIn( $fname );
1124
1125 $words = array();
1126 $stripped = array();
1127 $first = true;
1128 foreach ($lines as $line) {
1129 # If the line is too long, just pretend the entire line is one big word
1130 # This prevents resource exhaustion problems
1131 if ($first) {
1132 $first = false;
1133 } else {
1134 $words[] = "\n";
1135 $stripped[] = "\n";
1136 }
1137 if (strlen($line) > self::MAX_LINE_LENGTH) {
1138 $words[] = $line;
1139 $stripped[] = $line;
1140 } else {
1141 $m = array();
1142 if (preg_match_all(
1143 '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1144 $line,
1145 $m
1146 )) {
1147 $words = array_merge($words, $m[0]);
1148 $stripped = array_merge($stripped, $m[1]);
1149 }
1150 }
1151 }
1152 //wfProfileOut( $fname );
1153 return array($words, $stripped);
1154 }
1155
1156 public function orig()
1157 {
1158 $fname = 'WordLevelDiff::orig';
1159 //wfProfileIn( $fname );
1160 $orig = new _HWLDF_WordAccumulator();
1161
1162 foreach ($this->edits as $edit) {
1163 if ($edit->type == 'copy') {
1164 $orig->addWords($edit->orig);
1165 } elseif ($edit->orig) {
1166 $orig->addWords($edit->orig, 'del');
1167 }
1168 }
1169 $lines = $orig->getLines();
1170 //wfProfileOut( $fname );
1171 return $lines;
1172 }
1173
1174 public function closing()
1175 {
1176 $fname = 'WordLevelDiff::closing';
1177 //wfProfileIn( $fname );
1178 $closing = new _HWLDF_WordAccumulator();
1179
1180 foreach ($this->edits as $edit) {
1181 if ($edit->type == 'copy') {
1182 $closing->addWords($edit->closing);
1183 } elseif ($edit->closing) {
1184 $closing->addWords($edit->closing, 'ins');
1185 }
1186 }
1187 $lines = $closing->getLines();
1188 //wfProfileOut( $fname );
1189 return $lines;
1190 }
1191}
_block_header($xbeg, $xlen, $ybeg, $ylen)
_block($xbeg, $xlen, $ybeg, $ylen, $edits)
_changed($orig, $closing)
_lines($lines, $prefix=' ')
$leading_context_lines
Number of leading context "lines" to preserve.
format($diff)
Format a diff.
$trailing_context_lines
Number of trailing context "lines" to preserve.
reverse()
Compute reversed Diff.
orig()
Get the original set of lines.
__construct($from_lines, $to_lines)
Constructor.
isEmpty()
Check for empty diff.
lcs()
Compute the length of the Longest Common Subsequence (LCS).
closing()
Get the closing set of lines.
_check($from_lines, $to_lines)
Check a Diff for validity.
__construct( $from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines)
Constructor.
__construct($orig_lines, $closing_lines)
Constructor.
closing()
Get the closing set of lines.
orig()
Get the original set of lines.
const NBSP
Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3.
const USE_ASSERTS
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
diff($from_lines, $to_lines)
_shift_boundaries($lines, &$changed, $other_changed)
_line_hash($line)
Returns the whole line if it's small enough, or the MD5 hash otherwise.
_compareseq($xoff, $xlim, $yoff, $ylim)
__construct($orig, $closing)
__construct($orig, $closing=false)
__construct(Container $dic, ilPlugin $plugin)
@inheritDoc
$context
Definition: webdav.php:31