ILIAS  trunk Revision v11.0_alpha-3011-gc6b235a2e85
class.WordLevelDiff.php
Go to the documentation of this file.
1<?php
2
3// Copyright holded by MediaWiki contributers, Licensed under GPL version 2 or later
4
5
13// A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
14//
15// Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
16// You may copy this code freely under the conditions of the GPL.
17//
18
19define('USE_ASSERTS', function_exists('assert'));
20
27{
28 public $type;
29 public $orig;
30 public $closing;
31
32 public function reverse()
33 {
34 trigger_error('pure virtual', E_USER_ERROR);
35 }
36}
37
43class _DiffOp_Copy extends _DiffOp
44{
45 public $type = 'copy';
46
47 public function __construct($orig, $closing = false)
48 {
49 if (!is_array($closing)) {
51 }
52 $this->orig = $orig;
53 $this->closing = $closing;
54 }
55
56 public function reverse()
57 {
58 return new _DiffOp_Copy($this->closing, $this->orig);
59 }
60}
61
68{
69 public $type = 'delete';
70
71 public function __construct($lines)
72 {
73 $this->orig = $lines;
74 $this->closing = false;
75 }
76
77 public function reverse()
78 {
79 return new _DiffOp_Add($this->orig);
80 }
81}
82
88class _DiffOp_Add extends _DiffOp
89{
90 public $type = 'add';
91
92 public function __construct($lines)
93 {
94 $this->closing = $lines;
95 $this->orig = false;
96 }
97
98 public function reverse()
99 {
100 return new _DiffOp_Delete($this->closing);
101 }
102}
103
110{
111 public $type = 'change';
112
113 public function __construct($orig, $closing)
114 {
115 $this->orig = $orig;
116 $this->closing = $closing;
117 }
118
119 public function reverse()
120 {
121 return new _DiffOp_Change($this->closing, $this->orig);
122 }
123}
124
125
150{
151 public const MAX_XREF_LENGTH = 10000;
152 private array $xchanged;
153 private array $xv;
154 private array $xind;
155 private array $ychanged;
156 private array $yv;
157 private array $yind;
158 private int $lcs;
159 private array $in_seq;
160 private array $seq = [];
161
162 public function diff($from_lines, $to_lines)
163 {
164 $fname = '_DiffEngine::diff';
165 //wfProfileIn( $fname );
166
167 $n_from = sizeof($from_lines);
168 $n_to = sizeof($to_lines);
169
170 $this->xchanged = $this->ychanged = array();
171 $this->xv = $this->yv = array();
172 $this->xind = $this->yind = array();
173 unset($this->seq);
174 unset($this->in_seq);
175 unset($this->lcs);
176
177 // Skip leading common lines.
178 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
179 if ($from_lines[$skip] !== $to_lines[$skip]) {
180 break;
181 }
182 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
183 }
184 // Skip trailing common lines.
185 $xi = $n_from;
186 $yi = $n_to;
187 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
188 if ($from_lines[$xi] !== $to_lines[$yi]) {
189 break;
190 }
191 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
192 }
193
194 // Ignore lines which do not exist in both files.
195 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
196 $xhash[$this->_line_hash($from_lines[$xi])] = 1;
197 }
198
199 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
200 $line = $to_lines[$yi];
201 if (($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)]))) {
202 continue;
203 }
204 $yhash[$this->_line_hash($line)] = 1;
205 $this->yv[] = $line;
206 $this->yind[] = $yi;
207 }
208 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
209 $line = $from_lines[$xi];
210 if (($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)]))) {
211 continue;
212 }
213 $this->xv[] = $line;
214 $this->xind[] = $xi;
215 }
216
217 // Find the LCS.
218 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
219
220 // Merge edits when possible
221 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
222 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
223
224 // Compute the edit operations.
225 $edits = array();
226 $xi = $yi = 0;
227 while ($xi < $n_from || $yi < $n_to) {
228 USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
229 USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
230
231 // Skip matching "snake".
232 $copy = array();
233 while ($xi < $n_from && $yi < $n_to
234 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
235 $copy[] = $from_lines[$xi++];
236 ++$yi;
237 }
238 if ($copy) {
239 $edits[] = new _DiffOp_Copy($copy);
240 }
241
242 // Find deletes & adds.
243 $delete = array();
244 while ($xi < $n_from && $this->xchanged[$xi]) {
245 $delete[] = $from_lines[$xi++];
246 }
247
248 $add = array();
249 while ($yi < $n_to && $this->ychanged[$yi]) {
250 $add[] = $to_lines[$yi++];
251 }
252
253 if ($delete && $add) {
254 $edits[] = new _DiffOp_Change($delete, $add);
255 } elseif ($delete) {
256 $edits[] = new _DiffOp_Delete($delete);
257 } elseif ($add) {
258 $edits[] = new _DiffOp_Add($add);
259 }
260 }
261 //wfProfileOut( $fname );
262 return $edits;
263 }
264
268 public function _line_hash($line)
269 {
270 if (strlen($line) > self::MAX_XREF_LENGTH) {
271 return md5($line);
272 } else {
273 return $line;
274 }
275 }
276
277
278 /* Divide the Largest Common Subsequence (LCS) of the sequences
279 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
280 * sized segments.
281 *
282 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
283 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
284 * sub sequences. The first sub-sequence is contained in [X0, X1),
285 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
286 * that (X0, Y0) == (XOFF, YOFF) and
287 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
288 *
289 * This function assumes that the first lines of the specified portions
290 * of the two files do not match, and likewise that the last lines do not
291 * match. The caller must trim matching lines from the beginning and end
292 * of the portions it is going to specify.
293 */
294 public function _diag($xoff, $xlim, $yoff, $ylim, $nchunks)
295 {
296 $fname = '_DiffEngine::_diag';
297 //wfProfileIn( $fname );
298 $flip = false;
299
300 if ($xlim - $xoff > $ylim - $yoff) {
301 // Things seems faster (I'm not sure I understand why)
302 // when the shortest sequence in X.
303 $flip = true;
304 list($xoff, $xlim, $yoff, $ylim)
305 = array( $yoff, $ylim, $xoff, $xlim);
306 }
307
308 if ($flip) {
309 for ($i = $ylim - 1; $i >= $yoff; $i--) {
310 $ymatches[$this->xv[$i]][] = $i;
311 }
312 } else {
313 for ($i = $ylim - 1; $i >= $yoff; $i--) {
314 $ymatches[$this->yv[$i]][] = $i;
315 }
316 }
317
318 $this->lcs = 0;
319 $this->seq[0] = $yoff - 1;
320 $this->in_seq = array();
321 $ymids[0] = array();
322
323 $numer = $xlim - $xoff + $nchunks - 1;
324 $x = $xoff;
325 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
326 //wfProfileIn( "$fname-chunk" );
327 if ($chunk > 0) {
328 for ($i = 0; $i <= $this->lcs; $i++) {
329 $ymids[$i][$chunk - 1] = $this->seq[$i];
330 }
331 }
332
333 $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $chunk) / $nchunks);
334 for (; $x < $x1; $x++) {
335 $line = $flip ? $this->yv[$x] : $this->xv[$x];
336 if (empty($ymatches[$line])) {
337 continue;
338 }
339 $matches = $ymatches[$line];
340 reset($matches);
341 foreach ($matches as $junk => $y) {
342 if (empty($this->in_seq[$y])) {
343 $k = $this->_lcs_pos($y);
344 USE_ASSERTS && assert($k > 0);
345 $ymids[$k] = $ymids[$k - 1];
346 break;
347 }
348 }
349 foreach ($matches as $y) {
350 if ($y > $this->seq[$k - 1]) {
351 USE_ASSERTS && assert($y < $this->seq[$k]);
352 // Optimization: this is a common case:
353 // next match is just replacing previous match.
354 $this->in_seq[$this->seq[$k]] = false;
355 $this->seq[$k] = $y;
356 $this->in_seq[$y] = 1;
357 } elseif (empty($this->in_seq[$y])) {
358 $k = $this->_lcs_pos($y);
359 USE_ASSERTS && assert($k > 0);
360 $ymids[$k] = $ymids[$k - 1];
361 }
362 }
363 }
364 //wfProfileOut( "$fname-chunk" );
365 }
366
367 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
368 $ymid = $ymids[$this->lcs];
369 for ($n = 0; $n < $nchunks - 1; $n++) {
370 $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $n) / $nchunks);
371 $y1 = $ymid[$n] + 1;
372 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
373 }
374 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
375
376 //wfProfileOut( $fname );
377 return array($this->lcs, $seps);
378 }
379
380 public function _lcs_pos($ypos)
381 {
382 $fname = '_DiffEngine::_lcs_pos';
383 //wfProfileIn( $fname );
384
385 $end = $this->lcs;
386 if ($end == 0 || $ypos > $this->seq[$end]) {
387 $this->seq[++$this->lcs] = $ypos;
388 $this->in_seq[$ypos] = 1;
389 //wfProfileOut( $fname );
390 return $this->lcs;
391 }
392
393 $beg = 1;
394 while ($beg < $end) {
395 $mid = (int) (($beg + $end) / 2);
396 if ($ypos > $this->seq[$mid]) {
397 $beg = $mid + 1;
398 } else {
399 $end = $mid;
400 }
401 }
402
403 USE_ASSERTS && assert($ypos != $this->seq[$end]);
404
405 $this->in_seq[$this->seq[$end]] = false;
406 $this->seq[$end] = $ypos;
407 $this->in_seq[$ypos] = 1;
408 //wfProfileOut( $fname );
409 return $end;
410 }
411
412 /* Find LCS of two sequences.
413 *
414 * The results are recorded in the vectors $this->{x,y}changed[], by
415 * storing a 1 in the element for each line that is an insertion
416 * or deletion (ie. is not in the LCS).
417 *
418 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
419 *
420 * Note that XLIM, YLIM are exclusive bounds.
421 * All line numbers are origin-0 and discarded lines are not counted.
422 */
423 public function _compareseq($xoff, $xlim, $yoff, $ylim)
424 {
425 $fname = '_DiffEngine::_compareseq';
426 //wfProfileIn( $fname );
427
428 // Slide down the bottom initial diagonal.
429 while ($xoff < $xlim && $yoff < $ylim
430 && $this->xv[$xoff] == $this->yv[$yoff]) {
431 ++$xoff;
432 ++$yoff;
433 }
434
435 // Slide up the top initial diagonal.
436 while ($xlim > $xoff && $ylim > $yoff
437 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
438 --$xlim;
439 --$ylim;
440 }
441
442 if ($xoff == $xlim || $yoff == $ylim) {
443 $lcs = 0;
444 } else {
445 // This is ad hoc but seems to work well.
446 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
447 //$nchunks = max(2,min(8,(int)$nchunks));
448 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
449 list($lcs, $seps)
450 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
451 }
452
453 if ($lcs == 0) {
454 // X and Y sequences have no common subsequence:
455 // mark all changed.
456 while ($yoff < $ylim) {
457 $this->ychanged[$this->yind[$yoff++]] = 1;
458 }
459 while ($xoff < $xlim) {
460 $this->xchanged[$this->xind[$xoff++]] = 1;
461 }
462 } else {
463 // Use the partitions to split this problem into subproblems.
464 reset($seps);
465 $pt1 = $seps[0];
466 while ($pt2 = next($seps)) {
467 $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
468 $pt1 = $pt2;
469 }
470 }
471 //wfProfileOut( $fname );
472 }
473
474 /* Adjust inserts/deletes of identical lines to join changes
475 * as much as possible.
476 *
477 * We do something when a run of changed lines include a
478 * line at one end and has an excluded, identical line at the other.
479 * We are free to choose which identical line is included.
480 * `compareseq' usually chooses the one at the beginning,
481 * but usually it is cleaner to consider the following identical line
482 * to be the "change".
483 *
484 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
485 */
486 public function _shift_boundaries($lines, &$changed, $other_changed)
487 {
488 $fname = '_DiffEngine::_shift_boundaries';
489 //wfProfileIn( $fname );
490 $i = 0;
491 $j = 0;
492
493 USE_ASSERTS && assert(sizeof($lines) == sizeof($changed));
494 $len = sizeof($lines);
495 $other_len = sizeof($other_changed);
496
497 while (1) {
498 /*
499 * Scan forwards to find beginning of another run of changes.
500 * Also keep track of the corresponding point in the other file.
501 *
502 * Throughout this code, $i and $j are adjusted together so that
503 * the first $i elements of $changed and the first $j elements
504 * of $other_changed both contain the same number of zeros
505 * (unchanged lines).
506 * Furthermore, $j is always kept so that $j == $other_len or
507 * $other_changed[$j] == false.
508 */
509 while ($j < $other_len && $other_changed[$j]) {
510 $j++;
511 }
512
513 while ($i < $len && !$changed[$i]) {
514 USE_ASSERTS && assert($j < $other_len && !$other_changed[$j]);
515 $i++;
516 $j++;
517 while ($j < $other_len && $other_changed[$j]) {
518 $j++;
519 }
520 }
521
522 if ($i == $len) {
523 break;
524 }
525
526 $start = $i;
527
528 // Find the end of this run of changes.
529 while (++$i < $len && $changed[$i]) {
530 }
531
532 do {
533 /*
534 * Record the length of this run of changes, so that
535 * we can later determine whether the run has grown.
536 */
537 $runlength = $i - $start;
538
539 /*
540 * Move the changed region back, so long as the
541 * previous unchanged line matches the last changed one.
542 * This merges with previous changed regions.
543 */
544 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
545 $changed[--$start] = 1;
546 $changed[--$i] = false;
547 while ($start > 0 && $changed[$start - 1]) {
548 $start--;
549 }
550 USE_ASSERTS && assert($j > 0);
551 while ($other_changed[--$j]) {
552 }
553 USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
554 }
555
556 /*
557 * Set CORRESPONDING to the end of the changed run, at the last
558 * point where it corresponds to a changed run in the other file.
559 * CORRESPONDING == LEN means no such point has been found.
560 */
561 $corresponding = $j < $other_len ? $i : $len;
562
563 /*
564 * Move the changed region forward, so long as the
565 * first changed line matches the following unchanged one.
566 * This merges with following changed regions.
567 * Do this second, so that if there are no merges,
568 * the changed region is moved forward as far as possible.
569 */
570 while ($i < $len && $lines[$start] == $lines[$i]) {
571 $changed[$start++] = false;
572 $changed[$i++] = 1;
573 while ($i < $len && $changed[$i]) {
574 $i++;
575 }
576
577 USE_ASSERTS && assert($j < $other_len && !$other_changed[$j]);
578 $j++;
579 if ($j < $other_len && $other_changed[$j]) {
580 $corresponding = $i;
581 while ($j < $other_len && $other_changed[$j]) {
582 $j++;
583 }
584 }
585 }
586 } while ($runlength != $i - $start);
587
588 /*
589 * If possible, move the fully-merged run of changes
590 * back to a corresponding run in the other file.
591 */
592 while ($corresponding < $i) {
593 $changed[--$start] = 1;
594 $changed[--$i] = 0;
595 USE_ASSERTS && assert($j > 0);
596 while ($other_changed[--$j]) {
597 }
598 USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
599 }
600 }
601 //wfProfileOut( $fname );
602 }
603}
604
611class Diff
612{
613 public $edits;
614
623 public function __construct($from_lines, $to_lines)
624 {
625 $eng = new _DiffEngine();
626 $this->edits = $eng->diff($from_lines, $to_lines);
627 //$this->_check($from_lines, $to_lines);
628 }
629
640 public function reverse()
641 {
642 $rev = $this;
643 $rev->edits = array();
644 foreach ($this->edits as $edit) {
645 $rev->edits[] = $edit->reverse();
646 }
647 return $rev;
648 }
649
655 public function isEmpty()
656 {
657 foreach ($this->edits as $edit) {
658 if ($edit->type != 'copy') {
659 return false;
660 }
661 }
662 return true;
663 }
664
672 public function lcs()
673 {
674 $lcs = 0;
675 foreach ($this->edits as $edit) {
676 if ($edit->type == 'copy') {
677 $lcs += sizeof($edit->orig);
678 }
679 }
680 return $lcs;
681 }
682
691 public function orig()
692 {
693 $lines = array();
694
695 foreach ($this->edits as $edit) {
696 if ($edit->orig) {
697 array_splice($lines, sizeof($lines), 0, $edit->orig);
698 }
699 }
700 return $lines;
701 }
702
711 public function closing()
712 {
713 $lines = array();
714
715 foreach ($this->edits as $edit) {
716 if ($edit->closing) {
717 array_splice($lines, sizeof($lines), 0, $edit->closing);
718 }
719 }
720 return $lines;
721 }
722
728 public function _check($from_lines, $to_lines)
729 {
730 $fname = 'Diff::_check';
731 //wfProfileIn( $fname );
732 if (serialize($from_lines) != serialize($this->orig())) {
733 trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
734 }
735 if (serialize($to_lines) != serialize($this->closing())) {
736 trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
737 }
738
739 $rev = $this->reverse();
740 if (serialize($to_lines) != serialize($rev->orig())) {
741 trigger_error("Reversed original doesn't match", E_USER_ERROR);
742 }
743 if (serialize($from_lines) != serialize($rev->closing())) {
744 trigger_error("Reversed closing doesn't match", E_USER_ERROR);
745 }
746
747
748 $prevtype = 'none';
749 foreach ($this->edits as $edit) {
750 if ($prevtype == $edit->type) {
751 trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
752 }
753 $prevtype = $edit->type;
754 }
755
756 $lcs = $this->lcs();
757 trigger_error('Diff okay: LCS = ' . $lcs, E_USER_NOTICE);
758 //wfProfileOut( $fname );
759 }
760}
761
767class MappedDiff extends Diff
768{
792 public function __construct(
793 $from_lines,
794 $to_lines,
795 $mapped_from_lines,
796 $mapped_to_lines
797 ) {
798 $fname = 'MappedDiff::MappedDiff';
799 //wfProfileIn( $fname );
800
801 assert(sizeof($from_lines) == sizeof($mapped_from_lines));
802 assert(sizeof($to_lines) == sizeof($mapped_to_lines));
803
804 parent::__construct($mapped_from_lines, $mapped_to_lines);
805
806 $xi = $yi = 0;
807 for ($i = 0; $i < sizeof($this->edits); $i++) {
808 $orig = &$this->edits[$i]->orig;
809 if (is_array($orig)) {
810 $orig = array_slice($from_lines, $xi, sizeof($orig));
811 $xi += sizeof($orig);
812 }
813
814 $closing = &$this->edits[$i]->closing;
815 if (is_array($closing)) {
816 $closing = array_slice($to_lines, $yi, sizeof($closing));
817 $yi += sizeof($closing);
818 }
819 }
820 //wfProfileOut( $fname );
821 }
822}
823
835{
843
851
858 public function format($diff)
859 {
860 $fname = 'DiffFormatter::format';
861 //wfProfileIn( $fname );
862
863 $xi = $yi = 1;
864 $block = false;
865 $context = array();
866
869
870 $this->_start_diff();
871
872 foreach ($diff->edits as $edit) {
873 if ($edit->type == 'copy') {
874 if (is_array($block)) {
875 if (sizeof($edit->orig) <= $nlead + $ntrail) {
876 $block[] = $edit;
877 } else {
878 if ($ntrail) {
879 $context = array_slice($edit->orig, 0, $ntrail);
880 $block[] = new _DiffOp_Copy($context);
881 }
882 $this->_block(
883 $x0,
884 $ntrail + $xi - $x0,
885 $y0,
886 $ntrail + $yi - $y0,
887 $block
888 );
889 $block = false;
890 }
891 }
892 $context = $edit->orig;
893 } else {
894 if (!is_array($block)) {
895 $context = array_slice($context, sizeof($context) - $nlead);
896 $x0 = $xi - sizeof($context);
897 $y0 = $yi - sizeof($context);
898 $block = array();
899 if ($context) {
900 $block[] = new _DiffOp_Copy($context);
901 }
902 }
903 $block[] = $edit;
904 }
905
906 if ($edit->orig) {
907 $xi += sizeof($edit->orig);
908 }
909 if ($edit->closing) {
910 $yi += sizeof($edit->closing);
911 }
912 }
913
914 if (is_array($block)) {
915 $this->_block(
916 $x0,
917 $xi - $x0,
918 $y0,
919 $yi - $y0,
920 $block
921 );
922 }
923
924 $end = $this->_end_diff();
925 //wfProfileOut( $fname );
926 return $end;
927 }
928
929 public function _block($xbeg, $xlen, $ybeg, $ylen, $edits)
930 {
931 $fname = 'DiffFormatter::_block';
932 //wfProfileIn( $fname );
933 $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
934 foreach ($edits as $edit) {
935 if ($edit->type == 'copy') {
936 $this->_context($edit->orig);
937 } elseif ($edit->type == 'add') {
938 $this->_added($edit->closing);
939 } elseif ($edit->type == 'delete') {
940 $this->_deleted($edit->orig);
941 } elseif ($edit->type == 'change') {
942 $this->_changed($edit->orig, $edit->closing);
943 } else {
944 trigger_error('Unknown edit type', E_USER_ERROR);
945 }
946 }
947 $this->_end_block();
948 //wfProfileOut( $fname );
949 }
950
951 public function _start_diff()
952 {
953 ob_start();
954 }
955
956 public function _end_diff()
957 {
958 $val = ob_get_contents();
959 ob_end_clean();
960 return $val;
961 }
962
963 public function _block_header($xbeg, $xlen, $ybeg, $ylen)
964 {
965 if ($xlen > 1) {
966 $xbeg .= "," . ($xbeg + $xlen - 1);
967 }
968 if ($ylen > 1) {
969 $ybeg .= "," . ($ybeg + $ylen - 1);
970 }
971
972 return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
973 }
974
975 public function _start_block($header)
976 {
977 echo $header;
978 }
979
980 public function _end_block()
981 {
982 }
983
984 public function _lines($lines, $prefix = ' ')
985 {
986 foreach ($lines as $line) {
987 echo "$prefix $line\n";
988 }
989 }
990
991 public function _context($lines)
992 {
993 $this->_lines($lines);
994 }
995
996 public function _added($lines)
997 {
998 $this->_lines($lines, '>');
999 }
1000 public function _deleted($lines)
1001 {
1002 $this->_lines($lines, '<');
1003 }
1004
1005 public function _changed($orig, $closing)
1006 {
1007 $this->_deleted($orig);
1008 echo "---\n";
1009 $this->_added($closing);
1010 }
1011}
1012
1013
1019define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
1020
1027{
1028 private array $_lines;
1029 private string $_line;
1030 private string $_group;
1031 private string $_tag;
1032
1033 public function __construct()
1034 {
1035 $this->_lines = array();
1036 $this->_line = '';
1037 $this->_group = '';
1038 $this->_tag = '';
1039 }
1040
1041 public function _flushGroup($new_tag)
1042 {
1043 if ($this->_group !== '') {
1044 if ($this->_tag == 'ins') {
1045 $this->_line .= '[ilDiffInsStart]' .
1046 htmlspecialchars($this->_group) . '[ilDiffInsEnd]';
1047 } elseif ($this->_tag == 'del') {
1048 $this->_line .= '[ilDiffDelStart]' .
1049 htmlspecialchars($this->_group) . '[ilDiffDelEnd]';
1050 } else {
1051 $this->_line .= htmlspecialchars($this->_group);
1052 }
1053 }
1054 $this->_group = '';
1055 $this->_tag = $new_tag;
1056 }
1057
1058 public function _flushLine($new_tag)
1059 {
1060 $this->_flushGroup($new_tag);
1061 if ($this->_line != '') {
1062 array_push($this->_lines, $this->_line);
1063 } else {
1064 # make empty lines visible by inserting an NBSP
1065 array_push($this->_lines, NBSP);
1066 }
1067 $this->_line = '';
1068 }
1069
1070 public function addWords($words, $tag = '')
1071 {
1072 if ($tag != $this->_tag) {
1073 $this->_flushGroup($tag);
1074 }
1075
1076 foreach ($words as $word) {
1077 // new-line should only come as first char of word.
1078 if ($word == '') {
1079 continue;
1080 }
1081 if ($word[0] == "\n") {
1082 $this->_flushLine($tag);
1083 $word = substr($word, 1);
1084 }
1085 assert(!strstr($word, "\n"));
1086 $this->_group .= $word;
1087 }
1088 }
1089
1090 public function getLines()
1091 {
1092 $this->_flushLine('~done');
1093 return $this->_lines;
1094 }
1095}
1096
1103{
1104 public const MAX_LINE_LENGTH = 10000;
1105
1106 public function __construct($orig_lines, $closing_lines)
1107 {
1108 $fname = 'WordLevelDiff::WordLevelDiff';
1109 //wfProfileIn( $fname );
1110
1111 list($orig_words, $orig_stripped) = $this->_split($orig_lines);
1112 list($closing_words, $closing_stripped) = $this->_split($closing_lines);
1113
1115 $orig_words,
1116 $closing_words,
1117 $orig_stripped,
1118 $closing_stripped
1119 );
1120 //wfProfileOut( $fname );
1121 }
1122
1123 public function _split($lines)
1124 {
1125 $fname = 'WordLevelDiff::_split';
1126 //wfProfileIn( $fname );
1127
1128 $words = array();
1129 $stripped = array();
1130 $first = true;
1131 foreach ($lines as $line) {
1132 # If the line is too long, just pretend the entire line is one big word
1133 # This prevents resource exhaustion problems
1134 if ($first) {
1135 $first = false;
1136 } else {
1137 $words[] = "\n";
1138 $stripped[] = "\n";
1139 }
1140 if (strlen($line) > self::MAX_LINE_LENGTH) {
1141 $words[] = $line;
1142 $stripped[] = $line;
1143 } else {
1144 $m = array();
1145 if (preg_match_all(
1146 '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1147 $line,
1148 $m
1149 )) {
1150 $words = array_merge($words, $m[0]);
1151 $stripped = array_merge($stripped, $m[1]);
1152 }
1153 }
1154 }
1155 //wfProfileOut( $fname );
1156 return array($words, $stripped);
1157 }
1158
1159 public function orig()
1160 {
1161 $fname = 'WordLevelDiff::orig';
1162 //wfProfileIn( $fname );
1163 $orig = new _HWLDF_WordAccumulator();
1164
1165 foreach ($this->edits as $edit) {
1166 if ($edit->type == 'copy') {
1167 $orig->addWords($edit->orig);
1168 } elseif ($edit->orig) {
1169 $orig->addWords($edit->orig, 'del');
1170 }
1171 }
1172 $lines = $orig->getLines();
1173 //wfProfileOut( $fname );
1174 return $lines;
1175 }
1176
1177 public function closing()
1178 {
1179 $fname = 'WordLevelDiff::closing';
1180 //wfProfileIn( $fname );
1181 $closing = new _HWLDF_WordAccumulator();
1182
1183 foreach ($this->edits as $edit) {
1184 if ($edit->type == 'copy') {
1185 $closing->addWords($edit->closing);
1186 } elseif ($edit->closing) {
1187 $closing->addWords($edit->closing, 'ins');
1188 }
1189 }
1190 $lines = $closing->getLines();
1191 //wfProfileOut( $fname );
1192 return $lines;
1193 }
1194}
_block_header($xbeg, $xlen, $ybeg, $ylen)
_block($xbeg, $xlen, $ybeg, $ylen, $edits)
_changed($orig, $closing)
_lines($lines, $prefix=' ')
$leading_context_lines
Number of leading context "lines" to preserve.
format($diff)
Format a diff.
$trailing_context_lines
Number of trailing context "lines" to preserve.
reverse()
Compute reversed Diff.
orig()
Get the original set of lines.
__construct($from_lines, $to_lines)
Constructor.
isEmpty()
Check for empty diff.
lcs()
Compute the length of the Longest Common Subsequence (LCS).
closing()
Get the closing set of lines.
_check($from_lines, $to_lines)
Check a Diff for validity.
__construct( $from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines)
Constructor.
__construct($orig_lines, $closing_lines)
Constructor.
closing()
Get the closing set of lines.
orig()
Get the original set of lines.
const NBSP
Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3.
const USE_ASSERTS
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
diff($from_lines, $to_lines)
_shift_boundaries($lines, &$changed, $other_changed)
_line_hash($line)
Returns the whole line if it's small enough, or the MD5 hash otherwise.
_compareseq($xoff, $xlim, $yoff, $ylim)
__construct($orig, $closing)
__construct($orig, $closing=false)
__construct(Container $dic, ilPlugin $plugin)
@inheritDoc
$context
Definition: webdav.php:31