ILIAS  trunk Revision v11.0_alpha-1689-g66c127b4ae8
All Data Structures Namespaces Files Functions Variables Enumerations Enumerator Modules Pages
class.WordLevelDiff.php
Go to the documentation of this file.
1 <?php
2 
3 // Copyright holded by MediaWiki contributers, Licensed under GPL version 2 or later
4 
5 
13 // A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
14 //
15 // Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
16 // You may copy this code freely under the conditions of the GPL.
17 //
18 
19 define('USE_ASSERTS', function_exists('assert'));
20 
26 class _DiffOp
27 {
28  public $type;
29  public $orig;
30  public $closing;
31 
32  public function reverse()
33  {
34  trigger_error('pure virtual', E_USER_ERROR);
35  }
36 }
37 
43 class _DiffOp_Copy extends _DiffOp
44 {
45  public $type = 'copy';
46 
47  public function __construct($orig, $closing = false)
48  {
49  if (!is_array($closing)) {
50  $closing = $orig;
51  }
52  $this->orig = $orig;
53  $this->closing = $closing;
54  }
55 
56  public function reverse()
57  {
58  return new _DiffOp_Copy($this->closing, $this->orig);
59  }
60 }
61 
67 class _DiffOp_Delete extends _DiffOp
68 {
69  public $type = 'delete';
70 
71  public function __construct($lines)
72  {
73  $this->orig = $lines;
74  $this->closing = false;
75  }
76 
77  public function reverse()
78  {
79  return new _DiffOp_Add($this->orig);
80  }
81 }
82 
88 class _DiffOp_Add extends _DiffOp
89 {
90  public $type = 'add';
91 
92  public function __construct($lines)
93  {
94  $this->closing = $lines;
95  $this->orig = false;
96  }
97 
98  public function reverse()
99  {
100  return new _DiffOp_Delete($this->closing);
101  }
102 }
103 
109 class _DiffOp_Change extends _DiffOp
110 {
111  public $type = 'change';
112 
113  public function __construct($orig, $closing)
114  {
115  $this->orig = $orig;
116  $this->closing = $closing;
117  }
118 
119  public function reverse()
120  {
121  return new _DiffOp_Change($this->closing, $this->orig);
122  }
123 }
124 
125 
150 {
151  public const MAX_XREF_LENGTH = 10000;
152  private array $xchanged;
153  private array $xv;
154  private array $xind;
155  private array $ychanged;
156  private array $yv;
157  private array $yind;
158  private int $lcs;
159  private array $in_seq;
160  private array $seq = [];
161 
162  public function diff($from_lines, $to_lines)
163  {
164  $fname = '_DiffEngine::diff';
165  //wfProfileIn( $fname );
166 
167  $n_from = sizeof($from_lines);
168  $n_to = sizeof($to_lines);
169 
170  $this->xchanged = $this->ychanged = array();
171  $this->xv = $this->yv = array();
172  $this->xind = $this->yind = array();
173  unset($this->seq);
174  unset($this->in_seq);
175  unset($this->lcs);
176 
177  // Skip leading common lines.
178  for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
179  if ($from_lines[$skip] !== $to_lines[$skip]) {
180  break;
181  }
182  $this->xchanged[$skip] = $this->ychanged[$skip] = false;
183  }
184  // Skip trailing common lines.
185  $xi = $n_from;
186  $yi = $n_to;
187  for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
188  if ($from_lines[$xi] !== $to_lines[$yi]) {
189  break;
190  }
191  $this->xchanged[$xi] = $this->ychanged[$yi] = false;
192  }
193 
194  // Ignore lines which do not exist in both files.
195  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
196  $xhash[$this->_line_hash($from_lines[$xi])] = 1;
197  }
198 
199  for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
200  $line = $to_lines[$yi];
201  if (($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)]))) {
202  continue;
203  }
204  $yhash[$this->_line_hash($line)] = 1;
205  $this->yv[] = $line;
206  $this->yind[] = $yi;
207  }
208  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
209  $line = $from_lines[$xi];
210  if (($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)]))) {
211  continue;
212  }
213  $this->xv[] = $line;
214  $this->xind[] = $xi;
215  }
216 
217  // Find the LCS.
218  $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
219 
220  // Merge edits when possible
221  $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
222  $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
223 
224  // Compute the edit operations.
225  $edits = array();
226  $xi = $yi = 0;
227  while ($xi < $n_from || $yi < $n_to) {
228  USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
229  USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
230 
231  // Skip matching "snake".
232  $copy = array();
233  while ($xi < $n_from && $yi < $n_to
234  && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
235  $copy[] = $from_lines[$xi++];
236  ++$yi;
237  }
238  if ($copy) {
239  $edits[] = new _DiffOp_Copy($copy);
240  }
241 
242  // Find deletes & adds.
243  $delete = array();
244  while ($xi < $n_from && $this->xchanged[$xi]) {
245  $delete[] = $from_lines[$xi++];
246  }
247 
248  $add = array();
249  while ($yi < $n_to && $this->ychanged[$yi]) {
250  $add[] = $to_lines[$yi++];
251  }
252 
253  if ($delete && $add) {
254  $edits[] = new _DiffOp_Change($delete, $add);
255  } elseif ($delete) {
256  $edits[] = new _DiffOp_Delete($delete);
257  } elseif ($add) {
258  $edits[] = new _DiffOp_Add($add);
259  }
260  }
261  //wfProfileOut( $fname );
262  return $edits;
263  }
264 
268  public function _line_hash($line)
269  {
270  if (strlen($line) > self::MAX_XREF_LENGTH) {
271  return md5($line);
272  } else {
273  return $line;
274  }
275  }
276 
277 
278  /* Divide the Largest Common Subsequence (LCS) of the sequences
279  * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
280  * sized segments.
281  *
282  * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
283  * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
284  * sub sequences. The first sub-sequence is contained in [X0, X1),
285  * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
286  * that (X0, Y0) == (XOFF, YOFF) and
287  * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
288  *
289  * This function assumes that the first lines of the specified portions
290  * of the two files do not match, and likewise that the last lines do not
291  * match. The caller must trim matching lines from the beginning and end
292  * of the portions it is going to specify.
293  */
294  public function _diag($xoff, $xlim, $yoff, $ylim, $nchunks)
295  {
296  $fname = '_DiffEngine::_diag';
297  //wfProfileIn( $fname );
298  $flip = false;
299 
300  if ($xlim - $xoff > $ylim - $yoff) {
301  // Things seems faster (I'm not sure I understand why)
302  // when the shortest sequence in X.
303  $flip = true;
304  list($xoff, $xlim, $yoff, $ylim)
305  = array( $yoff, $ylim, $xoff, $xlim);
306  }
307 
308  if ($flip) {
309  for ($i = $ylim - 1; $i >= $yoff; $i--) {
310  $ymatches[$this->xv[$i]][] = $i;
311  }
312  } else {
313  for ($i = $ylim - 1; $i >= $yoff; $i--) {
314  $ymatches[$this->yv[$i]][] = $i;
315  }
316  }
317 
318  $this->lcs = 0;
319  $this->seq[0] = $yoff - 1;
320  $this->in_seq = array();
321  $ymids[0] = array();
322 
323  $numer = $xlim - $xoff + $nchunks - 1;
324  $x = $xoff;
325  for ($chunk = 0; $chunk < $nchunks; $chunk++) {
326  //wfProfileIn( "$fname-chunk" );
327  if ($chunk > 0) {
328  for ($i = 0; $i <= $this->lcs; $i++) {
329  $ymids[$i][$chunk - 1] = $this->seq[$i];
330  }
331  }
332 
333  $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $chunk) / $nchunks);
334  for (; $x < $x1; $x++) {
335  $line = $flip ? $this->yv[$x] : $this->xv[$x];
336  if (empty($ymatches[$line])) {
337  continue;
338  }
339  $matches = $ymatches[$line];
340  reset($matches);
341  foreach ($matches as $junk => $y) {
342  if (empty($this->in_seq[$y])) {
343  $k = $this->_lcs_pos($y);
344  USE_ASSERTS && assert($k > 0);
345  $ymids[$k] = $ymids[$k - 1];
346  break;
347  }
348  }
349  foreach ($matches as $y) {
350  if ($y > $this->seq[$k - 1]) {
351  USE_ASSERTS && assert($y < $this->seq[$k]);
352  // Optimization: this is a common case:
353  // next match is just replacing previous match.
354  $this->in_seq[$this->seq[$k]] = false;
355  $this->seq[$k] = $y;
356  $this->in_seq[$y] = 1;
357  } elseif (empty($this->in_seq[$y])) {
358  $k = $this->_lcs_pos($y);
359  USE_ASSERTS && assert($k > 0);
360  $ymids[$k] = $ymids[$k - 1];
361  }
362  }
363  }
364  //wfProfileOut( "$fname-chunk" );
365  }
366 
367  $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
368  $ymid = $ymids[$this->lcs];
369  for ($n = 0; $n < $nchunks - 1; $n++) {
370  $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $n) / $nchunks);
371  $y1 = $ymid[$n] + 1;
372  $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
373  }
374  $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
375 
376  //wfProfileOut( $fname );
377  return array($this->lcs, $seps);
378  }
379 
380  public function _lcs_pos($ypos)
381  {
382  $fname = '_DiffEngine::_lcs_pos';
383  //wfProfileIn( $fname );
384 
385  $end = $this->lcs;
386  if ($end == 0 || $ypos > $this->seq[$end]) {
387  $this->seq[++$this->lcs] = $ypos;
388  $this->in_seq[$ypos] = 1;
389  //wfProfileOut( $fname );
390  return $this->lcs;
391  }
392 
393  $beg = 1;
394  while ($beg < $end) {
395  $mid = (int) (($beg + $end) / 2);
396  if ($ypos > $this->seq[$mid]) {
397  $beg = $mid + 1;
398  } else {
399  $end = $mid;
400  }
401  }
402 
403  USE_ASSERTS && assert($ypos != $this->seq[$end]);
404 
405  $this->in_seq[$this->seq[$end]] = false;
406  $this->seq[$end] = $ypos;
407  $this->in_seq[$ypos] = 1;
408  //wfProfileOut( $fname );
409  return $end;
410  }
411 
412  /* Find LCS of two sequences.
413  *
414  * The results are recorded in the vectors $this->{x,y}changed[], by
415  * storing a 1 in the element for each line that is an insertion
416  * or deletion (ie. is not in the LCS).
417  *
418  * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
419  *
420  * Note that XLIM, YLIM are exclusive bounds.
421  * All line numbers are origin-0 and discarded lines are not counted.
422  */
423  public function _compareseq($xoff, $xlim, $yoff, $ylim)
424  {
425  $fname = '_DiffEngine::_compareseq';
426  //wfProfileIn( $fname );
427 
428  // Slide down the bottom initial diagonal.
429  while ($xoff < $xlim && $yoff < $ylim
430  && $this->xv[$xoff] == $this->yv[$yoff]) {
431  ++$xoff;
432  ++$yoff;
433  }
434 
435  // Slide up the top initial diagonal.
436  while ($xlim > $xoff && $ylim > $yoff
437  && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
438  --$xlim;
439  --$ylim;
440  }
441 
442  if ($xoff == $xlim || $yoff == $ylim) {
443  $lcs = 0;
444  } else {
445  // This is ad hoc but seems to work well.
446  //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
447  //$nchunks = max(2,min(8,(int)$nchunks));
448  $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
449  list($lcs, $seps)
450  = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
451  }
452 
453  if ($lcs == 0) {
454  // X and Y sequences have no common subsequence:
455  // mark all changed.
456  while ($yoff < $ylim) {
457  $this->ychanged[$this->yind[$yoff++]] = 1;
458  }
459  while ($xoff < $xlim) {
460  $this->xchanged[$this->xind[$xoff++]] = 1;
461  }
462  } else {
463  // Use the partitions to split this problem into subproblems.
464  reset($seps);
465  $pt1 = $seps[0];
466  while ($pt2 = next($seps)) {
467  $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
468  $pt1 = $pt2;
469  }
470  }
471  //wfProfileOut( $fname );
472  }
473 
474  /* Adjust inserts/deletes of identical lines to join changes
475  * as much as possible.
476  *
477  * We do something when a run of changed lines include a
478  * line at one end and has an excluded, identical line at the other.
479  * We are free to choose which identical line is included.
480  * `compareseq' usually chooses the one at the beginning,
481  * but usually it is cleaner to consider the following identical line
482  * to be the "change".
483  *
484  * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
485  */
486  public function _shift_boundaries($lines, &$changed, $other_changed)
487  {
488  $fname = '_DiffEngine::_shift_boundaries';
489  //wfProfileIn( $fname );
490  $i = 0;
491  $j = 0;
492 
493  USE_ASSERTS && assert(sizeof($lines) == sizeof($changed));
494  $len = sizeof($lines);
495  $other_len = sizeof($other_changed);
496 
497  while (1) {
498  /*
499  * Scan forwards to find beginning of another run of changes.
500  * Also keep track of the corresponding point in the other file.
501  *
502  * Throughout this code, $i and $j are adjusted together so that
503  * the first $i elements of $changed and the first $j elements
504  * of $other_changed both contain the same number of zeros
505  * (unchanged lines).
506  * Furthermore, $j is always kept so that $j == $other_len or
507  * $other_changed[$j] == false.
508  */
509  while ($j < $other_len && $other_changed[$j]) {
510  $j++;
511  }
512 
513  while ($i < $len && !$changed[$i]) {
514  USE_ASSERTS && assert($j < $other_len && !$other_changed[$j]);
515  $i++;
516  $j++;
517  while ($j < $other_len && $other_changed[$j]) {
518  $j++;
519  }
520  }
521 
522  if ($i == $len) {
523  break;
524  }
525 
526  $start = $i;
527 
528  // Find the end of this run of changes.
529  while (++$i < $len && $changed[$i]) {
530  }
531 
532  do {
533  /*
534  * Record the length of this run of changes, so that
535  * we can later determine whether the run has grown.
536  */
537  $runlength = $i - $start;
538 
539  /*
540  * Move the changed region back, so long as the
541  * previous unchanged line matches the last changed one.
542  * This merges with previous changed regions.
543  */
544  while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
545  $changed[--$start] = 1;
546  $changed[--$i] = false;
547  while ($start > 0 && $changed[$start - 1]) {
548  $start--;
549  }
550  USE_ASSERTS && assert($j > 0);
551  while ($other_changed[--$j]) {
552  }
553  USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
554  }
555 
556  /*
557  * Set CORRESPONDING to the end of the changed run, at the last
558  * point where it corresponds to a changed run in the other file.
559  * CORRESPONDING == LEN means no such point has been found.
560  */
561  $corresponding = $j < $other_len ? $i : $len;
562 
563  /*
564  * Move the changed region forward, so long as the
565  * first changed line matches the following unchanged one.
566  * This merges with following changed regions.
567  * Do this second, so that if there are no merges,
568  * the changed region is moved forward as far as possible.
569  */
570  while ($i < $len && $lines[$start] == $lines[$i]) {
571  $changed[$start++] = false;
572  $changed[$i++] = 1;
573  while ($i < $len && $changed[$i]) {
574  $i++;
575  }
576 
577  USE_ASSERTS && assert($j < $other_len && !$other_changed[$j]);
578  $j++;
579  if ($j < $other_len && $other_changed[$j]) {
580  $corresponding = $i;
581  while ($j < $other_len && $other_changed[$j]) {
582  $j++;
583  }
584  }
585  }
586  } while ($runlength != $i - $start);
587 
588  /*
589  * If possible, move the fully-merged run of changes
590  * back to a corresponding run in the other file.
591  */
592  while ($corresponding < $i) {
593  $changed[--$start] = 1;
594  $changed[--$i] = 0;
595  USE_ASSERTS && assert($j > 0);
596  while ($other_changed[--$j]) {
597  }
598  USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
599  }
600  }
601  //wfProfileOut( $fname );
602  }
603 }
604 
611 class Diff
612 {
613  public $edits;
614 
623  public function __construct($from_lines, $to_lines)
624  {
625  $eng = new _DiffEngine();
626  $this->edits = $eng->diff($from_lines, $to_lines);
627  //$this->_check($from_lines, $to_lines);
628  }
629 
640  public function reverse()
641  {
642  $rev = $this;
643  $rev->edits = array();
644  foreach ($this->edits as $edit) {
645  $rev->edits[] = $edit->reverse();
646  }
647  return $rev;
648  }
649 
655  public function isEmpty()
656  {
657  foreach ($this->edits as $edit) {
658  if ($edit->type != 'copy') {
659  return false;
660  }
661  }
662  return true;
663  }
664 
672  public function lcs()
673  {
674  $lcs = 0;
675  foreach ($this->edits as $edit) {
676  if ($edit->type == 'copy') {
677  $lcs += sizeof($edit->orig);
678  }
679  }
680  return $lcs;
681  }
682 
691  public function orig()
692  {
693  $lines = array();
694 
695  foreach ($this->edits as $edit) {
696  if ($edit->orig) {
697  array_splice($lines, sizeof($lines), 0, $edit->orig);
698  }
699  }
700  return $lines;
701  }
702 
711  public function closing()
712  {
713  $lines = array();
714 
715  foreach ($this->edits as $edit) {
716  if ($edit->closing) {
717  array_splice($lines, sizeof($lines), 0, $edit->closing);
718  }
719  }
720  return $lines;
721  }
722 
728  public function _check($from_lines, $to_lines)
729  {
730  $fname = 'Diff::_check';
731  //wfProfileIn( $fname );
732  if (serialize($from_lines) != serialize($this->orig())) {
733  trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
734  }
735  if (serialize($to_lines) != serialize($this->closing())) {
736  trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
737  }
738 
739  $rev = $this->reverse();
740  if (serialize($to_lines) != serialize($rev->orig())) {
741  trigger_error("Reversed original doesn't match", E_USER_ERROR);
742  }
743  if (serialize($from_lines) != serialize($rev->closing())) {
744  trigger_error("Reversed closing doesn't match", E_USER_ERROR);
745  }
746 
747 
748  $prevtype = 'none';
749  foreach ($this->edits as $edit) {
750  if ($prevtype == $edit->type) {
751  trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
752  }
753  $prevtype = $edit->type;
754  }
755 
756  $lcs = $this->lcs();
757  trigger_error('Diff okay: LCS = ' . $lcs, E_USER_NOTICE);
758  //wfProfileOut( $fname );
759  }
760 }
761 
767 class MappedDiff extends Diff
768 {
792  public function __construct(
793  $from_lines,
794  $to_lines,
795  $mapped_from_lines,
796  $mapped_to_lines
797  ) {
798  $fname = 'MappedDiff::MappedDiff';
799  //wfProfileIn( $fname );
800 
801  assert(sizeof($from_lines) == sizeof($mapped_from_lines));
802  assert(sizeof($to_lines) == sizeof($mapped_to_lines));
803 
804  parent::__construct($mapped_from_lines, $mapped_to_lines);
805 
806  $xi = $yi = 0;
807  for ($i = 0; $i < sizeof($this->edits); $i++) {
808  $orig = &$this->edits[$i]->orig;
809  if (is_array($orig)) {
810  $orig = array_slice($from_lines, $xi, sizeof($orig));
811  $xi += sizeof($orig);
812  }
813 
814  $closing = &$this->edits[$i]->closing;
815  if (is_array($closing)) {
816  $closing = array_slice($to_lines, $yi, sizeof($closing));
817  $yi += sizeof($closing);
818  }
819  }
820  //wfProfileOut( $fname );
821  }
822 }
823 
835 {
842  public $leading_context_lines = 0;
843 
850  public $trailing_context_lines = 0;
851 
858  public function format($diff)
859  {
860  $fname = 'DiffFormatter::format';
861  //wfProfileIn( $fname );
862 
863  $xi = $yi = 1;
864  $block = false;
865  $context = array();
866 
867  $nlead = $this->leading_context_lines;
868  $ntrail = $this->trailing_context_lines;
869 
870  $this->_start_diff();
871 
872  foreach ($diff->edits as $edit) {
873  if ($edit->type == 'copy') {
874  if (is_array($block)) {
875  if (sizeof($edit->orig) <= $nlead + $ntrail) {
876  $block[] = $edit;
877  } else {
878  if ($ntrail) {
879  $context = array_slice($edit->orig, 0, $ntrail);
880  $block[] = new _DiffOp_Copy($context);
881  }
882  $this->_block(
883  $x0,
884  $ntrail + $xi - $x0,
885  $y0,
886  $ntrail + $yi - $y0,
887  $block
888  );
889  $block = false;
890  }
891  }
892  $context = $edit->orig;
893  } else {
894  if (!is_array($block)) {
895  $context = array_slice($context, sizeof($context) - $nlead);
896  $x0 = $xi - sizeof($context);
897  $y0 = $yi - sizeof($context);
898  $block = array();
899  if ($context) {
900  $block[] = new _DiffOp_Copy($context);
901  }
902  }
903  $block[] = $edit;
904  }
905 
906  if ($edit->orig) {
907  $xi += sizeof($edit->orig);
908  }
909  if ($edit->closing) {
910  $yi += sizeof($edit->closing);
911  }
912  }
913 
914  if (is_array($block)) {
915  $this->_block(
916  $x0,
917  $xi - $x0,
918  $y0,
919  $yi - $y0,
920  $block
921  );
922  }
923 
924  $end = $this->_end_diff();
925  //wfProfileOut( $fname );
926  return $end;
927  }
928 
929  public function _block($xbeg, $xlen, $ybeg, $ylen, $edits)
930  {
931  $fname = 'DiffFormatter::_block';
932  //wfProfileIn( $fname );
933  $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
934  foreach ($edits as $edit) {
935  if ($edit->type == 'copy') {
936  $this->_context($edit->orig);
937  } elseif ($edit->type == 'add') {
938  $this->_added($edit->closing);
939  } elseif ($edit->type == 'delete') {
940  $this->_deleted($edit->orig);
941  } elseif ($edit->type == 'change') {
942  $this->_changed($edit->orig, $edit->closing);
943  } else {
944  trigger_error('Unknown edit type', E_USER_ERROR);
945  }
946  }
947  $this->_end_block();
948  //wfProfileOut( $fname );
949  }
950 
951  public function _start_diff()
952  {
953  ob_start();
954  }
955 
956  public function _end_diff()
957  {
958  $val = ob_get_contents();
959  ob_end_clean();
960  return $val;
961  }
962 
963  public function _block_header($xbeg, $xlen, $ybeg, $ylen)
964  {
965  if ($xlen > 1) {
966  $xbeg .= "," . ($xbeg + $xlen - 1);
967  }
968  if ($ylen > 1) {
969  $ybeg .= "," . ($ybeg + $ylen - 1);
970  }
971 
972  return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
973  }
974 
975  public function _start_block($header)
976  {
977  echo $header;
978  }
979 
980  public function _end_block()
981  {
982  }
983 
984  public function _lines($lines, $prefix = ' ')
985  {
986  foreach ($lines as $line) {
987  echo "$prefix $line\n";
988  }
989  }
990 
991  public function _context($lines)
992  {
993  $this->_lines($lines);
994  }
995 
996  public function _added($lines)
997  {
998  $this->_lines($lines, '>');
999  }
1000  public function _deleted($lines)
1001  {
1002  $this->_lines($lines, '<');
1003  }
1004 
1005  public function _changed($orig, $closing)
1006  {
1007  $this->_deleted($orig);
1008  echo "---\n";
1009  $this->_added($closing);
1010  }
1011 }
1012 
1013 
1019 define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
1020 
1027 {
1028  private array $_lines;
1029  private string $_line;
1030  private string $_group;
1031  private string $_tag;
1032 
1033  public function __construct()
1034  {
1035  $this->_lines = array();
1036  $this->_line = '';
1037  $this->_group = '';
1038  $this->_tag = '';
1039  }
1040 
1041  public function _flushGroup($new_tag)
1042  {
1043  if ($this->_group !== '') {
1044  if ($this->_tag == 'ins') {
1045  $this->_line .= '[ilDiffInsStart]' .
1046  htmlspecialchars($this->_group) . '[ilDiffInsEnd]';
1047  } elseif ($this->_tag == 'del') {
1048  $this->_line .= '[ilDiffDelStart]' .
1049  htmlspecialchars($this->_group) . '[ilDiffDelEnd]';
1050  } else {
1051  $this->_line .= htmlspecialchars($this->_group);
1052  }
1053  }
1054  $this->_group = '';
1055  $this->_tag = $new_tag;
1056  }
1057 
1058  public function _flushLine($new_tag)
1059  {
1060  $this->_flushGroup($new_tag);
1061  if ($this->_line != '') {
1062  array_push($this->_lines, $this->_line);
1063  } else {
1064  # make empty lines visible by inserting an NBSP
1065  array_push($this->_lines, NBSP);
1066  }
1067  $this->_line = '';
1068  }
1069 
1070  public function addWords($words, $tag = '')
1071  {
1072  if ($tag != $this->_tag) {
1073  $this->_flushGroup($tag);
1074  }
1075 
1076  foreach ($words as $word) {
1077  // new-line should only come as first char of word.
1078  if ($word == '') {
1079  continue;
1080  }
1081  if ($word[0] == "\n") {
1082  $this->_flushLine($tag);
1083  $word = substr($word, 1);
1084  }
1085  assert(!strstr($word, "\n"));
1086  $this->_group .= $word;
1087  }
1088  }
1089 
1090  public function getLines()
1091  {
1092  $this->_flushLine('~done');
1093  return $this->_lines;
1094  }
1095 }
1096 
1103 {
1104  public const MAX_LINE_LENGTH = 10000;
1105 
1106  public function __construct($orig_lines, $closing_lines)
1107  {
1108  $fname = 'WordLevelDiff::WordLevelDiff';
1109  //wfProfileIn( $fname );
1110 
1111  list($orig_words, $orig_stripped) = $this->_split($orig_lines);
1112  list($closing_words, $closing_stripped) = $this->_split($closing_lines);
1113 
1115  $orig_words,
1116  $closing_words,
1117  $orig_stripped,
1118  $closing_stripped
1119  );
1120  //wfProfileOut( $fname );
1121  }
1122 
1123  public function _split($lines)
1124  {
1125  $fname = 'WordLevelDiff::_split';
1126  //wfProfileIn( $fname );
1127 
1128  $words = array();
1129  $stripped = array();
1130  $first = true;
1131  foreach ($lines as $line) {
1132  # If the line is too long, just pretend the entire line is one big word
1133  # This prevents resource exhaustion problems
1134  if ($first) {
1135  $first = false;
1136  } else {
1137  $words[] = "\n";
1138  $stripped[] = "\n";
1139  }
1140  if (strlen($line) > self::MAX_LINE_LENGTH) {
1141  $words[] = $line;
1142  $stripped[] = $line;
1143  } else {
1144  $m = array();
1145  if (preg_match_all(
1146  '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1147  $line,
1148  $m
1149  )) {
1150  $words = array_merge($words, $m[0]);
1151  $stripped = array_merge($stripped, $m[1]);
1152  }
1153  }
1154  }
1155  //wfProfileOut( $fname );
1156  return array($words, $stripped);
1157  }
1158 
1159  public function orig()
1160  {
1161  $fname = 'WordLevelDiff::orig';
1162  //wfProfileIn( $fname );
1163  $orig = new _HWLDF_WordAccumulator();
1164 
1165  foreach ($this->edits as $edit) {
1166  if ($edit->type == 'copy') {
1167  $orig->addWords($edit->orig);
1168  } elseif ($edit->orig) {
1169  $orig->addWords($edit->orig, 'del');
1170  }
1171  }
1172  $lines = $orig->getLines();
1173  //wfProfileOut( $fname );
1174  return $lines;
1175  }
1176 
1177  public function closing()
1178  {
1179  $fname = 'WordLevelDiff::closing';
1180  //wfProfileIn( $fname );
1182 
1183  foreach ($this->edits as $edit) {
1184  if ($edit->type == 'copy') {
1185  $closing->addWords($edit->closing);
1186  } elseif ($edit->closing) {
1187  $closing->addWords($edit->closing, 'ins');
1188  }
1189  }
1190  $lines = $closing->getLines();
1191  //wfProfileOut( $fname );
1192  return $lines;
1193  }
1194 }
_line_hash($line)
Returns the whole line if it&#39;s small enough, or the MD5 hash otherwise.
diff($from_lines, $to_lines)
closing()
Get the closing set of lines.
$context
Definition: webdav.php:31
__construct( $from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines)
Constructor.
_block_header($xbeg, $xlen, $ybeg, $ylen)
__construct($from_lines, $to_lines)
Constructor.
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
reverse()
Compute reversed Diff.
lcs()
Compute the length of the Longest Common Subsequence (LCS).
orig()
Get the original set of lines.
_compareseq($xoff, $xlim, $yoff, $ylim)
isEmpty()
Check for empty diff.
const USE_ASSERTS
_shift_boundaries($lines, &$changed, $other_changed)
_lines($lines, $prefix=' ')
_block($xbeg, $xlen, $ybeg, $ylen, $edits)
__construct($orig_lines, $closing_lines)
__construct(Container $dic, ilPlugin $plugin)
_changed($orig, $closing)
format($diff)
Format a diff.
_check($from_lines, $to_lines)
Check a Diff for validity.
const NBSP
Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3.
__construct($orig, $closing)
__construct($orig, $closing=false)