ILIAS  trunk Revision v5.2.0beta1-34115-g3a2438be29
_DiffEngine Class Reference
+ Collaboration diagram for _DiffEngine:

Public Member Functions

 diff ($from_lines, $to_lines)
 
 _line_hash ($line)
 Returns the whole line if it's small enough, or the MD5 hash otherwise. More...
 
 _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
 
 _lcs_pos ($ypos)
 
 _compareseq ($xoff, $xlim, $yoff, $ylim)
 
 _shift_boundaries ($lines, &$changed, $other_changed)
 

Data Fields

const MAX_XREF_LENGTH = 10000
 

Private Attributes

array $xchanged
 
array $xv
 
array $xind
 
array $ychanged
 
array $yv
 
array $yind
 
int $lcs
 
array $in_seq
 
array $seq = []
 

Detailed Description

Definition at line 149 of file class.WordLevelDiff.php.

Member Function Documentation

◆ _compareseq()

_DiffEngine::_compareseq (   $xoff,
  $xlim,
  $yoff,
  $ylim 
)

Definition at line 423 of file class.WordLevelDiff.php.

424  {
425  $fname = '_DiffEngine::_compareseq';
426  //wfProfileIn( $fname );
427 
428  // Slide down the bottom initial diagonal.
429  while ($xoff < $xlim && $yoff < $ylim
430  && $this->xv[$xoff] == $this->yv[$yoff]) {
431  ++$xoff;
432  ++$yoff;
433  }
434 
435  // Slide up the top initial diagonal.
436  while ($xlim > $xoff && $ylim > $yoff
437  && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
438  --$xlim;
439  --$ylim;
440  }
441 
442  if ($xoff == $xlim || $yoff == $ylim) {
443  $lcs = 0;
444  } else {
445  // This is ad hoc but seems to work well.
446  //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
447  //$nchunks = max(2,min(8,(int)$nchunks));
448  $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
449  list($lcs, $seps)
450  = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
451  }
452 
453  if ($lcs == 0) {
454  // X and Y sequences have no common subsequence:
455  // mark all changed.
456  while ($yoff < $ylim) {
457  $this->ychanged[$this->yind[$yoff++]] = 1;
458  }
459  while ($xoff < $xlim) {
460  $this->xchanged[$this->xind[$xoff++]] = 1;
461  }
462  } else {
463  // Use the partitions to split this problem into subproblems.
464  reset($seps);
465  $pt1 = $seps[0];
466  while ($pt2 = next($seps)) {
467  $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
468  $pt1 = $pt2;
469  }
470  }
471  //wfProfileOut( $fname );
472  }
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
_compareseq($xoff, $xlim, $yoff, $ylim)

◆ _diag()

_DiffEngine::_diag (   $xoff,
  $xlim,
  $yoff,
  $ylim,
  $nchunks 
)

Definition at line 294 of file class.WordLevelDiff.php.

References $i, ILIAS\Repository\int(), and USE_ASSERTS.

295  {
296  $fname = '_DiffEngine::_diag';
297  //wfProfileIn( $fname );
298  $flip = false;
299 
300  if ($xlim - $xoff > $ylim - $yoff) {
301  // Things seems faster (I'm not sure I understand why)
302  // when the shortest sequence in X.
303  $flip = true;
304  list($xoff, $xlim, $yoff, $ylim)
305  = array( $yoff, $ylim, $xoff, $xlim);
306  }
307 
308  if ($flip) {
309  for ($i = $ylim - 1; $i >= $yoff; $i--) {
310  $ymatches[$this->xv[$i]][] = $i;
311  }
312  } else {
313  for ($i = $ylim - 1; $i >= $yoff; $i--) {
314  $ymatches[$this->yv[$i]][] = $i;
315  }
316  }
317 
318  $this->lcs = 0;
319  $this->seq[0] = $yoff - 1;
320  $this->in_seq = array();
321  $ymids[0] = array();
322 
323  $numer = $xlim - $xoff + $nchunks - 1;
324  $x = $xoff;
325  for ($chunk = 0; $chunk < $nchunks; $chunk++) {
326  //wfProfileIn( "$fname-chunk" );
327  if ($chunk > 0) {
328  for ($i = 0; $i <= $this->lcs; $i++) {
329  $ymids[$i][$chunk - 1] = $this->seq[$i];
330  }
331  }
332 
333  $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $chunk) / $nchunks);
334  for (; $x < $x1; $x++) {
335  $line = $flip ? $this->yv[$x] : $this->xv[$x];
336  if (empty($ymatches[$line])) {
337  continue;
338  }
339  $matches = $ymatches[$line];
340  reset($matches);
341  foreach ($matches as $junk => $y) {
342  if (empty($this->in_seq[$y])) {
343  $k = $this->_lcs_pos($y);
344  USE_ASSERTS && assert($k > 0);
345  $ymids[$k] = $ymids[$k - 1];
346  break;
347  }
348  }
349  foreach ($matches as $y) {
350  if ($y > $this->seq[$k - 1]) {
351  USE_ASSERTS && assert($y < $this->seq[$k]);
352  // Optimization: this is a common case:
353  // next match is just replacing previous match.
354  $this->in_seq[$this->seq[$k]] = false;
355  $this->seq[$k] = $y;
356  $this->in_seq[$y] = 1;
357  } elseif (empty($this->in_seq[$y])) {
358  $k = $this->_lcs_pos($y);
359  USE_ASSERTS && assert($k > 0);
360  $ymids[$k] = $ymids[$k - 1];
361  }
362  }
363  }
364  //wfProfileOut( "$fname-chunk" );
365  }
366 
367  $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
368  $ymid = $ymids[$this->lcs];
369  for ($n = 0; $n < $nchunks - 1; $n++) {
370  $x1 = $xoff + (int) (($numer + ($xlim - $xoff) * $n) / $nchunks);
371  $y1 = $ymid[$n] + 1;
372  $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
373  }
374  $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
375 
376  //wfProfileOut( $fname );
377  return array($this->lcs, $seps);
378  }
const USE_ASSERTS
$i
Definition: saml1-acs.php:28
+ Here is the call graph for this function:

◆ _lcs_pos()

_DiffEngine::_lcs_pos (   $ypos)

Definition at line 380 of file class.WordLevelDiff.php.

References ILIAS\Repository\int(), and USE_ASSERTS.

381  {
382  $fname = '_DiffEngine::_lcs_pos';
383  //wfProfileIn( $fname );
384 
385  $end = $this->lcs;
386  if ($end == 0 || $ypos > $this->seq[$end]) {
387  $this->seq[++$this->lcs] = $ypos;
388  $this->in_seq[$ypos] = 1;
389  //wfProfileOut( $fname );
390  return $this->lcs;
391  }
392 
393  $beg = 1;
394  while ($beg < $end) {
395  $mid = (int) (($beg + $end) / 2);
396  if ($ypos > $this->seq[$mid]) {
397  $beg = $mid + 1;
398  } else {
399  $end = $mid;
400  }
401  }
402 
403  USE_ASSERTS && assert($ypos != $this->seq[$end]);
404 
405  $this->in_seq[$this->seq[$end]] = false;
406  $this->seq[$end] = $ypos;
407  $this->in_seq[$ypos] = 1;
408  //wfProfileOut( $fname );
409  return $end;
410  }
const USE_ASSERTS
+ Here is the call graph for this function:

◆ _line_hash()

_DiffEngine::_line_hash (   $line)

Returns the whole line if it's small enough, or the MD5 hash otherwise.

Definition at line 268 of file class.WordLevelDiff.php.

269  {
270  if (strlen($line) > self::MAX_XREF_LENGTH) {
271  return md5($line);
272  } else {
273  return $line;
274  }
275  }

◆ _shift_boundaries()

_DiffEngine::_shift_boundaries (   $lines,
$changed,
  $other_changed 
)

Definition at line 486 of file class.WordLevelDiff.php.

References $i, and USE_ASSERTS.

487  {
488  $fname = '_DiffEngine::_shift_boundaries';
489  //wfProfileIn( $fname );
490  $i = 0;
491  $j = 0;
492 
493  USE_ASSERTS && assert(sizeof($lines) == sizeof($changed));
494  $len = sizeof($lines);
495  $other_len = sizeof($other_changed);
496 
497  while (1) {
498  /*
499  * Scan forwards to find beginning of another run of changes.
500  * Also keep track of the corresponding point in the other file.
501  *
502  * Throughout this code, $i and $j are adjusted together so that
503  * the first $i elements of $changed and the first $j elements
504  * of $other_changed both contain the same number of zeros
505  * (unchanged lines).
506  * Furthermore, $j is always kept so that $j == $other_len or
507  * $other_changed[$j] == false.
508  */
509  while ($j < $other_len && $other_changed[$j]) {
510  $j++;
511  }
512 
513  while ($i < $len && !$changed[$i]) {
514  USE_ASSERTS && assert($j < $other_len && !$other_changed[$j]);
515  $i++;
516  $j++;
517  while ($j < $other_len && $other_changed[$j]) {
518  $j++;
519  }
520  }
521 
522  if ($i == $len) {
523  break;
524  }
525 
526  $start = $i;
527 
528  // Find the end of this run of changes.
529  while (++$i < $len && $changed[$i]) {
530  }
531 
532  do {
533  /*
534  * Record the length of this run of changes, so that
535  * we can later determine whether the run has grown.
536  */
537  $runlength = $i - $start;
538 
539  /*
540  * Move the changed region back, so long as the
541  * previous unchanged line matches the last changed one.
542  * This merges with previous changed regions.
543  */
544  while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
545  $changed[--$start] = 1;
546  $changed[--$i] = false;
547  while ($start > 0 && $changed[$start - 1]) {
548  $start--;
549  }
550  USE_ASSERTS && assert($j > 0);
551  while ($other_changed[--$j]) {
552  }
553  USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
554  }
555 
556  /*
557  * Set CORRESPONDING to the end of the changed run, at the last
558  * point where it corresponds to a changed run in the other file.
559  * CORRESPONDING == LEN means no such point has been found.
560  */
561  $corresponding = $j < $other_len ? $i : $len;
562 
563  /*
564  * Move the changed region forward, so long as the
565  * first changed line matches the following unchanged one.
566  * This merges with following changed regions.
567  * Do this second, so that if there are no merges,
568  * the changed region is moved forward as far as possible.
569  */
570  while ($i < $len && $lines[$start] == $lines[$i]) {
571  $changed[$start++] = false;
572  $changed[$i++] = 1;
573  while ($i < $len && $changed[$i]) {
574  $i++;
575  }
576 
577  USE_ASSERTS && assert($j < $other_len && !$other_changed[$j]);
578  $j++;
579  if ($j < $other_len && $other_changed[$j]) {
580  $corresponding = $i;
581  while ($j < $other_len && $other_changed[$j]) {
582  $j++;
583  }
584  }
585  }
586  } while ($runlength != $i - $start);
587 
588  /*
589  * If possible, move the fully-merged run of changes
590  * back to a corresponding run in the other file.
591  */
592  while ($corresponding < $i) {
593  $changed[--$start] = 1;
594  $changed[--$i] = 0;
595  USE_ASSERTS && assert($j > 0);
596  while ($other_changed[--$j]) {
597  }
598  USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
599  }
600  }
601  //wfProfileOut( $fname );
602  }
const USE_ASSERTS
$i
Definition: saml1-acs.php:28

◆ diff()

_DiffEngine::diff (   $from_lines,
  $to_lines 
)

Definition at line 162 of file class.WordLevelDiff.php.

References USE_ASSERTS.

163  {
164  $fname = '_DiffEngine::diff';
165  //wfProfileIn( $fname );
166 
167  $n_from = sizeof($from_lines);
168  $n_to = sizeof($to_lines);
169 
170  $this->xchanged = $this->ychanged = array();
171  $this->xv = $this->yv = array();
172  $this->xind = $this->yind = array();
173  unset($this->seq);
174  unset($this->in_seq);
175  unset($this->lcs);
176 
177  // Skip leading common lines.
178  for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
179  if ($from_lines[$skip] !== $to_lines[$skip]) {
180  break;
181  }
182  $this->xchanged[$skip] = $this->ychanged[$skip] = false;
183  }
184  // Skip trailing common lines.
185  $xi = $n_from;
186  $yi = $n_to;
187  for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
188  if ($from_lines[$xi] !== $to_lines[$yi]) {
189  break;
190  }
191  $this->xchanged[$xi] = $this->ychanged[$yi] = false;
192  }
193 
194  // Ignore lines which do not exist in both files.
195  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
196  $xhash[$this->_line_hash($from_lines[$xi])] = 1;
197  }
198 
199  for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
200  $line = $to_lines[$yi];
201  if (($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)]))) {
202  continue;
203  }
204  $yhash[$this->_line_hash($line)] = 1;
205  $this->yv[] = $line;
206  $this->yind[] = $yi;
207  }
208  for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
209  $line = $from_lines[$xi];
210  if (($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)]))) {
211  continue;
212  }
213  $this->xv[] = $line;
214  $this->xind[] = $xi;
215  }
216 
217  // Find the LCS.
218  $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
219 
220  // Merge edits when possible
221  $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
222  $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
223 
224  // Compute the edit operations.
225  $edits = array();
226  $xi = $yi = 0;
227  while ($xi < $n_from || $yi < $n_to) {
228  USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
229  USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
230 
231  // Skip matching "snake".
232  $copy = array();
233  while ($xi < $n_from && $yi < $n_to
234  && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
235  $copy[] = $from_lines[$xi++];
236  ++$yi;
237  }
238  if ($copy) {
239  $edits[] = new _DiffOp_Copy($copy);
240  }
241 
242  // Find deletes & adds.
243  $delete = array();
244  while ($xi < $n_from && $this->xchanged[$xi]) {
245  $delete[] = $from_lines[$xi++];
246  }
247 
248  $add = array();
249  while ($yi < $n_to && $this->ychanged[$yi]) {
250  $add[] = $to_lines[$yi++];
251  }
252 
253  if ($delete && $add) {
254  $edits[] = new _DiffOp_Change($delete, $add);
255  } elseif ($delete) {
256  $edits[] = new _DiffOp_Delete($delete);
257  } elseif ($add) {
258  $edits[] = new _DiffOp_Add($add);
259  }
260  }
261  //wfProfileOut( $fname );
262  return $edits;
263  }
_line_hash($line)
Returns the whole line if it&#39;s small enough, or the MD5 hash otherwise.
_compareseq($xoff, $xlim, $yoff, $ylim)
const USE_ASSERTS
_shift_boundaries($lines, &$changed, $other_changed)

Field Documentation

◆ $in_seq

array _DiffEngine::$in_seq
private

Definition at line 159 of file class.WordLevelDiff.php.

◆ $lcs

int _DiffEngine::$lcs
private

Definition at line 158 of file class.WordLevelDiff.php.

◆ $seq

array _DiffEngine::$seq = []
private

Definition at line 160 of file class.WordLevelDiff.php.

◆ $xchanged

array _DiffEngine::$xchanged
private

Definition at line 152 of file class.WordLevelDiff.php.

◆ $xind

array _DiffEngine::$xind
private

Definition at line 154 of file class.WordLevelDiff.php.

◆ $xv

array _DiffEngine::$xv
private

Definition at line 153 of file class.WordLevelDiff.php.

◆ $ychanged

array _DiffEngine::$ychanged
private

Definition at line 155 of file class.WordLevelDiff.php.

◆ $yind

array _DiffEngine::$yind
private

Definition at line 157 of file class.WordLevelDiff.php.

◆ $yv

array _DiffEngine::$yv
private

Definition at line 156 of file class.WordLevelDiff.php.

◆ MAX_XREF_LENGTH

const _DiffEngine::MAX_XREF_LENGTH = 10000

Definition at line 151 of file class.WordLevelDiff.php.


The documentation for this class was generated from the following file: