ILIAS  release_5-1 Revision 5.0.0-5477-g43f3e3fab5f
_DiffEngine Class Reference
+ Collaboration diagram for _DiffEngine:

Public Member Functions

 diff ($from_lines, $to_lines)
 
 _line_hash ( $line)
 Returns the whole line if it's small enough, or the MD5 hash otherwise. More...
 
 _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
 
 _lcs_pos ($ypos)
 
 _compareseq ($xoff, $xlim, $yoff, $ylim)
 
 _shift_boundaries ($lines, &$changed, $other_changed)
 

Data Fields

const MAX_XREF_LENGTH = 10000
 

Detailed Description

Definition at line 781 of file class.WordLevelDiff.php.

Member Function Documentation

◆ _compareseq()

_DiffEngine::_compareseq (   $xoff,
  $xlim,
  $yoff,
  $ylim 
)

Definition at line 1025 of file class.WordLevelDiff.php.

1025 {
1026 $fname = '_DiffEngine::_compareseq';
1027 //wfProfileIn( $fname );
1028
1029 // Slide down the bottom initial diagonal.
1030 while ($xoff < $xlim && $yoff < $ylim
1031 && $this->xv[$xoff] == $this->yv[$yoff]) {
1032 ++$xoff;
1033 ++$yoff;
1034 }
1035
1036 // Slide up the top initial diagonal.
1037 while ($xlim > $xoff && $ylim > $yoff
1038 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
1039 --$xlim;
1040 --$ylim;
1041 }
1042
1043 if ($xoff == $xlim || $yoff == $ylim)
1044 $lcs = 0;
1045 else {
1046 // This is ad hoc but seems to work well.
1047 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
1048 //$nchunks = max(2,min(8,(int)$nchunks));
1049 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
1050 list ($lcs, $seps)
1051 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
1052 }
1053
1054 if ($lcs == 0) {
1055 // X and Y sequences have no common subsequence:
1056 // mark all changed.
1057 while ($yoff < $ylim)
1058 $this->ychanged[$this->yind[$yoff++]] = 1;
1059 while ($xoff < $xlim)
1060 $this->xchanged[$this->xind[$xoff++]] = 1;
1061 } else {
1062 // Use the partitions to split this problem into subproblems.
1063 reset($seps);
1064 $pt1 = $seps[0];
1065 while ($pt2 = next($seps)) {
1066 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
1067 $pt1 = $pt2;
1068 }
1069 }
1070 //wfProfileOut( $fname );
1071 }
_diag($xoff, $xlim, $yoff, $ylim, $nchunks)
_compareseq($xoff, $xlim, $yoff, $ylim)

References _compareseq(), and _diag().

Referenced by _compareseq(), and diff().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ _diag()

_DiffEngine::_diag (   $xoff,
  $xlim,
  $yoff,
  $ylim,
  $nchunks 
)

Definition at line 906 of file class.WordLevelDiff.php.

906 {
907 $fname = '_DiffEngine::_diag';
908 //wfProfileIn( $fname );
909 $flip = false;
910
911 if ($xlim - $xoff > $ylim - $yoff) {
912 // Things seems faster (I'm not sure I understand why)
913 // when the shortest sequence in X.
914 $flip = true;
915 list ($xoff, $xlim, $yoff, $ylim)
916 = array( $yoff, $ylim, $xoff, $xlim);
917 }
918
919 if ($flip)
920 for ($i = $ylim - 1; $i >= $yoff; $i--)
921 $ymatches[$this->xv[$i]][] = $i;
922 else
923 for ($i = $ylim - 1; $i >= $yoff; $i--)
924 $ymatches[$this->yv[$i]][] = $i;
925
926 $this->lcs = 0;
927 $this->seq[0]= $yoff - 1;
928 $this->in_seq = array();
929 $ymids[0] = array();
930
931 $numer = $xlim - $xoff + $nchunks - 1;
932 $x = $xoff;
933 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
934 //wfProfileIn( "$fname-chunk" );
935 if ($chunk > 0)
936 for ($i = 0; $i <= $this->lcs; $i++)
937 $ymids[$i][$chunk-1] = $this->seq[$i];
938
939 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
940 for ( ; $x < $x1; $x++) {
941 $line = $flip ? $this->yv[$x] : $this->xv[$x];
942 if (empty($ymatches[$line]))
943 continue;
944 $matches = $ymatches[$line];
945 reset($matches);
946 while (list ($junk, $y) = each($matches))
947 if (empty($this->in_seq[$y])) {
948 $k = $this->_lcs_pos($y);
949 USE_ASSERTS && assert($k > 0);
950 $ymids[$k] = $ymids[$k-1];
951 break;
952 }
953 while (list ( /* $junk */, $y) = each($matches)) {
954 if ($y > $this->seq[$k-1]) {
955 USE_ASSERTS && assert($y < $this->seq[$k]);
956 // Optimization: this is a common case:
957 // next match is just replacing previous match.
958 $this->in_seq[$this->seq[$k]] = false;
959 $this->seq[$k] = $y;
960 $this->in_seq[$y] = 1;
961 } else if (empty($this->in_seq[$y])) {
962 $k = $this->_lcs_pos($y);
963 USE_ASSERTS && assert($k > 0);
964 $ymids[$k] = $ymids[$k-1];
965 }
966 }
967 }
968 //wfProfileOut( "$fname-chunk" );
969 }
970
971 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
972 $ymid = $ymids[$this->lcs];
973 for ($n = 0; $n < $nchunks - 1; $n++) {
974 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
975 $y1 = $ymid[$n] + 1;
976 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
977 }
978 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
979
980 //wfProfileOut( $fname );
981 return array($this->lcs, $seps);
982 }
$n
Definition: RandomTest.php:80
const USE_ASSERTS
$y
Definition: example_007.php:83
$x
Definition: example_009.php:98

References $n, $x, $y, _lcs_pos(), and USE_ASSERTS.

Referenced by _compareseq().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ _lcs_pos()

_DiffEngine::_lcs_pos (   $ypos)

Definition at line 984 of file class.WordLevelDiff.php.

984 {
985 $fname = '_DiffEngine::_lcs_pos';
986 //wfProfileIn( $fname );
987
988 $end = $this->lcs;
989 if ($end == 0 || $ypos > $this->seq[$end]) {
990 $this->seq[++$this->lcs] = $ypos;
991 $this->in_seq[$ypos] = 1;
992 //wfProfileOut( $fname );
993 return $this->lcs;
994 }
995
996 $beg = 1;
997 while ($beg < $end) {
998 $mid = (int)(($beg + $end) / 2);
999 if ( $ypos > $this->seq[$mid] )
1000 $beg = $mid + 1;
1001 else
1002 $end = $mid;
1003 }
1004
1005 USE_ASSERTS && assert($ypos != $this->seq[$end]);
1006
1007 $this->in_seq[$this->seq[$end]] = false;
1008 $this->seq[$end] = $ypos;
1009 $this->in_seq[$ypos] = 1;
1010 //wfProfileOut( $fname );
1011 return $end;
1012 }

References USE_ASSERTS.

Referenced by _diag().

+ Here is the caller graph for this function:

◆ _line_hash()

_DiffEngine::_line_hash (   $line)

Returns the whole line if it's small enough, or the MD5 hash otherwise.

Definition at line 881 of file class.WordLevelDiff.php.

881 {
882 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
883 return md5( $line );
884 } else {
885 return $line;
886 }
887 }

Referenced by diff().

+ Here is the caller graph for this function:

◆ _shift_boundaries()

_DiffEngine::_shift_boundaries (   $lines,
$changed,
  $other_changed 
)

Definition at line 1085 of file class.WordLevelDiff.php.

1085 {
1086 $fname = '_DiffEngine::_shift_boundaries';
1087 //wfProfileIn( $fname );
1088 $i = 0;
1089 $j = 0;
1090
1091 USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
1092 $len = sizeof($lines);
1093 $other_len = sizeof($other_changed);
1094
1095 while (1) {
1096 /*
1097 * Scan forwards to find beginning of another run of changes.
1098 * Also keep track of the corresponding point in the other file.
1099 *
1100 * Throughout this code, $i and $j are adjusted together so that
1101 * the first $i elements of $changed and the first $j elements
1102 * of $other_changed both contain the same number of zeros
1103 * (unchanged lines).
1104 * Furthermore, $j is always kept so that $j == $other_len or
1105 * $other_changed[$j] == false.
1106 */
1107 while ($j < $other_len && $other_changed[$j])
1108 $j++;
1109
1110 while ($i < $len && ! $changed[$i]) {
1111 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
1112 $i++; $j++;
1113 while ($j < $other_len && $other_changed[$j])
1114 $j++;
1115 }
1116
1117 if ($i == $len)
1118 break;
1119
1120 $start = $i;
1121
1122 // Find the end of this run of changes.
1123 while (++$i < $len && $changed[$i])
1124 continue;
1125
1126 do {
1127 /*
1128 * Record the length of this run of changes, so that
1129 * we can later determine whether the run has grown.
1130 */
1131 $runlength = $i - $start;
1132
1133 /*
1134 * Move the changed region back, so long as the
1135 * previous unchanged line matches the last changed one.
1136 * This merges with previous changed regions.
1137 */
1138 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
1139 $changed[--$start] = 1;
1140 $changed[--$i] = false;
1141 while ($start > 0 && $changed[$start - 1])
1142 $start--;
1143 USE_ASSERTS && assert('$j > 0');
1144 while ($other_changed[--$j])
1145 continue;
1146 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
1147 }
1148
1149 /*
1150 * Set CORRESPONDING to the end of the changed run, at the last
1151 * point where it corresponds to a changed run in the other file.
1152 * CORRESPONDING == LEN means no such point has been found.
1153 */
1154 $corresponding = $j < $other_len ? $i : $len;
1155
1156 /*
1157 * Move the changed region forward, so long as the
1158 * first changed line matches the following unchanged one.
1159 * This merges with following changed regions.
1160 * Do this second, so that if there are no merges,
1161 * the changed region is moved forward as far as possible.
1162 */
1163 while ($i < $len && $lines[$start] == $lines[$i]) {
1164 $changed[$start++] = false;
1165 $changed[$i++] = 1;
1166 while ($i < $len && $changed[$i])
1167 $i++;
1168
1169 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
1170 $j++;
1171 if ($j < $other_len && $other_changed[$j]) {
1172 $corresponding = $i;
1173 while ($j < $other_len && $other_changed[$j])
1174 $j++;
1175 }
1176 }
1177 } while ($runlength != $i - $start);
1178
1179 /*
1180 * If possible, move the fully-merged run of changes
1181 * back to a corresponding run in the other file.
1182 */
1183 while ($corresponding < $i) {
1184 $changed[--$start] = 1;
1185 $changed[--$i] = 0;
1186 USE_ASSERTS && assert('$j > 0');
1187 while ($other_changed[--$j])
1188 continue;
1189 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
1190 }
1191 }
1192 //wfProfileOut( $fname );
1193 }

References $changed, and USE_ASSERTS.

Referenced by diff().

+ Here is the caller graph for this function:

◆ diff()

_DiffEngine::diff (   $from_lines,
  $to_lines 
)

Definition at line 785 of file class.WordLevelDiff.php.

785 {
786 $fname = '_DiffEngine::diff';
787 //wfProfileIn( $fname );
788
789 $n_from = sizeof($from_lines);
790 $n_to = sizeof($to_lines);
791
792 $this->xchanged = $this->ychanged = array();
793 $this->xv = $this->yv = array();
794 $this->xind = $this->yind = array();
795 unset($this->seq);
796 unset($this->in_seq);
797 unset($this->lcs);
798
799 // Skip leading common lines.
800 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
801 if ($from_lines[$skip] !== $to_lines[$skip])
802 break;
803 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
804 }
805 // Skip trailing common lines.
806 $xi = $n_from; $yi = $n_to;
807 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
808 if ($from_lines[$xi] !== $to_lines[$yi])
809 break;
810 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
811 }
812
813 // Ignore lines which do not exist in both files.
814 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
815 $xhash[$this->_line_hash($from_lines[$xi])] = 1;
816 }
817
818 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
819 $line = $to_lines[$yi];
820 if ( ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) )
821 continue;
822 $yhash[$this->_line_hash($line)] = 1;
823 $this->yv[] = $line;
824 $this->yind[] = $yi;
825 }
826 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
827 $line = $from_lines[$xi];
828 if ( ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) )
829 continue;
830 $this->xv[] = $line;
831 $this->xind[] = $xi;
832 }
833
834 // Find the LCS.
835 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
836
837 // Merge edits when possible
838 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
839 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
840
841 // Compute the edit operations.
842 $edits = array();
843 $xi = $yi = 0;
844 while ($xi < $n_from || $yi < $n_to) {
845 USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
846 USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
847
848 // Skip matching "snake".
849 $copy = array();
850 while ( $xi < $n_from && $yi < $n_to
851 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
852 $copy[] = $from_lines[$xi++];
853 ++$yi;
854 }
855 if ($copy)
856 $edits[] = new _DiffOp_Copy($copy);
857
858 // Find deletes & adds.
859 $delete = array();
860 while ($xi < $n_from && $this->xchanged[$xi])
861 $delete[] = $from_lines[$xi++];
862
863 $add = array();
864 while ($yi < $n_to && $this->ychanged[$yi])
865 $add[] = $to_lines[$yi++];
866
867 if ($delete && $add)
868 $edits[] = new _DiffOp_Change($delete, $add);
869 elseif ($delete)
870 $edits[] = new _DiffOp_Delete($delete);
871 elseif ($add)
872 $edits[] = new _DiffOp_Add($add);
873 }
874 //wfProfileOut( $fname );
875 return $edits;
876 }
_line_hash( $line)
Returns the whole line if it's small enough, or the MD5 hash otherwise.
_shift_boundaries($lines, &$changed, $other_changed)

References _compareseq(), _line_hash(), _shift_boundaries(), and USE_ASSERTS.

Referenced by Diff\Diff().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Field Documentation

◆ MAX_XREF_LENGTH

const _DiffEngine::MAX_XREF_LENGTH = 10000

Definition at line 783 of file class.WordLevelDiff.php.


The documentation for this class was generated from the following file: