|
| 1 | +package myersdiff |
| 2 | + |
| 3 | +// This implementation of the Myers Diff Algorythm was written using http://simplygenius.net/Article/DiffTutorial1, |
| 4 | +// and https://github.com/cj1128/myers-diff as references. |
| 5 | + |
| 6 | +import ( |
| 7 | + "bytes" |
| 8 | +) |
| 9 | + |
| 10 | +const ( |
| 11 | + ADD opType = 1 |
| 12 | + DELETE = 2 |
| 13 | + NOOP = 3 |
| 14 | +) |
| 15 | + |
| 16 | +type opType int |
| 17 | + |
| 18 | +func (op opType) String() string { |
| 19 | + switch op { |
| 20 | + case ADD: |
| 21 | + return "ADD" |
| 22 | + case DELETE: |
| 23 | + return "DEL" |
| 24 | + case NOOP: |
| 25 | + return "NOP" |
| 26 | + } |
| 27 | + return "" |
| 28 | +} |
| 29 | + |
| 30 | +// DiffOptions changes the behavior of how the diff is run or outputted. |
| 31 | +type DiffOptions struct { |
| 32 | + PrintAdd bool |
| 33 | + PrintNoOp bool |
| 34 | + PrintDelete bool |
| 35 | +} |
| 36 | + |
| 37 | +type trace struct { |
| 38 | + v []map[int]int |
| 39 | + srcLen int |
| 40 | + dstLen int |
| 41 | +} |
| 42 | + |
| 43 | +func (t *trace) length() int { |
| 44 | + return len(t.v) |
| 45 | +} |
| 46 | + |
| 47 | +func (t *trace) append(v map[int]int) { |
| 48 | + t.v = append(t.v, v) |
| 49 | +} |
| 50 | + |
| 51 | +// GenerateDiff provides the diff of two string slices. To diff two files, split them by lines and provide each as a |
| 52 | +// slice. |
| 53 | +func GenerateDiff(src, dst []string, opts *DiffOptions) *bytes.Buffer { |
| 54 | + script := shortestEditScript(src, dst) |
| 55 | + return writeDiff(src, dst, script, opts) |
| 56 | +} |
| 57 | + |
| 58 | +// NewOptions returns a default set of options. |
| 59 | +func NewOptions() *DiffOptions { |
| 60 | + return &DiffOptions{ |
| 61 | + PrintAdd: true, |
| 62 | + PrintNoOp: true, |
| 63 | + PrintDelete: true, |
| 64 | + } |
| 65 | +} |
| 66 | + |
| 67 | +func writeDiff(src, dst []string, script []opType, opts *DiffOptions) *bytes.Buffer { |
| 68 | + buffer := bytes.Buffer{} |
| 69 | + srcIndex, dstIndex := 0, 0 |
| 70 | + for _, op := range script { |
| 71 | + switch op { |
| 72 | + case ADD: |
| 73 | + if opts.PrintAdd { |
| 74 | + buffer.Write([]byte("+ ")) |
| 75 | + buffer.Write([]byte(dst[dstIndex])) |
| 76 | + buffer.Write([]byte("\n")) |
| 77 | + } |
| 78 | + dstIndex += 1 |
| 79 | + |
| 80 | + case NOOP: |
| 81 | + if opts.PrintNoOp { |
| 82 | + buffer.Write([]byte(" ")) |
| 83 | + buffer.Write([]byte(src[srcIndex])) |
| 84 | + buffer.Write([]byte("\n")) |
| 85 | + } |
| 86 | + srcIndex += 1 |
| 87 | + dstIndex += 1 |
| 88 | + |
| 89 | + case DELETE: |
| 90 | + if opts.PrintDelete { |
| 91 | + buffer.Write([]byte("- ")) |
| 92 | + buffer.Write([]byte(src[srcIndex])) |
| 93 | + buffer.Write([]byte("\n")) |
| 94 | + } |
| 95 | + srcIndex += 1 |
| 96 | + } |
| 97 | + } |
| 98 | + return &buffer |
| 99 | +} |
| 100 | + |
| 101 | +func createTrace(src, dst []string) *trace { |
| 102 | + srcLen := len(src) |
| 103 | + dstLen := len(dst) |
| 104 | + maxLen := srcLen + dstLen |
| 105 | + var x, y int |
| 106 | + trace := trace{ |
| 107 | + srcLen: len(src), |
| 108 | + dstLen: len(dst), |
| 109 | + } |
| 110 | + |
| 111 | + for d := 0; d <= maxLen; d++ { |
| 112 | + v := make(map[int]int, d+2) |
| 113 | + trace.append(v) |
| 114 | + |
| 115 | + // Find the first difference |
| 116 | + if d == 0 { |
| 117 | + firstDiff := 0 |
| 118 | + // Keep looking until the files differ |
| 119 | + for len(src) > firstDiff && len(dst) > firstDiff && src[firstDiff] == dst[firstDiff] { |
| 120 | + firstDiff++ |
| 121 | + } |
| 122 | + // The first diff is at line firstDiff |
| 123 | + v[0] = firstDiff |
| 124 | + |
| 125 | + // If firstDiff is the end of the file, there is no diff |
| 126 | + if firstDiff == len(src) && firstDiff == len(dst) { |
| 127 | + return &trace |
| 128 | + } |
| 129 | + continue |
| 130 | + } |
| 131 | + |
| 132 | + lastV := trace.v[d-1] |
| 133 | + |
| 134 | + for k := -d; k <= d; k += 2 { |
| 135 | + switch { |
| 136 | + // Go down (insert dest) if at the lowest k-line |
| 137 | + case k == -d: |
| 138 | + x = lastV[k+1] |
| 139 | + // Go down (insert dest) lower k-line x is behind the higher k-line x. This comparison can't be made at the |
| 140 | + // highest k-line (k==d). |
| 141 | + case k != d && lastV[k-1] < lastV[k+1]: |
| 142 | + x = lastV[k+1] |
| 143 | + // Move right (del source) if at the highest k-line (k==d) or if the lower k-line x is further along. |
| 144 | + default: |
| 145 | + x = lastV[k-1] + 1 |
| 146 | + } |
| 147 | + |
| 148 | + // Get y using the slope function |
| 149 | + y = x - k |
| 150 | + |
| 151 | + // Look for next diff along the diagonal (snake) |
| 152 | + for x < srcLen && y < dstLen && src[x] == dst[y] { |
| 153 | + x, y = x+1, y+1 |
| 154 | + } |
| 155 | + |
| 156 | + // Set the k-line/x intercept |
| 157 | + v[k] = x |
| 158 | + |
| 159 | + // Reached the end of the source or dest. |
| 160 | + if x == srcLen && y == dstLen { |
| 161 | + return &trace |
| 162 | + } |
| 163 | + } |
| 164 | + } |
| 165 | + return &trace |
| 166 | +} |
| 167 | + |
| 168 | +func createScript(trace *trace) []opType { |
| 169 | + var x, y int |
| 170 | + var script []opType |
| 171 | + |
| 172 | + x = trace.srcLen |
| 173 | + y = trace.dstLen |
| 174 | + var k, prevK, prevX, prevY int |
| 175 | + |
| 176 | + for d := trace.length() - 1; d > 0; d-- { |
| 177 | + k = x - y |
| 178 | + lastV := trace.v[d-1] |
| 179 | + |
| 180 | + switch { |
| 181 | + case k == -d: |
| 182 | + prevK = k + 1 |
| 183 | + case k != d && lastV[k-1] < lastV[k+1]: |
| 184 | + prevK = k + 1 |
| 185 | + default: |
| 186 | + prevK = k - 1 |
| 187 | + } |
| 188 | + |
| 189 | + prevX = lastV[prevK] |
| 190 | + prevY = prevX - prevK |
| 191 | + |
| 192 | + for x > prevX && y > prevY { |
| 193 | + script = append(script, NOOP) |
| 194 | + x -= 1 |
| 195 | + y -= 1 |
| 196 | + } |
| 197 | + |
| 198 | + if x == prevX { |
| 199 | + script = append(script, ADD) |
| 200 | + } else { |
| 201 | + script = append(script, DELETE) |
| 202 | + } |
| 203 | + x, y = prevX, prevY |
| 204 | + } |
| 205 | + |
| 206 | + if trace.v[0][0] != 0 { |
| 207 | + for i := 0; i < trace.v[0][0]; i++ { |
| 208 | + script = append(script, NOOP) |
| 209 | + } |
| 210 | + } |
| 211 | + |
| 212 | + return reverse(script) |
| 213 | +} |
| 214 | + |
| 215 | +func shortestEditScript(src, dst []string) []opType { |
| 216 | + trace := createTrace(src, dst) |
| 217 | + return createScript(trace) |
| 218 | +} |
| 219 | + |
| 220 | +func reverse(s []opType) []opType { |
| 221 | + result := make([]opType, len(s)) |
| 222 | + end := len(s) - 1 |
| 223 | + |
| 224 | + for i, v := range s { |
| 225 | + result[end-i] = v |
| 226 | + } |
| 227 | + |
| 228 | + return result |
| 229 | +} |
0 commit comments