Skip to content

Commit 4652fbb

Browse files
committed
First implementation
1 parent 277106a commit 4652fbb

File tree

5 files changed

+275
-0
lines changed

5 files changed

+275
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.idea

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Go-MyersDiff
2+
This implementation of the Myers Diff Algorythm was written for when the diff of many files is needed fast in Go. It
3+
includes options for what output is needed (additions, deletions, or similarities).
4+
5+
## Usage
6+
```go
7+
import "github.com/bill-rich/go-myersdiff"
8+
9+
func main() {
10+
a := []string {
11+
"line 1",
12+
"line 2a",
13+
"line 3",
14+
}
15+
a := []string {
16+
"line 1",
17+
"line 2b",
18+
"line 3",
19+
}
20+
fmt.Println(myersdiff.GenerateDiff(a, b, myersdiff.NewOptions()))
21+
}
22+
```

go.mod

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
module github.com/bill-rich/myersdiff
2+
3+
go 1.17
4+
5+
require github.com/sergi/go-diff v1.2.0

go.sum

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
3+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4+
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
5+
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
6+
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
7+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
8+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
9+
github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ=
10+
github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
11+
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
12+
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
13+
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
14+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
15+
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
16+
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
17+
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
18+
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

myersdiff.go

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
package myersdiff
2+
3+
// This implementation of the Myers Diff Algorythm was written using http://simplygenius.net/Article/DiffTutorial1,
4+
// and https://github.com/cj1128/myers-diff as references.
5+
6+
import (
7+
"bytes"
8+
)
9+
10+
const (
11+
ADD opType = 1
12+
DELETE = 2
13+
NOOP = 3
14+
)
15+
16+
type opType int
17+
18+
func (op opType) String() string {
19+
switch op {
20+
case ADD:
21+
return "ADD"
22+
case DELETE:
23+
return "DEL"
24+
case NOOP:
25+
return "NOP"
26+
}
27+
return ""
28+
}
29+
30+
// DiffOptions changes the behavior of how the diff is run or outputted.
31+
type DiffOptions struct {
32+
PrintAdd bool
33+
PrintNoOp bool
34+
PrintDelete bool
35+
}
36+
37+
type trace struct {
38+
v []map[int]int
39+
srcLen int
40+
dstLen int
41+
}
42+
43+
func (t *trace) length() int {
44+
return len(t.v)
45+
}
46+
47+
func (t *trace) append(v map[int]int) {
48+
t.v = append(t.v, v)
49+
}
50+
51+
// GenerateDiff provides the diff of two string slices. To diff two files, split them by lines and provide each as a
52+
// slice.
53+
func GenerateDiff(src, dst []string, opts *DiffOptions) *bytes.Buffer {
54+
script := shortestEditScript(src, dst)
55+
return writeDiff(src, dst, script, opts)
56+
}
57+
58+
// NewOptions returns a default set of options.
59+
func NewOptions() *DiffOptions {
60+
return &DiffOptions{
61+
PrintAdd: true,
62+
PrintNoOp: true,
63+
PrintDelete: true,
64+
}
65+
}
66+
67+
func writeDiff(src, dst []string, script []opType, opts *DiffOptions) *bytes.Buffer {
68+
buffer := bytes.Buffer{}
69+
srcIndex, dstIndex := 0, 0
70+
for _, op := range script {
71+
switch op {
72+
case ADD:
73+
if opts.PrintAdd {
74+
buffer.Write([]byte("+ "))
75+
buffer.Write([]byte(dst[dstIndex]))
76+
buffer.Write([]byte("\n"))
77+
}
78+
dstIndex += 1
79+
80+
case NOOP:
81+
if opts.PrintNoOp {
82+
buffer.Write([]byte(" "))
83+
buffer.Write([]byte(src[srcIndex]))
84+
buffer.Write([]byte("\n"))
85+
}
86+
srcIndex += 1
87+
dstIndex += 1
88+
89+
case DELETE:
90+
if opts.PrintDelete {
91+
buffer.Write([]byte("- "))
92+
buffer.Write([]byte(src[srcIndex]))
93+
buffer.Write([]byte("\n"))
94+
}
95+
srcIndex += 1
96+
}
97+
}
98+
return &buffer
99+
}
100+
101+
func createTrace(src, dst []string) *trace {
102+
srcLen := len(src)
103+
dstLen := len(dst)
104+
maxLen := srcLen + dstLen
105+
var x, y int
106+
trace := trace{
107+
srcLen: len(src),
108+
dstLen: len(dst),
109+
}
110+
111+
for d := 0; d <= maxLen; d++ {
112+
v := make(map[int]int, d+2)
113+
trace.append(v)
114+
115+
// Find the first difference
116+
if d == 0 {
117+
firstDiff := 0
118+
// Keep looking until the files differ
119+
for len(src) > firstDiff && len(dst) > firstDiff && src[firstDiff] == dst[firstDiff] {
120+
firstDiff++
121+
}
122+
// The first diff is at line firstDiff
123+
v[0] = firstDiff
124+
125+
// If firstDiff is the end of the file, there is no diff
126+
if firstDiff == len(src) && firstDiff == len(dst) {
127+
return &trace
128+
}
129+
continue
130+
}
131+
132+
lastV := trace.v[d-1]
133+
134+
for k := -d; k <= d; k += 2 {
135+
switch {
136+
// Go down (insert dest) if at the lowest k-line
137+
case k == -d:
138+
x = lastV[k+1]
139+
// Go down (insert dest) lower k-line x is behind the higher k-line x. This comparison can't be made at the
140+
// highest k-line (k==d).
141+
case k != d && lastV[k-1] < lastV[k+1]:
142+
x = lastV[k+1]
143+
// Move right (del source) if at the highest k-line (k==d) or if the lower k-line x is further along.
144+
default:
145+
x = lastV[k-1] + 1
146+
}
147+
148+
// Get y using the slope function
149+
y = x - k
150+
151+
// Look for next diff along the diagonal (snake)
152+
for x < srcLen && y < dstLen && src[x] == dst[y] {
153+
x, y = x+1, y+1
154+
}
155+
156+
// Set the k-line/x intercept
157+
v[k] = x
158+
159+
// Reached the end of the source or dest.
160+
if x == srcLen && y == dstLen {
161+
return &trace
162+
}
163+
}
164+
}
165+
return &trace
166+
}
167+
168+
func createScript(trace *trace) []opType {
169+
var x, y int
170+
var script []opType
171+
172+
x = trace.srcLen
173+
y = trace.dstLen
174+
var k, prevK, prevX, prevY int
175+
176+
for d := trace.length() - 1; d > 0; d-- {
177+
k = x - y
178+
lastV := trace.v[d-1]
179+
180+
switch {
181+
case k == -d:
182+
prevK = k + 1
183+
case k != d && lastV[k-1] < lastV[k+1]:
184+
prevK = k + 1
185+
default:
186+
prevK = k - 1
187+
}
188+
189+
prevX = lastV[prevK]
190+
prevY = prevX - prevK
191+
192+
for x > prevX && y > prevY {
193+
script = append(script, NOOP)
194+
x -= 1
195+
y -= 1
196+
}
197+
198+
if x == prevX {
199+
script = append(script, ADD)
200+
} else {
201+
script = append(script, DELETE)
202+
}
203+
x, y = prevX, prevY
204+
}
205+
206+
if trace.v[0][0] != 0 {
207+
for i := 0; i < trace.v[0][0]; i++ {
208+
script = append(script, NOOP)
209+
}
210+
}
211+
212+
return reverse(script)
213+
}
214+
215+
func shortestEditScript(src, dst []string) []opType {
216+
trace := createTrace(src, dst)
217+
return createScript(trace)
218+
}
219+
220+
func reverse(s []opType) []opType {
221+
result := make([]opType, len(s))
222+
end := len(s) - 1
223+
224+
for i, v := range s {
225+
result[end-i] = v
226+
}
227+
228+
return result
229+
}

0 commit comments

Comments
 (0)