Skip to content

Commit a7e4009

Browse files
committed
Improve commit decode
1 parent 8cb3ec9 commit a7e4009

File tree

6 files changed

+715
-111
lines changed

6 files changed

+715
-111
lines changed

modules/git/commit.go

Lines changed: 31 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -131,52 +131,47 @@ func (c *Commit) Decode(hash string, reader io.Reader, size int64) error {
131131
finishedHeaders = true
132132
continue
133133
}
134-
if fields := strings.Split(text, " "); !finishedHeaders {
135-
if len(fields) == 0 {
136-
// Executing in this block means that we got a
137-
// whitespace-only line, while parsing a header.
138-
//
139-
// Append it to the last-parsed header, and
140-
// continue.
141-
c.ExtraHeaders[len(c.ExtraHeaders)-1].V += "\n" + text[1:]
134+
if !finishedHeaders {
135+
// Check if this is a continuation line (starts with space)
136+
// Do this before strings.Cut to avoid unnecessary parsing
137+
if len(text) > 0 && text[0] == ' ' && len(c.ExtraHeaders) != 0 {
138+
last := c.ExtraHeaders[len(c.ExtraHeaders)-1]
139+
last.V += "\n" + text[1:]
142140
continue
143141
}
144-
if len(fields) < 2 {
145-
continue
146-
}
147-
switch fields[0] {
142+
143+
key, value, ok := strings.Cut(text, " ")
144+
switch key {
148145
case "tree":
149-
if len(fields) != 2 {
150-
return fmt.Errorf("error parsing tree: %s", text)
146+
if !ok || len(value) == 0 {
147+
continue
151148
}
152-
c.Tree = fields[1]
149+
c.Tree = value
153150
case "parent":
154-
if len(fields) != 2 {
155-
return fmt.Errorf("error parsing parent: %s", text)
151+
if !ok || len(value) == 0 {
152+
continue
156153
}
157-
c.Parents = append(c.Parents, fields[1])
154+
c.Parents = append(c.Parents, value)
158155
case "author":
159-
c.Author.Decode([]byte(text[7:]))
156+
if !ok || len(value) == 0 {
157+
continue
158+
}
159+
c.Author.Decode([]byte(value))
160160
case "committer":
161-
c.Committer.Decode([]byte(text[10:]))
161+
if !ok || len(value) == 0 {
162+
continue
163+
}
164+
c.Committer.Decode([]byte(value))
162165
default:
163-
if strings.HasPrefix(text, " ") && len(c.ExtraHeaders) != 0 {
164-
idx := len(c.ExtraHeaders) - 1
165-
hdr := c.ExtraHeaders[idx]
166-
167-
// Append the line of text (removing the
168-
// leading space) to the last header
169-
// that we parsed, adding a newline
170-
// between the two.
171-
hdr.V = strings.Join(append(
172-
[]string{hdr.V}, text[1:],
173-
), "\n")
174-
} else {
175-
c.ExtraHeaders = append(c.ExtraHeaders, &ExtraHeader{
176-
K: fields[0],
177-
V: strings.Join(fields[1:], " "),
178-
})
166+
// Skip malformed header lines (no space separator) or empty key
167+
if !ok || len(key) == 0 {
168+
continue
179169
}
170+
// New header
171+
c.ExtraHeaders = append(c.ExtraHeaders, &ExtraHeader{
172+
K: key,
173+
V: value,
174+
})
180175
}
181176
} else {
182177
_, _ = message.WriteString(line)

modules/git/commit_test.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package git
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
"github.com/stretchr/testify/assert"
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
// TestCommitDecodeWithMultipleParents tests decoding with multiple parents
12+
func TestCommitDecodeWithMultipleParents(t *testing.T) {
13+
input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb
14+
parent a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2
15+
parent b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3
16+
parent c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4
17+
author Pat Doe <pdoe@example.org> 1337892984 -0700
18+
committer Pat Doe <pdoe@example.org> 1337892984 -0700
19+
20+
test message`
21+
commit := new(Commit)
22+
err := commit.Decode("test", strings.NewReader(input), int64(len(input)))
23+
require.NoError(t, err)
24+
assert.Equal(t, 3, len(commit.Parents))
25+
}
26+
27+
// TestCommitDecodeWithSpecialCharacters tests decoding with special characters
28+
func TestCommitDecodeWithSpecialCharacters(t *testing.T) {
29+
input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb
30+
author 张三 <zhangsan@example.com> 1337892984 +0800
31+
committer 张三 <zhangsan@example.com> 1337892984 +0800
32+
custom value with spaces & special!@#$%^&*()_+-=[]{}|;':",./<>?
33+
34+
test message with 中文 and 日本語`
35+
36+
commit := new(Commit)
37+
err := commit.Decode("test", strings.NewReader(input), int64(len(input)))
38+
require.NoError(t, err)
39+
assert.Contains(t, commit.Author.String(), "张三")
40+
assert.Equal(t, 1, len(commit.ExtraHeaders))
41+
assert.Equal(t, "custom", commit.ExtraHeaders[0].K)
42+
assert.Equal(t, "value with spaces & special!@#$%^&*()_+-=[]{}|;':\",./<>?", commit.ExtraHeaders[0].V)
43+
assert.Contains(t, commit.Message, "中文")
44+
assert.Contains(t, commit.Message, "日本語")
45+
}
46+
47+
// TestCommitDecodeWithExtraHeaderBeforeStandard tests extra header before standard headers
48+
func TestCommitDecodeWithExtraHeaderBeforeStandard(t *testing.T) {
49+
input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb
50+
custom extra header before standard
51+
author Pat Doe <pdoe@example.org> 1337892984 -0700
52+
committer Pat Doe <pdoe@example.org> 1337892984 -0700
53+
54+
test message`
55+
commit := new(Commit)
56+
err := commit.Decode("test", strings.NewReader(input), int64(len(input)))
57+
require.NoError(t, err)
58+
assert.Equal(t, 1, len(commit.ExtraHeaders))
59+
assert.Equal(t, "custom", commit.ExtraHeaders[0].K)
60+
assert.Equal(t, "extra header before standard", commit.ExtraHeaders[0].V)
61+
}
62+
63+
// TestCommitDecodeWithComplexHeaders tests complex multi-line headers
64+
func TestCommitDecodeWithComplexHeaders(t *testing.T) {
65+
input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb
66+
parent b343c8beec664ef6f0e9964d3001c7c7966331ae
67+
author Pat Doe <pdoe@example.org> 1337892984 -0700
68+
committer Pat Doe <pdoe@example.org> 1337892984 -0700
69+
mergetag object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd
70+
type commit
71+
tag random
72+
tagger J. Roe <jroe@example.ca> 1337889148 -0600
73+
74+
Random changes`
75+
76+
commit := new(Commit)
77+
err := commit.Decode("test", strings.NewReader(input), int64(len(input)))
78+
require.NoError(t, err)
79+
80+
// Verify ExtraHeaders
81+
require.Equal(t, 1, len(commit.ExtraHeaders))
82+
require.Equal(t, "mergetag", commit.ExtraHeaders[0].K)
83+
require.Contains(t, commit.ExtraHeaders[0].V, "object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd")
84+
require.Contains(t, commit.ExtraHeaders[0].V, "type commit")
85+
require.Contains(t, commit.ExtraHeaders[0].V, "tag random")
86+
require.Contains(t, commit.ExtraHeaders[0].V, "tagger J. Roe <jroe@example.ca> 1337889148 -0600")
87+
}

modules/git/gitobj/commit.go

Lines changed: 29 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -109,65 +109,55 @@ func (c *Commit) Decode(hash hash.Hash, from io.Reader, size int64) (n int, err
109109
continue
110110
}
111111

112-
if fields := strings.Split(text, " "); !finishedHeaders {
113-
if len(fields) == 0 {
114-
// Executing in this block means that we got a
115-
// whitespace-only line, while parsing a header.
116-
//
117-
// Append it to the last-parsed header, and
118-
// continue.
119-
c.ExtraHeaders[len(c.ExtraHeaders)-1].V += "\n" + text[1:]
112+
if !finishedHeaders {
113+
// Check if this is a continuation line (starts with space)
114+
// Do this before strings.Cut to avoid unnecessary parsing
115+
if len(text) > 0 && text[0] == ' ' && len(c.ExtraHeaders) != 0 {
116+
last := c.ExtraHeaders[len(c.ExtraHeaders)-1]
117+
last.V += "\n" + text[1:]
120118
continue
121119
}
122-
switch fields[0] {
120+
121+
key, value, ok := strings.Cut(text, " ")
122+
switch key {
123123
case "tree":
124-
if len(fields) != 2 {
125-
return n, fmt.Errorf("error parsing tree: %s", text)
124+
if !ok || len(value) == 0 {
125+
continue
126126
}
127-
id, err := hex.DecodeString(fields[1])
127+
id, err := hex.DecodeString(value)
128128
if err != nil {
129129
return n, fmt.Errorf("error parsing tree: %s", err)
130130
}
131131
c.TreeID = id
132132
case "parent":
133-
if len(fields) != 2 {
134-
return n, fmt.Errorf("error parsing parent: %s", text)
133+
if !ok || len(value) == 0 {
134+
continue
135135
}
136-
id, err := hex.DecodeString(fields[1])
136+
id, err := hex.DecodeString(value)
137137
if err != nil {
138138
return n, fmt.Errorf("error parsing parent: %s", err)
139139
}
140140
c.ParentIDs = append(c.ParentIDs, id)
141141
case "author":
142-
if len(text) >= 7 {
143-
c.Author = text[7:]
144-
} else {
145-
c.Author = ""
142+
if !ok || len(value) == 0 {
143+
continue
146144
}
145+
c.Author = value
147146
case "committer":
148-
if len(text) >= 10 {
149-
c.Committer = text[10:]
150-
} else {
151-
c.Committer = ""
147+
if !ok || len(value) == 0 {
148+
continue
152149
}
150+
c.Committer = value
153151
default:
154-
if strings.HasPrefix(text, " ") && len(c.ExtraHeaders) != 0 {
155-
idx := len(c.ExtraHeaders) - 1
156-
hdr := c.ExtraHeaders[idx]
157-
158-
// Append the line of text (removing the
159-
// leading space) to the last header
160-
// that we parsed, adding a newline
161-
// between the two.
162-
hdr.V = strings.Join(append(
163-
[]string{hdr.V}, text[1:],
164-
), "\n")
165-
} else {
166-
c.ExtraHeaders = append(c.ExtraHeaders, &ExtraHeader{
167-
K: fields[0],
168-
V: strings.Join(fields[1:], " "),
169-
})
152+
// Skip malformed header lines (no space separator) or empty key
153+
if !ok || len(key) == 0 {
154+
continue
170155
}
156+
// New header
157+
c.ExtraHeaders = append(c.ExtraHeaders, &ExtraHeader{
158+
K: key,
159+
V: value,
160+
})
171161
}
172162
} else {
173163
_, _ = message.WriteString(line)

0 commit comments

Comments
 (0)