Skip to content

Commit 9f75ce3

Browse files
phil-blaingitster
authored andcommitted
ref-filter: handle CRLF at end-of-line more gracefully
The ref-filter code does not correctly handle commit or tag messages that use CRLF as the line terminator. Such messages can be created with the `--cleanup=verbatim` option of `git commit` and `git tag`, or by using `git commit-tree` directly. The function `find_subpos` in ref-filter.c looks for two consecutive LFs to find the end of the subject line, a sequence which is absent in messages using CRLF. This results in the whole message being parsed as the subject line (`%(contents:subject)`), and the body of the message (`%(contents:body)`) being empty. Moreover, in `copy_subject`, which wants to return the subject as a single line, '\n' is replaced by space, but '\r' is untouched. This impacts the output of `git branch`, `git tag` and `git for-each-ref`. This behaviour is a regression for `git branch --verbose`, which bisects down to 949af06 (branch: use ref-filter printing APIs, 2017-01-10). Adjust the ref-filter code to be more lenient by hardening the logic in `copy_subject` and `find_subpos` to correctly parse messages containing CRLF. Add a new test script, 't3920-crlf-messages.sh', to test the behaviour of commands using either the ref-filter or the pretty APIs with messages using CRLF line endings. The function `test_crlf_subject_body_and_contents` can be used to test that the `--format` option of `branch`, `tag`, `for-each-ref`, `log` and `show` correctly displays the subject, body and raw content of commit and tag messages using CRLF. Test the output of `branch`, `tag` and `for-each-ref` with such commits. Helped-by: Junio C Hamano <[email protected]> Helped-by: Eric Sunshine <[email protected]> Signed-off-by: Philippe Blain <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 69986e1 commit 9f75ce3

File tree

2 files changed

+130
-14
lines changed

2 files changed

+130
-14
lines changed

ref-filter.c

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,14 +1097,19 @@ static const char *copy_email(const char *buf, struct used_atom *atom)
10971097

10981098
static char *copy_subject(const char *buf, unsigned long len)
10991099
{
1100-
char *r = xmemdupz(buf, len);
1100+
struct strbuf sb = STRBUF_INIT;
11011101
int i;
11021102

1103-
for (i = 0; i < len; i++)
1104-
if (r[i] == '\n')
1105-
r[i] = ' ';
1103+
for (i = 0; i < len; i++) {
1104+
if (buf[i] == '\r' && i + 1 < len && buf[i + 1] == '\n')
1105+
continue; /* ignore CR in CRLF */
11061106

1107-
return r;
1107+
if (buf[i] == '\n')
1108+
strbuf_addch(&sb, ' ');
1109+
else
1110+
strbuf_addch(&sb, buf[i]);
1111+
}
1112+
return strbuf_detach(&sb, NULL);
11081113
}
11091114

11101115
static void grab_date(const char *buf, struct atom_value *v, const char *atomname)
@@ -1228,20 +1233,23 @@ static void find_subpos(const char *buf,
12281233

12291234
/* subject is first non-empty line */
12301235
*sub = buf;
1231-
/* subject goes to first empty line */
1232-
while (buf < *sig && *buf && *buf != '\n') {
1233-
eol = strchrnul(buf, '\n');
1234-
if (*eol)
1235-
eol++;
1236-
buf = eol;
1237-
}
1236+
/* subject goes to first empty line before signature begins */
1237+
if ((eol = strstr(*sub, "\n\n"))) {
1238+
eol = eol < *sig ? eol : *sig;
1239+
/* check if message uses CRLF */
1240+
} else if (! (eol = strstr(*sub, "\r\n\r\n"))) {
1241+
/* treat whole message as subject */
1242+
eol = strrchr(*sub, '\0');
1243+
}
1244+
buf = eol;
12381245
*sublen = buf - *sub;
12391246
/* drop trailing newline, if present */
1240-
if (*sublen && (*sub)[*sublen - 1] == '\n')
1247+
while (*sublen && ((*sub)[*sublen - 1] == '\n' ||
1248+
(*sub)[*sublen - 1] == '\r'))
12411249
*sublen -= 1;
12421250

12431251
/* skip any empty lines */
1244-
while (*buf == '\n')
1252+
while (*buf == '\n' || *buf == '\r')
12451253
buf++;
12461254
*body = buf;
12471255
*bodylen = strlen(buf);

t/t3920-crlf-messages.sh

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#!/bin/sh
2+
3+
test_description='Test ref-filter and pretty APIs for commit and tag messages using CRLF'
4+
. ./test-lib.sh
5+
6+
LIB_CRLF_BRANCHES=""
7+
8+
create_crlf_ref () {
9+
branch="$1" &&
10+
cat >.crlf-orig-$branch.txt &&
11+
cat .crlf-orig-$branch.txt | append_cr >.crlf-message-$branch.txt &&
12+
grep 'Subject' .crlf-orig-$branch.txt | tr '\n' ' ' | sed 's/[ ]*$//' | tr -d '\n' >.crlf-subject-$branch.txt &&
13+
grep 'Body' .crlf-message-$branch.txt >.crlf-body-$branch.txt || true &&
14+
LIB_CRLF_BRANCHES="${LIB_CRLF_BRANCHES} ${branch}" &&
15+
test_tick &&
16+
hash=$(git commit-tree HEAD^{tree} -p HEAD -F .crlf-message-${branch}.txt) &&
17+
git branch ${branch} ${hash} &&
18+
git tag tag-${branch} ${branch} -F .crlf-message-${branch}.txt --cleanup=verbatim
19+
}
20+
21+
create_crlf_refs () {
22+
create_crlf_ref crlf <<-\EOF &&
23+
Subject first line
24+
25+
Body first line
26+
Body second line
27+
EOF
28+
create_crlf_ref crlf-empty-lines-after-subject <<-\EOF &&
29+
Subject first line
30+
31+
32+
Body first line
33+
Body second line
34+
EOF
35+
create_crlf_ref crlf-two-line-subject <<-\EOF &&
36+
Subject first line
37+
Subject second line
38+
39+
Body first line
40+
Body second line
41+
EOF
42+
create_crlf_ref crlf-two-line-subject-no-body <<-\EOF &&
43+
Subject first line
44+
Subject second line
45+
EOF
46+
create_crlf_ref crlf-two-line-subject-no-body-trailing-newline <<-\EOF
47+
Subject first line
48+
Subject second line
49+
50+
EOF
51+
}
52+
53+
test_crlf_subject_body_and_contents() {
54+
command_and_args="$@" &&
55+
command=$1 &&
56+
if test ${command} = "branch" || test ${command} = "for-each-ref" || test ${command} = "tag"
57+
then
58+
atoms="(contents:subject) (contents:body) (contents)"
59+
elif test ${command} = "log" || test ${command} = "show"
60+
then
61+
atoms="s b B"
62+
fi &&
63+
files="subject body message" &&
64+
while test -n "${atoms}"
65+
do
66+
set ${atoms} && atom=$1 && shift && atoms="$*" &&
67+
set ${files} && file=$1 && shift && files="$*" &&
68+
test_expect_success "${command}: --format='%${atom}' works with messages using CRLF" "
69+
rm -f expect &&
70+
for ref in ${LIB_CRLF_BRANCHES}
71+
do
72+
cat .crlf-${file}-\"\${ref}\".txt >>expect &&
73+
printf \"\n\" >>expect
74+
done &&
75+
git $command_and_args --format=\"%${atom}\" >actual &&
76+
test_cmp expect actual
77+
"
78+
done
79+
}
80+
81+
82+
test_expect_success 'Setup refs with commit and tag messages using CRLF' '
83+
test_commit inital &&
84+
create_crlf_refs
85+
'
86+
87+
test_expect_success 'branch: --verbose works with messages using CRLF' '
88+
rm -f expect &&
89+
for branch in $LIB_CRLF_BRANCHES
90+
do
91+
printf " " >>expect &&
92+
cat .crlf-subject-${branch}.txt >>expect &&
93+
printf "\n" >>expect
94+
done &&
95+
git branch -v >tmp &&
96+
# Remove first two columns, and the line for the currently checked out branch
97+
current=$(git branch --show-current) &&
98+
grep -v $current <tmp | awk "{\$1=\$2=\"\"}1" >actual &&
99+
test_cmp expect actual
100+
'
101+
102+
test_crlf_subject_body_and_contents branch --list crlf*
103+
104+
test_crlf_subject_body_and_contents tag --list tag-crlf*
105+
106+
test_crlf_subject_body_and_contents for-each-ref refs/heads/crlf*
107+
108+
test_done

0 commit comments

Comments
 (0)