Skip to content

Commit b9905fe

Browse files
committed
diffcore-delta.c: Ignore CR in CRLF for text files
This ignores CR byte in CRLF sequence in text file when computing similarity of two blobs. Usually this should not matter as nobody sane would be checking in a file with CRLF line endings to the repository (they would use autocrlf so that the repository copy would have LF line endings). Signed-off-by: Junio C Hamano <[email protected]>
1 parent af3abef commit b9905fe

File tree

2 files changed

+44
-3
lines changed

2 files changed

+44
-3
lines changed

diffcore-delta.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,14 @@ static struct spanhash_top *add_spanhash(struct spanhash_top *top,
122122
}
123123
}
124124

125-
static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz)
125+
static struct spanhash_top *hash_chars(struct diff_filespec *one)
126126
{
127127
int i, n;
128128
unsigned int accum1, accum2, hashval;
129129
struct spanhash_top *hash;
130+
unsigned char *buf = one->data;
131+
unsigned int sz = one->size;
132+
int is_text = !one->is_binary;
130133

131134
i = INITIAL_HASH_SIZE;
132135
hash = xmalloc(sizeof(*hash) + sizeof(struct spanhash) * (1<<i));
@@ -140,6 +143,11 @@ static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz)
140143
unsigned int c = *buf++;
141144
unsigned int old_1 = accum1;
142145
sz--;
146+
147+
/* Ignore CR in CRLF sequence if text */
148+
if (is_text && c == '\r' && sz && *buf == '\n')
149+
continue;
150+
143151
accum1 = (accum1 << 7) ^ (accum2 >> 25);
144152
accum2 = (accum2 << 7) ^ (old_1 >> 25);
145153
accum1 += c;
@@ -169,14 +177,14 @@ int diffcore_count_changes(struct diff_filespec *src,
169177
if (src_count_p)
170178
src_count = *src_count_p;
171179
if (!src_count) {
172-
src_count = hash_chars(src->data, src->size);
180+
src_count = hash_chars(src);
173181
if (src_count_p)
174182
*src_count_p = src_count;
175183
}
176184
if (dst_count_p)
177185
dst_count = *dst_count_p;
178186
if (!dst_count) {
179-
dst_count = hash_chars(dst->data, dst->size);
187+
dst_count = hash_chars(dst);
180188
if (dst_count_p)
181189
*dst_count_p = dst_count;
182190
}

t/t0022-crlf-rename.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/sh
2+
3+
test_description='ignore CR in CRLF sequence while computing similiarity'
4+
5+
. ./test-lib.sh
6+
7+
test_expect_success setup '
8+
9+
cat ../t0022-crlf-rename.sh >sample &&
10+
git add sample &&
11+
12+
test_tick &&
13+
git commit -m Initial &&
14+
15+
sed -e "s/\$//" ../t0022-crlf-rename.sh >elpmas &&
16+
git add elpmas &&
17+
rm -f sample &&
18+
19+
test_tick &&
20+
git commit -a -m Second
21+
22+
'
23+
24+
test_expect_success 'diff -M' '
25+
26+
git diff-tree -M -r --name-status HEAD^ HEAD |
27+
sed -e "s/R[0-9]*/RNUM/" >actual &&
28+
echo "RNUM sample elpmas" >expect &&
29+
diff -u expect actual
30+
31+
'
32+
33+
test_done

0 commit comments

Comments
 (0)