Skip to content

Commit 395609a

Browse files
committed
xdiff: use rust's version of whitespace processing
Delete xdl_hash_record() and xdl_recmatch() in favor of xdl_line_hash() and xdl_line_equal(). Signed-off-by: Ezekiel Newren <[email protected]>
1 parent f7829c5 commit 395609a

File tree

6 files changed

+15
-194
lines changed

6 files changed

+15
-194
lines changed

rust/xdiff/src/lib.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,6 @@ pub const XDF_WHITESPACE_FLAGS: u64 = XDF_IGNORE_WHITESPACE |
1414
XDF_IGNORE_CR_AT_EOL;
1515

1616

17-
#[no_mangle]
18-
unsafe extern "C" fn xxh3_64(ptr: *const u8, size: usize) -> u64 {
19-
let slice = std::slice::from_raw_parts(ptr, size);
20-
xxhash_rust::xxh3::xxh3_64(slice)
21-
}
22-
2317
#[no_mangle]
2418
unsafe extern "C" fn xdl_line_hash(ptr: *const u8, size: usize, flags: u64) -> u64 {
2519
let line = std::slice::from_raw_parts(ptr, size);

xdiff-interface.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,13 +299,13 @@ void xdiff_clear_find_func(xdemitconf_t *xecfg)
299299

300300
unsigned long xdiff_hash_string(const char *s, size_t len, long flags)
301301
{
302-
return xdl_hash_record(&s, s + len, flags);
302+
return xdl_line_hash((u8 const*) s, len, flags);
303303
}
304304

305305
int xdiff_compare_lines(const char *l1, long s1,
306306
const char *l2, long s2, long flags)
307307
{
308-
return xdl_recmatch(l1, s1, l2, s2, flags);
308+
return xdl_line_equal((u8 const*) l1, s1, (u8 const*) l2, s2, flags);
309309
}
310310

311311
int parse_conflict_style_name(const char *value)

xdiff/xmerge.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2,
101101
xrecord_t **rec2 = xe2->xdf2.recs + i2;
102102

103103
for (i = 0; i < line_count; i++) {
104-
int result = xdl_recmatch((const char*) rec1[i]->ptr, rec1[i]->size,
105-
(const char*) rec2[i]->ptr, rec2[i]->size, flags);
104+
bool result = xdl_line_equal(rec1[i]->ptr, rec1[i]->size,
105+
rec2[i]->ptr, rec2[i]->size, flags);
106106
if (!result)
107107
return -1;
108108
}
@@ -324,8 +324,8 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1,
324324

325325
static int recmatch(xrecord_t *rec1, xrecord_t *rec2, unsigned long flags)
326326
{
327-
return xdl_recmatch((char const*) rec1->ptr, rec1->size,
328-
(char const*) rec2->ptr, rec2->size, flags);
327+
return xdl_line_equal(rec1->ptr, rec1->size,
328+
rec2->ptr, rec2->size, flags);
329329
}
330330

331331
/*

xdiff/xprepare.c

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
typedef struct s_xdlclass {
3434
struct s_xdlclass *next;
3535
u64 ha;
36-
char const *line;
37-
long size;
36+
u8 const *line;
37+
usize size;
3838
long idx;
3939
long len1, len2;
4040
} xdlclass_t;
@@ -93,15 +93,15 @@ static void xdl_free_classifier(xdlclassifier_t *cf) {
9393

9494
static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec) {
9595
long hi;
96-
char const *line;
96+
u8 const *line;
9797
xdlclass_t *rcrec;
9898

99-
line = (char const*) rec->ptr;
99+
line = rec->ptr;
100100
hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
101101
for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
102102
if (rcrec->ha == rec->ha &&
103-
xdl_recmatch(rcrec->line, rcrec->size,
104-
(const char*) rec->ptr, rec->size, cf->flags))
103+
xdl_line_equal(rcrec->line, rcrec->size,
104+
rec->ptr, rec->size, cf->flags))
105105
break;
106106

107107
if (!rcrec) {
@@ -160,9 +160,6 @@ static void xdl_parse_lines(mmfile_t *mf, long narec, xdfile_t *xdf) {
160160
}
161161

162162

163-
extern u64 xxh3_64(u8 const* ptr, usize size);
164-
165-
166163
static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
167164
xdlclassifier_t *cf, xdfile_t *xdf) {
168165
unsigned long *ha;
@@ -178,21 +175,9 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
178175

179176
xdl_parse_lines(mf, narec, xdf);
180177

181-
if ((xpp->flags & XDF_WHITESPACE_FLAGS) == 0) {
182-
for (usize i = 0; i < (usize) xdf->nrec; i++) {
183-
xrecord_t *rec = xdf->recs[i];
184-
rec->ha = xxh3_64(rec->ptr, rec->size);
185-
}
186-
} else {
187-
for (usize i = 0; i < (usize) xdf->nrec; i++) {
188-
xrecord_t *rec = xdf->recs[i];
189-
char const* dump = (char const*) rec->ptr;
190-
rec->ha = xdl_hash_record(&dump, (char const*) (rec->ptr + rec->size), xpp->flags);
191-
}
192-
}
193-
194178
for (usize i = 0; i < (usize) xdf->nrec; i++) {
195179
xrecord_t *rec = xdf->recs[i];
180+
rec->ha = xdl_line_hash(rec->ptr, rec->size, xpp->flags);
196181
xdl_classify_record(pass, cf, rec);
197182
}
198183

xdiff/xutils.c

Lines changed: 0 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -152,164 +152,6 @@ int xdl_blankline(const char *line, long size, long flags)
152152
return (i == size);
153153
}
154154

155-
/*
156-
* Have we eaten everything on the line, except for an optional
157-
* CR at the very end?
158-
*/
159-
static int ends_with_optional_cr(const char *l, long s, long i)
160-
{
161-
int complete = s && l[s-1] == '\n';
162-
163-
if (complete)
164-
s--;
165-
if (s == i)
166-
return 1;
167-
/* do not ignore CR at the end of an incomplete line */
168-
if (complete && s == i + 1 && l[i] == '\r')
169-
return 1;
170-
return 0;
171-
}
172-
173-
int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
174-
{
175-
int i1, i2;
176-
177-
if (s1 == s2 && !memcmp(l1, l2, s1))
178-
return 1;
179-
if (!(flags & XDF_WHITESPACE_FLAGS))
180-
return 0;
181-
182-
i1 = 0;
183-
i2 = 0;
184-
185-
/*
186-
* -w matches everything that matches with -b, and -b in turn
187-
* matches everything that matches with --ignore-space-at-eol,
188-
* which in turn matches everything that matches with --ignore-cr-at-eol.
189-
*
190-
* Each flavor of ignoring needs different logic to skip whitespaces
191-
* while we have both sides to compare.
192-
*/
193-
if (flags & XDF_IGNORE_WHITESPACE) {
194-
goto skip_ws;
195-
while (i1 < s1 && i2 < s2) {
196-
if (l1[i1++] != l2[i2++])
197-
return 0;
198-
skip_ws:
199-
while (i1 < s1 && XDL_ISSPACE(l1[i1]))
200-
i1++;
201-
while (i2 < s2 && XDL_ISSPACE(l2[i2]))
202-
i2++;
203-
}
204-
} else if (flags & XDF_IGNORE_WHITESPACE_CHANGE) {
205-
while (i1 < s1 && i2 < s2) {
206-
if (XDL_ISSPACE(l1[i1]) && XDL_ISSPACE(l2[i2])) {
207-
/* Skip matching spaces and try again */
208-
while (i1 < s1 && XDL_ISSPACE(l1[i1]))
209-
i1++;
210-
while (i2 < s2 && XDL_ISSPACE(l2[i2]))
211-
i2++;
212-
continue;
213-
}
214-
if (l1[i1++] != l2[i2++])
215-
return 0;
216-
}
217-
} else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) {
218-
while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) {
219-
i1++;
220-
i2++;
221-
}
222-
} else if (flags & XDF_IGNORE_CR_AT_EOL) {
223-
/* Find the first difference and see how the line ends */
224-
while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) {
225-
i1++;
226-
i2++;
227-
}
228-
return (ends_with_optional_cr(l1, s1, i1) &&
229-
ends_with_optional_cr(l2, s2, i2));
230-
}
231-
232-
/*
233-
* After running out of one side, the remaining side must have
234-
* nothing but whitespace for the lines to match. Note that
235-
* ignore-whitespace-at-eol case may break out of the loop
236-
* while there still are characters remaining on both lines.
237-
*/
238-
if (i1 < s1) {
239-
while (i1 < s1 && XDL_ISSPACE(l1[i1]))
240-
i1++;
241-
if (s1 != i1)
242-
return 0;
243-
}
244-
if (i2 < s2) {
245-
while (i2 < s2 && XDL_ISSPACE(l2[i2]))
246-
i2++;
247-
return (s2 == i2);
248-
}
249-
return 1;
250-
}
251-
252-
static unsigned long xdl_hash_record_with_whitespace(char const **data,
253-
char const *top, long flags) {
254-
unsigned long ha = 5381;
255-
char const *ptr = *data;
256-
int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL;
257-
258-
for (; ptr < top && *ptr != '\n'; ptr++) {
259-
if (cr_at_eol_only) {
260-
/* do not ignore CR at the end of an incomplete line */
261-
if (*ptr == '\r' &&
262-
(ptr + 1 < top && ptr[1] == '\n'))
263-
continue;
264-
}
265-
else if (XDL_ISSPACE(*ptr)) {
266-
const char *ptr2 = ptr;
267-
int at_eol;
268-
while (ptr + 1 < top && XDL_ISSPACE(ptr[1])
269-
&& ptr[1] != '\n')
270-
ptr++;
271-
at_eol = (top <= ptr + 1 || ptr[1] == '\n');
272-
if (flags & XDF_IGNORE_WHITESPACE)
273-
; /* already handled */
274-
else if (flags & XDF_IGNORE_WHITESPACE_CHANGE
275-
&& !at_eol) {
276-
ha += (ha << 5);
277-
ha ^= (unsigned long) ' ';
278-
}
279-
else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL
280-
&& !at_eol) {
281-
while (ptr2 != ptr + 1) {
282-
ha += (ha << 5);
283-
ha ^= (unsigned long) *ptr2;
284-
ptr2++;
285-
}
286-
}
287-
continue;
288-
}
289-
ha += (ha << 5);
290-
ha ^= (unsigned long) *ptr;
291-
}
292-
*data = ptr < top ? ptr + 1: ptr;
293-
294-
return ha;
295-
}
296-
297-
unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
298-
unsigned long ha = 5381;
299-
char const *ptr = *data;
300-
301-
if (flags & XDF_WHITESPACE_FLAGS)
302-
return xdl_hash_record_with_whitespace(data, top, flags);
303-
304-
for (; ptr < top && *ptr != '\n'; ptr++) {
305-
ha += (ha << 5);
306-
ha ^= (unsigned long) *ptr;
307-
}
308-
*data = ptr < top ? ptr + 1: ptr;
309-
310-
return ha;
311-
}
312-
313155
unsigned int xdl_hashbits(unsigned int size) {
314156
unsigned int val = 1, bits = 0;
315157

xdiff/xutils.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ void xdl_cha_free(chastore_t *cha);
3333
void *xdl_cha_alloc(chastore_t *cha);
3434
long xdl_guess_lines(mmfile_t *mf, long sample);
3535
int xdl_blankline(const char *line, long size, long flags);
36-
int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
37-
unsigned long xdl_hash_record(char const **data, char const *top, long flags);
36+
u64 xdl_line_hash(u8 const* ptr, usize size, u64 flags);
37+
bool xdl_line_equal(u8 const* lhs, usize lhs_len, u8 const* rhs, usize rhs_len, u64 flags);
3838
unsigned int xdl_hashbits(unsigned int size);
3939
int xdl_num_out(char *out, long val);
4040
int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,

0 commit comments

Comments
 (0)