Skip to content

Commit 9e3d0bd

Browse files
committed
Merge branch 'am/xdiff-hash-tweak'
Inspired by Ezekiel's recent effort to showcase Rust interface, the hash function implementation used to hash lines have been updated to the one used for ELF symbol lookup by Glibc. * am/xdiff-hash-tweak: xdiff: optimize xdl_hash_record_verbatim xdiff: refactor xdl_hash_record()
2 parents 8d5e429 + a4bbe8a commit 9e3d0bd

File tree

2 files changed

+66
-10
lines changed

2 files changed

+66
-10
lines changed

xdiff/xutils.c

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
249249
return 1;
250250
}
251251

252-
static unsigned long xdl_hash_record_with_whitespace(char const **data,
252+
unsigned long xdl_hash_record_with_whitespace(char const **data,
253253
char const *top, long flags) {
254254
unsigned long ha = 5381;
255255
char const *ptr = *data;
@@ -294,19 +294,67 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
294294
return ha;
295295
}
296296

297-
unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
298-
unsigned long ha = 5381;
297+
/*
298+
* Compiler reassociation barrier: pretend to modify X and Y to disallow
299+
* changing evaluation order with respect to following uses of X and Y.
300+
*/
301+
#ifdef __GNUC__
302+
#define REASSOC_FENCE(x, y) __asm__("" : "+r"(x), "+r"(y))
303+
#else
304+
#define REASSOC_FENCE(x, y)
305+
#endif
306+
307+
unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
308+
unsigned long ha = 5381, c0, c1;
299309
char const *ptr = *data;
300-
301-
if (flags & XDF_WHITESPACE_FLAGS)
302-
return xdl_hash_record_with_whitespace(data, top, flags);
303-
310+
#if 0
311+
/*
312+
* The baseline form of the optimized loop below. This is the djb2
313+
* hash (the above function uses a variant with XOR instead of ADD).
314+
*/
304315
for (; ptr < top && *ptr != '\n'; ptr++) {
305316
ha += (ha << 5);
306-
ha ^= (unsigned long) *ptr;
317+
ha += (unsigned long) *ptr;
307318
}
308319
*data = ptr < top ? ptr + 1: ptr;
309-
320+
#else
321+
/* Process two characters per iteration. */
322+
if (top - ptr >= 2) do {
323+
if ((c0 = ptr[0]) == '\n') {
324+
*data = ptr + 1;
325+
return ha;
326+
}
327+
if ((c1 = ptr[1]) == '\n') {
328+
*data = ptr + 2;
329+
c0 += ha;
330+
REASSOC_FENCE(c0, ha);
331+
ha = ha * 32 + c0;
332+
return ha;
333+
}
334+
/*
335+
* Combine characters C0 and C1 into the hash HA. We have
336+
* HA = (HA * 33 + C0) * 33 + C1, and we want to ensure
337+
* that dependency chain over HA is just one multiplication
338+
* and one addition, i.e. we want to evaluate this as
339+
* HA = HA * 33 * 33 + (C0 * 33 + C1), and likewise prefer
340+
* (C0 * 32 + (C0 + C1)) for the expression in parenthesis.
341+
*/
342+
ha *= 33 * 33;
343+
c1 += c0;
344+
REASSOC_FENCE(c1, c0);
345+
c1 += c0 * 32;
346+
REASSOC_FENCE(c1, ha);
347+
ha += c1;
348+
349+
ptr += 2;
350+
} while (ptr < top - 1);
351+
*data = top;
352+
if (ptr < top && (c0 = ptr[0]) != '\n') {
353+
c0 += ha;
354+
REASSOC_FENCE(c0, ha);
355+
ha = ha * 32 + c0;
356+
}
357+
#endif
310358
return ha;
311359
}
312360

xdiff/xutils.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,15 @@ void *xdl_cha_alloc(chastore_t *cha);
3434
long xdl_guess_lines(mmfile_t *mf, long sample);
3535
int xdl_blankline(const char *line, long size, long flags);
3636
int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
37-
unsigned long xdl_hash_record(char const **data, char const *top, long flags);
37+
unsigned long xdl_hash_record_verbatim(char const **data, char const *top);
38+
unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags);
39+
static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags)
40+
{
41+
if (flags & XDF_WHITESPACE_FLAGS)
42+
return xdl_hash_record_with_whitespace(data, top, flags);
43+
else
44+
return xdl_hash_record_verbatim(data, top);
45+
}
3846
unsigned int xdl_hashbits(unsigned int size);
3947
int xdl_num_out(char *out, long val);
4048
int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,

0 commit comments

Comments
 (0)