Skip to content

Commit 60a6138

Browse files
committed
Merge branch 'am/xdiff-hash-tweak' into seen
Inspired by Ezekiel's recent effort to showcase Rust interface, the hash function implementation used to hash lines have been updated to the one used for ELF symbol lookup by Glibc. Comments? * am/xdiff-hash-tweak: fixup! xdiff: optimize xdl_hash_record_verbatim xdiff: optimize xdl_hash_record_verbatim xdiff: refactor xdl_hash_record()
2 parents d605126 + 67d684a commit 60a6138

File tree

2 files changed

+66
-10
lines changed

2 files changed

+66
-10
lines changed

xdiff/xutils.c

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
249249
return 1;
250250
}
251251

252-
static unsigned long xdl_hash_record_with_whitespace(char const **data,
252+
unsigned long xdl_hash_record_with_whitespace(char const **data,
253253
char const *top, long flags) {
254254
unsigned long ha = 5381;
255255
char const *ptr = *data;
@@ -294,19 +294,67 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
294294
return ha;
295295
}
296296

297-
unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
298-
unsigned long ha = 5381;
297+
/*
298+
* Compiler reassociation barrier: pretend to modify X and Y to disallow
299+
* changing evaluation order with respect to following uses of X and Y.
300+
*/
301+
#ifdef __GNUC__
302+
#define REASSOC_FENCE(x, y) __asm__("" : "+r"(x), "+r"(y))
303+
#else
304+
#define REASSOC_FENCE(x, y)
305+
#endif
306+
307+
unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
308+
unsigned long ha = 5381, c0, c1;
299309
char const *ptr = *data;
300-
301-
if (flags & XDF_WHITESPACE_FLAGS)
302-
return xdl_hash_record_with_whitespace(data, top, flags);
303-
310+
#if 0
311+
/*
312+
* The baseline form of the optimized loop below. This is the djb2
313+
* hash (the above function uses a variant with XOR instead of ADD).
314+
*/
304315
for (; ptr < top && *ptr != '\n'; ptr++) {
305316
ha += (ha << 5);
306-
ha ^= (unsigned long) *ptr;
317+
ha += (unsigned long) *ptr;
307318
}
308319
*data = ptr < top ? ptr + 1: ptr;
309-
320+
#else
321+
/* Process two characters per iteration. */
322+
if (top - ptr >= 2) do {
323+
if ((c0 = ptr[0]) == '\n') {
324+
*data = ptr + 1;
325+
return ha;
326+
}
327+
if ((c1 = ptr[1]) == '\n') {
328+
*data = ptr + 2;
329+
c0 += ha;
330+
REASSOC_FENCE(c0, ha);
331+
ha = ha * 32 + c0;
332+
return ha;
333+
}
334+
/*
335+
* Combine characters C0 and C1 into the hash HA. We have
336+
* HA = (HA * 33 + C0) * 33 + C1, and we want to ensure
337+
* that dependency chain over HA is just one multiplication
338+
* and one addition, i.e. we want to evaluate this as
339+
* HA = HA * 33 * 33 + (C0 * 33 + C1), and likewise prefer
340+
* (C0 * 32 + (C0 + C1)) for the expression in parenthesis.
341+
*/
342+
ha *= 33 * 33;
343+
c1 += c0;
344+
REASSOC_FENCE(c1, c0);
345+
c1 += c0 * 32;
346+
REASSOC_FENCE(c1, ha);
347+
ha += c1;
348+
349+
ptr += 2;
350+
} while (ptr < top - 1);
351+
*data = top;
352+
if (ptr < top && (c0 = ptr[0]) != '\n') {
353+
c0 += ha;
354+
REASSOC_FENCE(c0, ha);
355+
ha = ha * 32 + c0;
356+
}
357+
#endif
310358
return ha;
311359
}
312360

xdiff/xutils.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,15 @@ void *xdl_cha_alloc(chastore_t *cha);
3434
long xdl_guess_lines(mmfile_t *mf, long sample);
3535
int xdl_blankline(const char *line, long size, long flags);
3636
int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
37-
unsigned long xdl_hash_record(char const **data, char const *top, long flags);
37+
unsigned long xdl_hash_record_verbatim(char const **data, char const *top);
38+
unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags);
39+
static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags)
40+
{
41+
if (flags & XDF_WHITESPACE_FLAGS)
42+
return xdl_hash_record_with_whitespace(data, top, flags);
43+
else
44+
return xdl_hash_record_verbatim(data, top);
45+
}
3846
unsigned int xdl_hashbits(unsigned int size);
3947
int xdl_num_out(char *out, long val);
4048
int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,

0 commit comments

Comments
 (0)