Skip to content

Commit 62e6cc5

Browse files
amonakovgitster
authored andcommitted
xdiff: optimize xdl_hash_record_verbatim
xdl_hash_record_verbatim uses modified djb2 hash with XOR instead of ADD for combining. The ADD-based variant is used as the basis of the modern ("GNU") symbol lookup scheme in ELF. Glibc dynamic loader received an optimized version of this hash function thanks to Noah Goldstein [1]. Switch xdl_hash_record_verbatim to additive hashing and implement an optimized loop following the scheme suggested by Noah. Timing 'git log --oneline --shortstat v2.0.0..v2.5.0' under perf, I got version | cycles, bn | instructions, bn --------------------------------------- A 6.38 11.3 B 6.21 10.89 C 5.80 9.95 D 5.83 8.74 --------------------------------------- A: baseline (git master at e4ef048) B: plus 'xdiff: refactor xdl_hash_record()' C: and plus this patch D: with 'xdiff: use xxhash' by Phillip Wood The resulting speedup for xdl_hash_record_verbatim itself is about 1.5x. [1] https://inbox.sourceware.org/libc-alpha/[email protected]/ Signed-off-by: Alexander Monakov <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 41d9783 commit 62e6cc5

File tree

1 file changed

+55
-4
lines changed

1 file changed

+55
-4
lines changed

xdiff/xutils.c

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -294,16 +294,67 @@ unsigned long xdl_hash_record_with_whitespace(char const **data,
294294
return ha;
295295
}
296296

297+
/*
298+
* Compiler reassociation barrier: pretend to modify X and Y to disallow
299+
* changing evaluation order with respect to following uses of X and Y.
300+
*/
301+
#ifdef __GNUC__
302+
#define REASSOC_FENCE(x, y) asm("" : "+r"(x), "+r"(y))
303+
#else
304+
#define REASSOC_FENCE(x, y)
305+
#endif
306+
297307
unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
298-
unsigned long ha = 5381;
308+
unsigned long ha = 5381, c0, c1;
299309
char const *ptr = *data;
300-
310+
#if 0
311+
/*
312+
* The baseline form of the optimized loop below. This is the djb2
313+
* hash (the above function uses a variant with XOR instead of ADD).
314+
*/
301315
for (; ptr < top && *ptr != '\n'; ptr++) {
302316
ha += (ha << 5);
303-
ha ^= (unsigned long) *ptr;
317+
ha += (unsigned long) *ptr;
304318
}
305319
*data = ptr < top ? ptr + 1: ptr;
306-
320+
#else
321+
/* Process two characters per iteration. */
322+
if (top - ptr >= 2) do {
323+
if ((c0 = ptr[0]) == '\n') {
324+
*data = ptr + 1;
325+
return ha;
326+
}
327+
if ((c1 = ptr[1]) == '\n') {
328+
*data = ptr + 2;
329+
c0 += ha;
330+
REASSOC_FENCE(c0, ha);
331+
ha = ha * 32 + c0;
332+
return ha;
333+
}
334+
/*
335+
* Combine characters C0 and C1 into the hash HA. We have
336+
* HA = (HA * 33 + C0) * 33 + C1, and we want to ensure
337+
* that dependency chain over HA is just one multiplication
338+
* and one addition, i.e. we want to evaluate this as
339+
* HA = HA * 33 * 33 + (C0 * 33 + C1), and likewise prefer
340+
* (C0 * 32 + (C0 + C1)) for the expression in parenthesis.
341+
*/
342+
ha *= 33 * 33;
343+
c1 += c0;
344+
REASSOC_FENCE(c1, c0);
345+
c1 += c0 * 32;
346+
REASSOC_FENCE(c1, ha);
347+
ha += c1;
348+
349+
ptr += 2;
350+
} while (ptr < top - 1);
351+
*data = top;
352+
if (ptr < top && (c0 = ptr[0]) != '\n') {
353+
c0 += ha;
354+
REASSOC_FENCE(c0, ha);
355+
ha = ha * 32 + c0;
356+
}
357+
#endif
307358
return ha;
308359
}
309360

0 commit comments

Comments
 (0)