Skip to content

Commit 48b1942

Browse files
committed
remove xor folding
1 parent 1d88ad4 commit 48b1942

File tree

2 files changed

+13
-14
lines changed

2 files changed

+13
-14
lines changed

src/chmatch.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,12 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
101101
free(counts);
102102
free(map);
103103
} else if (chin) {
104-
#pragma omp parallel for if(xlen > 100000) schedule(static) num_threads(getDTthreads(xlen, false))
104+
#pragma omp parallel for num_threads(getDTthreads(xlen, true))
105105
for (int i=0; i<xlen; i++) {
106106
ansd[i] = hash_lookup(marks,xd[i],0)<0;
107107
}
108108
} else {
109-
#pragma omp parallel for if(xlen > 100000) schedule(static) num_threads(getDTthreads(xlen, false))
109+
#pragma omp parallel for num_threads(getDTthreads(xlen, true))
110110
for (int i=0; i<xlen; i++) {
111111
const int m = hash_lookup(marks,xd[i],0);
112112
ansd[i] = (m<0) ? -m : nomatch;

src/hash.c

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -65,22 +65,21 @@ static hashtab * hash_create_(size_t n, double load_factor) {
6565

6666
hashtab * hash_create(size_t n) { return hash_create_(n, .5); }
6767

68-
// Fast hash mixing using XOR-shift and integer multiplication
68+
// double hashing
6969
static R_INLINE size_t hash_index1(SEXP key, uintptr_t multiplier) {
70-
uintptr_t h = (uintptr_t)key >> 4;
71-
// XOR folding to mix high bits into low bits
72-
h ^= h >> 16;
73-
h *= multiplier;
74-
h ^= h >> 13;
75-
return h;
70+
// The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size.
71+
// Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees,
72+
// which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes.
73+
return ((((uintptr_t)key) >> 4) & 0x0fffffff) * multiplier;
7674
}
7775

7876
static R_INLINE size_t hash_index2(SEXP key, uintptr_t multiplier) {
79-
uintptr_t h = (uintptr_t)key >> 6;
80-
h ^= h >> 18;
81-
h *= multiplier;
82-
h ^= h >> 15;
83-
return h;
77+
// For double hashing, we need a different hash that's coprime with table size.
78+
// We use higher-order bits that hash_index1 mostly ignores, and ensure
79+
// the result is always odd (coprime with power-of-2 table sizes).
80+
uintptr_t ptr = (uintptr_t)key;
81+
ptr = (ptr >> 12) | (ptr << (sizeof(uintptr_t) * 8 - 12));
82+
return ((ptr & 0x0fffffff) * multiplier) | 1;
8483
}
8584

8685
void hash_rehash(hashtab *h) {

0 commit comments

Comments
 (0)