Skip to content

Commit c2b5c67

Browse files
committed
Minor hash improvements
Use only 28 bits of the pointer (lower 32 but discard the lowest 4). Inline the linear search by advancing the pointer instead of repeatedly computing and dividing the hash value. Average improvement of 10%.
1 parent d7a9a17 commit c2b5c67

File tree

1 file changed

+14
-11
lines changed

1 file changed

+14
-11
lines changed

src/hash.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,16 @@ hashtab * hash_create(size_t n) { return hash_create_(n, .5); }
5454

5555
// Hashing for an open addressing hash table. See Cormen et al., Introduction to Algorithms, 3rd ed., section 11.4.
5656
// This is far from perfect. Make size a prime or a power of two and you'll be able to use double hashing.
57-
static R_INLINE size_t hash_index(SEXP key, uintptr_t multiplier, size_t offset, size_t size) {
57+
static R_INLINE size_t hash_index(SEXP key, uintptr_t multiplier) {
5858
// The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size.
5959
// Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees,
6060
// which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes.
61-
return ((((uintptr_t)key) >> 4) * multiplier + offset) % size;
61+
return ((((uintptr_t)key) >> 4) & 0x0fffffff) * multiplier;
6262
}
6363

6464
void hash_set(hashtab * h, SEXP key, R_xlen_t value) {
65-
for (size_t i = 0; i < h->size; ++i) {
66-
struct hash_pair * cell = h->tb + hash_index(key, h->multiplier, i, h->size);
65+
struct hash_pair *cell = h->tb + hash_index(key, h->multiplier) % h->size, *end = h->tb + h->size - 1;
66+
for (size_t i = 0; i < h->size; ++i, cell = cell == end ? h->tb : cell+1) {
6767
if (cell->key == key) {
6868
cell->value = value;
6969
return;
@@ -83,8 +83,8 @@ void hash_set(hashtab * h, SEXP key, R_xlen_t value) {
8383
}
8484

8585
R_xlen_t hash_lookup(const hashtab * h, SEXP key, R_xlen_t ifnotfound) {
86-
for (size_t i = 0; i < h->size; ++i) {
87-
const struct hash_pair * cell = h->tb + hash_index(key, h->multiplier, i, h->size);
86+
const struct hash_pair * cell = h->tb + hash_index(key, h->multiplier) % h->size, *end = h->tb + h->size - 1;
87+
for (size_t i = 0; i < h->size; ++i, cell = cell == end ? h->tb : cell+1) {
8888
if (cell->key == key) {
8989
return cell->value;
9090
} else if (!cell->key) {
@@ -152,7 +152,7 @@ static void dhash_enlarge(dhashtab_ * self) {
152152
uintptr_t new_multiplier = new_size * hash_multiplier;
153153
for (size_t i = 0; i < self->size; ++i) {
154154
for (size_t j = 0; j < new_size; ++j) {
155-
size_t ii = hash_index(self->table[i].key, new_multiplier, j, new_size);
155+
size_t ii = (hash_index(self->table[i].key, new_multiplier) + j) % new_size;
156156
if (!new[ii].key) {
157157
new[ii] = (struct hash_pair){
158158
.key = self->table[i].key,
@@ -183,9 +183,11 @@ static void dhash_enlarge(dhashtab_ * self) {
183183

184184
void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) {
185185
dhashtab_ * self = (dhashtab_ *)h;
186+
struct hash_pair *cell, *end;
186187
again:
187-
for (size_t i = 0; i < self->size; ++i) {
188-
struct hash_pair * cell = self->table + hash_index(key, self->multiplier, i, self->size);
188+
cell = self->table + hash_index(key, self->multiplier) % self->size;
189+
end = self->table + self->size - 1;
190+
for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) {
189191
if (cell->key == key) {
190192
cell->value = value;
191193
return;
@@ -209,8 +211,9 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) {
209211
#pragma omp flush // no locking or atomic access! this is bad
210212
dhashtab_ self = *(dhashtab_ *)h;
211213
R_xlen_t ret = ifnotfound;
212-
for (size_t i = 0; i < self.size; ++i) {
213-
const struct hash_pair * cell = self.table + hash_index(key, self.multiplier, i, self.size);
214+
const struct hash_pair * cell = self.table + hash_index(key, self.multiplier) % self.size;
215+
const struct hash_pair * end = self.table + self.size - 1;
216+
for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) {
214217
if (cell->key == key) {
215218
ret = cell->value;
216219
goto done;

0 commit comments

Comments
 (0)