Skip to content

Commit f4943ae

Browse files
author
icgmilk
committed
Switch hashmap implementation to open addressing
Previously, the chaining implementation incurred pointer chasing and per-node allocations, leading to poor cache locality and longer chains under collisions. Open addressing uses a single contiguous array and removes per-node allocations. We cap the load factor at 50% to keep probe lengths short and reduce clustering. This change trades ~320 kB of memory for ~30 ms faster execution, primarily due to better cache locality. Notice that current implementation lacks delete state as there is no in-place delete use case.
1 parent f265042 commit f4943ae

File tree

2 files changed

+62
-74
lines changed

2 files changed

+62
-74
lines changed

src/defs.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,13 @@ typedef struct {
100100
typedef struct hashmap_node {
101101
char *key;
102102
void *val;
103-
struct hashmap_node *next;
103+
bool occupied;
104104
} hashmap_node_t;
105105

106106
typedef struct {
107107
int size;
108108
int cap;
109-
hashmap_node_t **buckets;
109+
hashmap_node_t *table;
110110
} hashmap_t;
111111

112112
/* lexer tokens */

src/globals.c

Lines changed: 60 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,9 @@ void arena_free(arena_t *arena)
373373
*/
374374
int hashmap_hash_index(int size, char *key)
375375
{
376+
if (!key)
377+
return 0;
378+
376379
int hash = 0x811c9dc5;
377380

378381
for (; *key; key++) {
@@ -414,90 +417,61 @@ hashmap_t *hashmap_create(int cap)
414417

415418
map->size = 0;
416419
map->cap = round_up_pow2(cap);
417-
map->buckets = calloc(map->cap, sizeof(hashmap_node_t *));
420+
map->table = calloc(map->cap, sizeof(hashmap_node_t));
418421

419-
if (!map->buckets) {
420-
printf("Failed to allocate buckets in hashmap_t\n");
422+
if (!map->table) {
423+
printf("Failed to allocate table in hashmap_t\n");
421424
free(map);
422425
return NULL;
423426
}
424427

425428
return map;
426429
}
427430

428-
/* Create a hashmap node on heap.
429-
* @key: The key of node. Must not be NULL.
430-
* @val: The value of node. Could be NULL.
431-
*
432-
* Return: The pointer of created node.
433-
*/
434-
hashmap_node_t *hashmap_node_new(char *key, void *val)
435-
{
436-
if (!key)
437-
return NULL;
438-
439-
const int len = strlen(key);
440-
hashmap_node_t *node = arena_alloc(HASHMAP_ARENA, sizeof(hashmap_node_t));
441-
442-
443-
if (!node) {
444-
printf("Failed to allocate hashmap_node_t\n");
445-
return NULL;
446-
}
447-
448-
node->key = arena_alloc(HASHMAP_ARENA, len + 1);
449-
if (!node->key) {
450-
printf("Failed to allocate hashmap_node_t key with size %d\n", len + 1);
451-
return NULL;
452-
}
453-
454-
strcpy(node->key, key);
455-
node->val = val;
456-
node->next = NULL;
457-
return node;
458-
}
459431

460432
void hashmap_rehash(hashmap_t *map)
461433
{
462434
if (!map)
463435
return;
464436

465437
int old_cap = map->cap;
466-
hashmap_node_t **old_buckets = map->buckets;
438+
hashmap_node_t *old_table = map->table;
467439

468440
map->cap <<= 1;
469-
map->buckets = calloc(map->cap, sizeof(hashmap_node_t *));
441+
map->table = calloc(map->cap, sizeof(hashmap_node_t));
470442

471-
if (!map->buckets) {
472-
printf("Failed to allocate new buckets in hashmap_t\n");
473-
map->buckets = old_buckets;
443+
if (!map->table) {
444+
printf("Failed to allocate new table in hashmap_t\n");
445+
map->table = old_table;
474446
map->cap = old_cap;
475447
return;
476448
}
477449

450+
map->size = 0;
451+
478452
for (int i = 0; i < old_cap; i++) {
479-
hashmap_node_t *cur = old_buckets[i];
480-
hashmap_node_t *next;
481-
hashmap_node_t *target_cur;
482-
483-
while (cur) {
484-
next = cur->next;
485-
cur->next = NULL;
486-
int index = hashmap_hash_index(map->cap, cur->key);
487-
target_cur = map->buckets[index];
488-
489-
if (!target_cur) {
490-
map->buckets[index] = cur;
491-
} else {
492-
cur->next = target_cur;
493-
map->buckets[index] = cur;
453+
if (old_table[i].occupied) {
454+
char *key = old_table[i].key;
455+
void *val = old_table[i].val;
456+
457+
int index = hashmap_hash_index(map->cap, key);
458+
int start = index;
459+
460+
while (map->table[index].occupied) {
461+
index = (index + 1) & (map->cap - 1);
462+
if (index == start) {
463+
printf("Error: New table is full during rehash\n");
464+
abort();
465+
}
494466
}
495467

496-
cur = next;
468+
map->table[index].key = key;
469+
map->table[index].val = val;
470+
map->table[index].occupied = true;
471+
map->size++;
497472
}
498473
}
499-
500-
free(old_buckets);
474+
free(old_table);
501475
}
502476

503477
/* Put a key-value pair into given hashmap.
@@ -513,22 +487,30 @@ void hashmap_put(hashmap_t *map, char *key, void *val)
513487
if (!map)
514488
return;
515489

490+
/* Check if size of map exceeds load factor 50% (or 1/2 of capacity) */
491+
if ((map->cap >> 1) <= map->size)
492+
hashmap_rehash(map);
493+
516494
int index = hashmap_hash_index(map->cap, key);
517-
hashmap_node_t *cur = map->buckets[index],
518-
*new_node = hashmap_node_new(key, val);
495+
int start = index;
519496

520-
if (!cur) {
521-
map->buckets[index] = new_node;
522-
} else {
523-
while (cur->next)
524-
cur = cur->next;
525-
cur->next = new_node;
497+
while (map->table[index].occupied) {
498+
if (!strcmp(map->table[index].key, key)) {
499+
map->table[index].val = val;
500+
return;
501+
}
502+
503+
index = (index + 1) & (map->cap - 1);
504+
if (index == start) {
505+
printf("Error: Hashmap is full\n");
506+
abort();
507+
}
526508
}
527509

510+
map->table[index].key = arena_strdup(HASHMAP_ARENA, key);
511+
map->table[index].val = val;
512+
map->table[index].occupied = true;
528513
map->size++;
529-
/* Check if size of map exceeds load factor 75% (or 3/4 of capacity) */
530-
if ((map->cap >> 2) + (map->cap >> 1) <= map->size)
531-
hashmap_rehash(map);
532514
}
533515

534516
/* Get key-value pair node from hashmap from given key.
@@ -544,10 +526,16 @@ hashmap_node_t *hashmap_get_node(hashmap_t *map, char *key)
544526
return NULL;
545527

546528
int index = hashmap_hash_index(map->cap, key);
529+
int start = index;
530+
531+
while (map->table[index].occupied) {
532+
if (!strcmp(map->table[index].key, key))
533+
return &map->table[index];
547534

548-
for (hashmap_node_t *cur = map->buckets[index]; cur; cur = cur->next)
549-
if (!strcmp(cur->key, key))
550-
return cur;
535+
index = (index + 1) & (map->cap - 1);
536+
if (index == start)
537+
return NULL;
538+
}
551539

552540
return NULL;
553541
}
@@ -585,7 +573,7 @@ void hashmap_free(hashmap_t *map)
585573
if (!map)
586574
return;
587575

588-
free(map->buckets);
576+
free(map->table);
589577
free(map);
590578
}
591579

0 commit comments

Comments
 (0)