@@ -192,19 +192,24 @@ int main() {
192192
193193# Generic Hash Map
194194
195- Copy of the code on [Compiler Explorer](https://godbolt.org/z/d176e16eb):
195+ Although Clang manages to completely optimize the hash map away, GCC doesn't, even when passed these extra flags:
196+ - `-finline-limit=999999`
197+ - `--param max-inline-insns-single=999999`
198+ - `--param max-inline-insns-auto=999999`
199+
200+ Copy of the code on [Compiler Explorer](https://godbolt.org/z/eecK3rK7z):
196201
197202```c
198- #include <assert.h>
199203#include <stdbool.h>
200204#include <math.h>
201205#include <stddef.h>
202206#include <stdio.h>
207+ #include <stdlib.h>
203208#include <string.h>
204209
205210typedef struct {
206211 bool occupied;
207- unsigned char key_value[]; // C99 flexible array member
212+ // key and value are stored inline in memory after `occupied`
208213} entry;
209214
210215typedef struct {
@@ -213,109 +218,101 @@ typedef struct {
213218} hashmap;
214219
215220__attribute__((always_inline))
216- static inline void hashmap_init(hashmap *m, void *buf, size_t ks, size_t vs, size_t cap ) {
221+ static inline void hashmap_init(hashmap *m, void *buf, size_t ks, size_t vs, size_t capacity ) {
217222 m->entries = buf;
218- m->capacity = cap ;
223+ m->capacity = capacity ;
219224 m->key_size = ks;
220225 m->value_size = vs;
221- m->entry_size = sizeof(entry ) + ks + vs;
222- memset(buf, 0, cap * m->entry_size);
226+ m->entry_size = sizeof(bool ) + ks + vs;
227+ memset(buf, 0, capacity * m->entry_size);
223228}
224229
225230// Naive hashing
226231__attribute__((always_inline))
227232static inline size_t hash(const void *key, size_t size) {
228- size_t h = 0;
229- for (size_t i = 0; i < size; i++)
230- h = h * 31 + ((unsigned char*)key)[i];
231- return h;
233+ return 42;
232234}
233235
234- __attribute__((always_inline))
235- static inline void hashmap_insert(hashmap *m, const void *key, const void *val) {
236- size_t idx = hash(key, m->key_size) % m->capacity;
237- for (size_t i = 0; i < m->capacity; i++) {
238- entry *e = (entry*)((unsigned char*)m->entries + ((idx + i) % m->capacity) * m->entry_size);
239- if (!e->occupied || memcmp(e->key_value, key, m->key_size) == 0) {
240- e->occupied = true;
241- memcpy(e->key_value, key, m->key_size);
242- memcpy(e->key_value + m->key_size, val, m->value_size);
243- return;
244- }
245- }
246- }
236+ // Macro versions (no memcpy, no memcmp)
237+ #define HASHMAP_INSERT(m, key_val, value_val) \
238+ do { \
239+ size_t idx = hash(&(key_val), (m)->key_size) % (m)->capacity; \
240+ for (size_t i = 0; i < (m)->capacity; i++) { \
241+ unsigned char *e_ptr = (unsigned char*)(m)->entries + \
242+ ((idx + i) % (m)->capacity) * (m)->entry_size; \
243+ bool *occupied = (bool*)e_ptr; \
244+ unsigned char *kv = e_ptr + sizeof(bool); \
245+ bool keys_match = true; \
246+ if (*occupied) { \
247+ const unsigned char *key_ptr = (unsigned char*)&(key_val); \
248+ for (size_t j = 0; j < (m)->key_size; j++) { \
249+ if (kv[j] != key_ptr[j]) { \
250+ keys_match = false; \
251+ break; \
252+ } \
253+ } \
254+ } \
255+ if (!*occupied || keys_match) { \
256+ *occupied = true; \
257+ const unsigned char *src_key = (unsigned char*)&(key_val); \
258+ const unsigned char *src_val = (unsigned char*)&(value_val); \
259+ for (size_t j = 0; j < (m)->key_size; j++) kv[j] = src_key[j]; \
260+ for (size_t j = 0; j < (m)->value_size; j++) \
261+ kv[(m)->key_size + j] = src_val[j]; \
262+ break; \
263+ } \
264+ } \
265+ } while (0)
247266
248- __attribute__((always_inline))
249- static inline bool hashmap_get(hashmap *m, const void *key, void *out) {
250- size_t idx = hash(key, m->key_size) % m->capacity;
251- for (size_t i = 0; i < m->capacity; i++) {
252- entry *e = (entry*)((unsigned char*)m->entries + ((idx + i) % m->capacity) * m->entry_size);
253- if (!e->occupied) return false;
254- if (memcmp(e->key_value, key, m->key_size) == 0) {
255- memcpy(out, e->key_value + m->key_size, m->value_size);
256- return true;
257- }
258- }
259- return false;
260- }
267+ #define HASHMAP_GET(m, key_val, out_lvalue) \
268+ do { \
269+ size_t idx = hash(&(key_val), (m)->key_size) % (m)->capacity; \
270+ for (size_t i = 0; i < (m)->capacity; i++) { \
271+ unsigned char *e_ptr = (unsigned char*)(m)->entries + \
272+ ((idx + i) % (m)->capacity) * (m)->entry_size; \
273+ bool *occupied = (bool*)e_ptr; \
274+ unsigned char *kv = e_ptr + sizeof(bool); \
275+ if (!*occupied) break; \
276+ bool keys_match = true; \
277+ const unsigned char *key_ptr = (unsigned char*)&(key_val); \
278+ for (size_t j = 0; j < (m)->key_size; j++) { \
279+ if (kv[j] != key_ptr[j]) { \
280+ keys_match = false; \
281+ break; \
282+ } \
283+ } \
284+ if (keys_match) { \
285+ unsigned char *dst = (unsigned char*)&(out_lvalue); \
286+ for (size_t j = 0; j < (m)->value_size; j++) \
287+ dst[j] = kv[(m)->key_size + j]; \
288+ break; \
289+ } \
290+ } \
291+ } while (0)
261292
262- // Entry of (int, char*) map
263- typedef struct {
264- bool occupied;
265- int key;
266- char *value;
267- } IntStrMap_entry;
293+ __attribute__((always_inline))
294+ static inline void macro_version(size_t capacity) {
295+ // (int, char*) map
296+ size_t entry_size1 = sizeof(bool) + sizeof(int) + sizeof(char*);
268297
269- // Entry of (int, double) map
270- typedef struct {
271- bool occupied;
272- int key;
273- double value;
274- } IntDblMap_entry;
298+ unsigned char *buf1 = calloc(capacity, entry_size1);
299+ if (buf1 == NULL) __builtin_unreachable();
275300
276- int main(void) {
277- // Test (int, char*) map
278- IntStrMap_entry buf1[8] = {0};
279301 hashmap m1;
280- hashmap_init(&m1, buf1, sizeof(int), sizeof(char*), 8);
281-
282- int k1 = 42;
283- char *v1 = "foo";
284- hashmap_insert(&m1, &k1, &v1);
285-
286- int k2 = 7;
287- char *v2 = "bar";
288- hashmap_insert(&m1, &k2, &v2);
289-
290- char *r1;
291- hashmap_get(&m1, &k1, &r1);
292- char *r2;
293- hashmap_get(&m1, &k2, &r2);
294-
295- assert(strcmp(r1, "foo") == 0);
296- assert(strcmp(r2, "bar") == 0);
297-
298- // Test (int, double) map
299- IntDblMap_entry buf2[8] = {0};
300- hashmap m2;
301- hashmap_init(&m2, buf2, sizeof(int), sizeof(double), 8);
302-
303- int k3 = 10;
304- double v3 = 3.14159;
305- hashmap_insert(&m2, &k3, &v3);
306-
307- int k4 = 20;
308- double v4 = 2.71828;
309- hashmap_insert(&m2, &k4, &v4);
310-
311- double d1;
312- hashmap_get(&m2, &k3, &d1);
313- double d2;
314- hashmap_get(&m2, &k4, &d2);
315-
316- assert(fabs(d1 - 3.14159) < 0.00001);
317- assert(fabs(d2 - 2.71828) < 0.00001);
318-
319- printf("Hash map test passed.\n");
302+ hashmap_init(&m1, buf1, sizeof(int), sizeof(char*), capacity);
303+
304+ int k1 = 42; char *v1 = "foo";
305+ HASHMAP_INSERT(&m1, k1, v1);
306+
307+ char *r1; HASHMAP_GET(&m1, k1, r1);
308+ if (strcmp(r1, "foo") != 0) __builtin_unreachable();
309+
310+ free(buf1);
311+ }
312+
313+ int main() {
314+ size_t capacity = 2;
315+ macro_version(capacity);
316+ printf("All tests passed.\n");
320317}
321318```
0 commit comments