diff --git a/src/breakpoint.c b/src/breakpoint.c index da47534bf..72bc68fb9 100644 --- a/src/breakpoint.c +++ b/src/breakpoint.c @@ -12,7 +12,9 @@ static inline int cmp(const void *arg0, const void *arg1) { riscv_word_t *a = (riscv_word_t *) arg0, *b = (riscv_word_t *) arg1; - return (*a < *b) ? _CMP_LESS : (*a > *b) ? _CMP_GREATER : _CMP_EQUAL; + return (*a < *b) ? MAP_CMP_LESS + : (*a > *b) ? MAP_CMP_GREATER + : MAP_CMP_EQUAL; } breakpoint_map_t breakpoint_map_new() diff --git a/src/map.c b/src/map.c index dc7eaa537..8b76f6924 100644 --- a/src/map.c +++ b/src/map.c @@ -10,23 +10,23 @@ * Therefore, credit and sincere thanks are extended to jemalloc for their * invaluable work. * Reference: - * https://github.com/jemalloc/jemalloc/blob/dev/include/jemalloc/internal/rb.h + * https://github.com/jemalloc/jemalloc/blob/dev/include/ \ + * jemalloc/internal/rb.h */ #include +#include #include #include #include #include "map.h" -/* TODO: Avoid relying on key_size and data_size */ struct map_internal { - map_node_t *root; - - /* properties */ - size_t key_size, data_size; - + map_node_t *root; /* Tree root */ + size_t key_size, data_size; /* Size of key/value type */ + size_t size; /* Number of nodes */ + /* Key comparison function */ map_cmp_t (*comparator)(const void *, const void *); }; @@ -42,7 +42,24 @@ struct map_internal { */ #define RB_MAX_DEPTH (sizeof(void *) << 4) -typedef enum { RB_BLACK = 0, RB_RED } map_color_t; +/* Color/pointer manipulation macros */ +#define RB_COLOR_MASK 1UL +#define RB_PTR_MASK (~RB_COLOR_MASK) + +typedef enum { RB_BLACK = 0, RB_RED = 1 } map_color_t; + +/* Helper macros for cleaner access patterns */ +#define RB_IS_RED(node) (rb_node_get_color(node) == RB_RED) +#define RB_IS_BLACK(node) (rb_node_get_color(node) == RB_BLACK) + +/* Prefetch hints for better cache utilization */ +#ifdef __builtin_prefetch +#define PREFETCH_READ(addr) __builtin_prefetch((addr), 0, 1) +#define PREFETCH_WRITE(addr) __builtin_prefetch((addr), 1, 1) +#else +#define PREFETCH_READ(addr) ((void) 0) +#define PREFETCH_WRITE(addr) ((void) 0) +#endif /* Left accessors */ static inline map_node_t *rb_node_get_left(const map_node_t *node) @@ -55,47 +72,48 @@ static inline void rb_node_set_left(map_node_t *node, map_node_t *left) node->left = left; } -/* Right accessors */ +/* Right accessors - using consistent masking */ static inline map_node_t *rb_node_get_right(const map_node_t *node) { - return (map_node_t *) (((uintptr_t) node->right_red) & ~3); + return (map_node_t *) (((uintptr_t) node->right_red) & ~RB_COLOR_MASK); } static inline void rb_node_set_right(map_node_t *node, map_node_t *right) { - node->right_red = (map_node_t *) (((uintptr_t) right) | - (((uintptr_t) node->right_red) & 1)); + node->right_red = + (map_node_t *) (((uintptr_t) right) | + (((uintptr_t) node->right_red) & RB_COLOR_MASK)); } /* Color accessors */ static inline map_color_t rb_node_get_color(const map_node_t *node) { - return ((uintptr_t) node->right_red) & 1; + return ((uintptr_t) node->right_red) & RB_COLOR_MASK; } static inline void rb_node_set_color(map_node_t *node, map_color_t color) { node->right_red = - (map_node_t *) (((uintptr_t) node->right_red & ~3) | color); + (map_node_t *) (((uintptr_t) node->right_red & ~RB_COLOR_MASK) | color); } static inline void rb_node_set_red(map_node_t *node) { - node->right_red = (map_node_t *) (((uintptr_t) node->right_red) | 1); + node->right_red = (map_node_t *) (((uintptr_t) node->right_red) | RB_RED); } static inline void rb_node_set_black(map_node_t *node) { - node->right_red = (map_node_t *) (((uintptr_t) node->right_red) & ~3); + node->right_red = + (map_node_t *) (((uintptr_t) node->right_red) & ~RB_COLOR_MASK); } /* Node initializer */ static inline void rb_node_init(map_node_t *node) { - assert((((uintptr_t) node) & (0x1)) == 0); /* a pointer without marker */ - rb_node_set_left(node, NULL); - rb_node_set_right(node, NULL); - rb_node_set_red(node); + assert((((uintptr_t) node) & RB_COLOR_MASK) == 0); /* properly aligned */ + node->left = NULL; + node->right_red = (map_node_t *) RB_RED; /* NULL with red color */ } /* Internal helper macros */ @@ -118,102 +136,6 @@ typedef struct { map_cmp_t cmp; } rb_path_entry_t; -static inline map_node_t *rb_search(map_t rb, const map_node_t *node) -{ - map_node_t *ret = rb->root; - while (ret) { - map_cmp_t cmp = (rb->comparator)(node->key, ret->key); - switch (cmp) { - case _CMP_EQUAL: - return ret; - case _CMP_LESS: - ret = rb_node_get_left(ret); - break; - case _CMP_GREATER: - ret = rb_node_get_right(ret); - break; - default: - __UNREACHABLE; - break; - } - } - return ret; -} - -static void rb_insert(map_t rb, map_node_t *node) -{ - rb_path_entry_t path[RB_MAX_DEPTH]; - rb_path_entry_t *pathp; - rb_node_init(node); - - /* Traverse through red-black tree node and find the search target node. */ - path->node = rb->root; - for (pathp = path; pathp->node; pathp++) { - map_cmp_t cmp = pathp->cmp = - (rb->comparator)(node->key, pathp->node->key); - switch (cmp) { - case _CMP_LESS: - pathp[1].node = rb_node_get_left(pathp->node); - break; - case _CMP_GREATER: - pathp[1].node = rb_node_get_right(pathp->node); - break; - default: - /* ignore duplicate key */ - __UNREACHABLE; - break; - } - } - pathp->node = node; - - assert(!rb_node_get_left(node)); - assert(!rb_node_get_right(node)); - - /* Go from target node back to root node and fix color accordingly */ - for (pathp--; (uintptr_t) pathp >= (uintptr_t) path; pathp--) { - map_node_t *cnode = pathp->node; - if (pathp->cmp == _CMP_LESS) { - map_node_t *left = pathp[1].node; - rb_node_set_left(cnode, left); - if (rb_node_get_color(left) == RB_BLACK) - return; - map_node_t *leftleft = rb_node_get_left(left); - if (leftleft && (rb_node_get_color(leftleft) == RB_RED)) { - /* fix up 4-node */ - map_node_t *tnode; - rb_node_set_black(leftleft); - rb_node_rotate_right(cnode, tnode); - cnode = tnode; - } - } else { - map_node_t *right = pathp[1].node; - rb_node_set_right(cnode, right); - if (rb_node_get_color(right) == RB_BLACK) - return; - map_node_t *left = rb_node_get_left(cnode); - if (left && (rb_node_get_color(left) == RB_RED)) { - /* split 4-node */ - rb_node_set_black(left); - rb_node_set_black(right); - rb_node_set_red(cnode); - } else { - /* lean left */ - map_node_t *tnode; - map_color_t tcolor = rb_node_get_color(cnode); - rb_node_rotate_left(cnode, tnode); - rb_node_set_color(tnode, tcolor); - rb_node_set_red(cnode); - cnode = tnode; - } - } - pathp->node = cnode; - } - - /* set root, and make it black */ - rb->root = path->node; - rb_node_set_black(rb->root); -} - static void rb_remove(map_t rb, map_node_t *node) { rb_path_entry_t path[RB_MAX_DEPTH]; @@ -225,16 +147,16 @@ static void rb_remove(map_t rb, map_node_t *node) while (pathp->node) { map_cmp_t cmp = pathp->cmp = (rb->comparator)(node->key, pathp->node->key); - if (cmp == _CMP_LESS) { + if (cmp == MAP_CMP_LESS) { pathp[1].node = rb_node_get_left(pathp->node); } else { pathp[1].node = rb_node_get_right(pathp->node); - if (cmp == _CMP_EQUAL) { + if (cmp == MAP_CMP_EQUAL) { /* find node's successor, in preparation for swap */ - pathp->cmp = _CMP_GREATER; + pathp->cmp = MAP_CMP_GREATER; nodep = pathp; for (pathp++; pathp->node; pathp++) { - pathp->cmp = _CMP_LESS; + pathp->cmp = MAP_CMP_LESS; pathp[1].node = rb_node_get_left(pathp->node); } break; @@ -267,7 +189,7 @@ static void rb_remove(map_t rb, map_node_t *node) if (nodep == path) { rb->root = nodep->node; } else { - if (nodep[-1].cmp == _CMP_LESS) + if (nodep[-1].cmp == MAP_CMP_LESS) rb_node_set_left(nodep[-1].node, nodep->node); else rb_node_set_right(nodep[-1].node, nodep->node); @@ -278,8 +200,8 @@ static void rb_remove(map_t rb, map_node_t *node) /* node has no successor, but it has a left child. * Splice node out, without losing the left child. */ - assert(rb_node_get_color(node) == RB_BLACK); - assert(rb_node_get_color(left) == RB_RED); + assert(RB_IS_BLACK(node)); + assert(RB_IS_RED(left)); rb_node_set_black(left); if (pathp == path) { /* the subtree rooted at the node's left child has not @@ -287,7 +209,7 @@ static void rb_remove(map_t rb, map_node_t *node) */ rb->root = left; } else { - if (pathp[-1].cmp == _CMP_LESS) + if (pathp[-1].cmp == MAP_CMP_LESS) rb_node_set_left(pathp[-1].node, left); else rb_node_set_right(pathp[-1].node, left); @@ -306,9 +228,9 @@ static void rb_remove(map_t rb, map_node_t *node) * swapped with its successor). Furthermore, the only nodes with * out-of-date summaries exist in path[0], path[1], ..., pathp[-1]. */ - if (rb_node_get_color(pathp->node) == RB_RED) { + if (RB_IS_RED(pathp->node)) { /* prune red node, which requires no fixup */ - assert(pathp[-1].cmp == _CMP_LESS); + assert(pathp[-1].cmp == MAP_CMP_LESS); rb_node_set_left(pathp[-1].node, NULL); return; } @@ -316,14 +238,14 @@ static void rb_remove(map_t rb, map_node_t *node) /* The node to be pruned is black, so unwind until balance is restored. */ pathp->node = NULL; for (pathp--; (uintptr_t) pathp >= (uintptr_t) path; pathp--) { - assert(pathp->cmp != _CMP_EQUAL); - if (pathp->cmp == _CMP_LESS) { + assert(pathp->cmp != MAP_CMP_EQUAL); + if (pathp->cmp == MAP_CMP_LESS) { rb_node_set_left(pathp->node, pathp[1].node); - if (rb_node_get_color(pathp->node) == RB_RED) { + if (RB_IS_RED(pathp->node)) { map_node_t *right = rb_node_get_right(pathp->node); map_node_t *rightleft = rb_node_get_left(right); map_node_t *tnode; - if (rightleft && (rb_node_get_color(rightleft) == RB_RED)) { + if (rightleft && RB_IS_RED(rightleft)) { /* In the following diagrams, ||, //, and \\ * indicate the path to the removed node. * @@ -351,7 +273,7 @@ static void rb_remove(map_t rb, map_node_t *node) /* Balance restored, but rotation modified subtree root. */ assert((uintptr_t) pathp > (uintptr_t) path); - if (pathp[-1].cmp == _CMP_LESS) + if (pathp[-1].cmp == MAP_CMP_LESS) rb_node_set_left(pathp[-1].node, tnode); else rb_node_set_right(pathp[-1].node, tnode); @@ -359,7 +281,7 @@ static void rb_remove(map_t rb, map_node_t *node) } else { map_node_t *right = rb_node_get_right(pathp->node); map_node_t *rightleft = rb_node_get_left(right); - if (rightleft && (rb_node_get_color(rightleft) == RB_RED)) { + if (rightleft && RB_IS_RED(rightleft)) { /* || * pathp(b) * // \ @@ -379,7 +301,7 @@ static void rb_remove(map_t rb, map_node_t *node) /* set root */ rb->root = tnode; } else { - if (pathp[-1].cmp == _CMP_LESS) + if (pathp[-1].cmp == MAP_CMP_LESS) rb_node_set_left(pathp[-1].node, tnode); else rb_node_set_right(pathp[-1].node, tnode); @@ -402,12 +324,11 @@ static void rb_remove(map_t rb, map_node_t *node) } else { rb_node_set_right(pathp->node, pathp[1].node); map_node_t *left = rb_node_get_left(pathp->node); - if (rb_node_get_color(left) == RB_RED) { + if (RB_IS_RED(left)) { map_node_t *tnode; map_node_t *leftright = rb_node_get_right(left); map_node_t *leftrightleft = rb_node_get_left(leftright); - if (leftrightleft && - (rb_node_get_color(leftrightleft) == RB_RED)) { + if (leftrightleft && RB_IS_RED(leftrightleft)) { /* || * pathp(b) * / \\ @@ -446,15 +367,15 @@ static void rb_remove(map_t rb, map_node_t *node) /* set root */ rb->root = tnode; } else { - if (pathp[-1].cmp == _CMP_LESS) + if (pathp[-1].cmp == MAP_CMP_LESS) rb_node_set_left(pathp[-1].node, tnode); else rb_node_set_right(pathp[-1].node, tnode); } return; - } else if (rb_node_get_color(pathp->node) == RB_RED) { + } else if (RB_IS_RED(pathp->node)) { map_node_t *leftleft = rb_node_get_left(left); - if (leftleft && (rb_node_get_color(leftleft) == RB_RED)) { + if (leftleft && RB_IS_RED(leftleft)) { /* || * pathp(r) * / \\ @@ -469,7 +390,7 @@ static void rb_remove(map_t rb, map_node_t *node) rb_node_rotate_right(pathp->node, tnode); /* Balance restored, but rotation modified subtree root. */ assert((uintptr_t) pathp > (uintptr_t) path); - if (pathp[-1].cmp == _CMP_LESS) + if (pathp[-1].cmp == MAP_CMP_LESS) rb_node_set_left(pathp[-1].node, tnode); else rb_node_set_right(pathp[-1].node, tnode); @@ -489,7 +410,7 @@ static void rb_remove(map_t rb, map_node_t *node) } } else { map_node_t *leftleft = rb_node_get_left(left); - if (leftleft && (rb_node_get_color(leftleft) == RB_RED)) { + if (leftleft && RB_IS_RED(leftleft)) { /* || * pathp(b) * / \\ @@ -507,7 +428,7 @@ static void rb_remove(map_t rb, map_node_t *node) /* set root */ rb->root = tnode; } else { - if (pathp[-1].cmp == _CMP_LESS) + if (pathp[-1].cmp == MAP_CMP_LESS) rb_node_set_left(pathp[-1].node, tnode); else rb_node_set_right(pathp[-1].node, tnode); @@ -529,7 +450,7 @@ static void rb_remove(map_t rb, map_node_t *node) /* set root */ rb->root = path->node; - assert(rb_node_get_color(rb->root) == RB_BLACK); + assert(RB_IS_BLACK(rb->root)); } static void rb_destroy_recurse(map_t rb, map_node_t *node) @@ -541,103 +462,382 @@ static void rb_destroy_recurse(map_t rb, map_node_t *node) rb_node_set_left((node), NULL); rb_destroy_recurse(rb, rb_node_get_right(node)); rb_node_set_right((node), NULL); - free(node->key); - free(node->data); + /* Single free for entire block (node + key + data) */ free(node); } -static map_node_t *map_create_node(void *key, - void *value, +/* Create node with single allocation */ +static map_node_t *map_create_node(const void *key, + const void *value, size_t ksize, size_t vsize) { - map_node_t *node = calloc(1, sizeof(map_node_t)); - assert(node); + /* Calculate aligned offsets more efficiently */ + const size_t align_mask = sizeof(void *) - 1; + size_t key_offset = (sizeof(map_node_t) + align_mask) & ~align_mask; + size_t data_offset = (key_offset + ksize + align_mask) & ~align_mask; + size_t total_size = data_offset + vsize; - /* allocate memory for the keys and data */ - node->key = malloc(ksize), node->data = malloc(vsize); - assert(node->key); - assert(node->data); + /* Check for overflow */ + if (unlikely(total_size < vsize || total_size < ksize)) + return NULL; - /* copy over the key and values. - * If the parameter passed in is NULL, make the element blank instead of - * a segmentation fault. - */ - if (!key) - memset(node->key, 0, ksize); - else - memcpy(node->key, key, ksize); + char *mem = malloc(total_size); + if (unlikely(!mem)) + return NULL; - if (!value) - memset(node->data, 0, vsize); + map_node_t *node = (map_node_t *) mem; + node->key = mem + key_offset; + node->data = mem + data_offset; + + /* Initialize node linkage */ + rb_node_init(node); + + /* Copy key and value data efficiently */ + if (key) + memcpy(node->key, key, ksize); else + memset(node->key, 0, ksize); + + if (value) memcpy(node->data, value, vsize); + else + memset(node->data, 0, vsize); return node; } -/* Constructor */ -map_t map_new(size_t s1, - size_t s2, +/* Constructor - creates a new map instance */ +map_t map_new(size_t key_size, + size_t data_size, map_cmp_t (*cmp)(const void *, const void *)) { + /* Validate sizes to prevent integer overflow in allocation */ + if (key_size == 0 || data_size == 0 || !cmp) + return NULL; + + /* Prevent overflow: ensure total allocation size is reasonable */ + size_t max_size = SIZE_MAX / 4; /* Conservative limit */ + if (key_size > max_size || data_size > max_size || + (key_size + data_size) > max_size - sizeof(map_node_t)) + return NULL; + map_t tree = malloc(sizeof(struct map_internal)); - assert(tree); + if (!tree) + return NULL; - tree->key_size = s1, tree->data_size = s2; + tree->key_size = key_size; + tree->data_size = data_size; tree->comparator = cmp; tree->root = NULL; + tree->size = 0; return tree; } -/* Add function */ -bool map_insert(map_t obj, void *key, void *val) +/* Insert with single traversal - hot path */ +static inline const map_node_t *rb_insert_unique(map_t rb, + const void *key, + rb_path_entry_t *path, + rb_path_entry_t **pathp_out) +{ + rb_path_entry_t *pathp; + + /* Single traversal to find insertion point or existing key */ + path->node = rb->root; + size_t depth = 0; + for (pathp = path; pathp->node && depth < RB_MAX_DEPTH - 1; + pathp++, depth++) { + map_cmp_t cmp = pathp->cmp = (rb->comparator)(key, pathp->node->key); + if (cmp == MAP_CMP_LESS) { + pathp[1].node = rb_node_get_left(pathp->node); + } else if (cmp == MAP_CMP_GREATER) { + pathp[1].node = rb_node_get_right(pathp->node); + } else { + /* Key already exists */ + return pathp->node; + } + } + + /* Key doesn't exist, return NULL and set pathp for insertion */ + if (depth >= RB_MAX_DEPTH - 1) + return (const map_node_t *) -1; /* Tree too deep */ + *pathp_out = pathp; + return NULL; +} + +/* Insert a key-value pair into the map */ +bool map_insert(map_t obj, const void *key, const void *val) { + if (!obj || !key) + return false; + + rb_path_entry_t path[RB_MAX_DEPTH]; + rb_path_entry_t *pathp; + + /* Single traversal to check existence and get insertion point */ + const map_node_t *existing = rb_insert_unique(obj, key, path, &pathp); + if (existing == (const map_node_t *) -1) + return false; /* Tree too deep */ + if (existing) + return false; /* Key already exists */ + + /* Create and insert new node */ map_node_t *node = map_create_node(key, val, obj->key_size, obj->data_size); - rb_insert(obj, node); + if (!node) + return false; + + /* Node already initialized in map_create_node, just set in path */ + pathp->node = node; + + /* Fix up red-black tree properties */ + for (pathp--; (uintptr_t) pathp >= (uintptr_t) path; pathp--) { + map_node_t *cnode = pathp->node; + if (pathp->cmp == MAP_CMP_LESS) { + map_node_t *left = pathp[1].node; + rb_node_set_left(cnode, left); + if (RB_IS_BLACK(left)) + break; + map_node_t *leftleft = rb_node_get_left(left); + if (leftleft && RB_IS_RED(leftleft)) { + /* fix up 4-node */ + map_node_t *tnode; + rb_node_set_black(leftleft); + rb_node_rotate_right(cnode, tnode); + cnode = tnode; + } + } else { + map_node_t *right = pathp[1].node; + rb_node_set_right(cnode, right); + if (RB_IS_BLACK(right)) + break; + map_node_t *left = rb_node_get_left(cnode); + if (left && RB_IS_RED(left)) { + /* split 4-node */ + rb_node_set_black(left); + rb_node_set_black(right); + rb_node_set_red(cnode); + } else { + /* lean left */ + map_node_t *tnode; + map_color_t tcolor = rb_node_get_color(cnode); + rb_node_rotate_left(cnode, tnode); + rb_node_set_color(tnode, tcolor); + rb_node_set_red(cnode); + cnode = tnode; + } + } + pathp->node = cnode; + } + + /* Set root and make it black */ + obj->root = path->node; + rb_node_set_black(obj->root); + obj->size++; return true; } -/* Get functions */ -void map_find(map_t obj, map_iter_t *it, void *key) +/* Get functions, avoiding stack allocation */ +void map_find(map_t obj, map_iter_t *it, const void *key) { - map_node_t tmp_node = {.key = key}; - it->node = rb_search(obj, &tmp_node); + if (unlikely(!obj || !it)) { + if (it) + it->node = NULL; + return; + } + + map_node_t *node = obj->root; + + /* Prefetch for large trees */ + if (node && obj->size > 10000) { + PREFETCH_READ(node->left); + PREFETCH_READ(rb_node_get_right(node)); + } + + while (node) { + map_cmp_t cmp = obj->comparator(key, node->key); + if (cmp == MAP_CMP_EQUAL) { + it->node = node; + return; + } + node = (cmp == MAP_CMP_LESS) ? node->left : rb_node_get_right(node); + } + it->node = NULL; } bool map_empty(map_t obj) { - return !obj->root; + return unlikely(!obj) || !obj->root; } /* Iteration */ -bool map_at_end(map_t m UNUSED, map_iter_t *it) +bool map_at_end(map_t m, const map_iter_t *it) { + (void) m; /* Suppress unused parameter warning */ return !(it->node); } /* Remove functions */ void map_erase(map_t obj, map_iter_t *it) { - if (!it->node) + if (!obj || !it || !it->node) + return; + + /* Verify node exists in tree before removal */ + if (obj->size == 0) return; rb_remove(obj, it->node); - free(it->node->key); - free(it->node->data); + /* Single free for entire block (node + key + data) */ free(it->node); + it->node = NULL; + + /* Prevent underflow */ + if (obj->size > 0) + obj->size--; } /* Empty map */ void map_clear(map_t obj) { + if (!obj) + return; rb_destroy_recurse(obj, obj->root); obj->root = NULL; + obj->size = 0; } -/* Destructor */ +/* Destroy map and free all resources */ void map_delete(map_t obj) { + if (!obj) + return; map_clear(obj); free(obj); } + +/* Get number of elements in map */ +size_t map_size(map_t obj) +{ + return likely(obj) ? obj->size : 0; +} + +/* Iterator traversal functions */ + +void map_first(map_t map, map_iter_t *it) +{ + if (unlikely(!map || !it)) { + if (it) + it->node = NULL; + return; + } + + map_node_t *node = map->root; + if (likely(node)) { + while (node->left) + node = node->left; + } + + it->node = node; + it->prev = NULL; + it->count = 0; +} + +void map_last(map_t map, map_iter_t *it) +{ + if (unlikely(!map || !it)) { + if (it) + it->node = NULL; + return; + } + + map_node_t *node = map->root; + if (likely(node)) { + map_node_t *right; + while ((right = rb_node_get_right(node))) + node = right; + } + + it->node = node; + it->prev = NULL; + it->count = 0; +} + +void map_next(map_t map, map_iter_t *it) +{ + if (unlikely(!map || !it || !it->node)) { + if (it) + it->node = NULL; + return; + } + + map_node_t *node = it->node; + map_node_t *right = rb_node_get_right(node); + + /* If right subtree exists, find leftmost node in right subtree */ + if (right) { + while (right->left) + right = right->left; + it->node = right; + return; + } + + /* Find successor by searching from root */ + map_node_t *succ = NULL; + map_node_t *curr = map->root; + + while (curr) { + map_cmp_t cmp = map->comparator(it->node->key, curr->key); + if (cmp == MAP_CMP_LESS) { + succ = curr; + curr = curr->left; + } else if (cmp == MAP_CMP_GREATER) { + curr = rb_node_get_right(curr); + } else { + break; + } + } + + it->node = succ; +} + +void map_prev(map_t map, map_iter_t *it) +{ + if (!map || !it || !it->node) { + if (it) + it->node = NULL; + return; + } + + map_node_t *node = it->node; + + /* If left subtree exists, find rightmost node in left subtree */ + if (node->left) { + node = node->left; + while (rb_node_get_right(node)) + node = rb_node_get_right(node); + it->node = node; + return; + } + + /* Otherwise, find the first ancestor that is a right child */ + /* We need to traverse up, but we don't have parent pointers */ + /* So we need to find the predecessor by searching from root */ + + map_node_t *pred = NULL; + map_node_t *curr = map->root; + + while (curr) { + map_cmp_t cmp = map->comparator(it->node->key, curr->key); + if (cmp == MAP_CMP_GREATER) { + pred = curr; + curr = rb_node_get_right(curr); + } else if (cmp == MAP_CMP_LESS) { + curr = curr->left; + } else { + /* Found the node, predecessor is already set or NULL */ + break; + } + } + + it->node = pred; +} diff --git a/src/map.h b/src/map.h index 13a589be7..91e844f77 100644 --- a/src/map.h +++ b/src/map.h @@ -3,81 +3,169 @@ * "LICENSE" for information on usage and redistribution of this file. */ -/* C Implementation for C++ std::map using red-black tree. +/* Memory-efficient red-black tree map implementation. * - * Any data type can be stored in a map, just like std::map. - * A map instance requires the specification of two file types: - * 1. the key; - * 2. what data type the tree node will store; + * This implementation is optimized for minimal memory overhead while providing + * O(log n) insertion, deletion, and lookup operations. The design is inspired + * by the Linux kernel's intrusive data structures and jemalloc's rb.h. * - * It will also require a comparison function to sort the tree. + * Key features: + * - Color bit stored in least significant bit of pointer (2 pointers per node) + * - No parent pointer - uses stack-based traversal instead + * - Support for any data type through generic key/value storage */ #pragma once #include #include +#include -/* Store the key, data, and values of each element in the tree. - * This is the main basis of the entire tree aside from the root struct. +/* Red-black tree node structure. * - * @left: pointer to the left child in the tree - * @right_red: combination of a pointer to right child and @color (lowest - * bit) + * Memory layout optimized to reduce overhead: + * - Color bit encoded in LSB of right_red pointer + * - No parent pointer (uses traversal stack instead) + * - Key and data stored as flexible pointers + * - Single allocation strategy for node+key+data improves cache locality * - * The red-black tree consists of a root and nodes attached to this root. + * This design achieves minimal memory footprint (2 pointers + payload) + * while providing O(log n) operations with excellent cache performance. */ typedef struct map_node { - void *key, *data; - struct map_node *left, *right_red; /* red-black tree */ + void *key, *data; /* Pointer to key/value data */ + struct map_node *left; /* Left child */ + struct map_node *right_red; /* Right child + color bit in LSB */ } map_node_t; -typedef enum { _CMP_LESS = -1, _CMP_EQUAL = 0, _CMP_GREATER = 1 } map_cmp_t; - +/* Verify pointer alignment for color bit storage */ +#ifdef __STDC_VERSION__ +#if __STDC_VERSION__ >= 201112L +_Static_assert(_Alignof(void *) >= 2, + "Pointer alignment insufficient for color bit storage"); +#endif +#endif + +/* Comparison result enumeration */ +typedef enum { + MAP_CMP_LESS = -1, + MAP_CMP_EQUAL = 0, + MAP_CMP_GREATER = 1 +} map_cmp_t; + +/* Opaque map handle */ typedef struct map_internal *map_t; +/* Iterator for tree traversal */ typedef struct { - map_node_t *prev, *node; - size_t count; + map_node_t *node; /* Current node */ + map_node_t *prev; /* Previous node (for deletion safety) */ + size_t count; /* Iteration count */ } map_iter_t; #define map_iter_value(it, type) (*(type *) (it)->node->data) +#define map_iter_key(it, type) (*(type *) (it)->node->key) -/* Integer comparison */ -static inline map_cmp_t map_cmp_int(const void *arg0, const void *arg1) +/* Integer comparison - optimized branchless version */ +FORCE_INLINE map_cmp_t map_cmp_int(const void *arg0, const void *arg1) { - int *a = (int *) arg0; - int *b = (int *) arg1; - return (*a < *b) ? _CMP_LESS : (*a > *b) ? _CMP_GREATER : _CMP_EQUAL; + const int a = *(const int *) arg0; + const int b = *(const int *) arg1; + return (map_cmp_t) ((a > b) - (a < b)); } -/* Unsigned integer comparison */ -static inline map_cmp_t map_cmp_uint(const void *arg0, const void *arg1) +/* Unsigned integer comparison - optimized branchless version */ +FORCE_INLINE map_cmp_t map_cmp_uint(const void *arg0, const void *arg1) { - unsigned int *a = (unsigned int *) arg0; - unsigned int *b = (unsigned int *) arg1; - return (*a < *b) ? _CMP_LESS : (*a > *b) ? _CMP_GREATER : _CMP_EQUAL; + const unsigned int a = *(const unsigned int *) arg0; + const unsigned int b = *(const unsigned int *) arg1; + return (map_cmp_t) ((a > b) - (a < b)); } -/* Constructor */ -map_t map_new(size_t, size_t, map_cmp_t (*cmp)(const void *, const void *)); +/* Constructor - creates a new map instance + * @param key_size: Size of key type in bytes + * @param data_size: Size of value type in bytes + * @param cmp: Comparison function for ordering keys + * @return: New map instance or NULL on allocation failure + */ +map_t map_new(size_t key_size, + size_t data_size, + map_cmp_t (*cmp)(const void *, const void *)); + +/* Insert a key-value pair into the map + * @param obj: Map instance + * @param key: Pointer to key data + * @param val: Pointer to value data + * @return: true if inserted, false if key already exists + */ +bool map_insert(map_t obj, const void *key, const void *val); + +/* Find a key in the map + * @param obj: Map instance + * @param it: Iterator to store result + * @param key: Key to search for + */ +void map_find(map_t obj, map_iter_t *it, const void *key); + +/* Check if map is empty + * @param obj: Map instance + * @return: true if empty, false otherwise + */ +bool map_empty(map_t obj); + +/* Check if iterator is at end + * @param m: Map instance + * @param it: Iterator to check + * @return: true if at end, false if valid + */ +bool map_at_end(map_t m, const map_iter_t *it); + +/* Remove node at iterator position + * @param obj: Map instance + * @param it: Iterator pointing to node to remove + */ +void map_erase(map_t obj, map_iter_t *it); + +/* Remove all nodes from map + * @param obj: Map instance + */ +void map_clear(map_t obj); + +/* Destroy map and free all resources + * @param obj: Map instance to destroy + */ +void map_delete(map_t obj); -/* Add function */ -bool map_insert(map_t, void *, void *); +/* Convenience macro for map initialization with type safety */ +#define map_init(key_type, element_type, cmp_func) \ + map_new(sizeof(key_type), sizeof(element_type), cmp_func) -/* Get functions */ -void map_find(map_t, map_iter_t *, void *); -bool map_empty(map_t); +/* Get size of map (number of elements) + * @param obj: Map instance + * @return: Number of elements in map + */ +size_t map_size(map_t obj); -/* Iteration */ -bool map_at_end(map_t, map_iter_t *); +/* Get iterator to first element (smallest key) + * @param map: Map instance + * @param it: Iterator to initialize + */ +void map_first(map_t map, map_iter_t *it); -/* Remove functions */ -void map_erase(map_t, map_iter_t *); -void map_clear(map_t); +/* Get iterator to last element (largest key) + * @param map: Map instance + * @param it: Iterator to initialize + */ +void map_last(map_t map, map_iter_t *it); -/* Destructor */ -void map_delete(map_t); +/* Move iterator to next element (in-order traversal) + * @param map: Map instance + * @param it: Iterator to advance + */ +void map_next(map_t map, map_iter_t *it); -#define map_init(key_type, element_type, __func) \ - map_new(sizeof(key_type), sizeof(element_type), __func) +/* Move iterator to previous element (reverse in-order traversal) + * @param map: Map instance + * @param it: Iterator to move back + */ +void map_prev(map_t map, map_iter_t *it); diff --git a/tests/map/test-map.c b/tests/map/test-map.c index 593a955dd..f7783c9eb 100644 --- a/tests/map/test-map.c +++ b/tests/map/test-map.c @@ -1,12 +1,25 @@ +/* + * rv32emu is freely redistributable under the MIT License. See the file + * "LICENSE" for information on usage and redistribution of this file. + */ + #include +#include #include #include #include +#include +#include #include #include "map.h" #include "mt19937.h" +/* ANSI color codes */ +#define COLOR_GREEN "\033[32m" +#define COLOR_RESET "\033[0m" + +/* Helper function for original test */ static void swap(int *x, int *y) { int tmp = *x; @@ -16,7 +29,7 @@ static void swap(int *x, int *y) enum { N_NODES = 10000 }; -/* return 0 on success; non-zero values on failure */ +/* mixed operations test */ static int test_map_mixed_operations(void) { int ret = 0; @@ -24,9 +37,7 @@ static int test_map_mixed_operations(void) int key[N_NODES], val[N_NODES]; - /* - * Generate data for insertion - */ + /* Generate data for insertion */ for (int i = 0; i < N_NODES; i++) { key[i] = i; val[i] = mt19937_extract(); @@ -71,25 +82,24 @@ static int test_map_mixed_operations(void) map_insert(tree, key + i, val + i); map_find(tree, &my_it, key + i); if (!my_it.node) { - ret = 1; /* test fail */ + ret = 1; goto free_tree; } assert((*(int *) (my_it.node->data)) == val[i]); } - /* remove 2nd quarter of items */ for (int i = N_NODES / 4 + 1; i < N_NODES / 2; i++) { map_iter_t my_it; map_find(tree, &my_it, key + i); if (map_at_end(tree, &my_it)) { - ret = 1; /* test fail */ + ret = 1; goto free_tree; } map_erase(tree, &my_it); map_find(tree, &my_it, key + i); if (my_it.node) { - ret = 1; /* test fail */ + ret = 1; goto free_tree; } } @@ -100,6 +110,344 @@ static int test_map_mixed_operations(void) return ret; } +/* Get root node from map for validation */ +static map_node_t *get_root(map_t obj) +{ + struct map_internal { + map_node_t *root; + }; + struct map_internal *m = (struct map_internal *) obj; + return m ? m->root : NULL; +} + +/* Validate red-black tree properties */ +static int validate_rb_properties(map_node_t *node, int *black_height) +{ + if (!node) { + *black_height = 1; + return 1; + } + + int is_red = (uintptr_t) node->right_red & 1; + map_node_t *left = node->left; + map_node_t *right = (map_node_t *) ((uintptr_t) node->right_red & ~3); + + if (is_red) { + if (left && ((uintptr_t) left->right_red & 1)) { + fprintf(stderr, "Red-red violation: red node has red left child\n"); + return 0; + } + if (right && ((uintptr_t) right->right_red & 1)) { + fprintf(stderr, + "Red-red violation: red node has red right child\n"); + return 0; + } + } + + int left_height, right_height; + if (!validate_rb_properties(left, &left_height)) + return 0; + if (!validate_rb_properties(right, &right_height)) + return 0; + + if (left_height != right_height) { + fprintf(stderr, "Black height mismatch: left=%d, right=%d\n", + left_height, right_height); + return 0; + } + + *black_height = left_height + (is_red ? 0 : 1); + return 1; +} + +static int test_rb_properties(void) +{ + printf(" Testing red-black tree properties..."); + + map_t m = map_init(int, int, map_cmp_int); + if (!m) { + fprintf(stderr, "Failed to create map\n"); + return 1; + } + + for (int i = 1; i <= 100; i++) { + int val = i * 10; + if (!map_insert(m, &i, &val)) { + fprintf(stderr, "Failed to insert %d\n", i); + map_delete(m); + return 1; + } + } + + map_node_t *root = get_root(m); + if (!root) { + fprintf(stderr, "Root is NULL after insertions\n"); + map_delete(m); + return 1; + } + + if ((uintptr_t) root->right_red & 1) { + fprintf(stderr, "Root is not black\n"); + map_delete(m); + return 1; + } + + int black_height; + if (!validate_rb_properties(root, &black_height)) { + map_delete(m); + return 1; + } + + printf(" " COLOR_GREEN "[OK]" COLOR_RESET " (black height: %d)\n", + black_height); + + map_delete(m); + return 0; +} + +/* Stress tests */ + +#define STRESS_SIZE 100000 + +static long get_memory_usage(void) +{ + struct rusage usage; + getrusage(RUSAGE_SELF, &usage); + return usage.ru_maxrss / 1024; +} + +static long get_time_us(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000L + ts.tv_nsec / 1000; +} + +static int test_memory_stress(void) +{ + printf(" Testing memory stress (100K elements)..."); + + long mem_start = get_memory_usage(); + + map_t m = map_init(int, int, map_cmp_int); + if (!m) { + fprintf(stderr, "Failed to create map\n"); + return 1; + } + + for (int i = 0; i < STRESS_SIZE; i++) { + int val = i * 7; + if (!map_insert(m, &i, &val)) { + fprintf(stderr, "Failed to insert %d\n", i); + map_delete(m); + return 1; + } + } + + long mem_after_insert = get_memory_usage(); + + for (int i = 0; i < STRESS_SIZE; i++) { + map_iter_t it; + map_find(m, &it, &i); + if (map_at_end(m, &it) || map_iter_value(&it, int) != i * 7) { + fprintf(stderr, "Value mismatch at %d\n", i); + map_delete(m); + return 1; + } + } + + for (int i = 0; i < STRESS_SIZE; i += 2) { + map_iter_t it; + map_find(m, &it, &i); + if (!map_at_end(m, &it)) + map_erase(m, &it); + } + + for (int i = 1; i < STRESS_SIZE; i += 2) { + map_iter_t it; + map_find(m, &it, &i); + if (map_at_end(m, &it)) { + fprintf(stderr, "Element %d missing after deletion\n", i); + map_delete(m); + return 1; + } + } + + map_delete(m); + + printf(" " COLOR_GREEN "[OK]" COLOR_RESET + " (peak: %ld KB, ~%ld bytes/element)\n", + mem_after_insert, + (mem_after_insert - mem_start) * 1024 / STRESS_SIZE); + + return 0; +} + +static int test_performance_scaling(void) +{ + printf(" Testing performance scaling..."); + + int sizes[] = {1000, 10000, 100000}; + + for (int s = 0; s < 3; s++) { + int size = sizes[s]; + map_t m = map_init(int, int, map_cmp_int); + if (!m) { + fprintf(stderr, "Failed to create map\n"); + return 1; + } + + long start = get_time_us(); + for (int i = 0; i < size; i++) { + int val = i; + map_insert(m, &i, &val); + } + long insert_time = get_time_us() - start; + + start = get_time_us(); + for (int i = 0; i < size; i++) { + map_iter_t it; + map_find(m, &it, &i); + } + long lookup_time = get_time_us() - start; + + printf( + "\n Size %6d: insert %.2fms (%.1fM ops/s), lookup %.2fms (%.1fM " + "ops/s)", + size, insert_time / 1000.0, + insert_time > 0 ? (double) size / insert_time : 0, + lookup_time / 1000.0, + lookup_time > 0 ? (double) size / lookup_time : 0); + + map_delete(m); + } + + printf("\n Performance scaling... " COLOR_GREEN "[OK]" COLOR_RESET "\n"); + return 0; +} + +/* Edge case tests */ + +typedef struct { + char data[1024]; + int checksum; +} large_value_t; + +static int test_empty_map(void) +{ + printf(" Testing empty map operations..."); + + map_t m = map_init(int, int, map_cmp_int); + if (!m) { + fprintf(stderr, "Failed to create map\n"); + return 1; + } + + if (!map_empty(m) || map_size(m) != 0) { + fprintf(stderr, "New map not empty\n"); + map_delete(m); + return 1; + } + + map_iter_t it; + map_first(m, &it); + if (!map_at_end(m, &it)) { + fprintf(stderr, "First iterator not at end for empty map\n"); + map_delete(m); + return 1; + } + + map_last(m, &it); + if (!map_at_end(m, &it)) { + fprintf(stderr, "Last iterator not at end for empty map\n"); + map_delete(m); + return 1; + } + + int key = 42; + map_find(m, &it, &key); + if (!map_at_end(m, &it)) { + fprintf(stderr, "Find returned non-end for empty map\n"); + map_delete(m); + return 1; + } + + map_clear(m); + if (!map_empty(m)) { + fprintf(stderr, "Map not empty after clear\n"); + map_delete(m); + return 1; + } + + map_delete(m); + printf(" " COLOR_GREEN "[OK]" COLOR_RESET "\n"); + return 0; +} + +static int test_boundary_values(void) +{ + printf(" Testing boundary values..."); + + map_t m = map_init(int, int, map_cmp_int); + if (!m) { + fprintf(stderr, "Failed to create map\n"); + return 1; + } + + int min_key = INT_MIN, min_val = -999; + int max_key = INT_MAX, max_val = 999; + int zero_key = 0, zero_val = 0; + + if (!map_insert(m, &min_key, &min_val) || + !map_insert(m, &max_key, &max_val) || + !map_insert(m, &zero_key, &zero_val)) { + fprintf(stderr, "Failed to insert boundary values\n"); + map_delete(m); + return 1; + } + + map_iter_t it; + + map_find(m, &it, &min_key); + if (map_at_end(m, &it) || map_iter_value(&it, int) != min_val) { + fprintf(stderr, "INT_MIN value mismatch\n"); + map_delete(m); + return 1; + } + + map_find(m, &it, &max_key); + if (map_at_end(m, &it) || map_iter_value(&it, int) != max_val) { + fprintf(stderr, "INT_MAX value mismatch\n"); + map_delete(m); + return 1; + } + + map_find(m, &it, &zero_key); + if (map_at_end(m, &it) || map_iter_value(&it, int) != zero_val) { + fprintf(stderr, "Zero value mismatch\n"); + map_delete(m); + return 1; + } + + map_first(m, &it); + if (*(int *) it.node->key != INT_MIN) { + fprintf(stderr, "First key is not INT_MIN\n"); + map_delete(m); + return 1; + } + + map_last(m, &it); + if (*(int *) it.node->key != INT_MAX) { + fprintf(stderr, "Last key is not INT_MAX\n"); + map_delete(m); + return 1; + } + + map_delete(m); + printf(" " COLOR_GREEN "[OK]" COLOR_RESET "\n"); + return 0; +} + int main(int argc, char *argv[]) { (void) argc; @@ -107,5 +455,36 @@ int main(int argc, char *argv[]) mt19937_init(time(NULL)); - return test_map_mixed_operations(); + printf("Map tests:\n"); + + int failed = 0; + + /* Mixed operations test */ + printf(" Testing mixed operations..."); + if (test_map_mixed_operations()) { + printf(" FAILED\n"); + failed = 1; + } else { + printf(" " COLOR_GREEN "[OK]" COLOR_RESET "\n"); + } + + /* Validation tests */ + if (test_rb_properties()) + failed = 1; + + /* Stress tests */ + if (test_memory_stress()) + failed = 1; + + if (test_performance_scaling()) + failed = 1; + + /* Edge case tests */ + if (test_empty_map()) + failed = 1; + + if (test_boundary_values()) + failed = 1; + + return failed; }