@@ -12,18 +12,13 @@ block_list_t BLOCKS;
1212macro_t * MACROS ;
1313int macros_idx = 0 ;
1414
15- /* the first element is reserved for global scope */
16- func_t * FUNCS ;
17- int funcs_idx = 1 ;
18-
19- /* FUNC_TRIES is used to improve the performance of the find_func function.
20- * Instead of searching through all functions and comparing their names, we can
21- * utilize the trie data structure to search for existing functions efficiently.
22- * The index starts from 1 because the first trie node represents an empty input
23- * string, and it is not possible to record a function with an empty name.
15+ /* FUNCS_MAP is used to integerate function storing and boost lookup
16+ * performance, currently it uses FNV-1a hash function to hash function
17+ * name. The bucket size defaults to MAX_FUNCS. Ideally, it should be a small
18+ * number, but due to lack of rehashing implementation, to prevent collision,
19+ * we have to initially create large amount of buckets.
2420 */
25- trie_t * FUNC_TRIES ;
26- int func_tries_idx = 1 ;
21+ hashmap_t * FUNCS_MAP ;
2722
2823type_t * TYPES ;
2924int types_idx = 0 ;
@@ -72,72 +67,144 @@ char *elf_strtab;
7267char * elf_section ;
7368
7469/**
75- * insert_trie() - Inserts a new element into the trie structure.
76- * @trie: A pointer to the trie where the name will be inserted.
77- * @name: The name to be inserted into the trie.
78- * @funcs_index: The index of the pointer to the func_t. The index is recorded
79- * in a 1-indexed format. Because the first element of 'FUNCS' has been
80- * reserved, there is no need to shift it.
81- * Return: The index of the pointer to the func_t.
70+ * hash_index() - hashses a string with FNV-1a hash function
71+ * and converts into usable hashmap index. The range of returned
72+ * hashmap index is ranged from "(0 ~ 2,147,483,647) mod size" due to
73+ * lack of unsigned integer implementation.
74+ * @size: The size of map. Must not be negative or 0.
75+ * @key: The key string. May be NULL.
8276 *
83- * If the function has been inserted, the return value is the index of the
84- * function in FUNCS. Otherwise, the return value is the value of the parameter
85- * @funcs_index.
77+ * @returns: The usable hashmap index.
8678 */
87- int insert_trie (trie_t * trie , char * name , int funcs_index )
88- {
89- char first_char ;
90- int fc ;
91-
92- while (1 ) {
93- first_char = * name ;
94- fc = first_char ;
95- if (!fc ) {
96- if (!trie -> index )
97- trie -> index = funcs_index ;
98- return trie -> index ;
99- }
100- if (!trie -> next [fc ]) {
101- /* FIXME: The func_tries_idx variable may exceed the maximum number,
102- * which can lead to a segmentation fault. This issue is affected by
103- * the number of functions and the length of their names. The proper
104- * way to handle this is to dynamically allocate a new element.
105- */
106- trie -> next [fc ] = func_tries_idx ++ ;
107- for (int i = 0 ; i < 128 ; i ++ )
108- FUNC_TRIES [trie -> next [fc ]].next [i ] = 0 ;
109- FUNC_TRIES [trie -> next [fc ]].index = 0 ;
110- }
111- trie = & FUNC_TRIES [trie -> next [fc ]];
112- name ++ ;
79+ int hash_index (int size , char * key )
80+ {
81+ int hash = 0x811c9dc5 , mask ;
82+
83+ for (; * key ; key ++ ) {
84+ hash ^= * key ;
85+ hash *= 0x01000193 ;
86+ }
87+
88+ mask = hash >> 31 ;
89+ return ((hash ^ mask ) - mask ) % size ;
90+ }
91+
92+ /**
93+ * hashmap_create() - creates a hashmap on heap.
94+ * @size: The initial bucket size of hashmap. Must not be 0.
95+ *
96+ * @returns: The pointer of created hashmap.
97+ */
98+ hashmap_t * hashmap_create (int size )
99+ {
100+ hashmap_t * map = malloc (sizeof (hashmap_t ));
101+ map -> size = size ;
102+ map -> buckets = malloc (size * sizeof (hashmap_node_t * ));
103+
104+ for (int i = 0 ; i < map -> size ; i ++ )
105+ map -> buckets [i ] = 0 ;
106+
107+ return map ;
108+ }
109+
110+ /**
111+ * hashmap_node_new() - creates a hashmap node on heap.
112+ * @key: The key of node. Must not be NULL.
113+ * @val: The value of node. Could be NULL.
114+ *
115+ * @returns: The pointer of created node.
116+ */
117+ hashmap_node_t * hashmap_node_new (char * key , void * val )
118+ {
119+ int len = strlen (key );
120+ hashmap_node_t * node = malloc (sizeof (hashmap_node_t ));
121+ node -> key = calloc (len + 1 , sizeof (char ));
122+ strcpy (node -> key , key );
123+ node -> val = val ;
124+ node -> next = NULL ;
125+ return node ;
126+ }
127+
128+ /**
129+ * hashmap_put() - puts a key-value pair into given hashmap.
130+ * If key already contains a value, then replace it with new
131+ * value, the old value will be freed.
132+ * @map: The hashmap to be put into. Must not be NULL.
133+ * @key: The key string. May be NULL.
134+ * @val: The value pointer. May be NULL. This value's lifetime
135+ * is held by hashmap.
136+ */
137+ void hashmap_put (hashmap_t * map , char * key , void * val )
138+ {
139+ int index = hash_index (map -> size , key );
140+ hashmap_node_t * cur = map -> buckets [index ];
141+
142+ if (!cur ) {
143+ map -> buckets [index ] = hashmap_node_new (key , val );
144+ } else {
145+ while (cur -> next )
146+ cur = cur -> next ;
147+ cur -> next = hashmap_node_new (key , val );
113148 }
149+
150+ /* TODO: Rehash if size exceeds size * load factor */
114151}
115152
116153/**
117- * find_trie () - search the index of the function name in the trie
118- * @trie: A pointer to the trie where the name will be searched .
119- * @name : The name to be searched .
154+ * hashmap_get () - gets value from hashmap from given key.
155+ * @map: The hashmap to be looked up. Must no be NULL .
156+ * @key : The key string. May be NULL .
120157 *
121- * Return: The index of the pointer to the func_t.
158+ * @returns: The look up result, if the key-value pair entry
159+ * exists, then returns its value's address, NULL otherwise.
160+ */
161+ void * hashmap_get (hashmap_t * map , char * key )
162+ {
163+ int index = hash_index (map -> size , key );
164+
165+ for (hashmap_node_t * cur = map -> buckets [index ]; cur ; cur = cur -> next )
166+ if (!strcmp (cur -> key , key ))
167+ return cur -> val ;
168+
169+ return NULL ;
170+ }
171+
172+ /**
173+ * hashmap_contains() - checks if the key-value pair entry exists
174+ * from given key.
175+ * @map: The hashmap to be looked up. Must no be NULL.
176+ * @key: The key string. May be NULL.
122177 *
123- * 0 - the name not found.
124- * otherwise - the index of the founded index in the trie array.
178+ * @returns: The look up result, if the key-value pair entry
179+ * exists, then returns true, false otherwise.
180+ */
181+ bool hashmap_contains (hashmap_t * map , char * key )
182+ {
183+ return hashmap_get (map , key ) != NULL ;
184+ }
185+
186+ /**
187+ * hashmap_free() - frees the hashmap, this also frees key-value pair
188+ * entry's value.
189+ * @map: The hashmap to be looked up. Must no be NULL.
125190 */
126- int find_trie (trie_t * trie , char * name )
127- {
128- char first_char ;
129- int fc ;
130-
131- while (1 ) {
132- first_char = * name ;
133- fc = first_char ;
134- if (!fc )
135- return trie -> index ;
136- if (!trie -> next [fc ])
137- return 0 ;
138- trie = & FUNC_TRIES [trie -> next [fc ]];
139- name ++ ;
191+ void hashmap_free (hashmap_t * map )
192+ {
193+ for (int i = 0 ; i < map -> size ; i ++ ) {
194+ for (hashmap_node_t * cur = map -> buckets [i ], * next ; cur ;
195+ cur = cur -> next ) {
196+ next = cur -> next ;
197+ free (cur -> key );
198+ free (cur -> val );
199+ /* FIXME: Remove this if-clause will cause double free error */
200+ if (cur != map -> buckets [0 ])
201+ free (cur );
202+ cur = next ;
203+ }
140204 }
205+
206+ free (map -> buckets );
207+ free (map );
141208}
142209
143210/* options */
@@ -318,12 +385,14 @@ int find_macro_param_src_idx(char *name, block_t *parent)
318385func_t * add_func (char * name )
319386{
320387 func_t * fn ;
321- int index = insert_trie (FUNC_TRIES , name , funcs_idx );
322- if (index == funcs_idx ) {
323- fn = & FUNCS [funcs_idx ++ ];
388+ if (hashmap_contains (FUNCS_MAP , name )) {
389+ fn = hashmap_get (FUNCS_MAP , name );
390+ } else {
391+ fn = malloc (sizeof (func_t ));
392+ hashmap_put (FUNCS_MAP , name , fn );
324393 strcpy (fn -> return_def .var_name , name );
325394 }
326- fn = & FUNCS [ index ];
395+
327396 fn -> stack_size = 4 ; /* starting point of stack */
328397 return fn ;
329398}
@@ -358,10 +427,7 @@ constant_t *find_constant(char alias[])
358427
359428func_t * find_func (char func_name [])
360429{
361- int index = find_trie (FUNC_TRIES , func_name );
362- if (index )
363- return & FUNCS [index ];
364- return NULL ;
430+ return hashmap_get (FUNCS_MAP , func_name );
365431}
366432
367433var_t * find_member (char token [], type_t * type )
@@ -597,8 +663,7 @@ void global_init()
597663 BLOCKS .head = NULL ;
598664 BLOCKS .tail = NULL ;
599665 MACROS = malloc (MAX_ALIASES * sizeof (macro_t ));
600- FUNCS = malloc (MAX_FUNCS * sizeof (func_t ));
601- FUNC_TRIES = malloc (MAX_FUNC_TRIES * sizeof (trie_t ));
666+ FUNCS_MAP = hashmap_create (MAX_FUNCS );
602667 TYPES = malloc (MAX_TYPES * sizeof (type_t ));
603668 GLOBAL_IR = malloc (MAX_GLOBAL_IR * sizeof (ph1_ir_t ));
604669 PH1_IR = malloc (MAX_IR_INSTR * sizeof (ph1_ir_t ));
@@ -616,7 +681,8 @@ void global_init()
616681 elf_section = malloc (MAX_SECTION );
617682
618683 /* set starting point of global stack manually */
619- FUNCS [0 ].stack_size = 4 ;
684+ func_t * global_func = add_func ("" );
685+ global_func -> stack_size = 4 ;
620686}
621687
622688void global_release ()
@@ -627,8 +693,7 @@ void global_release()
627693 BLOCKS .head = next ;
628694 }
629695 free (MACROS );
630- free (FUNCS );
631- free (FUNC_TRIES );
696+ hashmap_free (FUNCS_MAP );
632697 free (TYPES );
633698 free (GLOBAL_IR );
634699 free (PH1_IR );
0 commit comments