@@ -15,18 +15,13 @@ block_list_t BLOCKS;
1515macro_t * MACROS ;
1616int macros_idx = 0 ;
1717
18- /* the first element is reserved for global scope */
19- func_t * FUNCS ;
20- int funcs_idx = 1 ;
21-
22- /* FUNC_TRIES is used to improve the performance of the find_func function.
23- * Instead of searching through all functions and comparing their names, we can
24- * utilize the trie data structure to search for existing functions efficiently.
25- * The index starts from 1 because the first trie node represents an empty input
26- * string, and it is not possible to record a function with an empty name.
18+ /* FUNCS_MAP is used to integrate function storing and boost lookup
19+ * performance, currently it uses FNV-1a hash function to hash function
20+ * name. The bucket size defaults to MAX_FUNCS. Ideally, it should be a small
21+ * number, but due to lack of rehashing implementation, to prevent collision,
22+ * we have to initially create large amount of buckets.
2723 */
28- trie_t * FUNC_TRIES ;
29- int func_tries_idx = 1 ;
24+ hashmap_t * FUNCS_MAP ;
3025
3126type_t * TYPES ;
3227int types_idx = 0 ;
@@ -75,72 +70,195 @@ char *elf_strtab;
7570char * elf_section ;
7671
7772/**
78- * insert_trie() - Inserts a new element into the trie structure.
79- * @trie: A pointer to the trie where the name will be inserted.
80- * @name: The name to be inserted into the trie.
81- * @funcs_index: The index of the pointer to the func_t. The index is recorded
82- * in a 1-indexed format. Because the first element of 'FUNCS' has been
83- * reserved, there is no need to shift it.
84- * Return: The index of the pointer to the func_t.
73+ * hashmap_hash_index() - hashses a string with FNV-1a hash function
74+ * and converts into usable hashmap index. The range of returned
75+ * hashmap index is ranged from "(0 ~ 2,147,483,647) mod size" due to
76+ * lack of unsigned integer implementation.
77+ * @size: The size of map. Must not be negative or 0.
78+ * @key: The key string. May be NULL.
8579 *
86- * If the function has been inserted, the return value is the index of the
87- * function in FUNCS. Otherwise, the return value is the value of the parameter
88- * @funcs_index.
80+ * Return: The usable hashmap index.
8981 */
90- int insert_trie (trie_t * trie , char * name , int funcs_index )
91- {
92- char first_char ;
93- int fc ;
94-
95- while (1 ) {
96- first_char = * name ;
97- fc = first_char ;
98- if (!fc ) {
99- if (!trie -> index )
100- trie -> index = funcs_index ;
101- return trie -> index ;
102- }
103- if (!trie -> next [fc ]) {
104- /* FIXME: The func_tries_idx variable may exceed the maximum number,
105- * which can lead to a segmentation fault. This issue is affected by
106- * the number of functions and the length of their names. The proper
107- * way to handle this is to dynamically allocate a new element.
108- */
109- trie -> next [fc ] = func_tries_idx ++ ;
110- for (int i = 0 ; i < 128 ; i ++ )
111- FUNC_TRIES [trie -> next [fc ]].next [i ] = 0 ;
112- FUNC_TRIES [trie -> next [fc ]].index = 0 ;
113- }
114- trie = & FUNC_TRIES [trie -> next [fc ]];
115- name ++ ;
82+ int hashmap_hash_index (int size , char * key )
83+ {
84+ int hash = 0x811c9dc5 , mask ;
85+
86+ for (; * key ; key ++ ) {
87+ hash ^= * key ;
88+ hash *= 0x01000193 ;
11689 }
90+
91+ mask = hash >> 31 ;
92+ return ((hash ^ mask ) - mask ) & (size - 1 );
93+ }
94+
95+ int round_up_pow2 (int v )
96+ {
97+ v -- ;
98+ v |= v >> 1 ;
99+ v |= v >> 2 ;
100+ v |= v >> 4 ;
101+ v |= v >> 8 ;
102+ v |= v >> 16 ;
103+ v ++ ;
104+ return v ;
117105}
118106
119107/**
120- * find_trie() - search the index of the function name in the trie
121- * @trie: A pointer to the trie where the name will be searched.
122- * @name: The name to be searched.
108+ * hashmap_create() - creates a hashmap on heap. Notice that
109+ * provided size will always be rounded up to nearest power of 2.
110+ * @size: The initial bucket size of hashmap. Must not be 0 or
111+ * negative.
123112 *
124- * Return: The index of the pointer to the func_t.
113+ * Return: The pointer of created hashmap.
114+ */
115+ hashmap_t * hashmap_create (int size )
116+ {
117+ hashmap_t * map = malloc (sizeof (hashmap_t ));
118+
119+ if (!map ) {
120+ printf ("Failed to allocate hashmap_t with size %d\n" , size );
121+ return NULL ;
122+ }
123+
124+ map -> size = round_up_pow2 (size );
125+ map -> buckets = malloc (map -> size * sizeof (hashmap_node_t * ));
126+
127+ if (!map -> buckets ) {
128+ printf ("Failed to allocate buckets in hashmap_t\n" );
129+ free (map );
130+ return NULL ;
131+ }
132+
133+ for (int i = 0 ; i < map -> size ; i ++ )
134+ map -> buckets [i ] = 0 ;
135+
136+ return map ;
137+ }
138+
139+ /**
140+ * hashmap_node_new() - creates a hashmap node on heap.
141+ * @key: The key of node. Must not be NULL.
142+ * @val: The value of node. Could be NULL.
125143 *
126- * 0 - the name not found.
127- * otherwise - the index of the founded index in the trie array.
144+ * Return: The pointer of created node.
128145 */
129- int find_trie ( trie_t * trie , char * name )
146+ hashmap_node_t * hashmap_node_new ( char * key , void * val )
130147{
131- char first_char ;
132- int fc ;
148+ if (!key )
149+ return NULL ;
150+
151+ int len = strlen (key );
152+ hashmap_node_t * node = malloc (sizeof (hashmap_node_t ));
133153
134- while (1 ) {
135- first_char = * name ;
136- fc = first_char ;
137- if (!fc )
138- return trie -> index ;
139- if (!trie -> next [fc ])
140- return 0 ;
141- trie = & FUNC_TRIES [trie -> next [fc ]];
142- name ++ ;
154+
155+ if (!node ) {
156+ printf ("Failed to allocate hashmap_node_t\n" );
157+ return NULL ;
143158 }
159+
160+ node -> key = calloc (len + 1 , sizeof (char ));
161+
162+ if (!node -> key ) {
163+ printf ("Failed to allocate hashmap_node_t key with size %d\n" );
164+ free (node );
165+ return NULL ;
166+ }
167+
168+ strcpy (node -> key , key );
169+ node -> val = val ;
170+ node -> next = NULL ;
171+ return node ;
172+ }
173+
174+ /**
175+ * hashmap_put() - puts a key-value pair into given hashmap.
176+ * If key already contains a value, then replace it with new
177+ * value, the old value will be freed.
178+ * @map: The hashmap to be put into. Must not be NULL.
179+ * @key: The key string. May be NULL.
180+ * @val: The value pointer. May be NULL. This value's lifetime
181+ * is held by hashmap.
182+ */
183+ void hashmap_put (hashmap_t * map , char * key , void * val )
184+ {
185+ if (!map )
186+ return ;
187+
188+ int index = hashmap_hash_index (map -> size , key );
189+ hashmap_node_t * cur = map -> buckets [index ];
190+
191+ if (!cur ) {
192+ map -> buckets [index ] = hashmap_node_new (key , val );
193+ } else {
194+ while (cur -> next )
195+ cur = cur -> next ;
196+ cur -> next = hashmap_node_new (key , val );
197+ }
198+
199+ /* TODO: Rehash if size exceeds size * load factor */
200+ }
201+
202+ /**
203+ * hashmap_get() - gets value from hashmap from given key.
204+ * @map: The hashmap to be looked up. Must no be NULL.
205+ * @key: The key string. May be NULL.
206+ *
207+ * Return: The look up result, if the key-value pair entry
208+ * exists, then returns its value's address, NULL otherwise.
209+ */
210+ void * hashmap_get (hashmap_t * map , char * key )
211+ {
212+ if (!map )
213+ return NULL ;
214+
215+ int index = hashmap_hash_index (map -> size , key );
216+
217+ for (hashmap_node_t * cur = map -> buckets [index ]; cur ; cur = cur -> next )
218+ if (!strcmp (cur -> key , key ))
219+ return cur -> val ;
220+
221+ return NULL ;
222+ }
223+
224+ /**
225+ * hashmap_contains() - checks if the key-value pair entry exists
226+ * from given key.
227+ * @map: The hashmap to be looked up. Must no be NULL.
228+ * @key: The key string. May be NULL.
229+ *
230+ * Return: The look up result, if the key-value pair entry
231+ * exists, then returns true, false otherwise.
232+ */
233+ bool hashmap_contains (hashmap_t * map , char * key )
234+ {
235+ return hashmap_get (map , key );
236+ }
237+
238+ /**
239+ * hashmap_free() - frees the hashmap, this also frees key-value pair
240+ * entry's value.
241+ * @map: The hashmap to be looked up. Must no be NULL.
242+ */
243+ void hashmap_free (hashmap_t * map )
244+ {
245+ if (!map )
246+ return ;
247+
248+ for (int i = 0 ; i < map -> size ; i ++ ) {
249+ for (hashmap_node_t * cur = map -> buckets [i ], * next ; cur ; cur = next ) {
250+ next = cur -> next ;
251+ free (cur -> key );
252+ free (cur -> val );
253+ /* FIXME: Remove this if-clause will cause double free error */
254+ if (cur != map -> buckets [0 ])
255+ free (cur );
256+ cur = next ;
257+ }
258+ }
259+
260+ free (map -> buckets );
261+ free (map );
144262}
145263
146264/* options */
@@ -321,12 +439,20 @@ int find_macro_param_src_idx(char *name, block_t *parent)
321439func_t * add_func (char * name )
322440{
323441 func_t * fn ;
324- int index = insert_trie (FUNC_TRIES , name , funcs_idx );
325- if (index == funcs_idx ) {
326- fn = & FUNCS [funcs_idx ++ ];
442+ if (hashmap_contains (FUNCS_MAP , name )) {
443+ fn = hashmap_get (FUNCS_MAP , name );
444+ } else {
445+ fn = malloc (sizeof (func_t ));
446+
447+ if (!fn ) {
448+ printf ("Failed to allocate func_t\n" );
449+ return NULL ;
450+ }
451+
452+ hashmap_put (FUNCS_MAP , name , fn );
327453 strcpy (fn -> return_def .var_name , name );
328454 }
329- fn = & FUNCS [ index ];
455+
330456 fn -> stack_size = 4 ; /* starting point of stack */
331457 return fn ;
332458}
@@ -361,10 +487,7 @@ constant_t *find_constant(char alias[])
361487
362488func_t * find_func (char func_name [])
363489{
364- int index = find_trie (FUNC_TRIES , func_name );
365- if (index )
366- return & FUNCS [index ];
367- return NULL ;
490+ return hashmap_get (FUNCS_MAP , func_name );
368491}
369492
370493var_t * find_member (char token [], type_t * type )
@@ -600,8 +723,7 @@ void global_init()
600723 BLOCKS .head = NULL ;
601724 BLOCKS .tail = NULL ;
602725 MACROS = malloc (MAX_ALIASES * sizeof (macro_t ));
603- FUNCS = malloc (MAX_FUNCS * sizeof (func_t ));
604- FUNC_TRIES = malloc (MAX_FUNC_TRIES * sizeof (trie_t ));
726+ FUNCS_MAP = hashmap_create (MAX_FUNCS );
605727 TYPES = malloc (MAX_TYPES * sizeof (type_t ));
606728 GLOBAL_IR = malloc (MAX_GLOBAL_IR * sizeof (ph1_ir_t ));
607729 PH1_IR = malloc (MAX_IR_INSTR * sizeof (ph1_ir_t ));
@@ -619,7 +741,8 @@ void global_init()
619741 elf_section = malloc (MAX_SECTION );
620742
621743 /* set starting point of global stack manually */
622- FUNCS [0 ].stack_size = 4 ;
744+ func_t * global_func = add_func ("" );
745+ global_func -> stack_size = 4 ;
623746}
624747
625748void global_release ()
@@ -630,8 +753,7 @@ void global_release()
630753 BLOCKS .head = next ;
631754 }
632755 free (MACROS );
633- free (FUNCS );
634- free (FUNC_TRIES );
756+ hashmap_free (FUNCS_MAP );
635757 free (TYPES );
636758 free (GLOBAL_IR );
637759 free (PH1_IR );
0 commit comments