|
10 | 10 | #include "defs.h"
|
11 | 11 | #include "globals.c"
|
12 | 12 |
|
| 13 | +/* Hash table constants */ |
| 14 | +#define NUM_DIRECTIVES 11 |
| 15 | +#define NUM_KEYWORDS 15 |
| 16 | + |
| 17 | +/* Preprocessor directive hash table using existing shecc hashmap */ |
| 18 | +hashmap_t *DIRECTIVE_MAP = NULL; |
| 19 | +/* C keywords hash table */ |
| 20 | +hashmap_t *KEYWORD_MAP = NULL; |
| 21 | +/* Token arrays for cleanup */ |
| 22 | +token_t *directive_tokens_storage = NULL; |
| 23 | +token_t *keyword_tokens_storage = NULL; |
| 24 | + |
| 25 | +void lex_init_directives() |
| 26 | +{ |
| 27 | + if (DIRECTIVE_MAP) |
| 28 | + return; |
| 29 | + |
| 30 | + DIRECTIVE_MAP = hashmap_create(16); /* Small capacity for directives */ |
| 31 | + |
| 32 | + /* Initialization using indexed for-loop */ |
| 33 | + directive_tokens_storage = |
| 34 | + arena_alloc(GENERAL_ARENA, NUM_DIRECTIVES * sizeof(token_t)); |
| 35 | + |
| 36 | + char *names[NUM_DIRECTIVES]; |
| 37 | + token_t token_values[NUM_DIRECTIVES]; |
| 38 | + |
| 39 | + /* Populate arrays using index-based assignments for compatibility */ |
| 40 | + names[0] = "#define"; |
| 41 | + token_values[0] = T_cppd_define; |
| 42 | + names[1] = "#elif"; |
| 43 | + token_values[1] = T_cppd_elif; |
| 44 | + names[2] = "#else"; |
| 45 | + token_values[2] = T_cppd_else; |
| 46 | + names[3] = "#endif"; |
| 47 | + token_values[3] = T_cppd_endif; |
| 48 | + names[4] = "#error"; |
| 49 | + token_values[4] = T_cppd_error; |
| 50 | + names[5] = "#if"; |
| 51 | + token_values[5] = T_cppd_if; |
| 52 | + names[6] = "#ifdef"; |
| 53 | + token_values[6] = T_cppd_ifdef; |
| 54 | + names[7] = "#ifndef"; |
| 55 | + token_values[7] = T_cppd_ifndef; |
| 56 | + names[8] = "#include"; |
| 57 | + token_values[8] = T_cppd_include; |
| 58 | + names[9] = "#pragma"; |
| 59 | + token_values[9] = T_cppd_pragma; |
| 60 | + names[10] = "#undef"; |
| 61 | + token_values[10] = T_cppd_undef; |
| 62 | + |
| 63 | + /* hashmap insertion */ |
| 64 | + for (int i = 0; i < NUM_DIRECTIVES; i++) { |
| 65 | + directive_tokens_storage[i] = token_values[i]; |
| 66 | + hashmap_put(DIRECTIVE_MAP, names[i], &directive_tokens_storage[i]); |
| 67 | + } |
| 68 | +} |
| 69 | + |
| 70 | +void lex_init_keywords() |
| 71 | +{ |
| 72 | + if (KEYWORD_MAP) |
| 73 | + return; |
| 74 | + |
| 75 | + KEYWORD_MAP = hashmap_create(32); /* Capacity for keywords */ |
| 76 | + |
| 77 | + /* Initialization using indexed for-loop */ |
| 78 | + keyword_tokens_storage = |
| 79 | + arena_alloc(GENERAL_ARENA, NUM_KEYWORDS * sizeof(token_t)); |
| 80 | + |
| 81 | + char *names[NUM_KEYWORDS]; |
| 82 | + token_t token_values[NUM_KEYWORDS]; |
| 83 | + |
| 84 | + /* Populate arrays using index-based assignments for compatibility */ |
| 85 | + names[0] = "if"; |
| 86 | + token_values[0] = T_if; |
| 87 | + names[1] = "while"; |
| 88 | + token_values[1] = T_while; |
| 89 | + names[2] = "for"; |
| 90 | + token_values[2] = T_for; |
| 91 | + names[3] = "do"; |
| 92 | + token_values[3] = T_do; |
| 93 | + names[4] = "else"; |
| 94 | + token_values[4] = T_else; |
| 95 | + names[5] = "return"; |
| 96 | + token_values[5] = T_return; |
| 97 | + names[6] = "typedef"; |
| 98 | + token_values[6] = T_typedef; |
| 99 | + names[7] = "enum"; |
| 100 | + token_values[7] = T_enum; |
| 101 | + names[8] = "struct"; |
| 102 | + token_values[8] = T_struct; |
| 103 | + names[9] = "sizeof"; |
| 104 | + token_values[9] = T_sizeof; |
| 105 | + names[10] = "switch"; |
| 106 | + token_values[10] = T_switch; |
| 107 | + names[11] = "case"; |
| 108 | + token_values[11] = T_case; |
| 109 | + names[12] = "break"; |
| 110 | + token_values[12] = T_break; |
| 111 | + names[13] = "default"; |
| 112 | + token_values[13] = T_default; |
| 113 | + names[14] = "continue"; |
| 114 | + token_values[14] = T_continue; |
| 115 | + |
| 116 | + /* hashmap insertion */ |
| 117 | + for (int i = 0; i < NUM_KEYWORDS; i++) { |
| 118 | + keyword_tokens_storage[i] = token_values[i]; |
| 119 | + hashmap_put(KEYWORD_MAP, names[i], &keyword_tokens_storage[i]); |
| 120 | + } |
| 121 | +} |
| 122 | + |
| 123 | +/* Hash table lookup for preprocessor directives */ |
| 124 | +token_t lookup_directive(char *token) |
| 125 | +{ |
| 126 | + if (!DIRECTIVE_MAP) |
| 127 | + lex_init_directives(); |
| 128 | + |
| 129 | + token_t *result = hashmap_get(DIRECTIVE_MAP, token); |
| 130 | + if (result) |
| 131 | + return *result; |
| 132 | + |
| 133 | + return T_identifier; |
| 134 | +} |
| 135 | + |
| 136 | +/* Hash table lookup for C keywords */ |
| 137 | +token_t lookup_keyword(char *token) |
| 138 | +{ |
| 139 | + if (!KEYWORD_MAP) |
| 140 | + lex_init_keywords(); |
| 141 | + |
| 142 | + token_t *result = hashmap_get(KEYWORD_MAP, token); |
| 143 | + if (result) |
| 144 | + return *result; |
| 145 | + |
| 146 | + return T_identifier; |
| 147 | +} |
| 148 | + |
| 149 | +/* Cleanup function for lexer hashmaps */ |
| 150 | +void lexer_cleanup() |
| 151 | +{ |
| 152 | + if (DIRECTIVE_MAP) { |
| 153 | + hashmap_free(DIRECTIVE_MAP); |
| 154 | + DIRECTIVE_MAP = NULL; |
| 155 | + } |
| 156 | + |
| 157 | + if (KEYWORD_MAP) { |
| 158 | + hashmap_free(KEYWORD_MAP); |
| 159 | + KEYWORD_MAP = NULL; |
| 160 | + } |
| 161 | + |
| 162 | + /* Token storage arrays are allocated from GENERAL_ARENA and will be |
| 163 | + * automatically freed when the arena is freed in global_release(). |
| 164 | + * No need to explicitly free them here. |
| 165 | + */ |
| 166 | + directive_tokens_storage = NULL; |
| 167 | + keyword_tokens_storage = NULL; |
| 168 | +} |
| 169 | + |
13 | 170 | bool is_whitespace(char c)
|
14 | 171 | {
|
15 | 172 | return c == ' ' || c == '\t';
|
@@ -112,28 +269,9 @@ token_t lex_token_internal(bool aliasing)
|
112 | 269 | token_str[i] = 0;
|
113 | 270 | skip_whitespace();
|
114 | 271 |
|
115 |
| - if (!strcmp(token_str, "#include")) |
116 |
| - return T_cppd_include; |
117 |
| - if (!strcmp(token_str, "#define")) |
118 |
| - return T_cppd_define; |
119 |
| - if (!strcmp(token_str, "#undef")) |
120 |
| - return T_cppd_undef; |
121 |
| - if (!strcmp(token_str, "#error")) |
122 |
| - return T_cppd_error; |
123 |
| - if (!strcmp(token_str, "#if")) |
124 |
| - return T_cppd_if; |
125 |
| - if (!strcmp(token_str, "#elif")) |
126 |
| - return T_cppd_elif; |
127 |
| - if (!strcmp(token_str, "#ifdef")) |
128 |
| - return T_cppd_ifdef; |
129 |
| - if (!strcmp(token_str, "#ifndef")) |
130 |
| - return T_cppd_ifndef; |
131 |
| - if (!strcmp(token_str, "#else")) |
132 |
| - return T_cppd_else; |
133 |
| - if (!strcmp(token_str, "#endif")) |
134 |
| - return T_cppd_endif; |
135 |
| - if (!strcmp(token_str, "#pragma")) |
136 |
| - return T_cppd_pragma; |
| 272 | + token_t directive = lookup_directive(token_str); |
| 273 | + if (directive != T_identifier) |
| 274 | + return directive; |
137 | 275 | error("Unknown directive");
|
138 | 276 | }
|
139 | 277 |
|
@@ -485,36 +623,9 @@ token_t lex_token_internal(bool aliasing)
|
485 | 623 | token_str[i] = 0;
|
486 | 624 | skip_whitespace();
|
487 | 625 |
|
488 |
| - if (!strcmp(token_str, "if")) |
489 |
| - return T_if; |
490 |
| - if (!strcmp(token_str, "while")) |
491 |
| - return T_while; |
492 |
| - if (!strcmp(token_str, "for")) |
493 |
| - return T_for; |
494 |
| - if (!strcmp(token_str, "do")) |
495 |
| - return T_do; |
496 |
| - if (!strcmp(token_str, "else")) |
497 |
| - return T_else; |
498 |
| - if (!strcmp(token_str, "return")) |
499 |
| - return T_return; |
500 |
| - if (!strcmp(token_str, "typedef")) |
501 |
| - return T_typedef; |
502 |
| - if (!strcmp(token_str, "enum")) |
503 |
| - return T_enum; |
504 |
| - if (!strcmp(token_str, "struct")) |
505 |
| - return T_struct; |
506 |
| - if (!strcmp(token_str, "sizeof")) |
507 |
| - return T_sizeof; |
508 |
| - if (!strcmp(token_str, "switch")) |
509 |
| - return T_switch; |
510 |
| - if (!strcmp(token_str, "case")) |
511 |
| - return T_case; |
512 |
| - if (!strcmp(token_str, "break")) |
513 |
| - return T_break; |
514 |
| - if (!strcmp(token_str, "default")) |
515 |
| - return T_default; |
516 |
| - if (!strcmp(token_str, "continue")) |
517 |
| - return T_continue; |
| 626 | + token_t keyword = lookup_keyword(token_str); |
| 627 | + if (keyword != T_identifier) |
| 628 | + return keyword; |
518 | 629 |
|
519 | 630 | if (aliasing) {
|
520 | 631 | alias = find_alias(token_str);
|
|
0 commit comments