Skip to content

Commit f0e6325

Browse files
authored
Merge pull request #235 from sysprog21/lexer-lookup
Replace strcmp chains with hashmap lookups
2 parents 37eee15 + 39bc653 commit f0e6325

File tree

2 files changed

+169
-52
lines changed

2 files changed

+169
-52
lines changed

src/globals.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,8 +1113,14 @@ void global_init(void)
11131113
elf_section = strbuf_create(MAX_SECTION);
11141114
}
11151115

1116+
/* Forward declaration for lexer cleanup */
1117+
void lexer_cleanup(void);
1118+
11161119
void global_release(void)
11171120
{
1121+
/* Cleanup lexer hashmaps */
1122+
lexer_cleanup();
1123+
11181124
hashmap_free(MACROS_MAP);
11191125
free(TYPES);
11201126
arena_free(BLOCK_ARENA);

src/lexer.c

Lines changed: 163 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,163 @@
1010
#include "defs.h"
1111
#include "globals.c"
1212

13+
/* Hash table constants */
14+
#define NUM_DIRECTIVES 11
15+
#define NUM_KEYWORDS 15
16+
17+
/* Preprocessor directive hash table using existing shecc hashmap */
18+
hashmap_t *DIRECTIVE_MAP = NULL;
19+
/* C keywords hash table */
20+
hashmap_t *KEYWORD_MAP = NULL;
21+
/* Token arrays for cleanup */
22+
token_t *directive_tokens_storage = NULL;
23+
token_t *keyword_tokens_storage = NULL;
24+
25+
void lex_init_directives()
26+
{
27+
if (DIRECTIVE_MAP)
28+
return;
29+
30+
DIRECTIVE_MAP = hashmap_create(16); /* Small capacity for directives */
31+
32+
/* Initialization using indexed for-loop */
33+
directive_tokens_storage =
34+
arena_alloc(GENERAL_ARENA, NUM_DIRECTIVES * sizeof(token_t));
35+
36+
char *names[NUM_DIRECTIVES];
37+
token_t token_values[NUM_DIRECTIVES];
38+
39+
/* Populate arrays using index-based assignments for compatibility */
40+
names[0] = "#define";
41+
token_values[0] = T_cppd_define;
42+
names[1] = "#elif";
43+
token_values[1] = T_cppd_elif;
44+
names[2] = "#else";
45+
token_values[2] = T_cppd_else;
46+
names[3] = "#endif";
47+
token_values[3] = T_cppd_endif;
48+
names[4] = "#error";
49+
token_values[4] = T_cppd_error;
50+
names[5] = "#if";
51+
token_values[5] = T_cppd_if;
52+
names[6] = "#ifdef";
53+
token_values[6] = T_cppd_ifdef;
54+
names[7] = "#ifndef";
55+
token_values[7] = T_cppd_ifndef;
56+
names[8] = "#include";
57+
token_values[8] = T_cppd_include;
58+
names[9] = "#pragma";
59+
token_values[9] = T_cppd_pragma;
60+
names[10] = "#undef";
61+
token_values[10] = T_cppd_undef;
62+
63+
/* hashmap insertion */
64+
for (int i = 0; i < NUM_DIRECTIVES; i++) {
65+
directive_tokens_storage[i] = token_values[i];
66+
hashmap_put(DIRECTIVE_MAP, names[i], &directive_tokens_storage[i]);
67+
}
68+
}
69+
70+
void lex_init_keywords()
71+
{
72+
if (KEYWORD_MAP)
73+
return;
74+
75+
KEYWORD_MAP = hashmap_create(32); /* Capacity for keywords */
76+
77+
/* Initialization using indexed for-loop */
78+
keyword_tokens_storage =
79+
arena_alloc(GENERAL_ARENA, NUM_KEYWORDS * sizeof(token_t));
80+
81+
char *names[NUM_KEYWORDS];
82+
token_t token_values[NUM_KEYWORDS];
83+
84+
/* Populate arrays using index-based assignments for compatibility */
85+
names[0] = "if";
86+
token_values[0] = T_if;
87+
names[1] = "while";
88+
token_values[1] = T_while;
89+
names[2] = "for";
90+
token_values[2] = T_for;
91+
names[3] = "do";
92+
token_values[3] = T_do;
93+
names[4] = "else";
94+
token_values[4] = T_else;
95+
names[5] = "return";
96+
token_values[5] = T_return;
97+
names[6] = "typedef";
98+
token_values[6] = T_typedef;
99+
names[7] = "enum";
100+
token_values[7] = T_enum;
101+
names[8] = "struct";
102+
token_values[8] = T_struct;
103+
names[9] = "sizeof";
104+
token_values[9] = T_sizeof;
105+
names[10] = "switch";
106+
token_values[10] = T_switch;
107+
names[11] = "case";
108+
token_values[11] = T_case;
109+
names[12] = "break";
110+
token_values[12] = T_break;
111+
names[13] = "default";
112+
token_values[13] = T_default;
113+
names[14] = "continue";
114+
token_values[14] = T_continue;
115+
116+
/* hashmap insertion */
117+
for (int i = 0; i < NUM_KEYWORDS; i++) {
118+
keyword_tokens_storage[i] = token_values[i];
119+
hashmap_put(KEYWORD_MAP, names[i], &keyword_tokens_storage[i]);
120+
}
121+
}
122+
123+
/* Hash table lookup for preprocessor directives */
124+
token_t lookup_directive(char *token)
125+
{
126+
if (!DIRECTIVE_MAP)
127+
lex_init_directives();
128+
129+
token_t *result = hashmap_get(DIRECTIVE_MAP, token);
130+
if (result)
131+
return *result;
132+
133+
return T_identifier;
134+
}
135+
136+
/* Hash table lookup for C keywords */
137+
token_t lookup_keyword(char *token)
138+
{
139+
if (!KEYWORD_MAP)
140+
lex_init_keywords();
141+
142+
token_t *result = hashmap_get(KEYWORD_MAP, token);
143+
if (result)
144+
return *result;
145+
146+
return T_identifier;
147+
}
148+
149+
/* Cleanup function for lexer hashmaps */
150+
void lexer_cleanup()
151+
{
152+
if (DIRECTIVE_MAP) {
153+
hashmap_free(DIRECTIVE_MAP);
154+
DIRECTIVE_MAP = NULL;
155+
}
156+
157+
if (KEYWORD_MAP) {
158+
hashmap_free(KEYWORD_MAP);
159+
KEYWORD_MAP = NULL;
160+
}
161+
162+
/* Token storage arrays are allocated from GENERAL_ARENA and will be
163+
* automatically freed when the arena is freed in global_release().
164+
* No need to explicitly free them here.
165+
*/
166+
directive_tokens_storage = NULL;
167+
keyword_tokens_storage = NULL;
168+
}
169+
13170
bool is_whitespace(char c)
14171
{
15172
return c == ' ' || c == '\t';
@@ -112,28 +269,9 @@ token_t lex_token_internal(bool aliasing)
112269
token_str[i] = 0;
113270
skip_whitespace();
114271

115-
if (!strcmp(token_str, "#include"))
116-
return T_cppd_include;
117-
if (!strcmp(token_str, "#define"))
118-
return T_cppd_define;
119-
if (!strcmp(token_str, "#undef"))
120-
return T_cppd_undef;
121-
if (!strcmp(token_str, "#error"))
122-
return T_cppd_error;
123-
if (!strcmp(token_str, "#if"))
124-
return T_cppd_if;
125-
if (!strcmp(token_str, "#elif"))
126-
return T_cppd_elif;
127-
if (!strcmp(token_str, "#ifdef"))
128-
return T_cppd_ifdef;
129-
if (!strcmp(token_str, "#ifndef"))
130-
return T_cppd_ifndef;
131-
if (!strcmp(token_str, "#else"))
132-
return T_cppd_else;
133-
if (!strcmp(token_str, "#endif"))
134-
return T_cppd_endif;
135-
if (!strcmp(token_str, "#pragma"))
136-
return T_cppd_pragma;
272+
token_t directive = lookup_directive(token_str);
273+
if (directive != T_identifier)
274+
return directive;
137275
error("Unknown directive");
138276
}
139277

@@ -485,36 +623,9 @@ token_t lex_token_internal(bool aliasing)
485623
token_str[i] = 0;
486624
skip_whitespace();
487625

488-
if (!strcmp(token_str, "if"))
489-
return T_if;
490-
if (!strcmp(token_str, "while"))
491-
return T_while;
492-
if (!strcmp(token_str, "for"))
493-
return T_for;
494-
if (!strcmp(token_str, "do"))
495-
return T_do;
496-
if (!strcmp(token_str, "else"))
497-
return T_else;
498-
if (!strcmp(token_str, "return"))
499-
return T_return;
500-
if (!strcmp(token_str, "typedef"))
501-
return T_typedef;
502-
if (!strcmp(token_str, "enum"))
503-
return T_enum;
504-
if (!strcmp(token_str, "struct"))
505-
return T_struct;
506-
if (!strcmp(token_str, "sizeof"))
507-
return T_sizeof;
508-
if (!strcmp(token_str, "switch"))
509-
return T_switch;
510-
if (!strcmp(token_str, "case"))
511-
return T_case;
512-
if (!strcmp(token_str, "break"))
513-
return T_break;
514-
if (!strcmp(token_str, "default"))
515-
return T_default;
516-
if (!strcmp(token_str, "continue"))
517-
return T_continue;
626+
token_t keyword = lookup_keyword(token_str);
627+
if (keyword != T_identifier)
628+
return keyword;
518629

519630
if (aliasing) {
520631
alias = find_alias(token_str);

0 commit comments

Comments
 (0)