Skip to content

Commit e2d1534

Browse files
committed
Refactor and simplify lexer implementation
This commit removes unused token buffer management infrastructure that was not being utilized.
1 parent 053c693 commit e2d1534

File tree

2 files changed

+18
-204
lines changed

2 files changed

+18
-204
lines changed

src/defs.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,6 @@ typedef struct token_info {
212212
typedef struct {
213213
token_info_t *freelist;
214214
int allocated_count;
215-
int reused_count; /* Statistics for debugging */
216215
} token_pool_t;
217216

218217
/* Token buffer for improved lookahead */

src/lexer.c

Lines changed: 18 additions & 203 deletions
Original file line numberDiff line numberDiff line change
@@ -121,143 +121,8 @@ token_t lookup_keyword(char *token)
121121
return T_identifier;
122122
}
123123

124-
/* Cleanup function for lexer hashmaps */
125-
/* Token Memory Management Functions */
126-
127-
/* Initialize token pool for memory reuse */
128-
void token_pool_init(void)
129-
{
130-
if (TOKEN_POOL)
131-
return;
132-
133-
TOKEN_POOL = arena_alloc(GENERAL_ARENA, sizeof(token_pool_t));
134-
if (TOKEN_POOL) {
135-
TOKEN_POOL->freelist = NULL;
136-
TOKEN_POOL->allocated_count = 0;
137-
TOKEN_POOL->reused_count = 0;
138-
}
139-
}
140-
141-
/* Allocate or reuse a token from the pool */
142-
token_info_t *token_pool_alloc(void)
143-
{
144-
if (!TOKEN_POOL)
145-
token_pool_init();
146-
147-
token_info_t *token;
148-
149-
if (TOKEN_POOL->freelist) {
150-
/* Reuse from freelist */
151-
token = TOKEN_POOL->freelist;
152-
TOKEN_POOL->freelist = token->next;
153-
TOKEN_POOL->reused_count++;
154-
} else {
155-
/* Allocate new token */
156-
token = arena_alloc(GENERAL_ARENA, sizeof(token_info_t));
157-
TOKEN_POOL->allocated_count++;
158-
}
159-
160-
/* Clear token data */
161-
token->type = T_eof;
162-
token->value[0] = '\0';
163-
/* Set location fields individually */
164-
token->location.line = current_location.line;
165-
token->location.column = current_location.column;
166-
token->location.filename = current_location.filename;
167-
token->next = NULL;
168-
169-
return token;
170-
}
171-
172-
/* Return token to freelist for reuse */
173-
void token_pool_free(token_info_t *token)
174-
{
175-
if (!token || !TOKEN_POOL)
176-
return;
177-
178-
token->next = TOKEN_POOL->freelist;
179-
TOKEN_POOL->freelist = token;
180-
}
181-
182-
/* Initialize token buffer for lookahead */
183-
void token_buffer_init(void)
184-
{
185-
if (TOKEN_BUFFER)
186-
return;
187-
188-
TOKEN_BUFFER = arena_alloc(GENERAL_ARENA, sizeof(token_buffer_t));
189-
TOKEN_BUFFER->head = 0;
190-
TOKEN_BUFFER->tail = 0;
191-
TOKEN_BUFFER->count = 0;
192-
193-
for (int i = 0; i < TOKEN_BUFFER_SIZE; i++)
194-
TOKEN_BUFFER->tokens[i] = NULL;
195-
}
196-
197-
/* Add token to buffer */
198-
void token_buffer_push(token_info_t *token)
199-
{
200-
if (!TOKEN_BUFFER)
201-
token_buffer_init();
202-
203-
if (TOKEN_BUFFER->count >= TOKEN_BUFFER_SIZE) {
204-
/* Buffer full, free oldest token */
205-
token_info_t *old = TOKEN_BUFFER->tokens[TOKEN_BUFFER->head];
206-
token_pool_free(old);
207-
TOKEN_BUFFER->head = (TOKEN_BUFFER->head + 1) % TOKEN_BUFFER_SIZE;
208-
TOKEN_BUFFER->count--;
209-
}
210-
211-
TOKEN_BUFFER->tokens[TOKEN_BUFFER->tail] = token;
212-
TOKEN_BUFFER->tail = (TOKEN_BUFFER->tail + 1) % TOKEN_BUFFER_SIZE;
213-
TOKEN_BUFFER->count++;
214-
}
215-
216-
/* Look ahead N tokens without consuming */
217-
token_info_t *token_buffer_peek(int offset)
218-
{
219-
if (!TOKEN_BUFFER || offset >= TOKEN_BUFFER->count)
220-
return NULL;
221-
222-
int idx = (TOKEN_BUFFER->head + offset) % TOKEN_BUFFER_SIZE;
223-
return TOKEN_BUFFER->tokens[idx];
224-
}
225-
226-
/* Update source location tracking */
227-
void update_location(char c)
228-
{
229-
if (c == '\n') {
230-
current_location.line++;
231-
current_location.column = 1;
232-
} else if (c == '\t') {
233-
current_location.column += 4; /* Assume 4-space tabs */
234-
} else {
235-
current_location.column++;
236-
}
237-
}
238-
239-
/* Set current filename for error reporting */
240-
void set_current_filename(char *filename)
241-
{
242-
current_location.filename = filename;
243-
current_location.line = 1;
244-
current_location.column = 1;
245-
}
246-
247-
/* Enhanced error reporting with location */
248-
void error_with_location(char *msg, source_location_t *loc)
249-
{
250-
if (loc && loc->filename) {
251-
printf("%s:%d:%d: error: %s\n", loc->filename, loc->line, loc->column,
252-
msg);
253-
} else if (loc) {
254-
printf("line %d, column %d: error: %s\n", loc->line, loc->column, msg);
255-
} else {
256-
printf("error: %s\n", msg);
257-
}
258-
abort();
259-
}
260124

125+
/* Cleanup function for lexer hashmaps */
261126
void lexer_cleanup()
262127
{
263128
if (DIRECTIVE_MAP) {
@@ -276,11 +141,6 @@ void lexer_cleanup()
276141
*/
277142
directive_tokens_storage = NULL;
278143
keyword_tokens_storage = NULL;
279-
280-
/* Token pool and buffer are also arena-allocated, no explicit free needed
281-
*/
282-
TOKEN_POOL = NULL;
283-
TOKEN_BUFFER = NULL;
284144
}
285145

286146
bool is_whitespace(char c)
@@ -372,7 +232,6 @@ char read_char(bool is_skip_space)
372232
{
373233
SOURCE->size++;
374234
next_char = SOURCE->elements[SOURCE->size];
375-
/* TODO: Re-enable after self-hosting: update_location(next_char); */
376235
if (is_skip_space)
377236
skip_whitespace();
378237
return next_char;
@@ -383,10 +242,10 @@ char peek_char(int offset)
383242
return SOURCE->elements[SOURCE->size + offset];
384243
}
385244

386-
/* Lex next token and returns its token type. Parameter 'aliasing' is used for
387-
* disable preprocessor aliasing on identifier tokens.
245+
/* Lex next token and returns its token type. Parameter 'aliasing' controls
246+
* preprocessor aliasing on identifier tokens (true = enable, false = disable).
388247
*/
389-
token_t lex_token_internal(bool aliasing)
248+
token_t lex_token_impl(bool aliasing)
390249
{
391250
token_str[0] = 0;
392251

@@ -431,15 +290,15 @@ token_t lex_token_internal(bool aliasing)
431290
next_char = SOURCE->elements[pos];
432291
SOURCE->size = pos;
433292
skip_whitespace();
434-
return lex_token_internal(aliasing);
293+
return lex_token_impl(aliasing);
435294
}
436295
}
437296
} while (next_char);
438297

439298
SOURCE->size = pos;
440299
if (!next_char)
441300
error("Unenclosed C-style comment");
442-
return lex_token_internal(aliasing);
301+
return lex_token_impl(aliasing);
443302
}
444303

445304
/* C++-style comments */
@@ -450,7 +309,7 @@ token_t lex_token_internal(bool aliasing)
450309
next_char = SOURCE->elements[pos];
451310
} while (next_char && !is_newline(next_char));
452311
SOURCE->size = pos;
453-
return lex_token_internal(aliasing);
312+
return lex_token_impl(aliasing);
454313
}
455314

456315
if (next_char == '=') {
@@ -485,14 +344,14 @@ token_t lex_token_internal(bool aliasing)
485344
} while (is_hex(read_char(false)));
486345

487346
} else if (token_str[0] == '0' && ((next_char | 32) == 'b')) {
488-
/* Binary: starts with 0b or 0B */
347+
/* Binary literal: 0b or 0B */
489348
if (i >= MAX_TOKEN_LEN - 1)
490349
error("Token too long");
491350
token_str[i++] = next_char;
492351

493352
read_char(false);
494353
if (next_char != '0' && next_char != '1')
495-
error("Invalid binary literal: expected 0 or 1 after 0b");
354+
error("Binary literal expects 0 or 1 after 0b");
496355

497356
do {
498357
if (i >= MAX_TOKEN_LEN - 1)
@@ -1010,7 +869,7 @@ token_t lex_token_internal(bool aliasing)
1010869
next_char = SOURCE->elements[SOURCE->size];
1011870
} else
1012871
next_char = read_char(true);
1013-
return lex_token_internal(aliasing);
872+
return lex_token_impl(aliasing);
1014873
}
1015874

1016875
if (next_char == 0)
@@ -1022,64 +881,20 @@ token_t lex_token_internal(bool aliasing)
1022881
return T_eof;
1023882
}
1024883

1025-
/* Enhanced lex_token that returns a full token_info structure */
1026-
token_info_t *lex_token_enhanced(bool aliasing)
1027-
{
1028-
token_info_t *token = token_pool_alloc();
1029-
1030-
/* Save location at start of token */
1031-
int saved_line = current_location.line;
1032-
int saved_column = current_location.column;
1033-
char *saved_filename = current_location.filename;
1034-
1035-
/* Get the token type using existing logic */
1036-
token->type = lex_token_internal(aliasing);
1037-
1038-
/* Copy token string value */
1039-
strcpy(token->value, token_str);
1040-
1041-
/* Restore saved location fields individually */
1042-
token->location.line = saved_line;
1043-
token->location.column = saved_column;
1044-
token->location.filename = saved_filename;
1045884

1046-
/* Add to buffer for lookahead capability */
1047-
token_buffer_push(token);
1048885

1049-
return token;
1050-
}
1051-
1052-
/* Lex next token and returns its token type. To disable aliasing on next
1053-
* token, use 'lex_token_internal'.
1054-
*/
886+
/* Lex next token with aliasing enabled */
1055887
token_t lex_token(void)
1056888
{
1057-
return lex_token_internal(true);
1058-
}
1059-
1060-
/* Advanced lookahead functions using token buffer */
1061-
bool lex_peek_ahead(int offset, token_t expected_type)
1062-
{
1063-
token_info_t *future_token = token_buffer_peek(offset);
1064-
return future_token && future_token->type == expected_type;
889+
return lex_token_impl(true);
1065890
}
1066891

1067-
/* Check if next N tokens match a pattern */
1068-
bool lex_match_sequence(token_t *pattern, int count)
892+
/* Lex next token with explicit aliasing control - kept for compatibility */
893+
token_t lex_token_internal(bool aliasing)
1069894
{
1070-
for (int i = 0; i < count; i++) {
1071-
if (!lex_peek_ahead(i, pattern[i]))
1072-
return false;
1073-
}
1074-
return true;
895+
return lex_token_impl(aliasing);
1075896
}
1076897

1077-
/* Get token value at offset for lookahead inspection */
1078-
char *lex_peek_value(int offset)
1079-
{
1080-
token_info_t *future_token = token_buffer_peek(offset);
1081-
return future_token ? future_token->value : NULL;
1082-
}
1083898

1084899
/* Skip the content. We only need the index where the macro body begins. */
1085900
void skip_macro_body(void)
@@ -1095,7 +910,7 @@ void skip_macro_body(void)
1095910
bool lex_accept_internal(token_t token, bool aliasing)
1096911
{
1097912
if (next_token == token) {
1098-
next_token = lex_token_internal(aliasing);
913+
next_token = lex_token_impl(aliasing);
1099914
return true;
1100915
}
1101916

@@ -1132,7 +947,7 @@ void lex_ident_internal(token_t token, char *value, bool aliasing)
1132947
if (next_token != token)
1133948
error("Unexpected token");
1134949
strcpy(value, token_str);
1135-
next_token = lex_token_internal(aliasing);
950+
next_token = lex_token_impl(aliasing);
1136951
}
1137952

1138953
/* Strictly match next token with given token type and copy token's literal to
@@ -1148,7 +963,7 @@ void lex_expect_internal(token_t token, bool aliasing)
1148963
{
1149964
if (next_token != token)
1150965
error("Unexpected token");
1151-
next_token = lex_token_internal(aliasing);
966+
next_token = lex_token_impl(aliasing);
1152967
}
1153968

1154969
/* Strictly match next token with given token type. To disable aliasing on next

0 commit comments

Comments
 (0)