Refactor and simplify lexer implementation

jserv · jserv · commit e2d1534ac5c5 · 2025-08-28T16:39:58.000+08:00
This commit removes unused token buffer management infrastructure that
was not being utilized.
diff --git a/src/defs.h b/src/defs.h
@@ -212,7 +212,6 @@ typedef struct token_info {
 typedef struct {
     token_info_t *freelist;
     int allocated_count;
-    int reused_count; /* Statistics for debugging */
 } token_pool_t;
 
 /* Token buffer for improved lookahead */
diff --git a/src/lexer.c b/src/lexer.c
@@ -121,143 +121,8 @@ token_t lookup_keyword(char *token)
     return T_identifier;
 }
 
-/* Cleanup function for lexer hashmaps */
-/* Token Memory Management Functions */
-
-/* Initialize token pool for memory reuse */
-void token_pool_init(void)
-{
-    if (TOKEN_POOL)
-        return;
-
-    TOKEN_POOL = arena_alloc(GENERAL_ARENA, sizeof(token_pool_t));
-    if (TOKEN_POOL) {
-        TOKEN_POOL->freelist = NULL;
-        TOKEN_POOL->allocated_count = 0;
-        TOKEN_POOL->reused_count = 0;
-    }
-}
-
-/* Allocate or reuse a token from the pool */
-token_info_t *token_pool_alloc(void)
-{
-    if (!TOKEN_POOL)
-        token_pool_init();
-
-    token_info_t *token;
-
-    if (TOKEN_POOL->freelist) {
-        /* Reuse from freelist */
-        token = TOKEN_POOL->freelist;
-        TOKEN_POOL->freelist = token->next;
-        TOKEN_POOL->reused_count++;
-    } else {
-        /* Allocate new token */
-        token = arena_alloc(GENERAL_ARENA, sizeof(token_info_t));
-        TOKEN_POOL->allocated_count++;
-    }
-
-    /* Clear token data */
-    token->type = T_eof;
-    token->value[0] = '\0';
-    /* Set location fields individually */
-    token->location.line = current_location.line;
-    token->location.column = current_location.column;
-    token->location.filename = current_location.filename;
-    token->next = NULL;
-
-    return token;
-}
-
-/* Return token to freelist for reuse */
-void token_pool_free(token_info_t *token)
-{
-    if (!token || !TOKEN_POOL)
-        return;
-
-    token->next = TOKEN_POOL->freelist;
-    TOKEN_POOL->freelist = token;
-}
-
-/* Initialize token buffer for lookahead */
-void token_buffer_init(void)
-{
-    if (TOKEN_BUFFER)
-        return;
-
-    TOKEN_BUFFER = arena_alloc(GENERAL_ARENA, sizeof(token_buffer_t));
-    TOKEN_BUFFER->head = 0;
-    TOKEN_BUFFER->tail = 0;
-    TOKEN_BUFFER->count = 0;
-
-    for (int i = 0; i < TOKEN_BUFFER_SIZE; i++)
-        TOKEN_BUFFER->tokens[i] = NULL;
-}
-
-/* Add token to buffer */
-void token_buffer_push(token_info_t *token)
-{
-    if (!TOKEN_BUFFER)
-        token_buffer_init();
-
-    if (TOKEN_BUFFER->count >= TOKEN_BUFFER_SIZE) {
-        /* Buffer full, free oldest token */
-        token_info_t *old = TOKEN_BUFFER->tokens[TOKEN_BUFFER->head];
-        token_pool_free(old);
-        TOKEN_BUFFER->head = (TOKEN_BUFFER->head + 1) % TOKEN_BUFFER_SIZE;
-        TOKEN_BUFFER->count--;
-    }
-
-    TOKEN_BUFFER->tokens[TOKEN_BUFFER->tail] = token;
-    TOKEN_BUFFER->tail = (TOKEN_BUFFER->tail + 1) % TOKEN_BUFFER_SIZE;
-    TOKEN_BUFFER->count++;
-}
-
-/* Look ahead N tokens without consuming */
-token_info_t *token_buffer_peek(int offset)
-{
-    if (!TOKEN_BUFFER || offset >= TOKEN_BUFFER->count)
-        return NULL;
-
-    int idx = (TOKEN_BUFFER->head + offset) % TOKEN_BUFFER_SIZE;
-    return TOKEN_BUFFER->tokens[idx];
-}
-
-/* Update source location tracking */
-void update_location(char c)
-{
-    if (c == '\n') {
-        current_location.line++;
-        current_location.column = 1;
-    } else if (c == '\t') {
-        current_location.column += 4; /* Assume 4-space tabs */
-    } else {
-        current_location.column++;
-    }
-}
-
-/* Set current filename for error reporting */
-void set_current_filename(char *filename)
-{
-    current_location.filename = filename;
-    current_location.line = 1;
-    current_location.column = 1;
-}
-
-/* Enhanced error reporting with location */
-void error_with_location(char *msg, source_location_t *loc)
-{
-    if (loc && loc->filename) {
-        printf("%s:%d:%d: error: %s\n", loc->filename, loc->line, loc->column,
-               msg);
-    } else if (loc) {
-        printf("line %d, column %d: error: %s\n", loc->line, loc->column, msg);
-    } else {
-        printf("error: %s\n", msg);
-    }
-    abort();
-}
 
+/* Cleanup function for lexer hashmaps */
 void lexer_cleanup()
 {
     if (DIRECTIVE_MAP) {
@@ -276,11 +141,6 @@ void lexer_cleanup()
      */
     directive_tokens_storage = NULL;
     keyword_tokens_storage = NULL;
-
-    /* Token pool and buffer are also arena-allocated, no explicit free needed
-     */
-    TOKEN_POOL = NULL;
-    TOKEN_BUFFER = NULL;
 }
 
 bool is_whitespace(char c)
@@ -372,7 +232,6 @@ char read_char(bool is_skip_space)
 {
     SOURCE->size++;
     next_char = SOURCE->elements[SOURCE->size];
-    /* TODO: Re-enable after self-hosting: update_location(next_char); */
     if (is_skip_space)
         skip_whitespace();
     return next_char;
@@ -383,10 +242,10 @@ char peek_char(int offset)
     return SOURCE->elements[SOURCE->size + offset];
 }
 
-/* Lex next token and returns its token type. Parameter 'aliasing' is used for
- * disable preprocessor aliasing on identifier tokens.
+/* Lex next token and returns its token type. Parameter 'aliasing' controls
+ * preprocessor aliasing on identifier tokens (true = enable, false = disable).
  */
-token_t lex_token_internal(bool aliasing)
+token_t lex_token_impl(bool aliasing)
 {
     token_str[0] = 0;
 
@@ -431,15 +290,15 @@ token_t lex_token_internal(bool aliasing)
                         next_char = SOURCE->elements[pos];
                         SOURCE->size = pos;
                         skip_whitespace();
-                        return lex_token_internal(aliasing);
+                        return lex_token_impl(aliasing);
                     }
                 }
             } while (next_char);
 
             SOURCE->size = pos;
             if (!next_char)
                 error("Unenclosed C-style comment");
-            return lex_token_internal(aliasing);
+            return lex_token_impl(aliasing);
         }
 
         /* C++-style comments */
@@ -450,7 +309,7 @@ token_t lex_token_internal(bool aliasing)
                 next_char = SOURCE->elements[pos];
             } while (next_char && !is_newline(next_char));
             SOURCE->size = pos;
-            return lex_token_internal(aliasing);
+            return lex_token_impl(aliasing);
         }
 
         if (next_char == '=') {
@@ -485,14 +344,14 @@ token_t lex_token_internal(bool aliasing)
             } while (is_hex(read_char(false)));
 
         } else if (token_str[0] == '0' && ((next_char | 32) == 'b')) {
-            /* Binary: starts with 0b or 0B */
+            /* Binary literal: 0b or 0B */
             if (i >= MAX_TOKEN_LEN - 1)
                 error("Token too long");
             token_str[i++] = next_char;
 
             read_char(false);
             if (next_char != '0' && next_char != '1')
-                error("Invalid binary literal: expected 0 or 1 after 0b");
+                error("Binary literal expects 0 or 1 after 0b");
 
             do {
                 if (i >= MAX_TOKEN_LEN - 1)
@@ -1010,7 +869,7 @@ token_t lex_token_internal(bool aliasing)
             next_char = SOURCE->elements[SOURCE->size];
         } else
             next_char = read_char(true);
-        return lex_token_internal(aliasing);
+        return lex_token_impl(aliasing);
     }
 
     if (next_char == 0)
@@ -1022,64 +881,20 @@ token_t lex_token_internal(bool aliasing)
     return T_eof;
 }
 
-/* Enhanced lex_token that returns a full token_info structure */
-token_info_t *lex_token_enhanced(bool aliasing)
-{
-    token_info_t *token = token_pool_alloc();
-
-    /* Save location at start of token */
-    int saved_line = current_location.line;
-    int saved_column = current_location.column;
-    char *saved_filename = current_location.filename;
-
-    /* Get the token type using existing logic */
-    token->type = lex_token_internal(aliasing);
-
-    /* Copy token string value */
-    strcpy(token->value, token_str);
-
-    /* Restore saved location fields individually */
-    token->location.line = saved_line;
-    token->location.column = saved_column;
-    token->location.filename = saved_filename;
 
-    /* Add to buffer for lookahead capability */
-    token_buffer_push(token);
 
-    return token;
-}
-
-/* Lex next token and returns its token type. To disable aliasing on next
- * token, use 'lex_token_internal'.
- */
+/* Lex next token with aliasing enabled */
 token_t lex_token(void)
 {
-    return lex_token_internal(true);
-}
-
-/* Advanced lookahead functions using token buffer */
-bool lex_peek_ahead(int offset, token_t expected_type)
-{
-    token_info_t *future_token = token_buffer_peek(offset);
-    return future_token && future_token->type == expected_type;
+    return lex_token_impl(true);
 }
 
-/* Check if next N tokens match a pattern */
-bool lex_match_sequence(token_t *pattern, int count)
+/* Lex next token with explicit aliasing control - kept for compatibility */
+token_t lex_token_internal(bool aliasing)
 {
-    for (int i = 0; i < count; i++) {
-        if (!lex_peek_ahead(i, pattern[i]))
-            return false;
-    }
-    return true;
+    return lex_token_impl(aliasing);
 }
 
-/* Get token value at offset for lookahead inspection */
-char *lex_peek_value(int offset)
-{
-    token_info_t *future_token = token_buffer_peek(offset);
-    return future_token ? future_token->value : NULL;
-}
 
 /* Skip the content. We only need the index where the macro body begins. */
 void skip_macro_body(void)
@@ -1095,7 +910,7 @@ void skip_macro_body(void)
 bool lex_accept_internal(token_t token, bool aliasing)
 {
     if (next_token == token) {
-        next_token = lex_token_internal(aliasing);
+        next_token = lex_token_impl(aliasing);
         return true;
     }
 
@@ -1132,7 +947,7 @@ void lex_ident_internal(token_t token, char *value, bool aliasing)
     if (next_token != token)
         error("Unexpected token");
     strcpy(value, token_str);
-    next_token = lex_token_internal(aliasing);
+    next_token = lex_token_impl(aliasing);
 }
 
 /* Strictly match next token with given token type and copy token's literal to
@@ -1148,7 +963,7 @@ void lex_expect_internal(token_t token, bool aliasing)
 {
     if (next_token != token)
         error("Unexpected token");
-    next_token = lex_token_internal(aliasing);
+    next_token = lex_token_impl(aliasing);
 }
 
 /* Strictly match next token with given token type. To disable aliasing on next