@@ -121,143 +121,8 @@ token_t lookup_keyword(char *token)
121
121
return T_identifier ;
122
122
}
123
123
124
- /* Cleanup function for lexer hashmaps */
125
- /* Token Memory Management Functions */
126
-
127
- /* Initialize token pool for memory reuse */
128
- void token_pool_init (void )
129
- {
130
- if (TOKEN_POOL )
131
- return ;
132
-
133
- TOKEN_POOL = arena_alloc (GENERAL_ARENA , sizeof (token_pool_t ));
134
- if (TOKEN_POOL ) {
135
- TOKEN_POOL -> freelist = NULL ;
136
- TOKEN_POOL -> allocated_count = 0 ;
137
- TOKEN_POOL -> reused_count = 0 ;
138
- }
139
- }
140
-
141
- /* Allocate or reuse a token from the pool */
142
- token_info_t * token_pool_alloc (void )
143
- {
144
- if (!TOKEN_POOL )
145
- token_pool_init ();
146
-
147
- token_info_t * token ;
148
-
149
- if (TOKEN_POOL -> freelist ) {
150
- /* Reuse from freelist */
151
- token = TOKEN_POOL -> freelist ;
152
- TOKEN_POOL -> freelist = token -> next ;
153
- TOKEN_POOL -> reused_count ++ ;
154
- } else {
155
- /* Allocate new token */
156
- token = arena_alloc (GENERAL_ARENA , sizeof (token_info_t ));
157
- TOKEN_POOL -> allocated_count ++ ;
158
- }
159
-
160
- /* Clear token data */
161
- token -> type = T_eof ;
162
- token -> value [0 ] = '\0' ;
163
- /* Set location fields individually */
164
- token -> location .line = current_location .line ;
165
- token -> location .column = current_location .column ;
166
- token -> location .filename = current_location .filename ;
167
- token -> next = NULL ;
168
-
169
- return token ;
170
- }
171
-
172
- /* Return token to freelist for reuse */
173
- void token_pool_free (token_info_t * token )
174
- {
175
- if (!token || !TOKEN_POOL )
176
- return ;
177
-
178
- token -> next = TOKEN_POOL -> freelist ;
179
- TOKEN_POOL -> freelist = token ;
180
- }
181
-
182
- /* Initialize token buffer for lookahead */
183
- void token_buffer_init (void )
184
- {
185
- if (TOKEN_BUFFER )
186
- return ;
187
-
188
- TOKEN_BUFFER = arena_alloc (GENERAL_ARENA , sizeof (token_buffer_t ));
189
- TOKEN_BUFFER -> head = 0 ;
190
- TOKEN_BUFFER -> tail = 0 ;
191
- TOKEN_BUFFER -> count = 0 ;
192
-
193
- for (int i = 0 ; i < TOKEN_BUFFER_SIZE ; i ++ )
194
- TOKEN_BUFFER -> tokens [i ] = NULL ;
195
- }
196
-
197
- /* Add token to buffer */
198
- void token_buffer_push (token_info_t * token )
199
- {
200
- if (!TOKEN_BUFFER )
201
- token_buffer_init ();
202
-
203
- if (TOKEN_BUFFER -> count >= TOKEN_BUFFER_SIZE ) {
204
- /* Buffer full, free oldest token */
205
- token_info_t * old = TOKEN_BUFFER -> tokens [TOKEN_BUFFER -> head ];
206
- token_pool_free (old );
207
- TOKEN_BUFFER -> head = (TOKEN_BUFFER -> head + 1 ) % TOKEN_BUFFER_SIZE ;
208
- TOKEN_BUFFER -> count -- ;
209
- }
210
-
211
- TOKEN_BUFFER -> tokens [TOKEN_BUFFER -> tail ] = token ;
212
- TOKEN_BUFFER -> tail = (TOKEN_BUFFER -> tail + 1 ) % TOKEN_BUFFER_SIZE ;
213
- TOKEN_BUFFER -> count ++ ;
214
- }
215
-
216
- /* Look ahead N tokens without consuming */
217
- token_info_t * token_buffer_peek (int offset )
218
- {
219
- if (!TOKEN_BUFFER || offset >= TOKEN_BUFFER -> count )
220
- return NULL ;
221
-
222
- int idx = (TOKEN_BUFFER -> head + offset ) % TOKEN_BUFFER_SIZE ;
223
- return TOKEN_BUFFER -> tokens [idx ];
224
- }
225
-
226
- /* Update source location tracking */
227
- void update_location (char c )
228
- {
229
- if (c == '\n' ) {
230
- current_location .line ++ ;
231
- current_location .column = 1 ;
232
- } else if (c == '\t' ) {
233
- current_location .column += 4 ; /* Assume 4-space tabs */
234
- } else {
235
- current_location .column ++ ;
236
- }
237
- }
238
-
239
- /* Set current filename for error reporting */
240
- void set_current_filename (char * filename )
241
- {
242
- current_location .filename = filename ;
243
- current_location .line = 1 ;
244
- current_location .column = 1 ;
245
- }
246
-
247
- /* Enhanced error reporting with location */
248
- void error_with_location (char * msg , source_location_t * loc )
249
- {
250
- if (loc && loc -> filename ) {
251
- printf ("%s:%d:%d: error: %s\n" , loc -> filename , loc -> line , loc -> column ,
252
- msg );
253
- } else if (loc ) {
254
- printf ("line %d, column %d: error: %s\n" , loc -> line , loc -> column , msg );
255
- } else {
256
- printf ("error: %s\n" , msg );
257
- }
258
- abort ();
259
- }
260
124
125
+ /* Cleanup function for lexer hashmaps */
261
126
void lexer_cleanup ()
262
127
{
263
128
if (DIRECTIVE_MAP ) {
@@ -276,11 +141,6 @@ void lexer_cleanup()
276
141
*/
277
142
directive_tokens_storage = NULL ;
278
143
keyword_tokens_storage = NULL ;
279
-
280
- /* Token pool and buffer are also arena-allocated, no explicit free needed
281
- */
282
- TOKEN_POOL = NULL ;
283
- TOKEN_BUFFER = NULL ;
284
144
}
285
145
286
146
bool is_whitespace (char c )
@@ -372,7 +232,6 @@ char read_char(bool is_skip_space)
372
232
{
373
233
SOURCE -> size ++ ;
374
234
next_char = SOURCE -> elements [SOURCE -> size ];
375
- /* TODO: Re-enable after self-hosting: update_location(next_char); */
376
235
if (is_skip_space )
377
236
skip_whitespace ();
378
237
return next_char ;
@@ -383,10 +242,10 @@ char peek_char(int offset)
383
242
return SOURCE -> elements [SOURCE -> size + offset ];
384
243
}
385
244
386
- /* Lex next token and returns its token type. Parameter 'aliasing' is used for
387
- * disable preprocessor aliasing on identifier tokens.
245
+ /* Lex next token and returns its token type. Parameter 'aliasing' controls
246
+ * preprocessor aliasing on identifier tokens (true = enable, false = disable) .
388
247
*/
389
- token_t lex_token_internal (bool aliasing )
248
+ token_t lex_token_impl (bool aliasing )
390
249
{
391
250
token_str [0 ] = 0 ;
392
251
@@ -431,15 +290,15 @@ token_t lex_token_internal(bool aliasing)
431
290
next_char = SOURCE -> elements [pos ];
432
291
SOURCE -> size = pos ;
433
292
skip_whitespace ();
434
- return lex_token_internal (aliasing );
293
+ return lex_token_impl (aliasing );
435
294
}
436
295
}
437
296
} while (next_char );
438
297
439
298
SOURCE -> size = pos ;
440
299
if (!next_char )
441
300
error ("Unenclosed C-style comment" );
442
- return lex_token_internal (aliasing );
301
+ return lex_token_impl (aliasing );
443
302
}
444
303
445
304
/* C++-style comments */
@@ -450,7 +309,7 @@ token_t lex_token_internal(bool aliasing)
450
309
next_char = SOURCE -> elements [pos ];
451
310
} while (next_char && !is_newline (next_char ));
452
311
SOURCE -> size = pos ;
453
- return lex_token_internal (aliasing );
312
+ return lex_token_impl (aliasing );
454
313
}
455
314
456
315
if (next_char == '=' ) {
@@ -485,14 +344,14 @@ token_t lex_token_internal(bool aliasing)
485
344
} while (is_hex (read_char (false)));
486
345
487
346
} else if (token_str [0 ] == '0' && ((next_char | 32 ) == 'b' )) {
488
- /* Binary: starts with 0b or 0B */
347
+ /* Binary literal: 0b or 0B */
489
348
if (i >= MAX_TOKEN_LEN - 1 )
490
349
error ("Token too long" );
491
350
token_str [i ++ ] = next_char ;
492
351
493
352
read_char (false);
494
353
if (next_char != '0' && next_char != '1' )
495
- error ("Invalid binary literal: expected 0 or 1 after 0b" );
354
+ error ("Binary literal expects 0 or 1 after 0b" );
496
355
497
356
do {
498
357
if (i >= MAX_TOKEN_LEN - 1 )
@@ -1010,7 +869,7 @@ token_t lex_token_internal(bool aliasing)
1010
869
next_char = SOURCE -> elements [SOURCE -> size ];
1011
870
} else
1012
871
next_char = read_char (true);
1013
- return lex_token_internal (aliasing );
872
+ return lex_token_impl (aliasing );
1014
873
}
1015
874
1016
875
if (next_char == 0 )
@@ -1022,64 +881,20 @@ token_t lex_token_internal(bool aliasing)
1022
881
return T_eof ;
1023
882
}
1024
883
1025
- /* Enhanced lex_token that returns a full token_info structure */
1026
- token_info_t * lex_token_enhanced (bool aliasing )
1027
- {
1028
- token_info_t * token = token_pool_alloc ();
1029
-
1030
- /* Save location at start of token */
1031
- int saved_line = current_location .line ;
1032
- int saved_column = current_location .column ;
1033
- char * saved_filename = current_location .filename ;
1034
-
1035
- /* Get the token type using existing logic */
1036
- token -> type = lex_token_internal (aliasing );
1037
-
1038
- /* Copy token string value */
1039
- strcpy (token -> value , token_str );
1040
-
1041
- /* Restore saved location fields individually */
1042
- token -> location .line = saved_line ;
1043
- token -> location .column = saved_column ;
1044
- token -> location .filename = saved_filename ;
1045
884
1046
- /* Add to buffer for lookahead capability */
1047
- token_buffer_push (token );
1048
885
1049
- return token ;
1050
- }
1051
-
1052
- /* Lex next token and returns its token type. To disable aliasing on next
1053
- * token, use 'lex_token_internal'.
1054
- */
886
+ /* Lex next token with aliasing enabled */
1055
887
token_t lex_token (void )
1056
888
{
1057
- return lex_token_internal (true);
1058
- }
1059
-
1060
- /* Advanced lookahead functions using token buffer */
1061
- bool lex_peek_ahead (int offset , token_t expected_type )
1062
- {
1063
- token_info_t * future_token = token_buffer_peek (offset );
1064
- return future_token && future_token -> type == expected_type ;
889
+ return lex_token_impl (true);
1065
890
}
1066
891
1067
- /* Check if next N tokens match a pattern */
1068
- bool lex_match_sequence ( token_t * pattern , int count )
892
+ /* Lex next token with explicit aliasing control - kept for compatibility */
893
+ token_t lex_token_internal ( bool aliasing )
1069
894
{
1070
- for (int i = 0 ; i < count ; i ++ ) {
1071
- if (!lex_peek_ahead (i , pattern [i ]))
1072
- return false;
1073
- }
1074
- return true;
895
+ return lex_token_impl (aliasing );
1075
896
}
1076
897
1077
- /* Get token value at offset for lookahead inspection */
1078
- char * lex_peek_value (int offset )
1079
- {
1080
- token_info_t * future_token = token_buffer_peek (offset );
1081
- return future_token ? future_token -> value : NULL ;
1082
- }
1083
898
1084
899
/* Skip the content. We only need the index where the macro body begins. */
1085
900
void skip_macro_body (void )
@@ -1095,7 +910,7 @@ void skip_macro_body(void)
1095
910
bool lex_accept_internal (token_t token , bool aliasing )
1096
911
{
1097
912
if (next_token == token ) {
1098
- next_token = lex_token_internal (aliasing );
913
+ next_token = lex_token_impl (aliasing );
1099
914
return true;
1100
915
}
1101
916
@@ -1132,7 +947,7 @@ void lex_ident_internal(token_t token, char *value, bool aliasing)
1132
947
if (next_token != token )
1133
948
error ("Unexpected token" );
1134
949
strcpy (value , token_str );
1135
- next_token = lex_token_internal (aliasing );
950
+ next_token = lex_token_impl (aliasing );
1136
951
}
1137
952
1138
953
/* Strictly match next token with given token type and copy token's literal to
@@ -1148,7 +963,7 @@ void lex_expect_internal(token_t token, bool aliasing)
1148
963
{
1149
964
if (next_token != token )
1150
965
error ("Unexpected token" );
1151
- next_token = lex_token_internal (aliasing );
966
+ next_token = lex_token_impl (aliasing );
1152
967
}
1153
968
1154
969
/* Strictly match next token with given token type. To disable aliasing on next
0 commit comments