@@ -122,6 +122,142 @@ token_t lookup_keyword(char *token)
122
122
}
123
123
124
124
/* Cleanup function for lexer hashmaps */
125
+ /* Token Memory Management Functions */
126
+
127
+ /* Initialize token pool for memory reuse */
128
+ void token_pool_init (void )
129
+ {
130
+ if (TOKEN_POOL )
131
+ return ;
132
+
133
+ TOKEN_POOL = arena_alloc (GENERAL_ARENA , sizeof (token_pool_t ));
134
+ if (TOKEN_POOL ) {
135
+ TOKEN_POOL -> freelist = NULL ;
136
+ TOKEN_POOL -> allocated_count = 0 ;
137
+ TOKEN_POOL -> reused_count = 0 ;
138
+ }
139
+ }
140
+
141
+ /* Allocate or reuse a token from the pool */
142
+ token_info_t * token_pool_alloc (void )
143
+ {
144
+ if (!TOKEN_POOL )
145
+ token_pool_init ();
146
+
147
+ token_info_t * token ;
148
+
149
+ if (TOKEN_POOL -> freelist ) {
150
+ /* Reuse from freelist */
151
+ token = TOKEN_POOL -> freelist ;
152
+ TOKEN_POOL -> freelist = token -> next ;
153
+ TOKEN_POOL -> reused_count ++ ;
154
+ } else {
155
+ /* Allocate new token */
156
+ token = arena_alloc (GENERAL_ARENA , sizeof (token_info_t ));
157
+ TOKEN_POOL -> allocated_count ++ ;
158
+ }
159
+
160
+ /* Clear token data */
161
+ token -> type = T_eof ;
162
+ token -> value [0 ] = '\0' ;
163
+ /* Set location fields individually */
164
+ token -> location .line = current_location .line ;
165
+ token -> location .column = current_location .column ;
166
+ token -> location .filename = current_location .filename ;
167
+ token -> next = NULL ;
168
+
169
+ return token ;
170
+ }
171
+
172
+ /* Return token to freelist for reuse */
173
+ void token_pool_free (token_info_t * token )
174
+ {
175
+ if (!token || !TOKEN_POOL )
176
+ return ;
177
+
178
+ token -> next = TOKEN_POOL -> freelist ;
179
+ TOKEN_POOL -> freelist = token ;
180
+ }
181
+
182
+ /* Initialize token buffer for lookahead */
183
+ void token_buffer_init (void )
184
+ {
185
+ if (TOKEN_BUFFER )
186
+ return ;
187
+
188
+ TOKEN_BUFFER = arena_alloc (GENERAL_ARENA , sizeof (token_buffer_t ));
189
+ TOKEN_BUFFER -> head = 0 ;
190
+ TOKEN_BUFFER -> tail = 0 ;
191
+ TOKEN_BUFFER -> count = 0 ;
192
+
193
+ for (int i = 0 ; i < TOKEN_BUFFER_SIZE ; i ++ )
194
+ TOKEN_BUFFER -> tokens [i ] = NULL ;
195
+ }
196
+
197
+ /* Add token to buffer */
198
+ void token_buffer_push (token_info_t * token )
199
+ {
200
+ if (!TOKEN_BUFFER )
201
+ token_buffer_init ();
202
+
203
+ if (TOKEN_BUFFER -> count >= TOKEN_BUFFER_SIZE ) {
204
+ /* Buffer full, free oldest token */
205
+ token_info_t * old = TOKEN_BUFFER -> tokens [TOKEN_BUFFER -> head ];
206
+ token_pool_free (old );
207
+ TOKEN_BUFFER -> head = (TOKEN_BUFFER -> head + 1 ) % TOKEN_BUFFER_SIZE ;
208
+ TOKEN_BUFFER -> count -- ;
209
+ }
210
+
211
+ TOKEN_BUFFER -> tokens [TOKEN_BUFFER -> tail ] = token ;
212
+ TOKEN_BUFFER -> tail = (TOKEN_BUFFER -> tail + 1 ) % TOKEN_BUFFER_SIZE ;
213
+ TOKEN_BUFFER -> count ++ ;
214
+ }
215
+
216
+ /* Look ahead N tokens without consuming */
217
+ token_info_t * token_buffer_peek (int offset )
218
+ {
219
+ if (!TOKEN_BUFFER || offset >= TOKEN_BUFFER -> count )
220
+ return NULL ;
221
+
222
+ int idx = (TOKEN_BUFFER -> head + offset ) % TOKEN_BUFFER_SIZE ;
223
+ return TOKEN_BUFFER -> tokens [idx ];
224
+ }
225
+
226
+ /* Update source location tracking */
227
+ void update_location (char c )
228
+ {
229
+ if (c == '\n' ) {
230
+ current_location .line ++ ;
231
+ current_location .column = 1 ;
232
+ } else if (c == '\t' ) {
233
+ current_location .column += 4 ; /* Assume 4-space tabs */
234
+ } else {
235
+ current_location .column ++ ;
236
+ }
237
+ }
238
+
239
+ /* Set current filename for error reporting */
240
+ void set_current_filename (char * filename )
241
+ {
242
+ current_location .filename = filename ;
243
+ current_location .line = 1 ;
244
+ current_location .column = 1 ;
245
+ }
246
+
247
+ /* Enhanced error reporting with location */
248
+ void error_with_location (char * msg , source_location_t * loc )
249
+ {
250
+ if (loc && loc -> filename ) {
251
+ printf ("%s:%d:%d: error: %s\n" , loc -> filename , loc -> line , loc -> column ,
252
+ msg );
253
+ } else if (loc ) {
254
+ printf ("line %d, column %d: error: %s\n" , loc -> line , loc -> column , msg );
255
+ } else {
256
+ printf ("error: %s\n" , msg );
257
+ }
258
+ abort ();
259
+ }
260
+
125
261
void lexer_cleanup ()
126
262
{
127
263
if (DIRECTIVE_MAP ) {
@@ -140,6 +276,11 @@ void lexer_cleanup()
140
276
*/
141
277
directive_tokens_storage = NULL ;
142
278
keyword_tokens_storage = NULL ;
279
+
280
+ /* Token pool and buffer are also arena-allocated, no explicit free needed
281
+ */
282
+ TOKEN_POOL = NULL ;
283
+ TOKEN_BUFFER = NULL ;
143
284
}
144
285
145
286
bool is_whitespace (char c )
@@ -231,6 +372,7 @@ char read_char(bool is_skip_space)
231
372
{
232
373
SOURCE -> size ++ ;
233
374
next_char = SOURCE -> elements [SOURCE -> size ];
375
+ /* TODO: Re-enable after self-hosting: update_location(next_char); */
234
376
if (is_skip_space )
235
377
skip_whitespace ();
236
378
return next_char ;
@@ -807,6 +949,33 @@ token_t lex_token_internal(bool aliasing)
807
949
return T_eof ;
808
950
}
809
951
952
+ /* Enhanced lex_token that returns a full token_info structure */
953
+ token_info_t * lex_token_enhanced (bool aliasing )
954
+ {
955
+ token_info_t * token = token_pool_alloc ();
956
+
957
+ /* Save location at start of token */
958
+ int saved_line = current_location .line ;
959
+ int saved_column = current_location .column ;
960
+ char * saved_filename = current_location .filename ;
961
+
962
+ /* Get the token type using existing logic */
963
+ token -> type = lex_token_internal (aliasing );
964
+
965
+ /* Copy token string value */
966
+ strcpy (token -> value , token_str );
967
+
968
+ /* Restore saved location fields individually */
969
+ token -> location .line = saved_line ;
970
+ token -> location .column = saved_column ;
971
+ token -> location .filename = saved_filename ;
972
+
973
+ /* Add to buffer for lookahead capability */
974
+ token_buffer_push (token );
975
+
976
+ return token ;
977
+ }
978
+
810
979
/* Lex next token and returns its token type. To disable aliasing on next
811
980
* token, use 'lex_token_internal'.
812
981
*/
@@ -815,6 +984,30 @@ token_t lex_token(void)
815
984
return lex_token_internal (true);
816
985
}
817
986
987
+ /* Advanced lookahead functions using token buffer */
988
+ bool lex_peek_ahead (int offset , token_t expected_type )
989
+ {
990
+ token_info_t * future_token = token_buffer_peek (offset );
991
+ return future_token && future_token -> type == expected_type ;
992
+ }
993
+
994
+ /* Check if next N tokens match a pattern */
995
+ bool lex_match_sequence (token_t * pattern , int count )
996
+ {
997
+ for (int i = 0 ; i < count ; i ++ ) {
998
+ if (!lex_peek_ahead (i , pattern [i ]))
999
+ return false;
1000
+ }
1001
+ return true;
1002
+ }
1003
+
1004
+ /* Get token value at offset for lookahead inspection */
1005
+ char * lex_peek_value (int offset )
1006
+ {
1007
+ token_info_t * future_token = token_buffer_peek (offset );
1008
+ return future_token ? future_token -> value : NULL ;
1009
+ }
1010
+
818
1011
/* Skip the content. We only need the index where the macro body begins. */
819
1012
void skip_macro_body (void )
820
1013
{
0 commit comments