@@ -122,6 +122,142 @@ token_t lookup_keyword(char *token)
122122}
123123
124124/* Cleanup function for lexer hashmaps */
125+ /* Token Memory Management Functions */
126+
127+ /* Initialize token pool for memory reuse */
128+ void token_pool_init (void )
129+ {
130+ if (TOKEN_POOL )
131+ return ;
132+
133+ TOKEN_POOL = arena_alloc (GENERAL_ARENA , sizeof (token_pool_t ));
134+ if (TOKEN_POOL ) {
135+ TOKEN_POOL -> freelist = NULL ;
136+ TOKEN_POOL -> allocated_count = 0 ;
137+ TOKEN_POOL -> reused_count = 0 ;
138+ }
139+ }
140+
141+ /* Allocate or reuse a token from the pool */
142+ token_info_t * token_pool_alloc (void )
143+ {
144+ if (!TOKEN_POOL )
145+ token_pool_init ();
146+
147+ token_info_t * token ;
148+
149+ if (TOKEN_POOL -> freelist ) {
150+ /* Reuse from freelist */
151+ token = TOKEN_POOL -> freelist ;
152+ TOKEN_POOL -> freelist = token -> next ;
153+ TOKEN_POOL -> reused_count ++ ;
154+ } else {
155+ /* Allocate new token */
156+ token = arena_alloc (GENERAL_ARENA , sizeof (token_info_t ));
157+ TOKEN_POOL -> allocated_count ++ ;
158+ }
159+
160+ /* Clear token data */
161+ token -> type = T_eof ;
162+ token -> value [0 ] = '\0' ;
163+ /* Set location fields individually (shecc doesn't support struct assignment) */
164+ token -> location .line = current_location .line ;
165+ token -> location .column = current_location .column ;
166+ token -> location .filename = current_location .filename ;
167+ token -> next = NULL ;
168+
169+ return token ;
170+ }
171+
172+ /* Return token to freelist for reuse */
173+ void token_pool_free (token_info_t * token )
174+ {
175+ if (!token || !TOKEN_POOL )
176+ return ;
177+
178+ token -> next = TOKEN_POOL -> freelist ;
179+ TOKEN_POOL -> freelist = token ;
180+ }
181+
182+ /* Initialize token buffer for lookahead */
183+ void token_buffer_init (void )
184+ {
185+ if (TOKEN_BUFFER )
186+ return ;
187+
188+ TOKEN_BUFFER = arena_alloc (GENERAL_ARENA , sizeof (token_buffer_t ));
189+ TOKEN_BUFFER -> head = 0 ;
190+ TOKEN_BUFFER -> tail = 0 ;
191+ TOKEN_BUFFER -> count = 0 ;
192+
193+ for (int i = 0 ; i < TOKEN_BUFFER_SIZE ; i ++ )
194+ TOKEN_BUFFER -> tokens [i ] = NULL ;
195+ }
196+
197+ /* Add token to buffer */
198+ void token_buffer_push (token_info_t * token )
199+ {
200+ if (!TOKEN_BUFFER )
201+ token_buffer_init ();
202+
203+ if (TOKEN_BUFFER -> count >= TOKEN_BUFFER_SIZE ) {
204+ /* Buffer full, free oldest token */
205+ token_info_t * old = TOKEN_BUFFER -> tokens [TOKEN_BUFFER -> head ];
206+ token_pool_free (old );
207+ TOKEN_BUFFER -> head = (TOKEN_BUFFER -> head + 1 ) % TOKEN_BUFFER_SIZE ;
208+ TOKEN_BUFFER -> count -- ;
209+ }
210+
211+ TOKEN_BUFFER -> tokens [TOKEN_BUFFER -> tail ] = token ;
212+ TOKEN_BUFFER -> tail = (TOKEN_BUFFER -> tail + 1 ) % TOKEN_BUFFER_SIZE ;
213+ TOKEN_BUFFER -> count ++ ;
214+ }
215+
216+ /* Look ahead N tokens without consuming */
217+ token_info_t * token_buffer_peek (int offset )
218+ {
219+ if (!TOKEN_BUFFER || offset >= TOKEN_BUFFER -> count )
220+ return NULL ;
221+
222+ int idx = (TOKEN_BUFFER -> head + offset ) % TOKEN_BUFFER_SIZE ;
223+ return TOKEN_BUFFER -> tokens [idx ];
224+ }
225+
226+ /* Update source location tracking */
227+ void update_location (char c )
228+ {
229+ if (c == '\n' ) {
230+ current_location .line ++ ;
231+ current_location .column = 1 ;
232+ } else if (c == '\t' ) {
233+ current_location .column += 4 ; /* Assume 4-space tabs */
234+ } else {
235+ current_location .column ++ ;
236+ }
237+ }
238+
239+ /* Set current filename for error reporting */
240+ void set_current_filename (char * filename )
241+ {
242+ current_location .filename = filename ;
243+ current_location .line = 1 ;
244+ current_location .column = 1 ;
245+ }
246+
247+ /* Enhanced error reporting with location */
248+ void error_with_location (char * msg , source_location_t * loc )
249+ {
250+ if (loc && loc -> filename ) {
251+ printf ("%s:%d:%d: error: %s\n" , loc -> filename , loc -> line , loc -> column ,
252+ msg );
253+ } else if (loc ) {
254+ printf ("line %d, column %d: error: %s\n" , loc -> line , loc -> column , msg );
255+ } else {
256+ printf ("error: %s\n" , msg );
257+ }
258+ abort ();
259+ }
260+
125261void lexer_cleanup ()
126262{
127263 if (DIRECTIVE_MAP ) {
@@ -140,6 +276,11 @@ void lexer_cleanup()
140276 */
141277 directive_tokens_storage = NULL ;
142278 keyword_tokens_storage = NULL ;
279+
280+ /* Token pool and buffer are also arena-allocated, no explicit free needed
281+ */
282+ TOKEN_POOL = NULL ;
283+ TOKEN_BUFFER = NULL ;
143284}
144285
145286bool is_whitespace (char c )
@@ -231,6 +372,7 @@ char read_char(bool is_skip_space)
231372{
232373 SOURCE -> size ++ ;
233374 next_char = SOURCE -> elements [SOURCE -> size ];
375+ /* TODO: Re-enable after self-hosting: update_location(next_char); */
234376 if (is_skip_space )
235377 skip_whitespace ();
236378 return next_char ;
@@ -807,6 +949,33 @@ token_t lex_token_internal(bool aliasing)
807949 return T_eof ;
808950}
809951
952+ /* Enhanced lex_token that returns a full token_info structure */
953+ token_info_t * lex_token_enhanced (bool aliasing )
954+ {
955+ token_info_t * token = token_pool_alloc ();
956+
957+ /* Save location at start of token (field by field - no struct assignment) */
958+ int saved_line = current_location .line ;
959+ int saved_column = current_location .column ;
960+ char * saved_filename = current_location .filename ;
961+
962+ /* Get the token type using existing logic */
963+ token -> type = lex_token_internal (aliasing );
964+
965+ /* Copy token string value */
966+ strcpy (token -> value , token_str );
967+
968+ /* Restore saved location fields individually */
969+ token -> location .line = saved_line ;
970+ token -> location .column = saved_column ;
971+ token -> location .filename = saved_filename ;
972+
973+ /* Add to buffer for lookahead capability */
974+ token_buffer_push (token );
975+
976+ return token ;
977+ }
978+
810979/* Lex next token and returns its token type. To disable aliasing on next
811980 * token, use 'lex_token_internal'.
812981 */
@@ -815,6 +984,30 @@ token_t lex_token(void)
815984 return lex_token_internal (true);
816985}
817986
987+ /* Advanced lookahead functions using token buffer */
988+ bool lex_peek_ahead (int offset , token_t expected_type )
989+ {
990+ token_info_t * future_token = token_buffer_peek (offset );
991+ return future_token && future_token -> type == expected_type ;
992+ }
993+
994+ /* Check if next N tokens match a pattern */
995+ bool lex_match_sequence (token_t * pattern , int count )
996+ {
997+ for (int i = 0 ; i < count ; i ++ ) {
998+ if (!lex_peek_ahead (i , pattern [i ]))
999+ return false;
1000+ }
1001+ return true;
1002+ }
1003+
1004+ /* Get token value at offset for lookahead inspection */
1005+ char * lex_peek_value (int offset )
1006+ {
1007+ token_info_t * future_token = token_buffer_peek (offset );
1008+ return future_token ? future_token -> value : NULL ;
1009+ }
1010+
8181011/* Skip the content. We only need the index where the macro body begins. */
8191012void skip_macro_body (void )
8201013{
0 commit comments