Skip to content

Commit 48d2b1a

Browse files
authored
Merge pull request #254 from sysprog21/token-mm
Implement token pool and buffer management
2 parents 4231457 + ebe8145 commit 48d2b1a

File tree

3 files changed

+240
-4
lines changed

3 files changed

+240
-4
lines changed

src/defs.h

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
#define MAX_PARAMS 8
2020
#define MAX_LOCALS 1600
2121
#define MAX_FIELDS 64
22-
#define MAX_TYPES 128
23-
#define MAX_IR_INSTR 60000
22+
#define MAX_TYPES 256
23+
#define MAX_IR_INSTR 80000
2424
#define MAX_BB_PRED 128
2525
#define MAX_BB_DOM_SUCC 64
2626
#define MAX_BB_RDOM_SUCC 256
@@ -180,6 +180,37 @@ typedef enum {
180180
T_cppd_pragma
181181
} token_t;
182182

183+
/* Source location tracking for better error reporting */
184+
typedef struct {
185+
int line;
186+
int column;
187+
char *filename;
188+
} source_location_t;
189+
190+
/* Token structure with metadata for enhanced lexing */
191+
typedef struct token_info {
192+
token_t type;
193+
char value[MAX_TOKEN_LEN];
194+
source_location_t location;
195+
struct token_info *next; /* For freelist management */
196+
} token_info_t;
197+
198+
/* Token freelist for memory reuse */
199+
typedef struct {
200+
token_info_t *freelist;
201+
int allocated_count;
202+
int reused_count; /* Statistics for debugging */
203+
} token_pool_t;
204+
205+
/* Token buffer for improved lookahead */
206+
#define TOKEN_BUFFER_SIZE 8
207+
typedef struct {
208+
token_info_t *tokens[TOKEN_BUFFER_SIZE];
209+
int head;
210+
int tail;
211+
int count;
212+
} token_buffer_t;
213+
183214
/* builtin types */
184215
typedef enum {
185216
TYPE_void = 0,

src/globals.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ token_t next_token;
1919
char next_char;
2020
bool skip_newline = true;
2121

22+
/* Token memory management */
23+
token_pool_t *TOKEN_POOL;
24+
token_buffer_t *TOKEN_BUFFER;
25+
source_location_t current_location; /* Will be initialized at runtime */
26+
2227
bool preproc_match;
2328

2429
/* Point to the first character after where the macro has been called. It is
@@ -1120,6 +1125,13 @@ void global_init(void)
11201125
SOURCE = strbuf_create(MAX_SOURCE);
11211126
FUNC_MAP = hashmap_create(DEFAULT_FUNCS_SIZE);
11221127
INCLUSION_MAP = hashmap_create(DEFAULT_INCLUSIONS_SIZE);
1128+
1129+
/* Initialize token management globals */
1130+
current_location.line = 1;
1131+
current_location.column = 1;
1132+
current_location.filename = NULL;
1133+
TOKEN_POOL = NULL;
1134+
TOKEN_BUFFER = NULL;
11231135
ALIASES_MAP = hashmap_create(MAX_ALIASES);
11241136
CONSTANTS_MAP = hashmap_create(MAX_CONSTANTS);
11251137

@@ -1195,8 +1207,8 @@ void error(char *msg)
11951207

11961208
strcpy(diagnostic + i, "^ Error occurs here");
11971209

1198-
/* TODO: figure out the corresponding C source file path and report line
1199-
* number.
1210+
/* TODO: Enhanced error reporting with location tracking will be added
1211+
* once self-hosting is stable with new token management
12001212
*/
12011213
printf("[Error]: %s\nOccurs at source location %d.\n%s\n", msg,
12021214
SOURCE->size, diagnostic);

src/lexer.c

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,142 @@ token_t lookup_keyword(char *token)
122122
}
123123

124124
/* Cleanup function for lexer hashmaps */
125+
/* Token Memory Management Functions */
126+
127+
/* Initialize token pool for memory reuse */
128+
void token_pool_init(void)
129+
{
130+
if (TOKEN_POOL)
131+
return;
132+
133+
TOKEN_POOL = arena_alloc(GENERAL_ARENA, sizeof(token_pool_t));
134+
if (TOKEN_POOL) {
135+
TOKEN_POOL->freelist = NULL;
136+
TOKEN_POOL->allocated_count = 0;
137+
TOKEN_POOL->reused_count = 0;
138+
}
139+
}
140+
141+
/* Allocate or reuse a token from the pool */
142+
token_info_t *token_pool_alloc(void)
143+
{
144+
if (!TOKEN_POOL)
145+
token_pool_init();
146+
147+
token_info_t *token;
148+
149+
if (TOKEN_POOL->freelist) {
150+
/* Reuse from freelist */
151+
token = TOKEN_POOL->freelist;
152+
TOKEN_POOL->freelist = token->next;
153+
TOKEN_POOL->reused_count++;
154+
} else {
155+
/* Allocate new token */
156+
token = arena_alloc(GENERAL_ARENA, sizeof(token_info_t));
157+
TOKEN_POOL->allocated_count++;
158+
}
159+
160+
/* Clear token data */
161+
token->type = T_eof;
162+
token->value[0] = '\0';
163+
/* Set location fields individually */
164+
token->location.line = current_location.line;
165+
token->location.column = current_location.column;
166+
token->location.filename = current_location.filename;
167+
token->next = NULL;
168+
169+
return token;
170+
}
171+
172+
/* Return token to freelist for reuse */
173+
void token_pool_free(token_info_t *token)
174+
{
175+
if (!token || !TOKEN_POOL)
176+
return;
177+
178+
token->next = TOKEN_POOL->freelist;
179+
TOKEN_POOL->freelist = token;
180+
}
181+
182+
/* Initialize token buffer for lookahead */
183+
void token_buffer_init(void)
184+
{
185+
if (TOKEN_BUFFER)
186+
return;
187+
188+
TOKEN_BUFFER = arena_alloc(GENERAL_ARENA, sizeof(token_buffer_t));
189+
TOKEN_BUFFER->head = 0;
190+
TOKEN_BUFFER->tail = 0;
191+
TOKEN_BUFFER->count = 0;
192+
193+
for (int i = 0; i < TOKEN_BUFFER_SIZE; i++)
194+
TOKEN_BUFFER->tokens[i] = NULL;
195+
}
196+
197+
/* Add token to buffer */
198+
void token_buffer_push(token_info_t *token)
199+
{
200+
if (!TOKEN_BUFFER)
201+
token_buffer_init();
202+
203+
if (TOKEN_BUFFER->count >= TOKEN_BUFFER_SIZE) {
204+
/* Buffer full, free oldest token */
205+
token_info_t *old = TOKEN_BUFFER->tokens[TOKEN_BUFFER->head];
206+
token_pool_free(old);
207+
TOKEN_BUFFER->head = (TOKEN_BUFFER->head + 1) % TOKEN_BUFFER_SIZE;
208+
TOKEN_BUFFER->count--;
209+
}
210+
211+
TOKEN_BUFFER->tokens[TOKEN_BUFFER->tail] = token;
212+
TOKEN_BUFFER->tail = (TOKEN_BUFFER->tail + 1) % TOKEN_BUFFER_SIZE;
213+
TOKEN_BUFFER->count++;
214+
}
215+
216+
/* Look ahead N tokens without consuming */
217+
token_info_t *token_buffer_peek(int offset)
218+
{
219+
if (!TOKEN_BUFFER || offset >= TOKEN_BUFFER->count)
220+
return NULL;
221+
222+
int idx = (TOKEN_BUFFER->head + offset) % TOKEN_BUFFER_SIZE;
223+
return TOKEN_BUFFER->tokens[idx];
224+
}
225+
226+
/* Update source location tracking */
227+
void update_location(char c)
228+
{
229+
if (c == '\n') {
230+
current_location.line++;
231+
current_location.column = 1;
232+
} else if (c == '\t') {
233+
current_location.column += 4; /* Assume 4-space tabs */
234+
} else {
235+
current_location.column++;
236+
}
237+
}
238+
239+
/* Set current filename for error reporting */
240+
void set_current_filename(char *filename)
241+
{
242+
current_location.filename = filename;
243+
current_location.line = 1;
244+
current_location.column = 1;
245+
}
246+
247+
/* Enhanced error reporting with location */
248+
void error_with_location(char *msg, source_location_t *loc)
249+
{
250+
if (loc && loc->filename) {
251+
printf("%s:%d:%d: error: %s\n", loc->filename, loc->line, loc->column,
252+
msg);
253+
} else if (loc) {
254+
printf("line %d, column %d: error: %s\n", loc->line, loc->column, msg);
255+
} else {
256+
printf("error: %s\n", msg);
257+
}
258+
abort();
259+
}
260+
125261
void lexer_cleanup()
126262
{
127263
if (DIRECTIVE_MAP) {
@@ -140,6 +276,11 @@ void lexer_cleanup()
140276
*/
141277
directive_tokens_storage = NULL;
142278
keyword_tokens_storage = NULL;
279+
280+
/* Token pool and buffer are also arena-allocated, no explicit free needed
281+
*/
282+
TOKEN_POOL = NULL;
283+
TOKEN_BUFFER = NULL;
143284
}
144285

145286
bool is_whitespace(char c)
@@ -231,6 +372,7 @@ char read_char(bool is_skip_space)
231372
{
232373
SOURCE->size++;
233374
next_char = SOURCE->elements[SOURCE->size];
375+
/* TODO: Re-enable after self-hosting: update_location(next_char); */
234376
if (is_skip_space)
235377
skip_whitespace();
236378
return next_char;
@@ -807,6 +949,33 @@ token_t lex_token_internal(bool aliasing)
807949
return T_eof;
808950
}
809951

952+
/* Enhanced lex_token that returns a full token_info structure */
953+
token_info_t *lex_token_enhanced(bool aliasing)
954+
{
955+
token_info_t *token = token_pool_alloc();
956+
957+
/* Save location at start of token */
958+
int saved_line = current_location.line;
959+
int saved_column = current_location.column;
960+
char *saved_filename = current_location.filename;
961+
962+
/* Get the token type using existing logic */
963+
token->type = lex_token_internal(aliasing);
964+
965+
/* Copy token string value */
966+
strcpy(token->value, token_str);
967+
968+
/* Restore saved location fields individually */
969+
token->location.line = saved_line;
970+
token->location.column = saved_column;
971+
token->location.filename = saved_filename;
972+
973+
/* Add to buffer for lookahead capability */
974+
token_buffer_push(token);
975+
976+
return token;
977+
}
978+
810979
/* Lex next token and returns its token type. To disable aliasing on next
811980
* token, use 'lex_token_internal'.
812981
*/
@@ -815,6 +984,30 @@ token_t lex_token(void)
815984
return lex_token_internal(true);
816985
}
817986

987+
/* Advanced lookahead functions using token buffer */
988+
bool lex_peek_ahead(int offset, token_t expected_type)
989+
{
990+
token_info_t *future_token = token_buffer_peek(offset);
991+
return future_token && future_token->type == expected_type;
992+
}
993+
994+
/* Check if next N tokens match a pattern */
995+
bool lex_match_sequence(token_t *pattern, int count)
996+
{
997+
for (int i = 0; i < count; i++) {
998+
if (!lex_peek_ahead(i, pattern[i]))
999+
return false;
1000+
}
1001+
return true;
1002+
}
1003+
1004+
/* Get token value at offset for lookahead inspection */
1005+
char *lex_peek_value(int offset)
1006+
{
1007+
token_info_t *future_token = token_buffer_peek(offset);
1008+
return future_token ? future_token->value : NULL;
1009+
}
1010+
8181011
/* Skip the content. We only need the index where the macro body begins. */
8191012
void skip_macro_body(void)
8201013
{

0 commit comments

Comments
 (0)