Skip to content

Commit a1488c3

Browse files
committed
Add const qualifier support
The implementation allows shecc to parse and handle const qualifiers in C code, improving type safety and C standard compliance. The parser can now handle const qualifiers after pointer asterisks, enabling declarations like 'int * const ptr' where the pointer itself is const (but the pointed-to value can change). Note: This is still partial support as patterns like 'int const *ptr' are not yet handled. Full const qualifier support would require tracking const-ness of pointers separately from const-ness of pointed-to data.
1 parent 3a8d1b3 commit a1488c3

File tree

5 files changed

+57
-17
lines changed

5 files changed

+57
-17
lines changed

src/defs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ typedef enum {
179179
T_break,
180180
T_default,
181181
T_continue,
182+
T_const, /* const qualifier */
182183
/* C pre-processor directives */
183184
T_cppd_include,
184185
T_cppd_define,
@@ -353,7 +354,8 @@ struct var {
353354
int ptr_level;
354355
bool is_func;
355356
bool is_global;
356-
bool address_taken; /* true if variable address was taken (&var) */
357+
bool is_const_qualified; /* true if variable has const qualifier */
358+
bool address_taken; /* true if variable address was taken (&var) */
357359
int array_size;
358360
int array_dim1, array_dim2; /* first/second dimension size for 2D arrays */
359361
int offset; /* offset from stack or frame, index 0 is reserved */

src/elf.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
int elf_symbol_index;
1515

16-
void elf_write_str(strbuf_t *elf_array, char *vals)
16+
void elf_write_str(strbuf_t *elf_array, const char *vals)
1717
{
1818
/*
1919
* Note that strbuf_puts() does not push the null character.
@@ -348,7 +348,7 @@ void elf_align(void)
348348
elf_write_byte(elf_strtab, 0);
349349
}
350350

351-
void elf_add_symbol(char *symbol, int pc)
351+
void elf_add_symbol(const char *symbol, int pc)
352352
{
353353
/* Check for null pointers to prevent crashes */
354354
if (!symbol || !elf_symtab || !elf_strtab) {
@@ -366,7 +366,7 @@ void elf_add_symbol(char *symbol, int pc)
366366
elf_symbol_index++;
367367
}
368368

369-
void elf_generate(char *outfile)
369+
void elf_generate(const char *outfile)
370370
{
371371
elf_align();
372372
elf_generate_header();

src/globals.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1159,7 +1159,7 @@ bool strbuf_putc(strbuf_t *src, char value)
11591159
return true;
11601160
}
11611161

1162-
bool strbuf_puts(strbuf_t *src, char *value)
1162+
bool strbuf_puts(strbuf_t *src, const char *value)
11631163
{
11641164
int len = strlen(value);
11651165

src/lexer.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
/* Hash table constants */
1414
#define NUM_DIRECTIVES 11
15-
#define NUM_KEYWORDS 16
15+
#define NUM_KEYWORDS 17
1616

1717
/* Token mapping structure for elegant initialization */
1818
typedef struct {
@@ -86,6 +86,7 @@ void lex_init_keywords()
8686
{"default", T_default},
8787
{"continue", T_continue},
8888
{"union", T_union},
89+
{"const", T_const},
8990
};
9091

9192
/* hashmap insertion */
@@ -787,13 +788,15 @@ token_t lex_token_impl(bool aliasing)
787788
keyword = T_case;
788789
break;
789790

790-
case 5: /* 5-letter keywords: while, break, union */
791+
case 5: /* 5-letter keywords: while, break, union, const */
791792
if (token_str[0] == 'w' && !memcmp(token_str, "while", 5))
792793
keyword = T_while;
793794
else if (token_str[0] == 'b' && !memcmp(token_str, "break", 5))
794795
keyword = T_break;
795796
else if (token_str[0] == 'u' && !memcmp(token_str, "union", 5))
796797
keyword = T_union;
798+
else if (token_str[0] == 'c' && !memcmp(token_str, "const", 5))
799+
keyword = T_const;
797800
break;
798801

799802
case 6: /* 6-letter keywords: return, struct, switch, sizeof */

src/parser.c

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ var_t *opstack_pop(void)
130130

131131
void read_expr(block_t *parent, basic_block_t **bb);
132132

133-
int write_symbol(char *data)
133+
int write_symbol(const char *data)
134134
{
135135
int start_len = elf_data->size;
136136
elf_write_str(elf_data, data);
@@ -1191,11 +1191,19 @@ void parse_array_init(var_t *var,
11911191

11921192
void read_inner_var_decl(var_t *vd, bool anon, bool is_param)
11931193
{
1194-
vd->init_val = 0;
11951194
/* Preserve typedef pointer level - don't reset if already inherited */
1195+
vd->init_val = 0;
11961196

1197-
while (lex_accept(T_asterisk))
1197+
while (lex_accept(T_asterisk)) {
11981198
vd->ptr_level++;
1199+
/* Check for const after asterisk (e.g., int * const ptr).
1200+
* For now, we just consume const qualifiers after pointer.
1201+
* Full support would require tracking const-ness of the pointer
1202+
* itself vs the pointed-to data separately.
1203+
*/
1204+
while (lex_peek(T_const, NULL))
1205+
lex_accept(T_const);
1206+
}
11991207

12001208
/* is it function pointer declaration? */
12011209
if (lex_accept(T_open_bracket)) {
@@ -1325,8 +1333,15 @@ void read_parameter_list_decl(func_t *func, bool anon)
13251333
lex_accept(T_comma);
13261334
}
13271335

1328-
while (lex_peek(T_identifier, NULL) == 1) {
1329-
read_full_var_decl(&func->param_defs[vn++], anon, true);
1336+
while (lex_peek(T_identifier, NULL) == 1 || lex_peek(T_const, NULL)) {
1337+
/* Check for const qualifier */
1338+
bool is_const = false;
1339+
if (lex_accept(T_const))
1340+
is_const = true;
1341+
1342+
read_full_var_decl(&func->param_defs[vn], anon, true);
1343+
func->param_defs[vn].is_const_qualified = is_const;
1344+
vn++;
13301345
lex_accept(T_comma);
13311346
}
13321347
func->num_params = vn;
@@ -3808,6 +3823,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
38083823
type_t *type;
38093824
var_t *vd, *rs1, *rs2, *var;
38103825
opcode_t prefix_op = OP_generic;
3826+
bool is_const = false;
38113827

38123828
if (!bb)
38133829
printf("Warning: unreachable code detected\n");
@@ -4127,6 +4143,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
41274143
type = find_type(token, find_type_flag);
41284144
if (type) {
41294145
var = require_typed_var(parent, type);
4146+
var->is_const_qualified = is_const;
41304147
read_partial_var_decl(var, NULL);
41314148
add_insn(parent, bb, OP_allocat, var, NULL, NULL, 0, NULL);
41324149
add_symbol(bb, var);
@@ -4332,14 +4349,22 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
43324349
error("Unknown struct/union type");
43334350
}
43344351

4352+
/* Handle const qualifier for local variable declarations */
4353+
if (lex_accept(T_const)) {
4354+
is_const = true;
4355+
/* After const, we expect a type */
4356+
if (!lex_peek(T_identifier, token))
4357+
error("Expected type after const");
4358+
}
4359+
43354360
/* statement with prefix */
4336-
if (lex_accept(T_increment))
4361+
if (!is_const && lex_accept(T_increment))
43374362
prefix_op = OP_add;
4338-
else if (lex_accept(T_decrement))
4363+
else if (!is_const && lex_accept(T_decrement))
43394364
prefix_op = OP_sub;
43404365
/* must be an identifier or asterisk (for pointer dereference) */
43414366
bool has_asterisk = lex_peek(T_asterisk, NULL);
4342-
if (!lex_peek(T_identifier, token) && !has_asterisk)
4367+
if (!is_const && !lex_peek(T_identifier, token) && !has_asterisk)
43434368
error("Unexpected token");
43444369

43454370
/* handle macro parameter substitution for statements */
@@ -4411,6 +4436,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
44114436

44124437
if (type) {
44134438
var = require_typed_var(parent, type);
4439+
var->is_const_qualified = is_const;
44144440
read_full_var_decl(var, false, false);
44154441
add_insn(parent, bb, OP_allocat, var, NULL, NULL, 0, NULL);
44164442
add_symbol(bb, var);
@@ -4729,10 +4755,11 @@ void read_func_body(func_t *func)
47294755
}
47304756

47314757
/* if first token is type */
4732-
void read_global_decl(block_t *block)
4758+
void read_global_decl(block_t *block, bool is_const)
47334759
{
47344760
var_t *var = require_var(block);
47354761
var->is_global = true;
4762+
var->is_const_qualified = is_const;
47364763

47374764
/* new function, or variables under parent */
47384765
read_full_var_decl(var, false, false);
@@ -4818,6 +4845,7 @@ void initialize_struct_field(var_t *nv, var_t *v, int offset)
48184845
nv->ptr_level = 0;
48194846
nv->is_func = false;
48204847
nv->is_global = false;
4848+
nv->is_const_qualified = false;
48214849
nv->array_size = 0;
48224850
nv->offset = offset;
48234851
nv->init_val = 0;
@@ -4832,6 +4860,11 @@ void read_global_statement(void)
48324860
{
48334861
char token[MAX_ID_LEN];
48344862
block_t *block = GLOBAL_BLOCK; /* global block */
4863+
bool is_const = false;
4864+
4865+
/* Handle const qualifier */
4866+
if (lex_accept(T_const))
4867+
is_const = true;
48354868

48364869
if (lex_accept(T_struct)) {
48374870
int i = 0, size = 0;
@@ -4847,6 +4880,7 @@ void read_global_statement(void)
48474880
/* one or more declarators */
48484881
var_t *var = require_typed_var(block, decl_type);
48494882
var->is_global = true; /* Global struct variable */
4883+
var->is_const_qualified = is_const;
48504884
read_partial_var_decl(var, NULL);
48514885
add_insn(block, GLOBAL_FUNC->bbs, OP_allocat, var, NULL, NULL, 0,
48524886
NULL);
@@ -5144,7 +5178,7 @@ void read_global_statement(void)
51445178
lex_expect(T_semicolon);
51455179
}
51465180
} else if (lex_peek(T_identifier, NULL)) {
5147-
read_global_decl(block);
5181+
read_global_decl(block, is_const);
51485182
} else
51495183
error("Syntax error in global statement");
51505184
}
@@ -5155,6 +5189,7 @@ void parse_internal(void)
51555189
GLOBAL_FUNC = add_func("", true);
51565190
GLOBAL_FUNC->stack_size = 4;
51575191
GLOBAL_FUNC->bbs = arena_calloc(BB_ARENA, 1, sizeof(basic_block_t));
5192+
GLOBAL_FUNC->bbs->belong_to = GLOBAL_FUNC; /* Prevent nullptr deref in RA */
51585193

51595194
/* built-in types */
51605195
TY_void = add_named_type("void");

0 commit comments

Comments
 (0)