Skip to content

Commit 301a225

Browse files
committed
Add const qualifier support
The implementation allows shecc to parse and handle const qualifiers in C code, improving type safety and C standard compliance. The parser can now handle const qualifiers after pointer asterisks, enabling declarations like 'int * const ptr' where the pointer itself is const (but the pointed-to value can change). Note: This is still partial support as patterns like 'int const *ptr' are not yet handled. Full const qualifier support would require tracking const-ness of pointers separately from const-ness of pointed-to data.
1 parent 3a8d1b3 commit 301a225

File tree

8 files changed

+73
-33
lines changed

8 files changed

+73
-33
lines changed

src/arm-codegen.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,9 @@ void emit(int code)
188188
void emit_ph2_ir(ph2_ir_t *ph2_ir)
189189
{
190190
func_t *func;
191-
int rd = ph2_ir->dest;
192-
int rn = ph2_ir->src0;
193-
int rm = ph2_ir->src1;
191+
const int rd = ph2_ir->dest;
192+
const int rn = ph2_ir->src0;
193+
const int rm = ph2_ir->src1;
194194
int ofs;
195195

196196
/* Prepare this variable to reuse code for:

src/defs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ typedef enum {
179179
T_break,
180180
T_default,
181181
T_continue,
182+
T_const, /* const qualifier */
182183
/* C pre-processor directives */
183184
T_cppd_include,
184185
T_cppd_define,
@@ -353,7 +354,8 @@ struct var {
353354
int ptr_level;
354355
bool is_func;
355356
bool is_global;
356-
bool address_taken; /* true if variable address was taken (&var) */
357+
bool is_const_qualified; /* true if variable has const qualifier */
358+
bool address_taken; /* true if variable address was taken (&var) */
357359
int array_size;
358360
int array_dim1, array_dim2; /* first/second dimension size for 2D arrays */
359361
int offset; /* offset from stack or frame, index 0 is reserved */

src/elf.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
int elf_symbol_index;
1515

16-
void elf_write_str(strbuf_t *elf_array, char *vals)
16+
void elf_write_str(strbuf_t *elf_array, const char *vals)
1717
{
1818
/*
1919
* Note that strbuf_puts() does not push the null character.
@@ -348,7 +348,7 @@ void elf_align(void)
348348
elf_write_byte(elf_strtab, 0);
349349
}
350350

351-
void elf_add_symbol(char *symbol, int pc)
351+
void elf_add_symbol(const char *symbol, int pc)
352352
{
353353
/* Check for null pointers to prevent crashes */
354354
if (!symbol || !elf_symtab || !elf_strtab) {
@@ -366,7 +366,7 @@ void elf_add_symbol(char *symbol, int pc)
366366
elf_symbol_index++;
367367
}
368368

369-
void elf_generate(char *outfile)
369+
void elf_generate(const char *outfile)
370370
{
371371
elf_align();
372372
elf_generate_header();

src/globals.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,16 @@ void *arena_alloc(arena_t *arena, int size)
173173
}
174174

175175
/* Align to sizeof(void*) bytes for host compatibility */
176-
int alignment = sizeof(void *);
176+
const int alignment = sizeof(void *);
177177
size = (size + alignment - 1) & ~(alignment - 1);
178178

179179
if (!arena->head || arena->head->offset + size > arena->head->capacity) {
180180
/* Need a new block: choose capacity = max(DEFAULT_ARENA_SIZE,
181181
* arena->block_size, size) */
182-
int base =
182+
const int base =
183183
(arena->block_size > DEFAULT_ARENA_SIZE ? arena->block_size
184184
: DEFAULT_ARENA_SIZE);
185-
int new_capacity = (size > base ? size : base);
185+
const int new_capacity = (size > base ? size : base);
186186
arena_block_t *new_block = arena_block_create(new_capacity);
187187
new_block->next = arena->head;
188188
arena->head = new_block;
@@ -282,7 +282,7 @@ void *arena_realloc(arena_t *arena, char *oldptr, int oldsz, int newsz)
282282
*/
283283
char *arena_strdup(arena_t *arena, char *str)
284284
{
285-
int n = strlen(str);
285+
const int n = strlen(str);
286286
char *dup = arena_alloc(arena, n + 1);
287287
memcpy(dup, str, n);
288288
dup[n] = '\0';
@@ -368,14 +368,14 @@ void arena_free(arena_t *arena)
368368
*/
369369
int hashmap_hash_index(int size, char *key)
370370
{
371-
int hash = 0x811c9dc5, mask;
371+
int hash = 0x811c9dc5;
372372

373373
for (; *key; key++) {
374374
hash ^= *key;
375375
hash *= 0x01000193;
376376
}
377377

378-
mask = hash >> 31;
378+
const int mask = hash >> 31;
379379
return ((hash ^ mask) - mask) & (size - 1);
380380
}
381381

@@ -431,7 +431,7 @@ hashmap_node_t *hashmap_node_new(char *key, void *val)
431431
if (!key)
432432
return NULL;
433433

434-
int len = strlen(key);
434+
const int len = strlen(key);
435435
hashmap_node_t *node = arena_alloc(HASHMAP_ARENA, sizeof(hashmap_node_t));
436436

437437

@@ -1159,7 +1159,7 @@ bool strbuf_putc(strbuf_t *src, char value)
11591159
return true;
11601160
}
11611161

1162-
bool strbuf_puts(strbuf_t *src, char *value)
1162+
bool strbuf_puts(strbuf_t *src, const char *value)
11631163
{
11641164
int len = strlen(value);
11651165

src/lexer.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
/* Hash table constants */
1414
#define NUM_DIRECTIVES 11
15-
#define NUM_KEYWORDS 16
15+
#define NUM_KEYWORDS 17
1616

1717
/* Token mapping structure for elegant initialization */
1818
typedef struct {
@@ -86,6 +86,7 @@ void lex_init_keywords()
8686
{"default", T_default},
8787
{"continue", T_continue},
8888
{"union", T_union},
89+
{"const", T_const},
8990
};
9091

9192
/* hashmap insertion */
@@ -787,13 +788,15 @@ token_t lex_token_impl(bool aliasing)
787788
keyword = T_case;
788789
break;
789790

790-
case 5: /* 5-letter keywords: while, break, union */
791+
case 5: /* 5-letter keywords: while, break, union, const */
791792
if (token_str[0] == 'w' && !memcmp(token_str, "while", 5))
792793
keyword = T_while;
793794
else if (token_str[0] == 'b' && !memcmp(token_str, "break", 5))
794795
keyword = T_break;
795796
else if (token_str[0] == 'u' && !memcmp(token_str, "union", 5))
796797
keyword = T_union;
798+
else if (token_str[0] == 'c' && !memcmp(token_str, "const", 5))
799+
keyword = T_const;
797800
break;
798801

799802
case 6: /* 6-letter keywords: return, struct, switch, sizeof */

src/opt-sccp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ bool simple_sccp(func_t *func)
122122
if (insn->rs1 && insn->rs1->is_const && insn->rs2 &&
123123
insn->rs2->is_const && !insn->rd->is_global) {
124124
int result = 0;
125-
int l = insn->rs1->init_val, r = insn->rs2->init_val;
125+
const int l = insn->rs1->init_val, r = insn->rs2->init_val;
126126

127127
/* Compute result based on operation type */
128128
switch (insn->opcode) {

src/parser.c

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@ var_t *opstack_pop(void)
130130

131131
void read_expr(block_t *parent, basic_block_t **bb);
132132

133-
int write_symbol(char *data)
133+
int write_symbol(const char *data)
134134
{
135-
int start_len = elf_data->size;
135+
const int start_len = elf_data->size;
136136
elf_write_str(elf_data, data);
137137
elf_write_byte(elf_data, 0);
138138
return start_len;
@@ -1191,11 +1191,19 @@ void parse_array_init(var_t *var,
11911191

11921192
void read_inner_var_decl(var_t *vd, bool anon, bool is_param)
11931193
{
1194-
vd->init_val = 0;
11951194
/* Preserve typedef pointer level - don't reset if already inherited */
1195+
vd->init_val = 0;
11961196

1197-
while (lex_accept(T_asterisk))
1197+
while (lex_accept(T_asterisk)) {
11981198
vd->ptr_level++;
1199+
/* Check for const after asterisk (e.g., int * const ptr).
1200+
* For now, we just consume const qualifiers after pointer.
1201+
* Full support would require tracking const-ness of the pointer
1202+
* itself vs the pointed-to data separately.
1203+
*/
1204+
while (lex_peek(T_const, NULL))
1205+
lex_accept(T_const);
1206+
}
11991207

12001208
/* is it function pointer declaration? */
12011209
if (lex_accept(T_open_bracket)) {
@@ -1325,8 +1333,15 @@ void read_parameter_list_decl(func_t *func, bool anon)
13251333
lex_accept(T_comma);
13261334
}
13271335

1328-
while (lex_peek(T_identifier, NULL) == 1) {
1329-
read_full_var_decl(&func->param_defs[vn++], anon, true);
1336+
while (lex_peek(T_identifier, NULL) == 1 || lex_peek(T_const, NULL)) {
1337+
/* Check for const qualifier */
1338+
bool is_const = false;
1339+
if (lex_accept(T_const))
1340+
is_const = true;
1341+
1342+
read_full_var_decl(&func->param_defs[vn], anon, true);
1343+
func->param_defs[vn].is_const_qualified = is_const;
1344+
vn++;
13301345
lex_accept(T_comma);
13311346
}
13321347
func->num_params = vn;
@@ -1360,7 +1375,7 @@ void read_literal_param(block_t *parent, basic_block_t *bb)
13601375
combined_len += literal_len;
13611376
}
13621377

1363-
int index = write_symbol(combined);
1378+
const int index = write_symbol(combined);
13641379

13651380
var_t *vd = require_typed_ptr_var(parent, TY_char, true);
13661381
gen_name_to(vd->var_name);
@@ -3808,6 +3823,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
38083823
type_t *type;
38093824
var_t *vd, *rs1, *rs2, *var;
38103825
opcode_t prefix_op = OP_generic;
3826+
bool is_const = false;
38113827

38123828
if (!bb)
38133829
printf("Warning: unreachable code detected\n");
@@ -4127,6 +4143,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
41274143
type = find_type(token, find_type_flag);
41284144
if (type) {
41294145
var = require_typed_var(parent, type);
4146+
var->is_const_qualified = is_const;
41304147
read_partial_var_decl(var, NULL);
41314148
add_insn(parent, bb, OP_allocat, var, NULL, NULL, 0, NULL);
41324149
add_symbol(bb, var);
@@ -4332,14 +4349,22 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
43324349
error("Unknown struct/union type");
43334350
}
43344351

4352+
/* Handle const qualifier for local variable declarations */
4353+
if (lex_accept(T_const)) {
4354+
is_const = true;
4355+
/* After const, we expect a type */
4356+
if (!lex_peek(T_identifier, token))
4357+
error("Expected type after const");
4358+
}
4359+
43354360
/* statement with prefix */
4336-
if (lex_accept(T_increment))
4361+
if (!is_const && lex_accept(T_increment))
43374362
prefix_op = OP_add;
4338-
else if (lex_accept(T_decrement))
4363+
else if (!is_const && lex_accept(T_decrement))
43394364
prefix_op = OP_sub;
43404365
/* must be an identifier or asterisk (for pointer dereference) */
43414366
bool has_asterisk = lex_peek(T_asterisk, NULL);
4342-
if (!lex_peek(T_identifier, token) && !has_asterisk)
4367+
if (!is_const && !lex_peek(T_identifier, token) && !has_asterisk)
43434368
error("Unexpected token");
43444369

43454370
/* handle macro parameter substitution for statements */
@@ -4411,6 +4436,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
44114436

44124437
if (type) {
44134438
var = require_typed_var(parent, type);
4439+
var->is_const_qualified = is_const;
44144440
read_full_var_decl(var, false, false);
44154441
add_insn(parent, bb, OP_allocat, var, NULL, NULL, 0, NULL);
44164442
add_symbol(bb, var);
@@ -4729,10 +4755,11 @@ void read_func_body(func_t *func)
47294755
}
47304756

47314757
/* if first token is type */
4732-
void read_global_decl(block_t *block)
4758+
void read_global_decl(block_t *block, bool is_const)
47334759
{
47344760
var_t *var = require_var(block);
47354761
var->is_global = true;
4762+
var->is_const_qualified = is_const;
47364763

47374764
/* new function, or variables under parent */
47384765
read_full_var_decl(var, false, false);
@@ -4818,6 +4845,7 @@ void initialize_struct_field(var_t *nv, var_t *v, int offset)
48184845
nv->ptr_level = 0;
48194846
nv->is_func = false;
48204847
nv->is_global = false;
4848+
nv->is_const_qualified = false;
48214849
nv->array_size = 0;
48224850
nv->offset = offset;
48234851
nv->init_val = 0;
@@ -4832,6 +4860,11 @@ void read_global_statement(void)
48324860
{
48334861
char token[MAX_ID_LEN];
48344862
block_t *block = GLOBAL_BLOCK; /* global block */
4863+
bool is_const = false;
4864+
4865+
/* Handle const qualifier */
4866+
if (lex_accept(T_const))
4867+
is_const = true;
48354868

48364869
if (lex_accept(T_struct)) {
48374870
int i = 0, size = 0;
@@ -4847,6 +4880,7 @@ void read_global_statement(void)
48474880
/* one or more declarators */
48484881
var_t *var = require_typed_var(block, decl_type);
48494882
var->is_global = true; /* Global struct variable */
4883+
var->is_const_qualified = is_const;
48504884
read_partial_var_decl(var, NULL);
48514885
add_insn(block, GLOBAL_FUNC->bbs, OP_allocat, var, NULL, NULL, 0,
48524886
NULL);
@@ -5144,7 +5178,7 @@ void read_global_statement(void)
51445178
lex_expect(T_semicolon);
51455179
}
51465180
} else if (lex_peek(T_identifier, NULL)) {
5147-
read_global_decl(block);
5181+
read_global_decl(block, is_const);
51485182
} else
51495183
error("Syntax error in global statement");
51505184
}
@@ -5155,6 +5189,7 @@ void parse_internal(void)
51555189
GLOBAL_FUNC = add_func("", true);
51565190
GLOBAL_FUNC->stack_size = 4;
51575191
GLOBAL_FUNC->bbs = arena_calloc(BB_ARENA, 1, sizeof(basic_block_t));
5192+
GLOBAL_FUNC->bbs->belong_to = GLOBAL_FUNC; /* Prevent nullptr deref in RA */
51585193

51595194
/* built-in types */
51605195
TY_void = add_named_type("void");

src/reg-alloc.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -887,9 +887,9 @@ void dump_ph2_ir(void)
887887
for (int i = 0; i < ph2_ir_idx; i++) {
888888
ph2_ir_t *ph2_ir = PH2_IR_FLATTEN[i];
889889

890-
int rd = ph2_ir->dest + 48;
891-
int rs1 = ph2_ir->src0 + 48;
892-
int rs2 = ph2_ir->src1 + 48;
890+
const int rd = ph2_ir->dest + 48;
891+
const int rs1 = ph2_ir->src0 + 48;
892+
const int rs2 = ph2_ir->src1 + 48;
893893

894894
switch (ph2_ir->op) {
895895
case OP_define:

0 commit comments

Comments
 (0)