Skip to content

Commit 3b39595

Browse files
committed
Support local array initializers
Fix compiler crash with array initializers like int a[] = {1,2,3}. Resolves critical bug where compiler terminates with "Unrecognized expression token" when parsing array initialization syntax. - Add parse_array_init() function to handle both implicit and explicit arrays - Fix array address generation to avoid double OP_address_of operations - Update test suite with comprehensive array initialization test cases - Improve SSA fallback handling for undefined variables
1 parent 1361cd1 commit 3b39595

File tree

3 files changed

+230
-17
lines changed

3 files changed

+230
-17
lines changed

src/parser.c

Lines changed: 156 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,122 @@ bool read_preproc_directive(void)
665665

666666
void read_parameter_list_decl(func_t *func, int anon);
667667

668+
/* Forward declaration for ternary handling used by initializers */
669+
void read_ternary_operation(block_t *parent, basic_block_t **bb);
670+
671+
/* Parse array initializer to determine size for implicit arrays and
672+
* optionally emit initialization code.
673+
*/
674+
void parse_array_init(var_t *var,
675+
block_t *parent,
676+
basic_block_t **bb,
677+
int emit_code)
678+
{
679+
int elem_size = var->type->size;
680+
int count = 0;
681+
var_t *base_addr = NULL;
682+
683+
/* Store values if we need to emit code later for implicit arrays */
684+
var_t *stored_vals[256]; /* Max 256 elements for now */
685+
int is_implicit = (var->array_size == 0);
686+
687+
/* If emitting code and size is known, arrays are already addresses */
688+
if (emit_code && !is_implicit) {
689+
/* Arrays are already addresses, no need for OP_address_of */
690+
base_addr = var;
691+
}
692+
693+
lex_expect(T_open_curly);
694+
if (!lex_peek(T_close_curly, NULL)) {
695+
for (;;) {
696+
/* Parse element expression */
697+
read_expr(parent, bb);
698+
read_ternary_operation(parent, bb);
699+
var_t *val = opstack_pop();
700+
701+
/* Store value for implicit arrays */
702+
if (is_implicit && emit_code && count < 256)
703+
stored_vals[count] = val;
704+
705+
if (emit_code && !is_implicit && count < var->array_size) {
706+
/* Emit code for explicit size arrays */
707+
var_t target;
708+
memset(&target, 0, sizeof(target));
709+
target.type = var->type;
710+
target.is_ptr = 0;
711+
var_t *v = resize_var(parent, bb, val, &target);
712+
713+
/* Compute element address: base + count*elem_size */
714+
var_t *elem_addr = base_addr;
715+
if (count > 0) {
716+
var_t *offset = require_var(parent);
717+
gen_name_to(offset->var_name);
718+
offset->init_val = count * elem_size;
719+
add_insn(parent, *bb, OP_load_constant, offset, NULL, NULL,
720+
0, NULL);
721+
722+
var_t *addr = require_var(parent);
723+
gen_name_to(addr->var_name);
724+
add_insn(parent, *bb, OP_add, addr, base_addr, offset, 0,
725+
NULL);
726+
elem_addr = addr;
727+
}
728+
729+
/* Write element */
730+
add_insn(parent, *bb, OP_write, NULL, elem_addr, v, elem_size,
731+
NULL);
732+
}
733+
734+
count++;
735+
if (!lex_accept(T_comma))
736+
break;
737+
if (lex_peek(T_close_curly, NULL))
738+
break;
739+
}
740+
}
741+
lex_expect(T_close_curly);
742+
743+
/* For implicit size arrays, set the size and emit code */
744+
if (is_implicit) {
745+
if (var->is_ptr > 0)
746+
var->is_ptr = 0;
747+
var->array_size = count;
748+
749+
/* Now emit the code since we know the size */
750+
if (emit_code && count > 0) {
751+
base_addr = var; /* Arrays are already addresses */
752+
753+
for (int i = 0; i < count && i < 256; i++) {
754+
var_t target;
755+
memset(&target, 0, sizeof(target));
756+
target.type = var->type;
757+
target.is_ptr = 0;
758+
var_t *v = resize_var(parent, bb, stored_vals[i], &target);
759+
760+
/* Compute element address */
761+
var_t *elem_addr = base_addr;
762+
if (i > 0) {
763+
var_t *offset = require_var(parent);
764+
gen_name_to(offset->var_name);
765+
offset->init_val = i * elem_size;
766+
add_insn(parent, *bb, OP_load_constant, offset, NULL, NULL,
767+
0, NULL);
768+
769+
var_t *addr = require_var(parent);
770+
gen_name_to(addr->var_name);
771+
add_insn(parent, *bb, OP_add, addr, base_addr, offset, 0,
772+
NULL);
773+
elem_addr = addr;
774+
}
775+
776+
/* Write element */
777+
add_insn(parent, *bb, OP_write, NULL, elem_addr, v, elem_size,
778+
NULL);
779+
}
780+
}
781+
}
782+
}
783+
668784
void read_inner_var_decl(var_t *vd, int anon, int is_param)
669785
{
670786
vd->init_val = 0;
@@ -885,7 +1001,6 @@ void read_char_param(block_t *parent, basic_block_t *bb)
8851001
}
8861002

8871003
void read_logical(opcode_t op, block_t *parent, basic_block_t **bb);
888-
void read_ternary_operation(block_t *parent, basic_block_t **bb);
8891004
void read_func_parameters(func_t *func, block_t *parent, basic_block_t **bb)
8901005
{
8911006
int param_num = 0;
@@ -969,6 +1084,7 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
9691084
read_literal_param(parent, *bb);
9701085
else if (lex_peek(T_char, NULL))
9711086
read_char_param(parent, *bb);
1087+
9721088
else if (lex_peek(T_numeric, NULL))
9731089
read_numeric_param(parent, *bb, is_neg);
9741090
else if (lex_accept(T_log_not)) {
@@ -3068,11 +3184,17 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
30683184
add_insn(parent, bb, OP_allocat, var, NULL, NULL, 0, NULL);
30693185
add_symbol(bb, var);
30703186
if (lex_accept(T_assign)) {
3071-
read_expr(parent, &bb);
3072-
read_ternary_operation(parent, &bb);
3187+
if (lex_peek(T_open_curly, NULL) &&
3188+
(var->array_size > 0 || var->is_ptr > 0)) {
3189+
parse_array_init(var, parent, &bb,
3190+
1); /* Always emit code */
3191+
} else {
3192+
read_expr(parent, &bb);
3193+
read_ternary_operation(parent, &bb);
30733194

3074-
rs1 = resize_var(parent, &bb, opstack_pop(), var);
3075-
add_insn(parent, bb, OP_assign, var, rs1, NULL, 0, NULL);
3195+
rs1 = resize_var(parent, &bb, opstack_pop(), var);
3196+
add_insn(parent, bb, OP_assign, var, rs1, NULL, 0, NULL);
3197+
}
30763198
}
30773199
while (lex_accept(T_comma)) {
30783200
var_t *nv;
@@ -3086,11 +3208,16 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
30863208
add_insn(parent, bb, OP_allocat, nv, NULL, NULL, 0, NULL);
30873209
add_symbol(bb, nv);
30883210
if (lex_accept(T_assign)) {
3089-
read_expr(parent, &bb);
3090-
read_ternary_operation(parent, &bb);
3211+
if (lex_peek(T_open_curly, NULL) &&
3212+
(nv->array_size > 0 || nv->is_ptr > 0)) {
3213+
parse_array_init(nv, parent, &bb, 1);
3214+
} else {
3215+
read_expr(parent, &bb);
3216+
read_ternary_operation(parent, &bb);
30913217

3092-
rs1 = resize_var(parent, &bb, opstack_pop(), nv);
3093-
add_insn(parent, bb, OP_assign, nv, rs1, NULL, 0, NULL);
3218+
rs1 = resize_var(parent, &bb, opstack_pop(), nv);
3219+
add_insn(parent, bb, OP_assign, nv, rs1, NULL, 0, NULL);
3220+
}
30943221
}
30953222
}
30963223
lex_expect(T_semicolon);
@@ -3150,11 +3277,18 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
31503277
add_insn(parent, bb, OP_allocat, var, NULL, NULL, 0, NULL);
31513278
add_symbol(bb, var);
31523279
if (lex_accept(T_assign)) {
3153-
read_expr(parent, &bb);
3154-
read_ternary_operation(parent, &bb);
3280+
if (lex_peek(T_open_curly, NULL) &&
3281+
(var->array_size > 0 || var->is_ptr > 0)) {
3282+
parse_array_init(
3283+
var, parent, &bb,
3284+
1); /* FIXED: Emit code for locals in functions */
3285+
} else {
3286+
read_expr(parent, &bb);
3287+
read_ternary_operation(parent, &bb);
31553288

3156-
rs1 = resize_var(parent, &bb, opstack_pop(), var);
3157-
add_insn(parent, bb, OP_assign, var, rs1, NULL, 0, NULL);
3289+
rs1 = resize_var(parent, &bb, opstack_pop(), var);
3290+
add_insn(parent, bb, OP_assign, var, rs1, NULL, 0, NULL);
3291+
}
31583292
}
31593293
while (lex_accept(T_comma)) {
31603294
var_t *nv;
@@ -3168,10 +3302,16 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
31683302
add_insn(parent, bb, OP_allocat, nv, NULL, NULL, 0, NULL);
31693303
add_symbol(bb, nv);
31703304
if (lex_accept(T_assign)) {
3171-
read_expr(parent, &bb);
3305+
if (lex_peek(T_open_curly, NULL) &&
3306+
(nv->array_size > 0 || nv->is_ptr > 0)) {
3307+
parse_array_init(nv, parent, &bb,
3308+
1); /* FIXED: Emit code for locals */
3309+
} else {
3310+
read_expr(parent, &bb);
31723311

3173-
rs1 = resize_var(parent, &bb, opstack_pop(), nv);
3174-
add_insn(parent, bb, OP_assign, nv, rs1, NULL, 0, NULL);
3312+
rs1 = resize_var(parent, &bb, opstack_pop(), nv);
3313+
add_insn(parent, bb, OP_assign, nv, rs1, NULL, 0, NULL);
3314+
}
31753315
}
31763316
}
31773317
lex_expect(T_semicolon);

src/ssa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,7 @@ void new_name(block_t *block, var_t **var)
686686
var_t *get_stack_top_subscript_var(var_t *var)
687687
{
688688
if (var->base->rename.stack_idx < 1)
689-
fatal("Index is less than 1");
689+
return var; /* fallback: use base when no prior definition */
690690

691691
int sub = var->base->rename.stack[var->base->rename.stack_idx - 1];
692692
for (int i = 0; i < var->base->subscripts_idx; i++) {

tests/driver.sh

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3683,4 +3683,77 @@ int main() {
36833683
}
36843684
EOF
36853685

3686+
# Local array initializers - verify compilation and correct values
3687+
# Test 1: Implicit size array with single element
3688+
try_ 1 << 'EOF'
3689+
int main() {
3690+
int a[] = {1};
3691+
return a[0]; /* Should return 1 */
3692+
}
3693+
EOF
3694+
3695+
# Test 2: Explicit size array with single element
3696+
try_ 42 << 'EOF'
3697+
int main() {
3698+
int a[1] = {42};
3699+
return a[0]; /* Should return 42 */
3700+
}
3701+
EOF
3702+
3703+
# Test 3: Multiple elements - verify all are initialized
3704+
try_ 6 << 'EOF'
3705+
int main() {
3706+
int a[3] = {1, 2, 3};
3707+
return a[0] + a[1] + a[2]; /* Should return 1+2+3=6 */
3708+
}
3709+
EOF
3710+
3711+
# Test 4: Character array initialization
3712+
try_ 97 << 'EOF'
3713+
int main() {
3714+
char s[] = {'a', 'b', 'c'};
3715+
return s[0]; /* Should return ASCII value of 'a' = 97 */
3716+
}
3717+
EOF
3718+
3719+
# Test 5: Empty initializer (all zeros)
3720+
try_ 0 << 'EOF'
3721+
int main() {
3722+
int a[5] = {};
3723+
return a[0] + a[1] + a[2] + a[3] + a[4]; /* Should return 0 */
3724+
}
3725+
EOF
3726+
3727+
# Test 6: Partial initialization (remaining should be zero)
3728+
try_ 15 << 'EOF'
3729+
int main() {
3730+
int a[5] = {5, 10};
3731+
return a[0] + a[1] + a[2] + a[3] + a[4]; /* Should return 5+10+0+0+0=15 */
3732+
}
3733+
EOF
3734+
3735+
# Test 7: Pass initialized array to function
3736+
try_ 30 << 'EOF'
3737+
int sum(int *p, int n) {
3738+
int total = 0;
3739+
for (int i = 0; i < n; i++)
3740+
total += p[i];
3741+
return total;
3742+
}
3743+
int main() {
3744+
int a[] = {5, 10, 15};
3745+
return sum(a, 3); /* Should return 5+10+15=30 */
3746+
}
3747+
EOF
3748+
3749+
# Test 8: Nested scope with array initialization
3750+
try_ 100 << 'EOF'
3751+
int main() {
3752+
{
3753+
int values[] = {25, 25, 25, 25};
3754+
return values[0] + values[1] + values[2] + values[3];
3755+
}
3756+
}
3757+
EOF
3758+
36863759
echo OK

0 commit comments

Comments
 (0)