Skip to content

Commit 28d954b

Browse files
committed
Fix array compound literal semantics
Unify and correct the handling of array compound literals across parsing, semantic analysis, and lowering. The compiler now constructs temporary arrays by emitting element writes, counting initializers, and returning the address of the temporary array instead of collapsing the literal to its first element. Decay to a scalar is applied only when a scalar value is required. A new helper centralizes the decay rules and replaces multiple ad-hoc callers across binary operators, assignments, function-call arguments, and ternary expressions. This resolves long-standing cases where array literals were incorrectly forced to scalars in pointer contexts. Additional fixes include: - correct scalarization in variadic and pointer-arithmetic contexts - correct pointer-typed ternary results involving array literals - treating zero-length array literals as constant zero - avoiding double consumption of braces in the parser - naming and formatting cleanup - regenerating ARM/RISC-V IR snapshots to reflect corrected lowering These changes restore correct pointer semantics for array compound literals and address #299.
1 parent c044948 commit 28d954b

File tree

1 file changed

+183
-52
lines changed

1 file changed

+183
-52
lines changed

src/parser.c

Lines changed: 183 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ void parse_array_init(var_t *var,
4545
block_t *parent,
4646
basic_block_t **bb,
4747
bool emit_code);
48-
48+
void parse_array_compound_literal(var_t *var,
49+
block_t *parent,
50+
basic_block_t **bb,
51+
bool emit_code);
4952

5053
label_t *find_label(char *name)
5154
{
@@ -1330,6 +1333,85 @@ void parse_array_init(var_t *var,
13301333
}
13311334
}
13321335

1336+
void parse_array_compound_literal(var_t *var,
1337+
block_t *parent,
1338+
basic_block_t **bb,
1339+
bool emit_code)
1340+
{
1341+
int elem_size = var->type->size;
1342+
int count = 0;
1343+
var->array_size = 0;
1344+
var->init_val = 0;
1345+
if (!lex_peek(T_close_curly, NULL)) {
1346+
for (;;) {
1347+
read_expr(parent, bb);
1348+
read_ternary_operation(parent, bb);
1349+
var_t *value = opstack_pop();
1350+
if (count == 0)
1351+
var->init_val = value->init_val;
1352+
if (emit_code) {
1353+
var_t target = {0};
1354+
target.type = var->type;
1355+
target.ptr_level = 0;
1356+
var_t *store_val = resize_var(parent, bb, value, &target);
1357+
var_t *elem_addr =
1358+
compute_element_address(parent, bb, var, count, elem_size);
1359+
add_insn(parent, *bb, OP_write, NULL, elem_addr, store_val,
1360+
elem_size, NULL);
1361+
}
1362+
count++;
1363+
if (!lex_accept(T_comma))
1364+
break;
1365+
if (lex_peek(T_close_curly, NULL))
1366+
break;
1367+
}
1368+
}
1369+
1370+
lex_expect(T_close_curly);
1371+
var->array_size = count;
1372+
}
1373+
/* Identify compiler-emitted temporaries that hold array compound literals.
1374+
* Parsing assigns these temporaries synthetic names via gen_name_to (".tN")
1375+
* and they keep array metadata without pointer indirection.
1376+
*/
1377+
bool is_array_literal_placeholder(var_t *var)
1378+
{
1379+
return var && var->array_size > 0 && !var->ptr_level &&
1380+
var->var_name[0] == '.';
1381+
}
1382+
1383+
bool is_pointer_like_value(var_t *var)
1384+
{
1385+
return var && (var->ptr_level || var->array_size ||
1386+
(var->type && var->type->ptr_level > 0));
1387+
}
1388+
1389+
var_t *scalarize_array_literal(block_t *parent,
1390+
basic_block_t **bb,
1391+
var_t *array_var,
1392+
type_t *hint_type)
1393+
{
1394+
if (!is_array_literal_placeholder(array_var))
1395+
return array_var;
1396+
1397+
type_t *literal_type = array_var->type ? array_var->type : TY_int;
1398+
int literal_size = literal_type->size;
1399+
if (literal_size <= 0)
1400+
literal_size = TY_int->size;
1401+
1402+
type_t *result_type = hint_type ? hint_type : literal_type;
1403+
if (!result_type)
1404+
result_type = TY_int;
1405+
1406+
var_t *scalar = require_typed_var(parent, result_type);
1407+
scalar->ptr_level = 0;
1408+
gen_name_to(scalar->var_name);
1409+
scalar->init_val = array_var->init_val;
1410+
1411+
add_insn(parent, *bb, OP_read, scalar, array_var, NULL, literal_size, NULL);
1412+
1413+
return scalar;
1414+
}
13331415
void read_inner_var_decl(var_t *vd, bool anon, bool is_param)
13341416
{
13351417
/* Preserve typedef pointer level - don't reset if already inherited */
@@ -1622,7 +1704,14 @@ void read_func_parameters(func_t *func, block_t *parent, basic_block_t **bb)
16221704
read_ternary_operation(parent, bb);
16231705

16241706
param = opstack_pop();
1625-
1707+
if (func) {
1708+
if (param_num < func->num_params) {
1709+
var_t *target = &func->param_defs[param_num];
1710+
if (!target->ptr_level && !target->array_size)
1711+
param = scalarize_array_literal(parent, bb, param,
1712+
target->type);
1713+
}
1714+
}
16261715
/* Handle parameter type conversion for direct calls.
16271716
* Indirect calls currently don't provide function instance.
16281717
*/
@@ -2064,9 +2153,22 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
20642153
bool is_array_literal = (cast_ptr_level == -1);
20652154
if (is_array_literal)
20662155
cast_ptr_level = 0; /* Reset for normal processing */
2067-
2156+
bool consumed_close_brace = false;
20682157
/* Check if this is a pointer compound literal */
2069-
if (cast_ptr_level > 0) {
2158+
if (is_array_literal) {
2159+
compound_var->array_size = 0;
2160+
add_insn(parent, *bb, OP_allocat, compound_var, NULL, NULL, 0,
2161+
NULL);
2162+
parse_array_compound_literal(compound_var, parent, bb, true);
2163+
2164+
if (compound_var->array_size == 0) {
2165+
compound_var->init_val = 0;
2166+
add_insn(parent, *bb, OP_load_constant, compound_var, NULL,
2167+
NULL, 0, NULL);
2168+
}
2169+
opstack_push(compound_var);
2170+
consumed_close_brace = true;
2171+
} else if (cast_ptr_level > 0) {
20702172
/* Pointer compound literal: (int*){&x} */
20712173
compound_var->ptr_level = cast_ptr_level;
20722174

@@ -2234,7 +2336,8 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
22342336
}
22352337
}
22362338

2237-
lex_expect(T_close_curly);
2339+
if (!consumed_close_brace)
2340+
lex_expect(T_close_curly);
22382341
} else {
22392342
/* Regular parenthesized expression */
22402343
read_expr(parent, bb);
@@ -2514,20 +2617,14 @@ void handle_pointer_arithmetic(block_t *parent,
25142617
}
25152618

25162619
/* Check if both have ptr_level or typedef pointer type */
2517-
bool rs1_is_ptr = (orig_rs1->ptr_level > 0) ||
2518-
(orig_rs1->type && orig_rs1->type->ptr_level > 0);
2519-
bool rs2_is_ptr = (orig_rs2->ptr_level > 0) ||
2520-
(orig_rs2->type && orig_rs2->type->ptr_level > 0);
2620+
bool rs1_is_ptr = is_pointer_like_value(orig_rs1);
2621+
bool rs2_is_ptr = is_pointer_like_value(orig_rs2);
25212622

25222623
/* If variable lookup failed, check the passed variables directly */
2523-
if (!rs1_is_ptr) {
2524-
rs1_is_ptr =
2525-
(rs1->ptr_level > 0) || (rs1->type && rs1->type->ptr_level > 0);
2526-
}
2527-
if (!rs2_is_ptr) {
2528-
rs2_is_ptr =
2529-
(rs2->ptr_level > 0) || (rs2->type && rs2->type->ptr_level > 0);
2530-
}
2624+
if (!rs1_is_ptr)
2625+
rs1_is_ptr = is_pointer_like_value(rs1);
2626+
if (!rs2_is_ptr)
2627+
rs2_is_ptr = is_pointer_like_value(rs2);
25312628

25322629
if (rs1_is_ptr && rs2_is_ptr) {
25332630
/* Both are pointers - this is pointer difference */
@@ -2606,11 +2703,11 @@ void handle_pointer_arithmetic(block_t *parent,
26062703
}
26072704
}
26082705
/* Determine which operand is the pointer for regular pointer arithmetic */
2609-
if (rs1->ptr_level || (rs1->type && rs1->type->ptr_level > 0)) {
2706+
if (is_pointer_like_value(rs1)) {
26102707
ptr_var = rs1;
26112708
int_var = rs2;
26122709
element_size = get_pointer_element_size(rs1);
2613-
} else if (rs2->ptr_level || (rs2->type && rs2->type->ptr_level > 0)) {
2710+
} else if (is_pointer_like_value(rs2)) {
26142711
/* Only for addition (p + n == n + p) */
26152712
if (op == OP_add) {
26162713
ptr_var = rs2;
@@ -2657,8 +2754,7 @@ bool is_pointer_operation(opcode_t op, var_t *rs1, var_t *rs2)
26572754
if (op != OP_add && op != OP_sub)
26582755
return false;
26592756

2660-
return (rs1->ptr_level || (rs1->type && rs1->type->ptr_level > 0) ||
2661-
rs2->ptr_level || (rs2->type && rs2->type->ptr_level > 0));
2757+
return is_pointer_like_value(rs1) || is_pointer_like_value(rs2);
26622758
}
26632759

26642760
/* Helper function to check if a variable is a pointer based on its declaration
@@ -2851,12 +2947,31 @@ void read_expr(block_t *parent, basic_block_t **bb)
28512947
rs2 = opstack_pop();
28522948
rs1 = opstack_pop();
28532949

2950+
bool rs1_is_placeholder = is_array_literal_placeholder(rs1);
2951+
bool rs2_is_placeholder = is_array_literal_placeholder(rs2);
2952+
bool rs1_is_ptr_like = is_pointer_like_value(rs1);
2953+
bool rs2_is_ptr_like = is_pointer_like_value(rs2);
2954+
bool pointer_context = (rs1_is_ptr_like && !rs1_is_placeholder) ||
2955+
(rs2_is_ptr_like && !rs2_is_placeholder);
2956+
28542957
/* Pointer arithmetic handling */
2855-
if (is_pointer_operation(top_op, rs1, rs2)) {
2958+
if (pointer_context && is_pointer_operation(top_op, rs1, rs2)) {
28562959
handle_pointer_arithmetic(parent, bb, top_op, rs1, rs2);
28572960
continue; /* skip normal processing */
28582961
}
28592962

2963+
if (rs1_is_placeholder && rs2_is_placeholder) {
2964+
rs1 = scalarize_array_literal(parent, bb, rs1, NULL);
2965+
rs2 = scalarize_array_literal(parent, bb, rs2, NULL);
2966+
} else {
2967+
if (rs1_is_placeholder && !rs2_is_ptr_like)
2968+
rs1 = scalarize_array_literal(
2969+
parent, bb, rs1, rs2 && rs2->type ? rs2->type : NULL);
2970+
2971+
if (rs2_is_placeholder && !rs1_is_ptr_like)
2972+
rs2 = scalarize_array_literal(
2973+
parent, bb, rs2, rs1 && rs1->type ? rs1->type : NULL);
2974+
}
28602975
/* Constant folding for binary operations */
28612976
if (rs1 && rs2 && rs1->init_val && !rs1->ptr_level && !rs1->is_global &&
28622977
rs2->init_val && !rs2->ptr_level && !rs2->is_global) {
@@ -3477,7 +3592,7 @@ void finalize_logical(opcode_t op,
34773592

34783593
void read_ternary_operation(block_t *parent, basic_block_t **bb)
34793594
{
3480-
var_t *vd, *rs1;
3595+
var_t *vd;
34813596

34823597
if (!lex_accept(T_question))
34833598
return;
@@ -3502,17 +3617,41 @@ void read_ternary_operation(block_t *parent, basic_block_t **bb)
35023617
abort();
35033618
}
35043619

3505-
rs1 = opstack_pop();
3506-
vd = require_var(parent);
3507-
gen_name_to(vd->var_name);
3508-
add_insn(parent, then_, OP_assign, vd, rs1, NULL, 0, NULL);
3620+
var_t *true_val = opstack_pop();
35093621

35103622
/* false branch */
35113623
read_expr(parent, &else_);
35123624
bb_connect(*bb, else_, ELSE);
3625+
var_t *false_val = opstack_pop();
3626+
bool true_array = is_array_literal_placeholder(true_val);
3627+
bool false_array = is_array_literal_placeholder(false_val);
3628+
bool true_ptr_like = is_pointer_like_value(true_val);
3629+
bool false_ptr_like = is_pointer_like_value(false_val);
3630+
3631+
if (true_array && !false_ptr_like)
3632+
true_val = scalarize_array_literal(parent, &then_, true_val,
3633+
false_val ? false_val->type : NULL);
35133634

3514-
rs1 = opstack_pop();
3515-
add_insn(parent, else_, OP_assign, vd, rs1, NULL, 0, NULL);
3635+
if (false_array && !true_ptr_like)
3636+
false_val = scalarize_array_literal(parent, &else_, false_val,
3637+
true_val ? true_val->type : NULL);
3638+
3639+
vd = require_var(parent);
3640+
gen_name_to(vd->var_name);
3641+
add_insn(parent, then_, OP_assign, vd, true_val, NULL, 0, NULL);
3642+
add_insn(parent, else_, OP_assign, vd, false_val, NULL, 0, NULL);
3643+
3644+
var_t *array_ref = NULL;
3645+
if (is_array_literal_placeholder(true_val))
3646+
array_ref = true_val;
3647+
else if (is_array_literal_placeholder(false_val))
3648+
array_ref = false_val;
3649+
3650+
if (array_ref) {
3651+
vd->array_size = array_ref->array_size;
3652+
vd->init_val = array_ref->init_val;
3653+
vd->type = array_ref->type;
3654+
}
35163655

35173656
vd->is_ternary_ret = true;
35183657
opstack_push(vd);
@@ -3661,6 +3800,11 @@ bool read_body_assignment(char *token,
36613800

36623801
read_expr(parent, bb);
36633802

3803+
var_t *rhs_val = opstack_pop();
3804+
if (!lvalue.ptr_level && !lvalue.is_reference)
3805+
rhs_val = scalarize_array_literal(parent, bb, rhs_val,
3806+
lvalue.type);
3807+
opstack_push(rhs_val);
36643808
vd = require_var(parent);
36653809
vd->init_val = increment_size;
36663810
gen_name_to(vd->var_name);
@@ -4401,29 +4545,12 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
44014545
read_expr(parent, &bb);
44024546
read_ternary_operation(parent, &bb);
44034547

4404-
var_t *expr_result = opstack_pop();
4548+
var_t *rhs = opstack_pop();
4549+
if (!var->ptr_level && var->array_size == 0)
4550+
rhs = scalarize_array_literal(parent, &bb, rhs,
4551+
var->type);
44054552

4406-
/* Handle array compound literal to scalar assignment.
4407-
* When assigning array compound literals to scalar
4408-
* variables, use the first element value rather than array
4409-
* address.
4410-
*/
4411-
if (expr_result && expr_result->array_size > 0 &&
4412-
!var->ptr_level && var->array_size == 0 && var->type &&
4413-
(var->type->base_type == TYPE_int ||
4414-
var->type->base_type == TYPE_short) &&
4415-
expr_result->var_name[0] == '.') {
4416-
var_t *first_elem = require_var(parent);
4417-
first_elem->type = var->type;
4418-
gen_name_to(first_elem->var_name);
4419-
4420-
/* Extract first element from compound literal array */
4421-
add_insn(parent, bb, OP_read, first_elem, expr_result,
4422-
NULL, var->type->size, NULL);
4423-
expr_result = first_elem;
4424-
}
4425-
4426-
rs1 = resize_var(parent, &bb, expr_result, var);
4553+
rs1 = resize_var(parent, &bb, rhs, var);
44274554
add_insn(parent, bb, OP_assign, var, rs1, NULL, 0, NULL);
44284555
}
44294556
}
@@ -4515,8 +4642,12 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
45154642
} else {
45164643
read_expr(parent, &bb);
45174644
read_ternary_operation(parent, &bb);
4645+
var_t *rhs = opstack_pop();
4646+
if (!nv->ptr_level && nv->array_size == 0)
4647+
rhs = scalarize_array_literal(parent, &bb, rhs,
4648+
nv->type);
45184649

4519-
rs1 = resize_var(parent, &bb, opstack_pop(), nv);
4650+
rs1 = resize_var(parent, &bb, rhs, nv);
45204651
add_insn(parent, bb, OP_assign, nv, rs1, NULL, 0, NULL);
45214652
}
45224653
}

0 commit comments

Comments
 (0)