Skip to content

Commit 79290dd

Browse files
committed
Support complex pointer arithmetic in dereference
Previously, the parser only handled simple identifier dereference (*var) and would crash on complex expressions like *(ptr + offset). This change extends the dereference operator handling to accept general expressions in parentheses. This enables compilation of previously failing patterns: - *(p + 4) - direct offset - *(p + i + 2) - variable in expression - *(p + i * 2) - arithmetic in expression It also handles consecutive asterisks ('**pp', '***ppp') by counting dereference levels and applying them iteratively.
1 parent cc0741f commit 79290dd

File tree

1 file changed

+51
-11
lines changed

1 file changed

+51
-11
lines changed

src/parser.c

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -964,23 +964,63 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
964964
}
965965
} else if (lex_accept(T_asterisk)) {
966966
/* dereference */
967-
char token[MAX_VAR_LEN];
968-
lvalue_t lvalue;
969-
970-
int open_bracket = lex_accept(T_open_bracket);
971-
if (open_bracket) {
972-
/* Handle expressions like *(++p) */
967+
if (lex_peek(T_open_bracket, NULL)) {
968+
/* Handle general expression dereference: *(expr) */
969+
lex_expect(T_open_bracket);
973970
read_expr(parent, bb);
974971
lex_expect(T_close_bracket);
972+
975973
rs1 = opstack_pop();
976-
/* Create a temporary variable for the dereferenced result */
977-
vd = require_var(parent);
978-
vd->type = TY_int; /* Default to int type for now */
979-
vd->is_ptr = 0;
974+
/* For pointer dereference, we need to determine the target type and
975+
* size. Since we do not have full type tracking in expressions, use
976+
* defaults
977+
*/
978+
type_t *deref_type = rs1->type ? rs1->type : TY_int;
979+
int deref_ptr = rs1->is_ptr > 0 ? rs1->is_ptr - 1 : 0;
980+
981+
vd = require_deref_var(parent, deref_type, deref_ptr);
982+
if (deref_ptr > 0)
983+
sz = PTR_SIZE;
984+
else
985+
sz = deref_type->size;
980986
gen_name_to(vd->var_name);
981987
opstack_push(vd);
982-
add_insn(parent, *bb, OP_read, vd, rs1, NULL, vd->type->size, NULL);
988+
add_insn(parent, *bb, OP_read, vd, rs1, NULL, sz, NULL);
989+
} else if (lex_peek(T_asterisk, NULL)) {
990+
/* Handle consecutive asterisks for multiple dereference: **pp,
991+
* ***ppp
992+
*/
993+
int deref_count = 1; /* We already consumed one asterisk */
994+
while (lex_accept(T_asterisk))
995+
deref_count++;
996+
997+
/* Now read the base expression (should be an identifier) */
998+
char token[MAX_VAR_LEN];
999+
lvalue_t lvalue;
1000+
1001+
lex_peek(T_identifier, token);
1002+
var_t *var = find_var(token, parent);
1003+
read_lvalue(&lvalue, var, parent, bb, true, OP_generic);
1004+
1005+
/* Apply dereferences one by one */
1006+
for (int i = 0; i < deref_count; i++) {
1007+
rs1 = opstack_pop();
1008+
vd = require_deref_var(
1009+
parent, var->type,
1010+
lvalue.is_ptr > i ? lvalue.is_ptr - i - 1 : 0);
1011+
if (lvalue.is_ptr > i + 1)
1012+
sz = PTR_SIZE;
1013+
else
1014+
sz = lvalue.type->size;
1015+
gen_name_to(vd->var_name);
1016+
opstack_push(vd);
1017+
add_insn(parent, *bb, OP_read, vd, rs1, NULL, sz, NULL);
1018+
}
9831019
} else {
1020+
/* Handle simple identifier dereference: *var */
1021+
char token[MAX_VAR_LEN];
1022+
lvalue_t lvalue;
1023+
9841024
lex_peek(T_identifier, token);
9851025
var_t *var = find_var(token, parent);
9861026
read_lvalue(&lvalue, var, parent, bb, true, OP_generic);

0 commit comments

Comments
 (0)