Skip to content

Commit 8ce5f7c

Browse files
committed
Support complex pointer arithmetic in dereference
Previously, the parser only handled simple identifier dereference (*var) and would crash on complex expressions like *(ptr + offset). This change extends the dereference operator handling to accept general expressions in parentheses. This enables compilation of previously failing patterns: - *(p + 4) - direct offset - *(p + i + 2) - variable in expression - *(p + i * 2) - arithmetic in expression It also handles consecutive asterisks ('**pp', '***ppp') by counting dereference levels and applying them iteratively. When expressions like arr[0] + arr[1] + arr[2] were parsed, the compiler was incorrectly applying pointer arithmetic scaling to the values read from the array elements, resulting in wrong calculations. The issue was in read_lvalue() which was handling the '+' operator after array indexing as if it were pointer arithmetic. After arr[0], we have an integer value, not a pointer, so the '+' should be handled by the expression parser, not by read_lvalue. This fix adds a check to ensure pointer arithmetic handling only occurs when we have a pointer/array that hasn't been dereferenced (i.e., when lvalue->is_reference is false). Test case that was failing: int arr[3] = {10, 20, 12}; return arr[0] + arr[1] + arr[2]; // Was returning 26 instead of 42
1 parent f2d99dd commit 8ce5f7c

File tree

2 files changed

+456
-13
lines changed

2 files changed

+456
-13
lines changed

src/parser.c

Lines changed: 87 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -964,23 +964,89 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
964964
}
965965
} else if (lex_accept(T_asterisk)) {
966966
/* dereference */
967-
char token[MAX_VAR_LEN];
968-
lvalue_t lvalue;
969-
970-
int open_bracket = lex_accept(T_open_bracket);
971-
if (open_bracket) {
972-
/* Handle expressions like *(++p) */
967+
if (lex_peek(T_open_bracket, NULL)) {
968+
/* Handle general expression dereference: *(expr) */
969+
lex_expect(T_open_bracket);
973970
read_expr(parent, bb);
974971
lex_expect(T_close_bracket);
972+
975973
rs1 = opstack_pop();
976-
/* Create a temporary variable for the dereferenced result */
977-
vd = require_var(parent);
978-
vd->type = TY_int; /* Default to int type for now */
979-
vd->is_ptr = 0;
974+
/* For pointer dereference, we need to determine the target type and
975+
* size. Since we do not have full type tracking in expressions, use
976+
* defaults
977+
*/
978+
type_t *deref_type = rs1->type ? rs1->type : TY_int;
979+
int deref_ptr = rs1->is_ptr > 0 ? rs1->is_ptr - 1 : 0;
980+
981+
vd = require_deref_var(parent, deref_type, deref_ptr);
982+
if (deref_ptr > 0)
983+
sz = PTR_SIZE;
984+
else
985+
sz = deref_type->size;
980986
gen_name_to(vd->var_name);
981987
opstack_push(vd);
982-
add_insn(parent, *bb, OP_read, vd, rs1, NULL, vd->type->size, NULL);
988+
add_insn(parent, *bb, OP_read, vd, rs1, NULL, sz, NULL);
989+
} else if (lex_peek(T_asterisk, NULL)) {
990+
/* Handle consecutive asterisks for multiple dereference: **pp,
991+
* ***ppp, ***(expr)
992+
*/
993+
int deref_count = 1; /* We already consumed one asterisk */
994+
while (lex_accept(T_asterisk))
995+
deref_count++;
996+
997+
/* Check if we have a parenthesized expression or simple identifier
998+
*/
999+
if (lex_peek(T_open_bracket, NULL)) {
1000+
/* Handle ***(expr) case */
1001+
lex_expect(T_open_bracket);
1002+
read_expr(parent, bb);
1003+
lex_expect(T_close_bracket);
1004+
1005+
/* Apply dereferences one by one */
1006+
for (int i = 0; i < deref_count; i++) {
1007+
rs1 = opstack_pop();
1008+
/* For expression dereference, use default type info */
1009+
type_t *deref_type = rs1->type ? rs1->type : TY_int;
1010+
int deref_ptr = rs1->is_ptr > 0 ? rs1->is_ptr - 1 : 0;
1011+
1012+
vd = require_deref_var(parent, deref_type, deref_ptr);
1013+
if (deref_ptr > 0)
1014+
sz = PTR_SIZE;
1015+
else
1016+
sz = deref_type->size;
1017+
gen_name_to(vd->var_name);
1018+
opstack_push(vd);
1019+
add_insn(parent, *bb, OP_read, vd, rs1, NULL, sz, NULL);
1020+
}
1021+
} else {
1022+
/* Handle **pp, ***ppp case with simple identifier */
1023+
char token[MAX_VAR_LEN];
1024+
lvalue_t lvalue;
1025+
1026+
lex_peek(T_identifier, token);
1027+
var_t *var = find_var(token, parent);
1028+
read_lvalue(&lvalue, var, parent, bb, true, OP_generic);
1029+
1030+
/* Apply dereferences one by one */
1031+
for (int i = 0; i < deref_count; i++) {
1032+
rs1 = opstack_pop();
1033+
vd = require_deref_var(
1034+
parent, var->type,
1035+
lvalue.is_ptr > i ? lvalue.is_ptr - i - 1 : 0);
1036+
if (lvalue.is_ptr > i + 1)
1037+
sz = PTR_SIZE;
1038+
else
1039+
sz = lvalue.type->size;
1040+
gen_name_to(vd->var_name);
1041+
opstack_push(vd);
1042+
add_insn(parent, *bb, OP_read, vd, rs1, NULL, sz, NULL);
1043+
}
1044+
}
9831045
} else {
1046+
/* Handle simple identifier dereference: *var */
1047+
char token[MAX_VAR_LEN];
1048+
lvalue_t lvalue;
1049+
9841050
lex_peek(T_identifier, token);
9851051
var_t *var = find_var(token, parent);
9861052
read_lvalue(&lvalue, var, parent, bb, true, OP_generic);
@@ -1513,7 +1579,12 @@ void read_lvalue(lvalue_t *lvalue,
15131579
if (!eval)
15141580
return;
15151581

1516-
if (lex_peek(T_plus, NULL) && (var->is_ptr || var->array_size)) {
1582+
/* Only handle pointer arithmetic if we have a pointer/array that hasn't
1583+
* been dereferenced. After array indexing like arr[0], we have a value, not
1584+
* a pointer.
1585+
*/
1586+
if (lex_peek(T_plus, NULL) && (var->is_ptr || var->array_size) &&
1587+
!lvalue->is_reference) {
15171588
while (lex_peek(T_plus, NULL) && (var->is_ptr || var->array_size)) {
15181589
lex_expect(T_plus);
15191590
if (lvalue->is_reference) {
@@ -1916,7 +1987,10 @@ bool read_body_assignment(char *token,
19161987
int increment_size = 1;
19171988

19181989
/* if we have a pointer, shift it by element size */
1919-
if (lvalue.is_ptr)
1990+
/* But not if we are operating on a dereferenced value (array
1991+
* indexing)
1992+
*/
1993+
if (lvalue.is_ptr && !lvalue.is_reference)
19201994
increment_size = lvalue.type->size;
19211995

19221996
/* If operand is a reference, read the value and push to stack for

0 commit comments

Comments
 (0)