Skip to content

Commit 345cf8f

Browse files
committed
Fix pointer arithmetic for all pointer types
This commit resolves the long-standing limitation where non-char pointer differences didn't work correctly. The root cause was incorrect ARM instruction encoding for arithmetic right shift, not a parser issue. The fix enables proper pointer differences for all types: - Integer pointers (int*) - Struct pointers - Typedef pointers - Complex pointer expressions
1 parent c33ef7c commit 345cf8f

File tree

3 files changed

+113
-159
lines changed

3 files changed

+113
-159
lines changed

src/arm.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,12 @@ int __sll_amt(arm_cond_t cond,
230230

231231
int __sra(arm_cond_t cond, arm_reg rd, arm_reg rm, arm_reg rs)
232232
{
233+
/* Arithmetic right shift with register
234+
* Bit 4 = 1 (register-specified shift)
235+
* Bits 5-6 = arith_rs (2) for arithmetic right shift
236+
*/
233237
return arm_encode(cond, 0 + (arm_mov << 1) + (0 << 5), 0, rd,
234-
rm + (5 << 4) + (rs << 8));
238+
rm + (1 << 4) + (arith_rs << 5) + (rs << 8));
235239
}
236240

237241
int __add_i(arm_cond_t cond, arm_reg rd, arm_reg rs, int imm)

src/parser.c

Lines changed: 29 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -2265,7 +2265,6 @@ void handle_pointer_difference(block_t *parent,
22652265
var_t *result = require_var(parent);
22662266
gen_name_to(result->var_name);
22672267
add_insn(parent, *bb, OP_div, result, vd, size_const, 0, NULL);
2268-
22692268
/* Push the result */
22702269
opstack_push(result);
22712270
} else {
@@ -2285,16 +2284,8 @@ void handle_pointer_arithmetic(block_t *parent,
22852284
var_t *int_var = NULL;
22862285
int element_size = 0;
22872286

2288-
/* FIXME: Integer pointer differences are not fully supported.
2289-
* The type information needed to determine element size is lost
2290-
* when pointer variables are loaded for use in expressions.
2291-
* Character pointer differences work because element size is 1.
2292-
*
2293-
* Current workaround: Cast to char* and divide by sizeof(type)
2294-
* Example: ((char*)q - (char*)p) / sizeof(int)
2295-
*
2296-
* Attempted fixes include looking up original variable declarations,
2297-
* but the fundamental issue remains in the compilation pipeline.
2287+
/* Pointer arithmetic: differences (char*, int*, struct*, etc.),
2288+
* addition/increment with scaling, and array indexing.
22982289
*/
22992290

23002291
/* Check if both operands are pointers (pointer difference) */
@@ -2321,6 +2312,16 @@ void handle_pointer_arithmetic(block_t *parent,
23212312
bool rs2_is_ptr = (orig_rs2->ptr_level > 0) ||
23222313
(orig_rs2->type && orig_rs2->type->ptr_level > 0);
23232314

2315+
/* If variable lookup failed, check the passed variables directly */
2316+
if (!rs1_is_ptr) {
2317+
rs1_is_ptr =
2318+
(rs1->ptr_level > 0) || (rs1->type && rs1->type->ptr_level > 0);
2319+
}
2320+
if (!rs2_is_ptr) {
2321+
rs2_is_ptr =
2322+
(rs2->ptr_level > 0) || (rs2->type && rs2->type->ptr_level > 0);
2323+
}
2324+
23242325
if (rs1_is_ptr && rs2_is_ptr) {
23252326
/* Both are pointers - this is pointer difference */
23262327
/* Determine element size */
@@ -2427,6 +2428,11 @@ void handle_pointer_arithmetic(block_t *parent,
24272428

24282429
/* Perform the operation */
24292430
var_t *vd = require_var(parent);
2431+
/* Preserve pointer type metadata on results of pointer arithmetic */
2432+
if (ptr_var) {
2433+
vd->type = ptr_var->type;
2434+
vd->ptr_level = ptr_var->ptr_level;
2435+
}
24302436
gen_name_to(vd->var_name);
24312437
opstack_push(vd);
24322438
add_insn(parent, *bb, op, vd, rs1, rs2, 0, NULL);
@@ -2463,41 +2469,6 @@ bool is_pointer_var(var_t *v, block_t *parent)
24632469
return false;
24642470
}
24652471

2466-
/* Helper function to check if it's a pointer difference operation */
2467-
bool is_pointer_difference(opcode_t op, var_t *rs1, var_t *rs2)
2468-
{
2469-
if (op != OP_sub)
2470-
return false;
2471-
2472-
/* Check if both operands are pointers or have pointer types */
2473-
bool rs1_is_ptr =
2474-
rs1->ptr_level > 0 || (rs1->type && rs1->type->ptr_level > 0);
2475-
bool rs2_is_ptr =
2476-
rs2->ptr_level > 0 || (rs2->type && rs2->type->ptr_level > 0);
2477-
2478-
/* If both explicitly marked as pointers, it's pointer difference */
2479-
if (rs1_is_ptr && rs2_is_ptr)
2480-
return true;
2481-
2482-
/* If both variables have the same type and that type has base_type set
2483-
* (indicating they're related to typed data), assume it is pointer
2484-
* subtraction.
2485-
*/
2486-
if (rs1->type && rs2->type) {
2487-
/* If they have the same type object or same base type, and the
2488-
* base type is not void, treat as pointer difference.
2489-
*/
2490-
if ((rs1->type == rs2->type ||
2491-
rs1->type->base_type == rs2->type->base_type) &&
2492-
rs1->type->base_type != TYPE_void &&
2493-
rs1->type->base_type != TYPE_struct) {
2494-
return true;
2495-
}
2496-
}
2497-
2498-
return false;
2499-
}
2500-
25012472
void read_expr(block_t *parent, basic_block_t **bb)
25022473
{
25032474
var_t *vd, *rs1, *rs2;
@@ -2544,14 +2515,10 @@ void read_expr(block_t *parent, basic_block_t **bb)
25442515

25452516
/* Handle pointer arithmetic for addition and subtraction */
25462517
if (is_pointer_operation(top_op, rs1, rs2)) {
2547-
if (is_pointer_difference(top_op, rs1, rs2)) {
2548-
/* Special case: pointer - pointer difference */
2549-
handle_pointer_difference(parent, bb, rs1, rs2);
2550-
} else {
2551-
/* Regular pointer arithmetic with scaling */
2552-
handle_pointer_arithmetic(parent, bb, top_op, rs1,
2553-
rs2);
2554-
}
2518+
/* handle_pointer_arithmetic handles both pointer
2519+
* differences and regular pointer arithmetic internally
2520+
*/
2521+
handle_pointer_arithmetic(parent, bb, top_op, rs1, rs2);
25552522
oper_stack_idx--;
25562523
continue;
25572524
}
@@ -2671,90 +2638,10 @@ void read_expr(block_t *parent, basic_block_t **bb)
26712638
rs2 = opstack_pop();
26722639
rs1 = opstack_pop();
26732640

2674-
/* Handle pointer arithmetic for addition and subtraction */
2675-
if ((top_op == OP_add || top_op == OP_sub) &&
2676-
(rs1->ptr_level || (rs1->type && rs1->type->ptr_level > 0) ||
2677-
rs2->ptr_level || (rs2->type && rs2->type->ptr_level > 0))) {
2678-
var_t *ptr_var = NULL;
2679-
var_t *int_var = NULL;
2680-
int element_size = 0;
2681-
2682-
/* Determine which operand is the pointer */
2683-
if (rs1->ptr_level || (rs1->type && rs1->type->ptr_level > 0)) {
2684-
ptr_var = rs1;
2685-
int_var = rs2;
2686-
2687-
/* Calculate element size */
2688-
if (rs1->ptr_level && rs1->type) {
2689-
element_size = rs1->type->size;
2690-
} else if (rs1->type && rs1->type->ptr_level > 0) {
2691-
/* Typedef pointer */
2692-
switch (rs1->type->base_type) {
2693-
case TYPE_char:
2694-
element_size = TY_char->size;
2695-
break;
2696-
case TYPE_int:
2697-
element_size = TY_int->size;
2698-
break;
2699-
case TYPE_void:
2700-
element_size = 1;
2701-
break;
2702-
default:
2703-
element_size = rs1->type ? rs1->type->size : PTR_SIZE;
2704-
break;
2705-
}
2706-
}
2707-
} else if (rs2->ptr_level ||
2708-
(rs2->type && rs2->type->ptr_level > 0)) {
2709-
/* Only for addition (p + n == n + p) */
2710-
if (top_op == OP_add) {
2711-
ptr_var = rs2;
2712-
int_var = rs1;
2713-
2714-
/* Calculate element size */
2715-
if (rs2->ptr_level && rs2->type) {
2716-
element_size = rs2->type->size;
2717-
} else if (rs2->type && rs2->type->ptr_level > 0) {
2718-
/* Typedef pointer */
2719-
switch (rs2->type->base_type) {
2720-
case TYPE_char:
2721-
element_size = TY_char->size;
2722-
break;
2723-
case TYPE_int:
2724-
element_size = TY_int->size;
2725-
break;
2726-
case TYPE_void:
2727-
element_size = 1;
2728-
break;
2729-
default:
2730-
element_size =
2731-
rs2->type ? rs2->type->size : PTR_SIZE;
2732-
break;
2733-
}
2734-
}
2735-
/* Swap operands so pointer is rs1 */
2736-
rs1 = ptr_var;
2737-
rs2 = int_var;
2738-
}
2739-
}
2740-
2741-
/* If we need to scale the integer operand */
2742-
if (ptr_var && element_size > 1) {
2743-
/* Create multiplication by element size */
2744-
var_t *size_const = require_var(parent);
2745-
gen_name_to(size_const->var_name);
2746-
size_const->init_val = element_size;
2747-
add_insn(parent, *bb, OP_load_constant, size_const, NULL, NULL,
2748-
0, NULL);
2749-
2750-
var_t *scaled = require_var(parent);
2751-
gen_name_to(scaled->var_name);
2752-
add_insn(parent, *bb, OP_mul, scaled, int_var, size_const, 0,
2753-
NULL);
2754-
2755-
/* Use scaled value as rs2 */
2756-
rs2 = scaled;
2757-
}
2641+
/* Pointer arithmetic handling */
2642+
if (is_pointer_operation(top_op, rs1, rs2)) {
2643+
handle_pointer_arithmetic(parent, bb, top_op, rs1, rs2);
2644+
continue; /* skip normal processing */
27582645
}
27592646

27602647
/* Constant folding for binary operations */
@@ -4496,9 +4383,8 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
44964383
if (lex_accept(T_assign)) {
44974384
if (lex_peek(T_open_curly, NULL) &&
44984385
(var->array_size > 0 || var->ptr_level > 0)) {
4499-
parse_array_init(
4500-
var, parent, &bb,
4501-
1); /* FIXED: Emit code for locals in functions */
4386+
/* Emit code for locals in functions */
4387+
parse_array_init(var, parent, &bb, 1);
45024388
} else if (lex_peek(T_open_curly, NULL) &&
45034389
(var->type->base_type == TYPE_struct ||
45044390
var->type->base_type == TYPE_typedef)) {
@@ -4609,8 +4495,8 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
46094495
if (lex_accept(T_assign)) {
46104496
if (lex_peek(T_open_curly, NULL) &&
46114497
(nv->array_size > 0 || nv->ptr_level > 0)) {
4612-
parse_array_init(nv, parent, &bb,
4613-
1); /* FIXED: Emit code for locals */
4498+
/* Emit code for locals */
4499+
parse_array_init(nv, parent, &bb, 1);
46144500
} else if (lex_peek(T_open_curly, NULL) &&
46154501
(nv->type->base_type == TYPE_struct ||
46164502
nv->type->base_type == TYPE_typedef)) {

tests/driver.sh

Lines changed: 79 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,20 +1311,15 @@ int main() {
13111311
}
13121312
EOF
13131313

1314-
# Character pointer differences work correctly because element size is 1.
1315-
# Integer and other typed pointer differences face challenges due to type
1316-
# information loss during the compilation pipeline.
1317-
#
1318-
# FIXME: when pointer variables are used in expressions, they become temporaries
1319-
# without sufficient type information for proper scaling.
1320-
# workaround: For integer pointer differences, cast to char* and divide manually
1314+
# Pointer arithmetic tests
1315+
1316+
# Basic integer pointer difference
13211317
try_ 7 << EOF
13221318
int main() {
13231319
int arr[10];
13241320
int *p = arr;
13251321
int *q = arr + 7;
1326-
/* Workaround: cast to char* and divide by sizeof(int) */
1327-
return ((char*)q - (char*)p) / sizeof(int); /* Returns 7 */
1322+
return q - p;
13281323
}
13291324
EOF
13301325

@@ -1334,7 +1329,7 @@ int main() {
13341329
char text[50];
13351330
char *start = text;
13361331
char *end = text + 10;
1337-
return end - start; /* char pointers work correctly */
1332+
return end - start;
13381333
}
13391334
EOF
13401335

@@ -1367,24 +1362,93 @@ int main() {
13671362
}
13681363
EOF
13691364

1370-
# Additional integer pointer
1365+
# Integer pointer with array indexing
13711366
try_ 3 << EOF
13721367
int main() {
13731368
int nums[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
13741369
int *first = &nums[2];
13751370
int *second = &nums[5];
1376-
/* Workaround using char* cast */
1377-
return ((char*)second - (char*)first) / sizeof(int); /* (5-2) = 3 */
1371+
return second - first; /* Direct subtraction: (5-2) = 3 */
13781372
}
13791373
EOF
13801374

1375+
# Larger integer pointer difference
13811376
try_ 10 << EOF
13821377
int main() {
13831378
int values[20];
13841379
int *p = values;
13851380
int *q = values + 10;
1386-
/* Another workaround approach */
1387-
return ((char*)q - (char*)p) / sizeof(int);
1381+
return q - p; /* Direct pointer arithmetic */
1382+
}
1383+
EOF
1384+
1385+
# Negative pointer difference
1386+
try_ 251 << EOF
1387+
int main() {
1388+
int arr[10];
1389+
int *p = arr + 8;
1390+
int *q = arr + 3;
1391+
return q - p; /* 3 - 8 = -5, wraps to 251 in exit code */
1392+
}
1393+
EOF
1394+
1395+
# Zero pointer difference
1396+
try_ 0 << EOF
1397+
int main() {
1398+
int data[10];
1399+
int *p1 = data + 5;
1400+
int *p2 = data + 5;
1401+
return p2 - p1; /* Same position = 0 */
1402+
}
1403+
EOF
1404+
1405+
# Struct pointer arithmetic
1406+
try_ 4 << EOF
1407+
struct point {
1408+
int x;
1409+
int y;
1410+
int z;
1411+
};
1412+
1413+
int main() {
1414+
struct point pts[10];
1415+
struct point *p1 = pts;
1416+
struct point *p2 = pts + 4;
1417+
return p2 - p1; /* Struct pointer difference */
1418+
}
1419+
EOF
1420+
1421+
# Mixed pointer arithmetic operations
1422+
try_ 16 << EOF
1423+
int main() {
1424+
int arr[20];
1425+
int *start = arr;
1426+
int *mid = arr + 10;
1427+
int *end = arr + 18;
1428+
return (end - mid) + (mid - start) - 2; /* (18-10) + (10-0) - 2 = 8 + 10 - 2 = 16 */
1429+
}
1430+
EOF
1431+
1432+
# Pointer arithmetic with typedef
1433+
try_ 6 << EOF
1434+
typedef int* int_ptr;
1435+
int main() {
1436+
int data[15];
1437+
int_ptr p1 = data + 2;
1438+
int_ptr p2 = data + 8;
1439+
return p2 - p1; /* Typedef pointer difference: 8 - 2 = 6 */
1440+
}
1441+
EOF
1442+
1443+
# Complex expression with pointer differences
1444+
try_ 13 << EOF
1445+
int main() {
1446+
int vals[30];
1447+
int *a = vals;
1448+
int *b = vals + 5;
1449+
int *c = vals + 9;
1450+
int *d = vals + 15;
1451+
return (d - a) - (c - b) + 2; /* (15-0) - (9-5) + 2 = 15 - 4 + 2 = 13 */
13881452
}
13891453
EOF
13901454

0 commit comments

Comments
 (0)