Skip to content

Commit 08b039f

Browse files
committed
Improve ABI compliance
In the previous implementation, shecc lacked the consideration of calling convention, so the function arguments were loaded into registers directly when encountering a function call, regardless of the target architecture. For the Arm architecture, if the number of arguments is greater than 4, the additional arguments were still loaded into registers instead of being passed to stack, causing dynamically linked programs to execute incorrectly. To ensure that dynamically linked programs execute correctly, these changes introduce the following strategy: - Each function allocates an additional 20 bytes of space using stack. - 16 bytes are used for the extra arguments. - 4 bytes are used to save and restore the global pointer stored in register r12 (ip). - Because the external call may change register r12, the compiled program could lose the address of the global stack. Therefore, 4 bytes are allocated to preserve its value. - If any internal function calls an external function with more than 4 arguments, the extra arguments are stored directly on stack. - If the callee is an internal function, all arguments are still loaded into registers. Therefore, this commit makes the dynamically linked shecc more strictly compliant with ABI, and ensure that dynamically linked programs compiled by shecc also follow ABI.
1 parent 43066fb commit 08b039f

File tree

5 files changed

+96
-6
lines changed

5 files changed

+96
-6
lines changed

mk/arm.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@ ARCH_DEFS = \
1111
\#define PLT_FIXUP_SIZE 20\n$\
1212
\#define PLT_ENT_SIZE 12\n$\
1313
\#define R_ARCH_JUMP_SLOT 0x16\n$\
14+
\#define MAX_ARGS_IN_REG 4\n$\
1415
"
1516
RUNNER_LD_PREFIX=-L /usr/arm-linux-gnueabihf/

mk/riscv.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ ARCH_DEFS = \
1212
\#define PLT_FIXUP_SIZE 20\n$\
1313
\#define PLT_ENT_SIZE 12\n$\
1414
\#define R_ARCH_JUMP_SLOT 0x5\n$\
15+
\#define MAX_ARGS_IN_REG 8\n$\
1516
"
1617

1718
# TODO: Set this variable for RISC-V architecture

src/arm-codegen.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
void update_elf_offset(ph2_ir_t *ph2_ir)
1515
{
16+
func_t *func;
1617
switch (ph2_ir->op) {
1718
case OP_load_constant:
1819
/* ARMv7 uses 12 bits to encode immediate value, but the higher 4 bits
@@ -70,7 +71,6 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
7071
case OP_read:
7172
case OP_write:
7273
case OP_jump:
73-
case OP_call:
7474
case OP_load_func:
7575
case OP_indirect:
7676
case OP_add:
@@ -85,6 +85,17 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
8585
case OP_bit_not:
8686
elf_offset += 4;
8787
return;
88+
case OP_call:
89+
func = find_func(ph2_ir->func_name);
90+
if (func->bbs)
91+
elf_offset += 4;
92+
else if (dynlink)
93+
elf_offset += 12;
94+
else {
95+
printf("The '%s' function is not implemented\n", ph2_ir->func_name);
96+
abort();
97+
}
98+
return;
8899
case OP_div:
89100
case OP_mod:
90101
if (hard_mul_div) {
@@ -211,6 +222,7 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
211222
const int rn = ph2_ir->src0;
212223
int rm = ph2_ir->src1; /* Not const because OP_trunc modifies it */
213224
int ofs;
225+
bool is_external_call = false;
214226

215227
/* Prepare this variable to reuse code for:
216228
* 1. division and modulo operations
@@ -307,14 +319,23 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
307319
func = find_func(ph2_ir->func_name);
308320
if (func->bbs)
309321
ofs = func->bbs->elf_offset - elf_code->size;
310-
else if (dynlink)
322+
else if (dynlink) {
311323
ofs = (dynamic_sections.elf_plt_start + func->plt_offset) -
312-
(elf_code_start + elf_code->size);
313-
else {
324+
(elf_code_start + elf_code->size + 4);
325+
is_external_call = true;
326+
} else {
314327
printf("The '%s' function is not implemented\n", ph2_ir->func_name);
315328
abort();
316329
}
330+
331+
/* If the callee is external, save __r12 at [sp + 16] and
332+
* restore it after the function returns.
333+
*/
334+
if (is_external_call)
335+
emit(__sw(__AL, __r12, __sp, 16));
317336
emit(__bl(__AL, ofs));
337+
if (is_external_call)
338+
emit(__lw(__AL, __r12, __sp, 16));
318339
return;
319340
case OP_load_data_address:
320341
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));

src/globals.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -935,7 +935,31 @@ func_t *add_func(char *func_name, bool synthesize)
935935
hashmap_put(FUNC_MAP, func_name, func);
936936
/* Use interned string for function name */
937937
strcpy(func->return_def.var_name, intern_string(func_name));
938-
func->stack_size = 4;
938+
/* Prepare space for function arguments and global pointer.
939+
*
940+
* For Arm architecture, the first four arguments are passed to r0 ~ r3,
941+
* and any additional arguments are passed to the stack.
942+
*
943+
* +-------------+ <-- sp + 20
944+
* | (val of ip) |
945+
* +-------------+ <-- sp + 16
946+
* | arg 8 |
947+
* +-------------+ <-- sp + 12
948+
* | arg 7 |
949+
* +-------------+ <-- sp + 8
950+
* | arg 6 |
951+
* +-------------+ <-- sp + 4
952+
* | arg 5 |
953+
* +-------------+ <-- sp
954+
*
955+
* However, register r12 (ip) holds the global pointer that points to
956+
* global stack, but this register may be changed after an external call.
957+
*
958+
* Therefore, the current strategy is preparing additional space for ip,
959+
* saves its original value to the stack before an external call, and
960+
* restores it from the stack after the external function returns.
961+
*/
962+
func->stack_size = 20;
939963

940964
if (synthesize)
941965
return func;

src/reg-alloc.c

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,39 @@ void extend_liveness(basic_block_t *bb, insn_t *insn, var_t *var, int offset)
388388
var->consumed = insn->idx + offset;
389389
}
390390

391+
/* Return whether extra arguments are pushed onto stack. */
392+
bool abi_lower_call_args(basic_block_t *bb, insn_t *insn)
393+
{
394+
int num_of_args = 0;
395+
int stack_args = 0;
396+
while (insn && insn->opcode == OP_push) {
397+
num_of_args += 1;
398+
insn = insn->next;
399+
}
400+
401+
if (insn && insn->opcode == OP_call) {
402+
func_t *func = find_func(insn->str);
403+
if (func->bbs)
404+
return false;
405+
}
406+
407+
if (num_of_args <= MAX_ARGS_IN_REG)
408+
return false;
409+
410+
insn = insn->prev;
411+
stack_args = num_of_args - MAX_ARGS_IN_REG;
412+
while (stack_args) {
413+
load_var(bb, insn->rs1, MAX_ARGS_IN_REG - 1);
414+
ph2_ir_t *ir = bb_add_ph2_ir(bb, OP_store);
415+
ir->src0 = MAX_ARGS_IN_REG - 1;
416+
ir->src1 = (stack_args - 1) * 4;
417+
stack_args -= 1;
418+
insn = insn->prev;
419+
}
420+
REGS[MAX_ARGS_IN_REG - 1].var = NULL;
421+
return true;
422+
}
423+
391424
void reg_alloc(void)
392425
{
393426
/* TODO: Add proper .bss and .data section support for uninitialized /
@@ -556,7 +589,8 @@ void reg_alloc(void)
556589
}
557590

558591
for (basic_block_t *bb = func->bbs; bb; bb = bb->rpo_next) {
559-
bool is_pushing_args = false;
592+
bool is_pushing_args = false, handle_abi = false,
593+
args_on_stack = false;
560594
int args = 0;
561595

562596
bb->visited++;
@@ -757,6 +791,13 @@ void reg_alloc(void)
757791
spill_alive(bb, insn);
758792
is_pushing_args = true;
759793
}
794+
if (dynlink && !handle_abi) {
795+
args_on_stack = abi_lower_call_args(bb, insn);
796+
handle_abi = true;
797+
}
798+
799+
if (dynlink && args_on_stack && args >= MAX_ARGS_IN_REG)
800+
break;
760801

761802
src0 = prepare_operand(bb, insn->rs1, -1);
762803
ir = bb_add_ph2_ir(bb, OP_assign);
@@ -778,6 +819,7 @@ void reg_alloc(void)
778819

779820
is_pushing_args = false;
780821
args = 0;
822+
handle_abi = false;
781823

782824
for (int i = 0; i < REG_CNT; i++)
783825
REGS[i].var = NULL;
@@ -795,6 +837,7 @@ void reg_alloc(void)
795837

796838
is_pushing_args = false;
797839
args = 0;
840+
handle_abi = false;
798841
break;
799842
case OP_func_ret:
800843
dest = prepare_dest(bb, insn->rd, -1, -1);

0 commit comments

Comments
 (0)