Skip to content

Commit d79b244

Browse files
committed
Support Arm dynamic linking
1 parent f265042 commit d79b244

File tree

15 files changed

+844
-182
lines changed

15 files changed

+844
-182
lines changed

Makefile

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,19 @@ HOST_ARCH = $(shell arch 2>/dev/null)
4343
SRCDIR := $(shell find src -type d)
4444
LIBDIR := $(shell find lib -type d)
4545

46+
BUILTIN_LIBC ?= c.c
47+
STAGE0_FLAGS ?= --dump-ir
48+
STAGE1_FLAGS ?=
49+
ifeq ($(DYNLINK),1)
50+
ifeq ($(ARCH),riscv)
51+
# TODO: implement dynamic linking for RISC-V.
52+
$(error "Dynamic linking mode is not implemented for RISC-V")
53+
endif
54+
BUILTIN_LIBC := c.h
55+
STAGE0_FLAGS += --dynlink
56+
STAGE1_FLAGS += --dynlink
57+
endif
58+
4659
SRCS := $(wildcard $(patsubst %,%/main.c, $(SRCDIR)))
4760
OBJS := $(SRCS:%.c=$(OUT)/%.o)
4861
deps := $(OBJS:%.o=%.o.d)
@@ -72,7 +85,7 @@ config:
7285

7386
$(OUT)/tests/%.elf: tests/%.c $(OUT)/$(STAGE0)
7487
$(VECHO) " SHECC\t$@\n"
75-
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $< > $(basename $@).log ; \
88+
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $< > $(basename $@).log ; \
7689
chmod +x $@ ; $(PRINTF) "Running $@ ...\n"
7790
$(Q)$(TARGET_EXEC) $@ && $(call pass)
7891

@@ -122,9 +135,9 @@ $(OUT)/norm-lf: tools/norm-lf.c
122135
$(VECHO) " CC+LD\t$@\n"
123136
$(Q)$(CC) $(CFLAGS) -o $@ $^
124137

125-
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/c.c
138+
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC)
126139
$(VECHO) " GEN\t$@\n"
127-
$(Q)$(OUT)/norm-lf $(LIBDIR)/c.c $(OUT)/c.normalized.c
140+
$(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC) $(OUT)/c.normalized.c
128141
$(Q)$(OUT)/inliner $(OUT)/c.normalized.c $@
129142
$(Q)$(RM) $(OUT)/c.normalized.c
130143

@@ -143,12 +156,12 @@ $(OUT)/$(STAGE0)-sanitizer: $(OUT)/libc.inc $(OBJS)
143156
$(OUT)/$(STAGE1): $(OUT)/$(STAGE0)
144157
$(Q)$(STAGE1_CHECK_CMD)
145158
$(VECHO) " SHECC\t$@\n"
146-
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
159+
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
147160
$(Q)chmod a+x $@
148161

149162
$(OUT)/$(STAGE2): $(OUT)/$(STAGE1)
150163
$(VECHO) " SHECC\t$@\n"
151-
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) -o $@ $(SRCDIR)/main.c
164+
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) $(STAGE1_FLAGS) -o $@ $(SRCDIR)/main.c
152165

153166
bootstrap: $(OUT)/$(STAGE2)
154167
$(Q)chmod 775 $(OUT)/$(STAGE2)

lib/c.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* shecc - Self-Hosting and Educational C Compiler.
3+
*
4+
* shecc is freely redistributable under the BSD 2 clause license. See the
5+
* file "LICENSE" for information on usage and redistribution of this file.
6+
*/
7+
8+
#pragma once
9+
/* Declarations of C standard library functions */
10+
11+
#define NULL 0
12+
13+
#define bool _Bool
14+
#define true 1
15+
#define false 0
16+
17+
/* va_list support for variadic functions */
18+
typedef int *va_list;
19+
20+
/* File I/O */
21+
typedef int FILE;
22+
FILE *fopen(char *filename, char *mode);
23+
int fclose(FILE *stream);
24+
int fgetc(FILE *stream);
25+
char *fgets(char *str, int n, FILE *stream);
26+
int fputc(int c, FILE *stream);
27+
28+
/* string-related functions */
29+
int strlen(char *str);
30+
int strcmp(char *s1, char *s2);
31+
int strncmp(char *s1, char *s2, int len);
32+
char *strcpy(char *dest, char *src);
33+
char *strncpy(char *dest, char *src, int len);
34+
char *memcpy(char *dest, char *src, int count);
35+
int memcmp(void *s1, void *s2, int n);
36+
void *memset(void *s, int c, int n);
37+
38+
/* formatted output string */
39+
int printf(char *str, ...);
40+
int sprintf(char *buffer, char *str, ...);
41+
int snprintf(char *buffer, int n, char *str, ...);
42+
43+
/* Terminating program */
44+
void exit(int exit_code);
45+
void abort(void);
46+
47+
/* Dynamic memory allocation/deallocation functions */
48+
void *malloc(int size);
49+
void *calloc(int n, int size);
50+
void free(void *ptr);

mk/arm.mk

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,10 @@ ARCH_DEFS = \
66
\#define ARCH_PREDEFINED \"__arm__\" /* defined by GNU C and RealView */\n$\
77
\#define ELF_MACHINE 0x28 /* up to ARMv7/Aarch32 */\n$\
88
\#define ELF_FLAGS 0x5000200\n$\
9+
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
10+
\#define LIBC_SO \"libc.so.6\"\n$\
11+
\#define PLT_FIXUP_SIZE 20\n$\
12+
\#define PLT_ENT_SIZE 12\n$\
13+
\#define R_ARCH_JUMP_SLOT 0x16\n$\
914
"
15+
RUNNER_LD_PREFIX=-L /usr/arm-linux-gnueabi/

mk/common.mk

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ ifneq ($(HOST_ARCH),$(ARCH_NAME))
3636

3737
# Generate the path to the architecture-specific qemu
3838
TARGET_EXEC = $(shell which $(ARCH_RUNNER))
39+
ifeq ($(DYNLINK),1)
40+
TARGET_EXEC += $(RUNNER_LD_PREFIX)
41+
endif
3942
endif
4043
export TARGET_EXEC
4144

mk/riscv.mk

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,12 @@ ARCH_DEFS = \
77
\#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\
88
\#define ELF_MACHINE 0xf3\n$\
99
\#define ELF_FLAGS 0\n$\
10+
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
11+
\#define LIBC_SO \"libc.so.6\"\n$\
12+
\#define PLT_FIXUP_SIZE 20\n$\
13+
\#define PLT_ENT_SIZE 12\n$\
14+
\#define R_ARCH_JUMP_SLOT 0x5\n$\
1015
"
16+
17+
# TODO: Set this variable for RISC-V architecture
18+
RUNNER_LD_PREFIX=

src/arm-codegen.c

Lines changed: 133 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,18 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
136136

137137
void cfg_flatten(void)
138138
{
139-
func_t *func = find_func("__syscall");
140-
func->bbs->elf_offset = 48; /* offset of start + branch + exit in codegen */
139+
func_t *func;
140+
141+
if (dynlink)
142+
elf_offset =
143+
84; /* offset of __libc_start_main + main_wrapper in codegen */
144+
else {
145+
func = find_func("__syscall");
146+
func->bbs->elf_offset = 48; /* offset of start + exit in codegen */
147+
elf_offset =
148+
84; /* offset of start + branch + exit + syscall in codegen */
149+
}
141150

142-
elf_offset = 84; /* offset of start + branch + exit + syscall in codegen */
143151
GLOBAL_FUNC->bbs->elf_offset = elf_offset;
144152

145153
for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -148,7 +156,10 @@ void cfg_flatten(void)
148156
}
149157

150158
/* prepare 'argc' and 'argv', then proceed to 'main' function */
151-
elf_offset += 32; /* 6 insns for main call + 2 for exit */
159+
if (dynlink)
160+
elf_offset += 12;
161+
else
162+
elf_offset += 32; /* 6 insns for main call + 2 for exit */
152163

153164
for (func = FUNC_LIST.head; func; func = func->next) {
154165
/* Skip function declarations without bodies */
@@ -287,7 +298,16 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
287298
return;
288299
case OP_call:
289300
func = find_func(ph2_ir->func_name);
290-
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size));
301+
if (func->bbs)
302+
ofs = func->bbs->elf_offset - elf_code->size;
303+
else if (dynlink)
304+
ofs = (elf_plt_start + func->plt_offset) -
305+
(elf_code_start + elf_code->size);
306+
else {
307+
printf("The '%s' function is not implemented\n", ph2_ir->func_name);
308+
abort();
309+
}
310+
emit(__bl(__AL, ofs));
291311
return;
292312
case OP_load_data_address:
293313
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
@@ -299,7 +319,14 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
299319
return;
300320
case OP_address_of_func:
301321
func = find_func(ph2_ir->func_name);
302-
ofs = elf_code_start + func->bbs->elf_offset;
322+
if (func->bbs)
323+
ofs = elf_code_start + func->bbs->elf_offset;
324+
else if (dynlink)
325+
ofs = elf_plt_start + func->plt_offset;
326+
else {
327+
printf("The '%s' function is not implemented\n", ph2_ir->func_name);
328+
abort();
329+
}
303330
emit(__movw(__AL, __r8, ofs));
304331
emit(__movt(__AL, __r8, ofs));
305332
emit(__sw(__AL, __r8, rn, 0));
@@ -456,39 +483,74 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
456483
}
457484
}
458485

486+
void plt_generate(void);
459487
void code_generate(void)
460488
{
461-
elf_data_start = elf_code_start + elf_offset;
462-
elf_rodata_start = elf_data_start + elf_data->size;
463-
elf_bss_start = elf_rodata_start + elf_rodata->size;
489+
if (dynlink) {
490+
plt_generate();
491+
/* Call __libc_start_main() */
492+
emit(__mov_i(__AL, __r11, 0));
493+
emit(__mov_i(__AL, __lr, 0));
494+
emit(__pop_word(__AL, __r1));
495+
emit(__mov_r(__AL, __r2, __sp));
496+
emit(__push_reg(__AL, __r2));
497+
emit(__push_reg(__AL, __r0));
498+
emit(__mov_i(__AL, __r12, 0));
499+
emit(__push_reg(__AL, __r12));
464500

465-
/* start */
501+
int main_wrapper_offset = elf_code->size + 28;
502+
emit(__movw(__AL, __r0, elf_code_start + main_wrapper_offset));
503+
emit(__movt(__AL, __r0, elf_code_start + main_wrapper_offset));
504+
emit(__mov_i(__AL, __r3, 0));
505+
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) -
506+
(elf_code_start + elf_code->size)));
507+
/* Call '_exit' (syscall) to terminate the program if __libc_start_main
508+
* returns. */
509+
emit(__mov_i(__AL, __r0, 127));
510+
emit(__mov_i(__AL, __r7, 1));
511+
emit(__svc());
512+
513+
/* If the compiled program is dynamic linking, the starting
514+
* point of 'main_wrapper' is located here.
515+
*
516+
* Preserve 'argc' and 'argv' for the 'main' function.
517+
* */
518+
emit(__mov_r(__AL, __r9, __r0));
519+
emit(__mov_r(__AL, __r10, __r1));
520+
}
521+
/* For both static and dynamic linking, we need to set up the stack
522+
* and call the main function.
523+
* */
466524
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
467525
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
468526
emit(__sub_r(__AL, __sp, __sp, __r8));
469527
emit(__mov_r(__AL, __r12, __sp));
470-
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size));
471-
/* After global init, jump to main preparation */
472-
emit(__b(__AL, 56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */
473528

474-
/* exit */
475-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
476-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
477-
emit(__add_r(__AL, __sp, __sp, __r8));
478-
emit(__mov_r(__AL, __r0, __r0));
479-
emit(__mov_i(__AL, __r7, 1));
480-
emit(__svc());
481-
482-
/* syscall */
483-
emit(__mov_r(__AL, __r7, __r0));
484-
emit(__mov_r(__AL, __r0, __r1));
485-
emit(__mov_r(__AL, __r1, __r2));
486-
emit(__mov_r(__AL, __r2, __r3));
487-
emit(__mov_r(__AL, __r3, __r4));
488-
emit(__mov_r(__AL, __r4, __r5));
489-
emit(__mov_r(__AL, __r5, __r6));
490-
emit(__svc());
491-
emit(__bx(__AL, __lr));
529+
if (!dynlink) {
530+
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size));
531+
/* After global init, jump to main preparation */
532+
emit(__b(__AL,
533+
56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */
534+
535+
/* exit - only for static linking */
536+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
537+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
538+
emit(__add_r(__AL, __sp, __sp, __r8));
539+
emit(__mov_r(__AL, __r0, __r0));
540+
emit(__mov_i(__AL, __r7, 1));
541+
emit(__svc());
542+
543+
/* syscall */
544+
emit(__mov_r(__AL, __r7, __r0));
545+
emit(__mov_r(__AL, __r0, __r1));
546+
emit(__mov_r(__AL, __r1, __r2));
547+
emit(__mov_r(__AL, __r2, __r3));
548+
emit(__mov_r(__AL, __r3, __r4));
549+
emit(__mov_r(__AL, __r4, __r5));
550+
emit(__mov_r(__AL, __r5, __r6));
551+
emit(__svc());
552+
emit(__bx(__AL, __lr));
553+
}
492554

493555
ph2_ir_t *ph2_ir;
494556
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -497,20 +559,51 @@ void code_generate(void)
497559

498560
/* prepare 'argc' and 'argv', then proceed to 'main' function */
499561
if (MAIN_BB) {
500-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
501-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
502-
emit(__add_r(__AL, __r8, __r12, __r8));
503-
emit(__lw(__AL, __r0, __r8, 0));
504-
emit(__add_i(__AL, __r1, __r8, 4));
505-
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));
562+
if (dynlink) {
563+
emit(__mov_r(__AL, __r0, __r9));
564+
emit(__mov_r(__AL, __r1, __r10));
565+
/* Jump directly to the main function.
566+
*
567+
* When the main function returns, the program
568+
* will return to __libc_start_main. */
569+
emit(__b(__AL, MAIN_BB->elf_offset - elf_code->size));
570+
} else {
571+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
572+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
573+
emit(__add_r(__AL, __r8, __r12, __r8));
574+
emit(__lw(__AL, __r0, __r8, 0));
575+
emit(__add_i(__AL, __r1, __r8, 4));
506576

507-
/* exit with main's return value - r0 already has the return value */
508-
emit(__mov_i(__AL, __r7, 1));
509-
emit(__svc());
577+
/* Call main function, and call '_exit' syscall to
578+
* terminate the program. */
579+
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));
580+
581+
/* exit with main's return value - r0 already has the
582+
* return value */
583+
emit(__mov_i(__AL, __r7, 1));
584+
emit(__svc());
585+
}
510586
}
511587

512588
for (int i = 0; i < ph2_ir_idx; i++) {
513589
ph2_ir = PH2_IR_FLATTEN[i];
514590
emit_ph2_ir(ph2_ir);
515591
}
516592
}
593+
594+
void plt_generate(void)
595+
{
596+
int addr_of_got = elf_got_start + PTR_SIZE * 2;
597+
int end = plt_size - PLT_FIXUP_SIZE;
598+
elf_write_int(elf_plt, __push_reg(__AL, __lr));
599+
elf_write_int(elf_plt, __movw(__AL, __r10, addr_of_got));
600+
elf_write_int(elf_plt, __movt(__AL, __r10, addr_of_got));
601+
elf_write_int(elf_plt, __mov_r(__AL, __lr, __r10));
602+
elf_write_int(elf_plt, __lw(__AL, __pc, __lr, 0));
603+
for (int i = 0; i * PLT_ENT_SIZE < end; i++) {
604+
addr_of_got = elf_got_start + PTR_SIZE * (i + 3);
605+
elf_write_int(elf_plt, __movw(__AL, __r12, addr_of_got));
606+
elf_write_int(elf_plt, __movt(__AL, __r12, addr_of_got));
607+
elf_write_int(elf_plt, __lw(__AL, __pc, __r12, 0));
608+
}
609+
}

src/arm.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,16 @@ int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list)
312312
return arm_encode(cond, arm_ldm + (0x2 << 6) + (w << 1), rn, 0, reg_list);
313313
}
314314

315+
int __push_reg(arm_cond_t cond, arm_reg rt)
316+
{
317+
return arm_encode(cond, (0x5 << 4) | 0x2, 0xd, rt, 0x4);
318+
}
319+
320+
int __pop_word(arm_cond_t cond, arm_reg rt)
321+
{
322+
return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4);
323+
}
324+
315325
int __b(arm_cond_t cond, int ofs)
316326
{
317327
int o = (ofs - 8) >> 2;

0 commit comments

Comments
 (0)