Skip to content

Commit 7f6b1f8

Browse files
committed
Support Arm dynamic linking
This commit implements dynamic linking for the Arm target so that the compiler has the ability to generate dynamically linked program to use the external C library such as glibc. It includes the following changes: - Modify arm.mk to make the compiled executable use hard-float ABI. - Add a new header named 'c.h' that only includes declaration of C standard functions. If using dynamic linking mode, the compiler will use this file to generate C functions for the compiled program. - Generate dynamically linked program by adding the following data: - Dynamic sections such such as '.interp', '.plt', '.got' and so on. - Additional program headers and section headers. - Make the entire compilation process able to compile the functions without implementation, which are considered to be external functions. - Add a new input argument "--dynlink" to enable the dynamic linking mode. - Improve the build system to build the compiler under the dynamic linking mode by the command "make LINK_MODE=1". - Stop the build process if the target architecture and the mode are RISC-V and dynamic linking, respectively, because this commit only implements for the Arm architecture.
1 parent f265042 commit 7f6b1f8

File tree

15 files changed

+864
-181
lines changed

15 files changed

+864
-181
lines changed

Makefile

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,19 @@ HOST_ARCH = $(shell arch 2>/dev/null)
4343
SRCDIR := $(shell find src -type d)
4444
LIBDIR := $(shell find lib -type d)
4545

46+
BUILTIN_LIBC ?= c.c
47+
STAGE0_FLAGS ?= --dump-ir
48+
STAGE1_FLAGS ?=
49+
ifeq ($(LINK_MODE),dynamic)
50+
ifeq ($(ARCH),riscv)
51+
# TODO: implement dynamic linking for RISC-V.
52+
$(error "Dynamic linking mode is not implemented for RISC-V")
53+
endif
54+
BUILTIN_LIBC := c.h
55+
STAGE0_FLAGS += --dynlink
56+
STAGE1_FLAGS += --dynlink
57+
endif
58+
4659
SRCS := $(wildcard $(patsubst %,%/main.c, $(SRCDIR)))
4760
OBJS := $(SRCS:%.c=$(OUT)/%.o)
4861
deps := $(OBJS:%.o=%.o.d)
@@ -72,7 +85,7 @@ config:
7285

7386
$(OUT)/tests/%.elf: tests/%.c $(OUT)/$(STAGE0)
7487
$(VECHO) " SHECC\t$@\n"
75-
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $< > $(basename $@).log ; \
88+
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $< > $(basename $@).log ; \
7689
chmod +x $@ ; $(PRINTF) "Running $@ ...\n"
7790
$(Q)$(TARGET_EXEC) $@ && $(call pass)
7891

@@ -122,9 +135,9 @@ $(OUT)/norm-lf: tools/norm-lf.c
122135
$(VECHO) " CC+LD\t$@\n"
123136
$(Q)$(CC) $(CFLAGS) -o $@ $^
124137

125-
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/c.c
138+
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC)
126139
$(VECHO) " GEN\t$@\n"
127-
$(Q)$(OUT)/norm-lf $(LIBDIR)/c.c $(OUT)/c.normalized.c
140+
$(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC) $(OUT)/c.normalized.c
128141
$(Q)$(OUT)/inliner $(OUT)/c.normalized.c $@
129142
$(Q)$(RM) $(OUT)/c.normalized.c
130143

@@ -143,12 +156,12 @@ $(OUT)/$(STAGE0)-sanitizer: $(OUT)/libc.inc $(OBJS)
143156
$(OUT)/$(STAGE1): $(OUT)/$(STAGE0)
144157
$(Q)$(STAGE1_CHECK_CMD)
145158
$(VECHO) " SHECC\t$@\n"
146-
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
159+
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
147160
$(Q)chmod a+x $@
148161

149162
$(OUT)/$(STAGE2): $(OUT)/$(STAGE1)
150163
$(VECHO) " SHECC\t$@\n"
151-
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) -o $@ $(SRCDIR)/main.c
164+
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) $(STAGE1_FLAGS) -o $@ $(SRCDIR)/main.c
152165

153166
bootstrap: $(OUT)/$(STAGE2)
154167
$(Q)chmod 775 $(OUT)/$(STAGE2)

lib/c.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* shecc - Self-Hosting and Educational C Compiler.
3+
*
4+
* shecc is freely redistributable under the BSD 2 clause license. See the
5+
* file "LICENSE" for information on usage and redistribution of this file.
6+
*/
7+
8+
#pragma once
9+
/* Declarations of C standard library functions */
10+
11+
#define NULL 0
12+
13+
#define bool _Bool
14+
#define true 1
15+
#define false 0
16+
17+
/* va_list support for variadic functions */
18+
typedef int *va_list;
19+
20+
/* File I/O */
21+
typedef int FILE;
22+
FILE *fopen(char *filename, char *mode);
23+
int fclose(FILE *stream);
24+
int fgetc(FILE *stream);
25+
char *fgets(char *str, int n, FILE *stream);
26+
int fputc(int c, FILE *stream);
27+
28+
/* string-related functions */
29+
int strlen(char *str);
30+
int strcmp(char *s1, char *s2);
31+
int strncmp(char *s1, char *s2, int len);
32+
char *strcpy(char *dest, char *src);
33+
char *strncpy(char *dest, char *src, int len);
34+
char *memcpy(char *dest, char *src, int count);
35+
int memcmp(void *s1, void *s2, int n);
36+
void *memset(void *s, int c, int n);
37+
38+
/* formatted output string */
39+
int printf(char *str, ...);
40+
int sprintf(char *buffer, char *str, ...);
41+
int snprintf(char *buffer, int n, char *str, ...);
42+
43+
/* Terminating program */
44+
void exit(int exit_code);
45+
void abort(void);
46+
47+
/* Dynamic memory allocation/deallocation functions */
48+
void *malloc(int size);
49+
void *calloc(int n, int size);
50+
void free(void *ptr);

mk/arm.mk

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,11 @@ ARCH_DEFS = \
55
\#pragma once\n$\
66
\#define ARCH_PREDEFINED \"__arm__\" /* defined by GNU C and RealView */\n$\
77
\#define ELF_MACHINE 0x28 /* up to ARMv7/Aarch32 */\n$\
8-
\#define ELF_FLAGS 0x5000200\n$\
8+
\#define ELF_FLAGS 0x5000400\n$\
9+
\#define DYN_LINKER \"/lib/ld-linux-armhf.so.3\"\n$\
10+
\#define LIBC_SO \"libc.so.6\"\n$\
11+
\#define PLT_FIXUP_SIZE 20\n$\
12+
\#define PLT_ENT_SIZE 12\n$\
13+
\#define R_ARCH_JUMP_SLOT 0x16\n$\
914
"
15+
RUNNER_LD_PREFIX=-L /usr/arm-linux-gnueabihf/

mk/common.mk

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ ifneq ($(HOST_ARCH),$(ARCH_NAME))
3636

3737
# Generate the path to the architecture-specific qemu
3838
TARGET_EXEC = $(shell which $(ARCH_RUNNER))
39+
ifeq ($(LINK_MODE),dynamic)
40+
TARGET_EXEC += $(RUNNER_LD_PREFIX)
41+
endif
3942
endif
4043
export TARGET_EXEC
4144

mk/riscv.mk

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,12 @@ ARCH_DEFS = \
77
\#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\
88
\#define ELF_MACHINE 0xf3\n$\
99
\#define ELF_FLAGS 0\n$\
10+
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
11+
\#define LIBC_SO \"libc.so.6\"\n$\
12+
\#define PLT_FIXUP_SIZE 20\n$\
13+
\#define PLT_ENT_SIZE 12\n$\
14+
\#define R_ARCH_JUMP_SLOT 0x5\n$\
1015
"
16+
17+
# TODO: Set this variable for RISC-V architecture
18+
RUNNER_LD_PREFIX=

src/arm-codegen.c

Lines changed: 133 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,18 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
136136

137137
void cfg_flatten(void)
138138
{
139-
func_t *func = find_func("__syscall");
140-
func->bbs->elf_offset = 48; /* offset of start + branch + exit in codegen */
139+
func_t *func;
140+
141+
if (dynlink)
142+
elf_offset =
143+
84; /* offset of __libc_start_main + main_wrapper in codegen */
144+
else {
145+
func = find_func("__syscall");
146+
func->bbs->elf_offset = 48; /* offset of start + exit in codegen */
147+
elf_offset =
148+
84; /* offset of start + branch + exit + syscall in codegen */
149+
}
141150

142-
elf_offset = 84; /* offset of start + branch + exit + syscall in codegen */
143151
GLOBAL_FUNC->bbs->elf_offset = elf_offset;
144152

145153
for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -148,7 +156,10 @@ void cfg_flatten(void)
148156
}
149157

150158
/* prepare 'argc' and 'argv', then proceed to 'main' function */
151-
elf_offset += 32; /* 6 insns for main call + 2 for exit */
159+
if (dynlink)
160+
elf_offset += 12;
161+
else
162+
elf_offset += 32; /* 6 insns for main call + 2 for exit */
152163

153164
for (func = FUNC_LIST.head; func; func = func->next) {
154165
/* Skip function declarations without bodies */
@@ -287,7 +298,16 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
287298
return;
288299
case OP_call:
289300
func = find_func(ph2_ir->func_name);
290-
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size));
301+
if (func->bbs)
302+
ofs = func->bbs->elf_offset - elf_code->size;
303+
else if (dynlink)
304+
ofs = (elf_plt_start + func->plt_offset) -
305+
(elf_code_start + elf_code->size);
306+
else {
307+
printf("The '%s' function is not implemented\n", ph2_ir->func_name);
308+
abort();
309+
}
310+
emit(__bl(__AL, ofs));
291311
return;
292312
case OP_load_data_address:
293313
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
@@ -299,7 +319,14 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
299319
return;
300320
case OP_address_of_func:
301321
func = find_func(ph2_ir->func_name);
302-
ofs = elf_code_start + func->bbs->elf_offset;
322+
if (func->bbs)
323+
ofs = elf_code_start + func->bbs->elf_offset;
324+
else if (dynlink)
325+
ofs = elf_plt_start + func->plt_offset;
326+
else {
327+
printf("The '%s' function is not implemented\n", ph2_ir->func_name);
328+
abort();
329+
}
303330
emit(__movw(__AL, __r8, ofs));
304331
emit(__movt(__AL, __r8, ofs));
305332
emit(__sw(__AL, __r8, rn, 0));
@@ -456,39 +483,74 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
456483
}
457484
}
458485

486+
void plt_generate(void);
459487
void code_generate(void)
460488
{
461-
elf_data_start = elf_code_start + elf_offset;
462-
elf_rodata_start = elf_data_start + elf_data->size;
463-
elf_bss_start = elf_rodata_start + elf_rodata->size;
489+
if (dynlink) {
490+
plt_generate();
491+
/* Call __libc_start_main() */
492+
emit(__mov_i(__AL, __r11, 0));
493+
emit(__mov_i(__AL, __lr, 0));
494+
emit(__pop_word(__AL, __r1));
495+
emit(__mov_r(__AL, __r2, __sp));
496+
emit(__push_reg(__AL, __r2));
497+
emit(__push_reg(__AL, __r0));
498+
emit(__mov_i(__AL, __r12, 0));
499+
emit(__push_reg(__AL, __r12));
464500

465-
/* start */
501+
int main_wrapper_offset = elf_code->size + 28;
502+
emit(__movw(__AL, __r0, elf_code_start + main_wrapper_offset));
503+
emit(__movt(__AL, __r0, elf_code_start + main_wrapper_offset));
504+
emit(__mov_i(__AL, __r3, 0));
505+
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) -
506+
(elf_code_start + elf_code->size)));
507+
/* Call '_exit' (syscall) to terminate the program if __libc_start_main
508+
* returns. */
509+
emit(__mov_i(__AL, __r0, 127));
510+
emit(__mov_i(__AL, __r7, 1));
511+
emit(__svc());
512+
513+
/* If the compiled program is dynamic linking, the starting
514+
* point of 'main_wrapper' is located here.
515+
*
516+
* Preserve 'argc' and 'argv' for the 'main' function.
517+
* */
518+
emit(__mov_r(__AL, __r9, __r0));
519+
emit(__mov_r(__AL, __r10, __r1));
520+
}
521+
/* For both static and dynamic linking, we need to set up the stack
522+
* and call the main function.
523+
* */
466524
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
467525
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
468526
emit(__sub_r(__AL, __sp, __sp, __r8));
469527
emit(__mov_r(__AL, __r12, __sp));
470-
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size));
471-
/* After global init, jump to main preparation */
472-
emit(__b(__AL, 56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */
473528

474-
/* exit */
475-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
476-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
477-
emit(__add_r(__AL, __sp, __sp, __r8));
478-
emit(__mov_r(__AL, __r0, __r0));
479-
emit(__mov_i(__AL, __r7, 1));
480-
emit(__svc());
481-
482-
/* syscall */
483-
emit(__mov_r(__AL, __r7, __r0));
484-
emit(__mov_r(__AL, __r0, __r1));
485-
emit(__mov_r(__AL, __r1, __r2));
486-
emit(__mov_r(__AL, __r2, __r3));
487-
emit(__mov_r(__AL, __r3, __r4));
488-
emit(__mov_r(__AL, __r4, __r5));
489-
emit(__mov_r(__AL, __r5, __r6));
490-
emit(__svc());
491-
emit(__bx(__AL, __lr));
529+
if (!dynlink) {
530+
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size));
531+
/* After global init, jump to main preparation */
532+
emit(__b(__AL,
533+
56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */
534+
535+
/* exit - only for static linking */
536+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
537+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
538+
emit(__add_r(__AL, __sp, __sp, __r8));
539+
emit(__mov_r(__AL, __r0, __r0));
540+
emit(__mov_i(__AL, __r7, 1));
541+
emit(__svc());
542+
543+
/* syscall */
544+
emit(__mov_r(__AL, __r7, __r0));
545+
emit(__mov_r(__AL, __r0, __r1));
546+
emit(__mov_r(__AL, __r1, __r2));
547+
emit(__mov_r(__AL, __r2, __r3));
548+
emit(__mov_r(__AL, __r3, __r4));
549+
emit(__mov_r(__AL, __r4, __r5));
550+
emit(__mov_r(__AL, __r5, __r6));
551+
emit(__svc());
552+
emit(__bx(__AL, __lr));
553+
}
492554

493555
ph2_ir_t *ph2_ir;
494556
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -497,20 +559,51 @@ void code_generate(void)
497559

498560
/* prepare 'argc' and 'argv', then proceed to 'main' function */
499561
if (MAIN_BB) {
500-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
501-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
502-
emit(__add_r(__AL, __r8, __r12, __r8));
503-
emit(__lw(__AL, __r0, __r8, 0));
504-
emit(__add_i(__AL, __r1, __r8, 4));
505-
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));
562+
if (dynlink) {
563+
emit(__mov_r(__AL, __r0, __r9));
564+
emit(__mov_r(__AL, __r1, __r10));
565+
/* Jump directly to the main function.
566+
*
567+
* When the main function returns, the program
568+
* will return to __libc_start_main. */
569+
emit(__b(__AL, MAIN_BB->elf_offset - elf_code->size));
570+
} else {
571+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
572+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
573+
emit(__add_r(__AL, __r8, __r12, __r8));
574+
emit(__lw(__AL, __r0, __r8, 0));
575+
emit(__add_i(__AL, __r1, __r8, 4));
506576

507-
/* exit with main's return value - r0 already has the return value */
508-
emit(__mov_i(__AL, __r7, 1));
509-
emit(__svc());
577+
/* Call main function, and call '_exit' syscall to
578+
* terminate the program. */
579+
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));
580+
581+
/* exit with main's return value - r0 already has the
582+
* return value */
583+
emit(__mov_i(__AL, __r7, 1));
584+
emit(__svc());
585+
}
510586
}
511587

512588
for (int i = 0; i < ph2_ir_idx; i++) {
513589
ph2_ir = PH2_IR_FLATTEN[i];
514590
emit_ph2_ir(ph2_ir);
515591
}
516592
}
593+
594+
void plt_generate(void)
595+
{
596+
int addr_of_got = elf_got_start + PTR_SIZE * 2;
597+
int end = plt_size - PLT_FIXUP_SIZE;
598+
elf_write_int(elf_plt, __push_reg(__AL, __lr));
599+
elf_write_int(elf_plt, __movw(__AL, __r10, addr_of_got));
600+
elf_write_int(elf_plt, __movt(__AL, __r10, addr_of_got));
601+
elf_write_int(elf_plt, __mov_r(__AL, __lr, __r10));
602+
elf_write_int(elf_plt, __lw(__AL, __pc, __lr, 0));
603+
for (int i = 0; i * PLT_ENT_SIZE < end; i++) {
604+
addr_of_got = elf_got_start + PTR_SIZE * (i + 3);
605+
elf_write_int(elf_plt, __movw(__AL, __r12, addr_of_got));
606+
elf_write_int(elf_plt, __movt(__AL, __r12, addr_of_got));
607+
elf_write_int(elf_plt, __lw(__AL, __pc, __r12, 0));
608+
}
609+
}

src/arm.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,16 @@ int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list)
312312
return arm_encode(cond, arm_ldm + (0x2 << 6) + (w << 1), rn, 0, reg_list);
313313
}
314314

315+
int __push_reg(arm_cond_t cond, arm_reg rt)
316+
{
317+
return arm_encode(cond, (0x5 << 4) | 0x2, 0xd, rt, 0x4);
318+
}
319+
320+
int __pop_word(arm_cond_t cond, arm_reg rt)
321+
{
322+
return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4);
323+
}
324+
315325
int __b(arm_cond_t cond, int ofs)
316326
{
317327
int o = (ofs - 8) >> 2;

0 commit comments

Comments
 (0)