Skip to content

Commit 9bdd0b5

Browse files
committed
Support Arm dynamic linking
This commit implements dynamic linking for the Arm target so that the compiler has the ability to generate dynamically linked program to use the external C library such as glibc. It includes the following changes: - Modify arm.mk to make the compiled executable use hard-float ABI. - Add a new header named 'c.h' that only includes declaration of C standard functions. If using dynamic linking mode, the compiler will use this file to generate C functions for the compiled program. - Generate dynamically linked program by adding the following data: - Dynamic sections such such as '.interp', '.plt', '.got' and so on. - Additional program headers and section headers. - Make the entire compilation process able to compile the functions without implementation, which are considered to be external functions. - Add a new input argument "--dynlink" to enable the dynamic linking mode. - Improve the build system to build the compiler under the dynamic linking mode by the command "make LINK_MODE=1". - Stop the build process if the target architecture and the mode are RISC-V and dynamic linking, respectively, because this commit only implements for the Arm architecture.
1 parent 4da0b6e commit 9bdd0b5

File tree

15 files changed

+858
-181
lines changed

15 files changed

+858
-181
lines changed

Makefile

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,19 @@ HOST_ARCH = $(shell arch 2>/dev/null)
4343
SRCDIR := $(shell find src -type d)
4444
LIBDIR := $(shell find lib -type d)
4545

46+
BUILTIN_LIBC ?= c.c
47+
STAGE0_FLAGS ?= --dump-ir
48+
STAGE1_FLAGS ?=
49+
ifeq ($(LINK_MODE),dynamic)
50+
ifeq ($(ARCH),riscv)
51+
# TODO: implement dynamic linking for RISC-V.
52+
$(error "Dynamic linking mode is not implemented for RISC-V")
53+
endif
54+
BUILTIN_LIBC := c.h
55+
STAGE0_FLAGS += --dynlink
56+
STAGE1_FLAGS += --dynlink
57+
endif
58+
4659
SRCS := $(wildcard $(patsubst %,%/main.c, $(SRCDIR)))
4760
OBJS := $(SRCS:%.c=$(OUT)/%.o)
4861
deps := $(OBJS:%.o=%.o.d)
@@ -72,7 +85,7 @@ config:
7285

7386
$(OUT)/tests/%.elf: tests/%.c $(OUT)/$(STAGE0)
7487
$(VECHO) " SHECC\t$@\n"
75-
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $< > $(basename $@).log ; \
88+
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $< > $(basename $@).log ; \
7689
chmod +x $@ ; $(PRINTF) "Running $@ ...\n"
7790
$(Q)$(TARGET_EXEC) $@ && $(call pass)
7891

@@ -122,9 +135,9 @@ $(OUT)/norm-lf: tools/norm-lf.c
122135
$(VECHO) " CC+LD\t$@\n"
123136
$(Q)$(CC) $(CFLAGS) -o $@ $^
124137

125-
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/c.c
138+
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC)
126139
$(VECHO) " GEN\t$@\n"
127-
$(Q)$(OUT)/norm-lf $(LIBDIR)/c.c $(OUT)/c.normalized.c
140+
$(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC) $(OUT)/c.normalized.c
128141
$(Q)$(OUT)/inliner $(OUT)/c.normalized.c $@
129142
$(Q)$(RM) $(OUT)/c.normalized.c
130143

@@ -143,12 +156,12 @@ $(OUT)/$(STAGE0)-sanitizer: $(OUT)/libc.inc $(OBJS)
143156
$(OUT)/$(STAGE1): $(OUT)/$(STAGE0)
144157
$(Q)$(STAGE1_CHECK_CMD)
145158
$(VECHO) " SHECC\t$@\n"
146-
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
159+
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
147160
$(Q)chmod a+x $@
148161

149162
$(OUT)/$(STAGE2): $(OUT)/$(STAGE1)
150163
$(VECHO) " SHECC\t$@\n"
151-
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) -o $@ $(SRCDIR)/main.c
164+
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) $(STAGE1_FLAGS) -o $@ $(SRCDIR)/main.c
152165

153166
bootstrap: $(OUT)/$(STAGE2)
154167
$(Q)chmod 775 $(OUT)/$(STAGE2)

lib/c.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* shecc - Self-Hosting and Educational C Compiler.
3+
*
4+
* shecc is freely redistributable under the BSD 2 clause license. See the
5+
* file "LICENSE" for information on usage and redistribution of this file.
6+
*/
7+
8+
#pragma once
9+
/* Declarations of C standard library functions */
10+
11+
#define NULL 0
12+
13+
#define bool _Bool
14+
#define true 1
15+
#define false 0
16+
17+
/* va_list support for variadic functions */
18+
typedef int *va_list;
19+
20+
/* File I/O */
21+
typedef int FILE;
22+
FILE *fopen(char *filename, char *mode);
23+
int fclose(FILE *stream);
24+
int fgetc(FILE *stream);
25+
char *fgets(char *str, int n, FILE *stream);
26+
int fputc(int c, FILE *stream);
27+
28+
/* string-related functions */
29+
int strlen(char *str);
30+
int strcmp(char *s1, char *s2);
31+
int strncmp(char *s1, char *s2, int len);
32+
char *strcpy(char *dest, char *src);
33+
char *strncpy(char *dest, char *src, int len);
34+
char *memcpy(char *dest, char *src, int count);
35+
int memcmp(void *s1, void *s2, int n);
36+
void *memset(void *s, int c, int n);
37+
38+
/* formatted output string */
39+
int printf(char *str, ...);
40+
int sprintf(char *buffer, char *str, ...);
41+
int snprintf(char *buffer, int n, char *str, ...);
42+
43+
/* Terminating program */
44+
void exit(int exit_code);
45+
void abort(void);
46+
47+
/* Dynamic memory allocation/deallocation functions */
48+
void *malloc(int size);
49+
void *calloc(int n, int size);
50+
void free(void *ptr);

mk/arm.mk

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,11 @@ ARCH_DEFS = \
55
\#pragma once\n$\
66
\#define ARCH_PREDEFINED \"__arm__\" /* defined by GNU C and RealView */\n$\
77
\#define ELF_MACHINE 0x28 /* up to ARMv7/Aarch32 */\n$\
8-
\#define ELF_FLAGS 0x5000200\n$\
8+
\#define ELF_FLAGS 0x5000400\n$\
9+
\#define DYN_LINKER \"/lib/ld-linux-armhf.so.3\"\n$\
10+
\#define LIBC_SO \"libc.so.6\"\n$\
11+
\#define PLT_FIXUP_SIZE 20\n$\
12+
\#define PLT_ENT_SIZE 12\n$\
13+
\#define R_ARCH_JUMP_SLOT 0x16\n$\
914
"
15+
RUNNER_LD_PREFIX=-L /usr/arm-linux-gnueabihf/

mk/common.mk

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ ifneq ($(HOST_ARCH),$(ARCH_NAME))
3636

3737
# Generate the path to the architecture-specific qemu
3838
TARGET_EXEC = $(shell which $(ARCH_RUNNER))
39+
ifeq ($(LINK_MODE),dynamic)
40+
TARGET_EXEC += $(RUNNER_LD_PREFIX)
41+
endif
3942
endif
4043
export TARGET_EXEC
4144

mk/riscv.mk

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,12 @@ ARCH_DEFS = \
77
\#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\
88
\#define ELF_MACHINE 0xf3\n$\
99
\#define ELF_FLAGS 0\n$\
10+
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
11+
\#define LIBC_SO \"libc.so.6\"\n$\
12+
\#define PLT_FIXUP_SIZE 20\n$\
13+
\#define PLT_ENT_SIZE 12\n$\
14+
\#define R_ARCH_JUMP_SLOT 0x5\n$\
1015
"
16+
17+
# TODO: Set this variable for RISC-V architecture
18+
RUNNER_LD_PREFIX=

src/arm-codegen.c

Lines changed: 133 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,18 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
139139

140140
void cfg_flatten(void)
141141
{
142-
func_t *func = find_func("__syscall");
143-
func->bbs->elf_offset = 48; /* offset of start + branch + exit in codegen */
142+
func_t *func;
143+
144+
if (dynlink)
145+
elf_offset =
146+
84; /* offset of __libc_start_main + main_wrapper in codegen */
147+
else {
148+
func = find_func("__syscall");
149+
func->bbs->elf_offset = 48; /* offset of start + exit in codegen */
150+
elf_offset =
151+
84; /* offset of start + branch + exit + syscall in codegen */
152+
}
144153

145-
elf_offset = 84; /* offset of start + branch + exit + syscall in codegen */
146154
GLOBAL_FUNC->bbs->elf_offset = elf_offset;
147155

148156
for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -151,7 +159,10 @@ void cfg_flatten(void)
151159
}
152160

153161
/* prepare 'argc' and 'argv', then proceed to 'main' function */
154-
elf_offset += 32; /* 6 insns for main call + 2 for exit */
162+
if (dynlink)
163+
elf_offset += 12;
164+
else
165+
elf_offset += 32; /* 6 insns for main call + 2 for exit */
155166

156167
for (func = FUNC_LIST.head; func; func = func->next) {
157168
/* Skip function declarations without bodies */
@@ -294,7 +305,16 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
294305
return;
295306
case OP_call:
296307
func = find_func(ph2_ir->func_name);
297-
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size));
308+
if (func->bbs)
309+
ofs = func->bbs->elf_offset - elf_code->size;
310+
else if (dynlink)
311+
ofs = (elf_plt_start + func->plt_offset) -
312+
(elf_code_start + elf_code->size);
313+
else {
314+
printf("The '%s' function is not implemented\n", ph2_ir->func_name);
315+
abort();
316+
}
317+
emit(__bl(__AL, ofs));
298318
return;
299319
case OP_load_data_address:
300320
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
@@ -306,7 +326,14 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
306326
return;
307327
case OP_address_of_func:
308328
func = find_func(ph2_ir->func_name);
309-
ofs = elf_code_start + func->bbs->elf_offset;
329+
if (func->bbs)
330+
ofs = elf_code_start + func->bbs->elf_offset;
331+
else if (dynlink)
332+
ofs = elf_plt_start + func->plt_offset;
333+
else {
334+
printf("The '%s' function is not implemented\n", ph2_ir->func_name);
335+
abort();
336+
}
310337
emit(__movw(__AL, __r8, ofs));
311338
emit(__movt(__AL, __r8, ofs));
312339
emit(__sw(__AL, __r8, rn, 0));
@@ -470,39 +497,74 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
470497
}
471498
}
472499

500+
void plt_generate(void);
473501
void code_generate(void)
474502
{
475-
elf_data_start = elf_code_start + elf_offset;
476-
elf_rodata_start = elf_data_start + elf_data->size;
477-
elf_bss_start = elf_rodata_start + elf_rodata->size;
503+
if (dynlink) {
504+
plt_generate();
505+
/* Call __libc_start_main() */
506+
emit(__mov_i(__AL, __r11, 0));
507+
emit(__mov_i(__AL, __lr, 0));
508+
emit(__pop_word(__AL, __r1));
509+
emit(__mov_r(__AL, __r2, __sp));
510+
emit(__push_reg(__AL, __r2));
511+
emit(__push_reg(__AL, __r0));
512+
emit(__mov_i(__AL, __r12, 0));
513+
emit(__push_reg(__AL, __r12));
478514

479-
/* start */
515+
int main_wrapper_offset = elf_code->size + 28;
516+
emit(__movw(__AL, __r0, elf_code_start + main_wrapper_offset));
517+
emit(__movt(__AL, __r0, elf_code_start + main_wrapper_offset));
518+
emit(__mov_i(__AL, __r3, 0));
519+
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) -
520+
(elf_code_start + elf_code->size)));
521+
/* Call '_exit' (syscall) to terminate the program if __libc_start_main
522+
* returns. */
523+
emit(__mov_i(__AL, __r0, 127));
524+
emit(__mov_i(__AL, __r7, 1));
525+
emit(__svc());
526+
527+
/* If the compiled program is dynamic linking, the starting
528+
* point of 'main_wrapper' is located here.
529+
*
530+
* Preserve 'argc' and 'argv' for the 'main' function.
531+
* */
532+
emit(__mov_r(__AL, __r9, __r0));
533+
emit(__mov_r(__AL, __r10, __r1));
534+
}
535+
/* For both static and dynamic linking, we need to set up the stack
536+
* and call the main function.
537+
* */
480538
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
481539
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
482540
emit(__sub_r(__AL, __sp, __sp, __r8));
483541
emit(__mov_r(__AL, __r12, __sp));
484-
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size));
485-
/* After global init, jump to main preparation */
486-
emit(__b(__AL, 56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */
487542

488-
/* exit */
489-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
490-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
491-
emit(__add_r(__AL, __sp, __sp, __r8));
492-
emit(__mov_r(__AL, __r0, __r0));
493-
emit(__mov_i(__AL, __r7, 1));
494-
emit(__svc());
495-
496-
/* syscall */
497-
emit(__mov_r(__AL, __r7, __r0));
498-
emit(__mov_r(__AL, __r0, __r1));
499-
emit(__mov_r(__AL, __r1, __r2));
500-
emit(__mov_r(__AL, __r2, __r3));
501-
emit(__mov_r(__AL, __r3, __r4));
502-
emit(__mov_r(__AL, __r4, __r5));
503-
emit(__mov_r(__AL, __r5, __r6));
504-
emit(__svc());
505-
emit(__bx(__AL, __lr));
543+
if (!dynlink) {
544+
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size));
545+
/* After global init, jump to main preparation */
546+
emit(__b(__AL,
547+
56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */
548+
549+
/* exit - only for static linking */
550+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
551+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
552+
emit(__add_r(__AL, __sp, __sp, __r8));
553+
emit(__mov_r(__AL, __r0, __r0));
554+
emit(__mov_i(__AL, __r7, 1));
555+
emit(__svc());
556+
557+
/* syscall */
558+
emit(__mov_r(__AL, __r7, __r0));
559+
emit(__mov_r(__AL, __r0, __r1));
560+
emit(__mov_r(__AL, __r1, __r2));
561+
emit(__mov_r(__AL, __r2, __r3));
562+
emit(__mov_r(__AL, __r3, __r4));
563+
emit(__mov_r(__AL, __r4, __r5));
564+
emit(__mov_r(__AL, __r5, __r6));
565+
emit(__svc());
566+
emit(__bx(__AL, __lr));
567+
}
506568

507569
ph2_ir_t *ph2_ir;
508570
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -511,20 +573,51 @@ void code_generate(void)
511573

512574
/* prepare 'argc' and 'argv', then proceed to 'main' function */
513575
if (MAIN_BB) {
514-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
515-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
516-
emit(__add_r(__AL, __r8, __r12, __r8));
517-
emit(__lw(__AL, __r0, __r8, 0));
518-
emit(__add_i(__AL, __r1, __r8, 4));
519-
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));
576+
if (dynlink) {
577+
emit(__mov_r(__AL, __r0, __r9));
578+
emit(__mov_r(__AL, __r1, __r10));
579+
/* Jump directly to the main function.
580+
*
581+
* When the main function returns, the program
582+
* will return to __libc_start_main. */
583+
emit(__b(__AL, MAIN_BB->elf_offset - elf_code->size));
584+
} else {
585+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
586+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
587+
emit(__add_r(__AL, __r8, __r12, __r8));
588+
emit(__lw(__AL, __r0, __r8, 0));
589+
emit(__add_i(__AL, __r1, __r8, 4));
520590

521-
/* exit with main's return value - r0 already has the return value */
522-
emit(__mov_i(__AL, __r7, 1));
523-
emit(__svc());
591+
/* Call main function, and call '_exit' syscall to
592+
* terminate the program. */
593+
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));
594+
595+
/* exit with main's return value - r0 already has the
596+
* return value */
597+
emit(__mov_i(__AL, __r7, 1));
598+
emit(__svc());
599+
}
524600
}
525601

526602
for (int i = 0; i < ph2_ir_idx; i++) {
527603
ph2_ir = PH2_IR_FLATTEN[i];
528604
emit_ph2_ir(ph2_ir);
529605
}
530606
}
607+
608+
void plt_generate(void)
609+
{
610+
int addr_of_got = elf_got_start + PTR_SIZE * 2;
611+
int end = plt_size - PLT_FIXUP_SIZE;
612+
elf_write_int(elf_plt, __push_reg(__AL, __lr));
613+
elf_write_int(elf_plt, __movw(__AL, __r10, addr_of_got));
614+
elf_write_int(elf_plt, __movt(__AL, __r10, addr_of_got));
615+
elf_write_int(elf_plt, __mov_r(__AL, __lr, __r10));
616+
elf_write_int(elf_plt, __lw(__AL, __pc, __lr, 0));
617+
for (int i = 0; i * PLT_ENT_SIZE < end; i++) {
618+
addr_of_got = elf_got_start + PTR_SIZE * (i + 3);
619+
elf_write_int(elf_plt, __movw(__AL, __r12, addr_of_got));
620+
elf_write_int(elf_plt, __movt(__AL, __r12, addr_of_got));
621+
elf_write_int(elf_plt, __lw(__AL, __pc, __r12, 0));
622+
}
623+
}

src/arm.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,16 @@ int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list)
357357
return arm_encode(cond, arm_ldm + (0x2 << 6) + (w << 1), rn, 0, reg_list);
358358
}
359359

360+
int __push_reg(arm_cond_t cond, arm_reg rt)
361+
{
362+
return arm_encode(cond, (0x5 << 4) | 0x2, 0xd, rt, 0x4);
363+
}
364+
365+
int __pop_word(arm_cond_t cond, arm_reg rt)
366+
{
367+
return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4);
368+
}
369+
360370
int __b(arm_cond_t cond, int ofs)
361371
{
362372
int o = (ofs - 8) >> 2;

0 commit comments

Comments
 (0)