Skip to content

Commit 13dfa35

Browse files
committed
Support dynamic linking
1 parent b5e0cbc commit 13dfa35

File tree

15 files changed

+782
-115
lines changed

15 files changed

+782
-115
lines changed

Makefile

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ STAGE0 := shecc
3636
STAGE1 := shecc-stage1.elf
3737
STAGE2 := shecc-stage2.elf
3838

39+
BUILTIN_LIBC ?= c.c
40+
STAGE0_FLAGS ?= --dump-ir
41+
STAGE1_FLAGS ?=
42+
ifeq ($(DYNLINK),1)
43+
BUILTIN_LIBC := c.h
44+
STAGE0_FLAGS += --dynlink
45+
STAGE1_FLAGS += --dynlink
46+
endif
47+
3948
OUT ?= out
4049
ARCHS = arm riscv
4150
ARCH ?= $(firstword $(ARCHS))
@@ -111,9 +120,9 @@ $(OUT)/norm-lf: tools/norm-lf.c
111120
$(VECHO) " CC+LD\t$@\n"
112121
$(Q)$(CC) $(CFLAGS) -o $@ $^
113122

114-
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/c.c
123+
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC)
115124
$(VECHO) " GEN\t$@\n"
116-
$(Q)$(OUT)/norm-lf $(LIBDIR)/c.c $(OUT)/c.normalized.c
125+
$(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC) $(OUT)/c.normalized.c
117126
$(Q)$(OUT)/inliner $(OUT)/c.normalized.c $@
118127
$(Q)$(RM) $(OUT)/c.normalized.c
119128

@@ -128,12 +137,12 @@ $(OUT)/$(STAGE0): $(OUT)/libc.inc $(OBJS)
128137
$(OUT)/$(STAGE1): $(OUT)/$(STAGE0)
129138
$(Q)$(STAGE1_CHECK_CMD)
130139
$(VECHO) " SHECC\t$@\n"
131-
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
140+
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
132141
$(Q)chmod a+x $@
133142

134143
$(OUT)/$(STAGE2): $(OUT)/$(STAGE1)
135144
$(VECHO) " SHECC\t$@\n"
136-
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) -o $@ $(SRCDIR)/main.c
145+
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) $(STAGE1_FLAGS) -o $@ $(SRCDIR)/main.c
137146

138147
bootstrap: $(OUT)/$(STAGE2)
139148
$(Q)chmod 775 $(OUT)/$(STAGE2)

lib/c.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* shecc - Self-Hosting and Educational C Compiler.
3+
*
4+
* shecc is freely redistributable under the BSD 2 clause license. See the
5+
* file "LICENSE" for information on usage and redistribution of this file.
6+
*/
7+
8+
/* Declaractions of C Standard library functions */
9+
10+
#define NULL 0
11+
12+
#define bool _Bool
13+
#define true 1
14+
#define false 0
15+
16+
/* File I/O */
17+
typedef int FILE;
18+
FILE *fopen(char *filename, char *mode);
19+
int fclose(FILE *stream);
20+
int fgetc(FILE *stream);
21+
char *fgets(char *str, int n, FILE *stream);
22+
int fputc(int c, FILE *stream);
23+
24+
/* string-related functions */
25+
int strlen(char *str);
26+
int strcmp(char *s1, char *s2);
27+
int strncmp(char *s1, char *s2, int len);
28+
char *strcpy(char *dest, char *src);
29+
char *strncpy(char *dest, char *src, int len);
30+
char *memcpy(char *dest, char *src, int count);
31+
void *memset(void *s, int c, int n);
32+
33+
/* formatted output string */
34+
int printf(char *str, ...);
35+
int sprintf(char *buffer, char *str, ...);
36+
int snprintf(char *buffer, int n, char *str, ...);
37+
38+
/* Terminating program */
39+
void exit(int exit_code);
40+
void abort(void);
41+
42+
/* Dynamic memory allocation/deallocation functions */
43+
void *malloc(int size);
44+
void *calloc(int n, int size);
45+
void free(void *ptr);

mk/arm.mk

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,10 @@ ARCH_DEFS = \
66
\#define ARCH_PREDEFINED \"__arm__\" /* defined by GNU C and RealView */\n$\
77
\#define ELF_MACHINE 0x28 /* up to ARMv7/Aarch32 */\n$\
88
\#define ELF_FLAGS 0x5000200\n$\
9+
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
10+
\#define LIBC_SO \"libc.so.6\"\n$\
11+
\#define PLT_FIXUP_SIZE 20\n$\
12+
\#define PLT_ENT_SIZE 12\n$\
13+
\#define R_ARCH_JUMP_SLOT 0x16\n$\
914
"
15+
RUNNER_LD_PREFIX=-L /usr/arm-linux-gnueabi/

mk/common.mk

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ ifneq ($(HOST_ARCH),$(ARCH_NAME))
3636

3737
# Generate the path to the architecture-specific qemu
3838
TARGET_EXEC = $(shell which $(ARCH_RUNNER))
39+
ifeq ($(DYNLINK),1)
40+
TARGET_EXEC += $(RUNNER_LD_PREFIX)
41+
endif
3942
endif
4043
export TARGET_EXEC
4144

mk/riscv.mk

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,12 @@ ARCH_DEFS = \
77
\#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\
88
\#define ELF_MACHINE 0xf3\n$\
99
\#define ELF_FLAGS 0\n$\
10+
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
11+
\#define LIBC_SO \"libc.so.6\"\n$\
12+
\#define PLT_FIXUP_SIZE 20\n$\
13+
\#define PLT_ENT_SIZE 12\n$\
14+
\#define R_ARCH_JUMP_SLOT 0x5\n$\
1015
"
16+
17+
# TODO: Set this variable for RISC-V architecture
18+
RUNNER_LD_PREFIX=

src/arm-codegen.c

Lines changed: 96 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,16 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
135135

136136
void cfg_flatten(void)
137137
{
138-
func_t *func = find_func("__syscall");
139-
func->bbs->elf_offset = 44; /* offset of start + exit in codegen */
138+
func_t *func;
139+
140+
if (dynlink)
141+
elf_offset = 108; /* offset of start + exit in codegen */
142+
else {
143+
func = find_func("__syscall");
144+
func->bbs->elf_offset = 44; /* offset of start + exit in codegen */
145+
elf_offset = 80; /* offset of start + exit + syscall in codegen */
146+
}
140147

141-
elf_offset = 80; /* offset of start + exit + syscall in codegen */
142148
GLOBAL_FUNC->bbs->elf_offset = elf_offset;
143149

144150
for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -147,9 +153,15 @@ void cfg_flatten(void)
147153
}
148154

149155
/* prepare 'argc' and 'argv', then proceed to 'main' function */
150-
elf_offset += 32; /* 6 insns for main call + 2 for exit */
156+
if (dynlink)
157+
elf_offset += 20;
158+
else
159+
elf_offset += 32; /* 6 insns for main call + 2 for exit */
151160

152161
for (func = FUNC_LIST.head; func; func = func->next) {
162+
if (!func->bbs)
163+
continue;
164+
153165
/* reserve stack */
154166
ph2_ir_t *flatten_ir = add_ph2_ir(OP_define);
155167
flatten_ir->src0 = func->stack_size;
@@ -282,15 +294,23 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
282294
return;
283295
case OP_call:
284296
func = find_func(ph2_ir->func_name);
285-
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size));
297+
if (func->bbs)
298+
ofs = func->bbs->elf_offset - elf_code->size;
299+
else
300+
ofs = (elf_plt_start + func->plt_offset) -
301+
(elf_code_start + elf_code->size);
302+
emit(__bl(__AL, ofs));
286303
return;
287304
case OP_load_data_address:
288305
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
289306
emit(__movt(__AL, rd, ph2_ir->src0 + elf_data_start));
290307
return;
291308
case OP_address_of_func:
292309
func = find_func(ph2_ir->func_name);
293-
ofs = elf_code_start + func->bbs->elf_offset;
310+
if (func->bbs)
311+
ofs = elf_code_start + func->bbs->elf_offset;
312+
else
313+
ofs = elf_plt_start + func->plt_offset;
294314
emit(__movw(__AL, __r8, ofs));
295315
emit(__movt(__AL, __r8, ofs));
296316
emit(__sw(__AL, __r8, rn, 0));
@@ -447,11 +467,40 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
447467
}
448468
}
449469

470+
void plt_generate(void);
450471
void code_generate(void)
451472
{
452-
elf_data_start = elf_code_start + elf_offset;
473+
if (dynlink) {
474+
plt_generate();
475+
/* Call __libc_start_main() */
476+
emit(__mov_i(__AL, __r11, 0));
477+
emit(__mov_i(__AL, __lr, 0));
478+
emit(__pop_word(__AL, __r1));
479+
emit(__mov_r(__AL, __r2, __sp));
480+
emit(__push_reg(__AL, __r2));
481+
emit(__push_reg(__AL, __r0));
482+
emit(__mov_i(__AL, __r12, 0));
483+
emit(__push_reg(__AL, __r12));
484+
emit(__movw(__AL, __r0, elf_code_start + 56));
485+
emit(__movt(__AL, __r0, elf_code_start + 56));
486+
emit(__mov_i(__AL, __r3, 0));
487+
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) -
488+
(elf_code_start + elf_code->size)));
489+
/* Goto the 'exit' code snippet if __libc_start_main returns */
490+
emit(__mov_i(__AL, __r0, 127));
491+
emit(__bl(__AL, 28));
453492

454-
/* start */
493+
/* If the compiled program is dynamic linking, the starting
494+
* point of 'start' is located here.
495+
*
496+
* Preserve the 'argc' and 'argv' for the 'main' function.
497+
* */
498+
emit(__mov_r(__AL, __r9, __r0));
499+
emit(__mov_r(__AL, __r10, __r1));
500+
}
501+
/* If the compiled program is static linking, the starting point
502+
* of 'start' is here.
503+
* */
455504
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
456505
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
457506
emit(__sub_r(__AL, __sp, __sp, __r8));
@@ -466,28 +515,35 @@ void code_generate(void)
466515
emit(__mov_i(__AL, __r7, 1));
467516
emit(__svc());
468517

469-
/* syscall */
470-
emit(__mov_r(__AL, __r7, __r0));
471-
emit(__mov_r(__AL, __r0, __r1));
472-
emit(__mov_r(__AL, __r1, __r2));
473-
emit(__mov_r(__AL, __r2, __r3));
474-
emit(__mov_r(__AL, __r3, __r4));
475-
emit(__mov_r(__AL, __r4, __r5));
476-
emit(__mov_r(__AL, __r5, __r6));
477-
emit(__svc());
478-
emit(__mov_r(__AL, __pc, __lr));
518+
if (!dynlink) {
519+
/* syscall */
520+
emit(__mov_r(__AL, __r7, __r0));
521+
emit(__mov_r(__AL, __r0, __r1));
522+
emit(__mov_r(__AL, __r1, __r2));
523+
emit(__mov_r(__AL, __r2, __r3));
524+
emit(__mov_r(__AL, __r3, __r4));
525+
emit(__mov_r(__AL, __r4, __r5));
526+
emit(__mov_r(__AL, __r5, __r6));
527+
emit(__svc());
528+
emit(__mov_r(__AL, __pc, __lr));
529+
}
479530

480531
ph2_ir_t *ph2_ir;
481532
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
482533
ph2_ir = ph2_ir->next)
483534
emit_ph2_ir(ph2_ir);
484535

485536
/* prepare 'argc' and 'argv', then proceed to 'main' function */
486-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
487-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
488-
emit(__add_r(__AL, __r8, __r12, __r8));
489-
emit(__lw(__AL, __r0, __r8, 0));
490-
emit(__add_i(__AL, __r1, __r8, 4));
537+
if (dynlink) {
538+
emit(__mov_r(__AL, __r0, __r9));
539+
emit(__mov_r(__AL, __r1, __r10));
540+
} else {
541+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
542+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
543+
emit(__add_r(__AL, __r8, __r12, __r8));
544+
emit(__lw(__AL, __r0, __r8, 0));
545+
emit(__add_i(__AL, __r1, __r8, 4));
546+
}
491547
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));
492548

493549
/* exit with main's return value */
@@ -499,3 +555,20 @@ void code_generate(void)
499555
emit_ph2_ir(ph2_ir);
500556
}
501557
}
558+
559+
void plt_generate(void)
560+
{
561+
int addr_of_got = elf_got_start + PTR_SIZE * 2;
562+
int end = plt_sz - PLT_FIXUP_SIZE;
563+
elf_write_int(elf_plt, __push_reg(__AL, __lr));
564+
elf_write_int(elf_plt, __movw(__AL, __r10, addr_of_got));
565+
elf_write_int(elf_plt, __movt(__AL, __r10, addr_of_got));
566+
elf_write_int(elf_plt, __mov_r(__AL, __lr, __r10));
567+
elf_write_int(elf_plt, __lw(__AL, __pc, __lr, 0));
568+
for (int i = 0; i * PLT_ENT_SIZE < end; i++) {
569+
addr_of_got = elf_got_start + PTR_SIZE * (i + 3);
570+
elf_write_int(elf_plt, __movw(__AL, __r12, addr_of_got));
571+
elf_write_int(elf_plt, __movt(__AL, __r12, addr_of_got));
572+
elf_write_int(elf_plt, __lw(__AL, __pc, __r12, 0));
573+
}
574+
}

src/arm.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,16 @@ int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list)
308308
return arm_encode(cond, arm_ldm + (0x2 << 6) + (w << 1), rn, 0, reg_list);
309309
}
310310

311+
int __push_reg(arm_cond_t cond, arm_reg rt)
312+
{
313+
return arm_encode(cond, (0x5 << 4) | 0x2, 0xd, rt, 0x4);
314+
}
315+
316+
int __pop_word(arm_cond_t cond, arm_reg rt)
317+
{
318+
return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4);
319+
}
320+
311321
int __b(arm_cond_t cond, int ofs)
312322
{
313323
int o = (ofs - 8) >> 2;

src/defs.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,18 @@
3131
#define MAX_SYMTAB 65536
3232
#define MAX_STRTAB 65536
3333
#define MAX_HEADER 1024
34+
#define MAX_PROGRAM_HEADER 1024
3435
#define MAX_SECTION 1024
3536
#define MAX_ALIASES 128
37+
#define MAX_SECTION_HEADER 1024
38+
#define MAX_SHSTR 1024
39+
#define MAX_INTERP 1024
40+
#define MAX_DYNAMIC 1024
41+
#define MAX_DYNSYM 1024
42+
#define MAX_DYNSTR 1024
43+
#define MAX_RELPLT 1024
44+
#define MAX_PLT 1024
45+
#define MAX_GOTPLT 1024
3646
#define MAX_CONSTANTS 1024
3747
#define MAX_CASES 128
3848
#define MAX_NESTING 128
@@ -516,6 +526,11 @@ struct func {
516526
int bb_cnt;
517527
int visited;
518528

529+
/* Information used for dynamic linking */
530+
bool is_used;
531+
int plt_offset;
532+
int got_offset;
533+
519534
struct func *next;
520535
};
521536

@@ -578,3 +593,26 @@ typedef struct {
578593
int sh_addralign;
579594
int sh_entsize;
580595
} elf32_shdr_t;
596+
597+
/* Structures for dynamic linked program */
598+
/* For .dynsym section. */
599+
typedef struct {
600+
int st_name;
601+
int st_value;
602+
int st_size;
603+
char st_info;
604+
char st_other;
605+
char st_shndx[2];
606+
} elf32_sym_t;
607+
608+
/* For .rel.plt section */
609+
typedef struct {
610+
int r_offset;
611+
int r_info;
612+
} elf32_rel_t;
613+
614+
/* For .dynamic section */
615+
typedef struct {
616+
int d_tag;
617+
int d_un;
618+
} elf32_dyn_t;

0 commit comments

Comments
 (0)