Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ STAGE0 := shecc
STAGE1 := shecc-stage1.elf
STAGE2 := shecc-stage2.elf

BUILTIN_LIBC ?= c.c
STAGE0_FLAGS ?= --dump-ir
STAGE1_FLAGS ?=
ifeq ($(DYNLINK),1)
BUILTIN_LIBC := c.h
STAGE0_FLAGS += --dynlink
STAGE1_FLAGS += --dynlink
endif

OUT ?= out
ARCHS = arm riscv
ARCH ?= $(firstword $(ARCHS))
Expand Down Expand Up @@ -122,9 +131,9 @@ $(OUT)/norm-lf: tools/norm-lf.c
$(VECHO) " CC+LD\t$@\n"
$(Q)$(CC) $(CFLAGS) -o $@ $^

$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/c.c
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC)
$(VECHO) " GEN\t$@\n"
$(Q)$(OUT)/norm-lf $(LIBDIR)/c.c $(OUT)/c.normalized.c
$(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC) $(OUT)/c.normalized.c
$(Q)$(OUT)/inliner $(OUT)/c.normalized.c $@
$(Q)$(RM) $(OUT)/c.normalized.c

Expand All @@ -143,12 +152,12 @@ $(OUT)/$(STAGE0)-sanitizer: $(OUT)/libc.inc $(OBJS)
$(OUT)/$(STAGE1): $(OUT)/$(STAGE0)
$(Q)$(STAGE1_CHECK_CMD)
$(VECHO) " SHECC\t$@\n"
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
$(Q)chmod a+x $@

$(OUT)/$(STAGE2): $(OUT)/$(STAGE1)
$(VECHO) " SHECC\t$@\n"
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) -o $@ $(SRCDIR)/main.c
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) $(STAGE1_FLAGS) -o $@ $(SRCDIR)/main.c

bootstrap: $(OUT)/$(STAGE2)
$(Q)chmod 775 $(OUT)/$(STAGE2)
Expand Down
47 changes: 47 additions & 0 deletions lib/c.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* shecc - Self-Hosting and Educational C Compiler.
*
* shecc is freely redistributable under the BSD 2 clause license. See the
* file "LICENSE" for information on usage and redistribution of this file.
*/

#pragma once
/* Declaractions of C Standard library functions */

#define NULL 0

#define bool _Bool
#define true 1
#define false 0

/* File I/O */
typedef int FILE;
FILE *fopen(char *filename, char *mode);
int fclose(FILE *stream);
int fgetc(FILE *stream);
char *fgets(char *str, int n, FILE *stream);
int fputc(int c, FILE *stream);

/* string-related functions */
int strlen(char *str);
int strcmp(char *s1, char *s2);
int strncmp(char *s1, char *s2, int len);
char *strcpy(char *dest, char *src);
char *strncpy(char *dest, char *src, int len);
char *memcpy(char *dest, char *src, int count);
int memcmp(void *s1, void *s2, int n);
void *memset(void *s, int c, int n);

/* formatted output string */
int printf(char *str, ...);
int sprintf(char *buffer, char *str, ...);
int snprintf(char *buffer, int n, char *str, ...);

/* Terminating program */
void exit(int exit_code);
void abort(void);

/* Dynamic memory allocation/deallocation functions */
void *malloc(int size);
void *calloc(int n, int size);
void free(void *ptr);
6 changes: 6 additions & 0 deletions mk/arm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@ ARCH_DEFS = \
\#define ARCH_PREDEFINED \"__arm__\" /* defined by GNU C and RealView */\n$\
\#define ELF_MACHINE 0x28 /* up to ARMv7/Aarch32 */\n$\
\#define ELF_FLAGS 0x5000200\n$\
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
\#define LIBC_SO \"libc.so.6\"\n$\
\#define PLT_FIXUP_SIZE 20\n$\
\#define PLT_ENT_SIZE 12\n$\
\#define R_ARCH_JUMP_SLOT 0x16\n$\
"
RUNNER_LD_PREFIX=-L /usr/arm-linux-gnueabi/
3 changes: 3 additions & 0 deletions mk/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ ifneq ($(HOST_ARCH),$(ARCH_NAME))

# Generate the path to the architecture-specific qemu
TARGET_EXEC = $(shell which $(ARCH_RUNNER))
ifeq ($(DYNLINK),1)
TARGET_EXEC += $(RUNNER_LD_PREFIX)
endif
endif
export TARGET_EXEC

Expand Down
8 changes: 8 additions & 0 deletions mk/riscv.mk
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,12 @@ ARCH_DEFS = \
\#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\
\#define ELF_MACHINE 0xf3\n$\
\#define ELF_FLAGS 0\n$\
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
\#define LIBC_SO \"libc.so.6\"\n$\
\#define PLT_FIXUP_SIZE 20\n$\
\#define PLT_ENT_SIZE 12\n$\
\#define R_ARCH_JUMP_SLOT 0x5\n$\
"

# TODO: Set this variable for RISC-V architecture
RUNNER_LD_PREFIX=
148 changes: 118 additions & 30 deletions src/arm-codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,18 @@ void update_elf_offset(ph2_ir_t *ph2_ir)

void cfg_flatten(void)
{
func_t *func = find_func("__syscall");
func->bbs->elf_offset = 48; /* offset of start + branch + exit in codegen */
func_t *func;

if (dynlink)
elf_offset =
88; /* offset of dynamic linking setup + global init call */
else {
func = find_func("__syscall");
func->bbs->elf_offset = 48; /* offset of start + exit in codegen */
elf_offset =
84; /* offset of start + branch + exit + syscall in codegen */
}

elf_offset = 84; /* offset of start + branch + exit + syscall in codegen */
GLOBAL_FUNC->bbs->elf_offset = elf_offset;

for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
Expand All @@ -147,9 +155,15 @@ void cfg_flatten(void)
}

/* prepare 'argc' and 'argv', then proceed to 'main' function */
elf_offset += 32; /* 6 insns for main call + 2 for exit */
if (dynlink)
elf_offset += 20; /* 5 insns: restore r0/r1 from r9/r10, bl to main */
else
elf_offset += 32; /* 6 insns for main call + 2 for exit */

for (func = FUNC_LIST.head; func; func = func->next) {
if (!func->bbs)
continue;

/* reserve stack */
ph2_ir_t *flatten_ir = add_ph2_ir(OP_define);
flatten_ir->src0 = func->stack_size;
Expand Down Expand Up @@ -282,15 +296,23 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
return;
case OP_call:
func = find_func(ph2_ir->func_name);
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size));
if (func->bbs)
ofs = func->bbs->elf_offset - elf_code->size;
else
ofs = (elf_plt_start + func->plt_offset) -
(elf_code_start + elf_code->size);
emit(__bl(__AL, ofs));
return;
case OP_load_data_address:
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
emit(__movt(__AL, rd, ph2_ir->src0 + elf_data_start));
return;
case OP_address_of_func:
func = find_func(ph2_ir->func_name);
ofs = elf_code_start + func->bbs->elf_offset;
if (func->bbs)
ofs = elf_code_start + func->bbs->elf_offset;
else
ofs = elf_plt_start + func->plt_offset;
emit(__movw(__AL, __r8, ofs));
emit(__movt(__AL, __r8, ofs));
emit(__sw(__AL, __r8, rn, 0));
Expand Down Expand Up @@ -447,49 +469,98 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
}
}

void plt_generate(void);
void code_generate(void)
{
elf_data_start = elf_code_start + elf_offset;
if (dynlink) {
plt_generate();
/* Call __libc_start_main() */
emit(__mov_i(__AL, __r11, 0));
emit(__mov_i(__AL, __lr, 0));
emit(__pop_word(__AL, __r1));
emit(__mov_r(__AL, __r2, __sp));
emit(__push_reg(__AL, __r2));
emit(__push_reg(__AL, __r0));
emit(__mov_i(__AL, __r12, 0));
emit(__push_reg(__AL, __r12));
/* Pass the address of our main wrapper function;
* After these two mov movw/movt, we have:
* - mov r3, #0
* - bl to __libc_start_main@plt
* - mov r0, #127
* - bl +28
* - (main wrapper starts here)
*
* Total offset = current + 8 + 16 = current + 24
*
* That is, the current code size + 24 is the starting address
* of main wrapper.
* */
int main_wrapper_offset = elf_code->size + 24;
emit(__movw(__AL, __r0, elf_code_start + main_wrapper_offset));
emit(__movt(__AL, __r0, elf_code_start + main_wrapper_offset));
emit(__mov_i(__AL, __r3, 0));
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) -
(elf_code_start + elf_code->size)));
/* Goto the 'exit' code snippet if __libc_start_main returns */
emit(__mov_i(__AL, __r0, 127));
emit(__bl(__AL, 28));

/* start */
/* If the compiled program is dynamic linking, it needs to
* preserve the 'argc' and 'argv' for the 'main' function.
* */
emit(__mov_r(__AL, __r9, __r0));
emit(__mov_r(__AL, __r10, __r1));
}
/* For both static and dynamic linking, we need to set up the stack
* and call the main function.
* */
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__sub_r(__AL, __sp, __sp, __r8));
emit(__mov_r(__AL, __r12, __sp));
/* Calculate the branch offset to the global initialization code */
emit(__bl(__AL, GLOBAL_FUNC->bbs->elf_offset - elf_code->size));
/* After global init, jump to main preparation */
emit(__b(__AL, 56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */

/* exit */
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__add_r(__AL, __sp, __sp, __r8));
emit(__mov_r(__AL, __r0, __r0));
emit(__mov_i(__AL, __r7, 1));
emit(__svc());
if (!dynlink) {
/* exit - only for statck linking */
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__add_r(__AL, __sp, __sp, __r8));
emit(__mov_r(__AL, __r0, __r0));
emit(__mov_i(__AL, __r7, 1));
emit(__svc());

/* syscall */
emit(__mov_r(__AL, __r7, __r0));
emit(__mov_r(__AL, __r0, __r1));
emit(__mov_r(__AL, __r1, __r2));
emit(__mov_r(__AL, __r2, __r3));
emit(__mov_r(__AL, __r3, __r4));
emit(__mov_r(__AL, __r4, __r5));
emit(__mov_r(__AL, __r5, __r6));
emit(__svc());
emit(__mov_r(__AL, __pc, __lr));
/* syscall */
emit(__mov_r(__AL, __r7, __r0));
emit(__mov_r(__AL, __r0, __r1));
emit(__mov_r(__AL, __r1, __r2));
emit(__mov_r(__AL, __r2, __r3));
emit(__mov_r(__AL, __r3, __r4));
emit(__mov_r(__AL, __r4, __r5));
emit(__mov_r(__AL, __r5, __r6));
emit(__svc());
emit(__mov_r(__AL, __pc, __lr));
}

ph2_ir_t *ph2_ir;
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
ph2_ir = ph2_ir->next)
emit_ph2_ir(ph2_ir);

/* prepare 'argc' and 'argv', then proceed to 'main' function */
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__add_r(__AL, __r8, __r12, __r8));
emit(__lw(__AL, __r0, __r8, 0));
emit(__add_i(__AL, __r1, __r8, 4));
if (dynlink) {
emit(__mov_r(__AL, __r0, __r9));
emit(__mov_r(__AL, __r1, __r10));
} else {
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
emit(__add_r(__AL, __r8, __r12, __r8));
emit(__lw(__AL, __r0, __r8, 0));
emit(__add_i(__AL, __r1, __r8, 4));
}
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));

/* exit with main's return value - r0 already has the return value */
Expand All @@ -501,3 +572,20 @@ void code_generate(void)
emit_ph2_ir(ph2_ir);
}
}

void plt_generate(void)
{
int addr_of_got = elf_got_start + PTR_SIZE * 2;
int end = plt_sz - PLT_FIXUP_SIZE;
elf_write_int(elf_plt, __push_reg(__AL, __lr));
elf_write_int(elf_plt, __movw(__AL, __r10, addr_of_got));
elf_write_int(elf_plt, __movt(__AL, __r10, addr_of_got));
elf_write_int(elf_plt, __mov_r(__AL, __lr, __r10));
elf_write_int(elf_plt, __lw(__AL, __pc, __lr, 0));
for (int i = 0; i * PLT_ENT_SIZE < end; i++) {
addr_of_got = elf_got_start + PTR_SIZE * (i + 3);
elf_write_int(elf_plt, __movw(__AL, __r12, addr_of_got));
elf_write_int(elf_plt, __movt(__AL, __r12, addr_of_got));
elf_write_int(elf_plt, __lw(__AL, __pc, __r12, 0));
}
}
10 changes: 10 additions & 0 deletions src/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,16 @@ int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list)
return arm_encode(cond, arm_ldm + (0x2 << 6) + (w << 1), rn, 0, reg_list);
}

int __push_reg(arm_cond_t cond, arm_reg rt)
{
return arm_encode(cond, (0x5 << 4) | 0x2, 0xd, rt, 0x4);
}

int __pop_word(arm_cond_t cond, arm_reg rt)
{
return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4);
}

int __b(arm_cond_t cond, int ofs)
{
int o = (ofs - 8) >> 2;
Expand Down
Loading