-
Notifications
You must be signed in to change notification settings - Fork 961
Feature: Complete RISC-V baremetal support with automated qemu emulation #3254
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
1c3e5cf
3fd7a8d
81f2327
816dbd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| ### **RISC-V 32-Bit Bare-Metal Support** | ||
|
|
||
| This target provides complete bare-metal support for the 32-bit RISC-V toolchain, including a UART driver and an automated QEMU emulation setup using `qemu-system-riscv32`. | ||
| It enables formatted output and testing on microcontroller-class RISC-V devices without relying on an operating system. | ||
|
|
||
| Note: Compatible with riscv32 baremetal toolchain configured with -march=rv32imac and -mabi=ilp32 | ||
|
|
||
| **Building an Example:** | ||
|
|
||
| To build any example for the 'riscv32_baremetal' target, run: | ||
|
|
||
| ``` | ||
| make -f tensorflow/lite/micro/tools/make/Makefile TARGET=riscv32_baremetal <example_name> | ||
| ``` | ||
|
|
||
| This command performs cross-compilation for RISC-V and then automatically runs the binary under QEMU full-system emulation. | ||
|
|
||
| **Example:** | ||
|
|
||
| ``` | ||
| make -f tensorflow/lite/micro/tools/make/Makefile TARGET=riscv32_baremetal person_detection_test | ||
| ``` | ||
|
|
||
| **Expected Output:** | ||
|
|
||
| ``` | ||
| Testing TestInvoke | ||
| person data. person score: 113, no person score: -113 | ||
| no person data. person score: -57, no person score: 57 | ||
| Ran successfully | ||
| 1/1 tests passed | ||
| ~~~ALL TESTS PASSED~~~ | ||
| ``` | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| /* Copyright 2023 The TensorFlow Authors. All Rights Reserved. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2025? |
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| ==============================================================================*/ | ||
|
|
||
| #include "tensorflow/lite/micro/debug_log.h" | ||
| #include <stdarg.h> | ||
| #include <stdio.h> | ||
| #include "tensorflow/lite/micro/riscv32_baremetal/lightweight_snprintf.cc" | ||
|
|
||
| // UART definitions- Datasheet: https://pdos.csail.mit.edu/6.S081/2024/lec/16550.pdf | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about using Semihosting instead of having UART support here? |
||
| #define UART0_BASE 0x10000000 | ||
| #define REG(base, offset) ((*((volatile unsigned char *)(base + offset)))) | ||
| #define UART0_DR REG(UART0_BASE, 0x00) // Data register (read/write | ||
| #define UART0_FCR REG(UART0_BASE, 0x02) // FIFO Control register | ||
| #define UART0_LSR REG(UART0_BASE, 0x05) // Line Status register | ||
|
|
||
| #define UARTFCR_FFENA 0x01 // FIFO Enable bit | ||
| #define UARTLSR_THRE 0x20 // Transmitter Holding Register Empty bit | ||
| #define UART0_FF_THR_EMPTY (UART0_LSR & UARTLSR_THRE) // Macro: check if the transmitter is ready to accept a new character | ||
|
|
||
| // uart_putc() | ||
| // Waits until the UART transmitter is ready, then writes one character. | ||
|
|
||
| static void uart_putc(char c) { | ||
| while (!UART0_FF_THR_EMPTY); | ||
| UART0_DR = c; | ||
| } | ||
|
|
||
| // uart_puts() | ||
| // Sends a null-terminated string over UART. | ||
|
|
||
| static void uart_puts(const char *str) { | ||
| while (*str) { | ||
| uart_putc(*str++); | ||
| } | ||
| } | ||
|
|
||
| // DebugLog() | ||
| // TFLM's platform hook for debug logging. | ||
|
|
||
| extern "C" void DebugLog(const char* format, va_list args) { | ||
| #ifndef TF_LITE_STRIP_ERROR_STRINGS | ||
| char buffer[256]; // Temporary buffer for formatted string | ||
| int len = mini_vsnprintf(buffer, sizeof(buffer), format, args); | ||
| if (len > 0) { | ||
| UART0_FCR = UARTFCR_FFENA; | ||
| uart_puts(buffer); | ||
| } | ||
| #endif | ||
| } | ||
|
|
||
| // DebugVsnprintf() | ||
| // A utility for TFLM internal formatting, using the same lightweight function. | ||
|
|
||
| #ifndef TF_LITE_STRIP_ERROR_STRINGS | ||
| extern "C" int DebugVsnprintf(char* buffer, size_t buf_size, | ||
| const char* format, va_list vlist) { | ||
| return mini_vsnprintf(buffer, buf_size, format, vlist); | ||
| } | ||
| #endif | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif | ||
|
|
||
| #include <stdarg.h> | ||
| #include <stddef.h> | ||
| #include <stdint.h> | ||
|
|
||
| // mini_vsnprintf() | ||
| // A minimal implementation of vsnprintf() supporting %s, %d, and %p. | ||
| // Writes formatted output into 'buf' up to 'size' bytes. | ||
|
|
||
| int mini_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) { | ||
| char *out = buf; | ||
| size_t remaining = size; | ||
|
|
||
| #define PUTCHAR(c) \ | ||
| do { if (remaining > 1) { *out++ = (c); remaining--; } } while (0) | ||
|
|
||
| while (*fmt) { | ||
| if (*fmt == '%' && *(fmt+1) == 's') { | ||
| fmt += 2; | ||
| const char *s = va_arg(args, const char *); | ||
| if (!s) s = "(null)"; | ||
| while (*s) PUTCHAR(*s++); | ||
| } else if (*fmt == '%' && *(fmt+1) == 'd') { | ||
| fmt += 2; | ||
| int v = va_arg(args, int); | ||
| if (v < 0) { PUTCHAR('-'); v = -v; } | ||
| char tmp[16]; int i=0; | ||
| do { tmp[i++] = '0' + (v % 10); v /= 10; } while (v && i < 16); | ||
| while (i--) PUTCHAR(tmp[i]); | ||
| } else if (*fmt == '%' && *(fmt+1) == 'p') { | ||
| fmt += 2; | ||
| void *ptr = va_arg(args, void*); | ||
| uintptr_t val = (uintptr_t)ptr; | ||
| char tmp[2*sizeof(void*)+1]; int i=0; | ||
| if (!val) { | ||
| const char *null_str = "(nil)"; | ||
| while (*null_str) PUTCHAR(*null_str++); | ||
| } else { | ||
| do { | ||
| tmp[i++] = "0123456789abcdef"[val % 16]; | ||
| val /= 16; | ||
| } while(val && i < (int)sizeof(tmp)); | ||
| PUTCHAR('0'); PUTCHAR('x'); | ||
| while(i--) PUTCHAR(tmp[i]); | ||
| } | ||
| } else { | ||
| PUTCHAR(*fmt++); | ||
| } | ||
| } | ||
|
|
||
| if (remaining > 0) *out = '\0'; | ||
| return (int)(size - remaining); | ||
| } | ||
|
|
||
| // mini_snprintf() | ||
| // Simple wrapper around mini_vsnprintf() that takes a variable number of args. | ||
| int mini_snprintf(char *buf, size_t size, const char *fmt, ...) { | ||
| va_list args; | ||
| va_start(args, fmt); | ||
| int ret = mini_vsnprintf(buf, size, fmt, args); | ||
| va_end(args); | ||
| return ret; | ||
| } | ||
|
|
||
| #ifdef __cplusplus | ||
| } | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| ENTRY(_start) | ||
| SECTIONS | ||
| { | ||
| # Typical start for RISC-V bare-metal (QEMU virt)'s memory layout | ||
| . = 0x80000000; | ||
|
|
||
| # Code section: contains all executable instructions | ||
| text : { | ||
| *(.text._start); # Put _start first | ||
| *(.text*); | ||
| } | ||
|
|
||
| # Enforces a page boundary between code and data, letting the linker separate permissions ( | ||
| # RX for code, RW for data), removing the RWX warning. | ||
| . = ALIGN(0x1000); | ||
|
|
||
| # Data section: contains initialized global and static variables | ||
| .data : { | ||
| *(.data*) | ||
| *(.rodata*) | ||
| } | ||
|
|
||
| # BSS section: contains zero-initialized or uninitialized variables | ||
| .bss : { | ||
| *(.bss*) | ||
| *(COMMON) | ||
| } | ||
|
|
||
| # Stack section: reserves memory for the runtime stack | ||
| # Without proper initialization of stack top, sometimes QEMU halts and sometimes produces no output | ||
| # (blinking cursor) | ||
| .stack : { | ||
| . = . + 0x10000; | ||
| __stack_top = .; | ||
| } | ||
|
|
||
| # Avoids undefined reference to _end in _sbrk function | ||
| _end = .; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| # Places code in .text._start so linker puts it at reset address. | ||
| .section .text._start | ||
| .global _start | ||
| _start: | ||
| la sp, __stack_top # Load stack pointer | ||
| add s0, sp, zero # Set frame pointer | ||
| jal zero, main # Jump to main, do not expect return | ||
| loop: | ||
| j loop | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| extern "C" { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might not be necessary if Semihosting is used? |
||
|
|
||
| #include <sys/types.h> | ||
| #include <sys/stat.h> | ||
| #include <errno.h> | ||
|
|
||
| // Need _trap as it is used inside void _exit | ||
| // trap handler | ||
| static void _trap(void) { | ||
| while (1) { __asm__("wfi"); } | ||
| } | ||
|
|
||
| //_sbrk() grows the heap by moving its end pointer and gives the old address to store data there. | ||
| // heap grows upwards and stack growd downwards | ||
| void* _sbrk(ptrdiff_t incr) { | ||
| extern char _end; // Symbol from linker script = end of program (.text + .data + .bss) | ||
| static char* heap_end; // Pointer that tracks the current end of the heap | ||
| if (!heap_end) { | ||
| heap_end = &_end; // Initialize heap to start just after _end | ||
| } | ||
| char* prev_heap_end = heap_end; // Save current heap end to return later | ||
| heap_end += incr; // Move heap pointer forward by requested amount | ||
| return (void*)prev_heap_end; // Return old end as start of allocated block | ||
| } | ||
|
|
||
| // Minimal syscall stubs for bare-metal (no OS). | ||
| // These dummy functions satisfy newlib/stdio dependencies so code using | ||
| // printf, malloc, etc. can link. Some stubs redirect output to UART. | ||
| // while others just return defaults or errors since there is no filesystem or OS. | ||
| int _close(int file) { return -1; } | ||
| int _fstat(int file, struct stat* st) { st->st_mode = S_IFCHR; return 0; } | ||
| int _isatty(int file) { return 1; } | ||
| int _lseek(int file, int ptr, int dir) { return 0; } | ||
| int _read(int file, char* ptr, int len) { errno = EINVAL; return -1; } | ||
| int _write(int file, const char* ptr, int len) { return len; } | ||
| void _exit(int status) { _trap(); } | ||
| int _kill(int pid, int sig) { errno = EINVAL; return -1; } | ||
| int _getpid(void) { return 1; } | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -881,6 +881,36 @@ $(BINDIR)%_test : $(CORE_OBJDIR)%_test.o $(MICROLITE_LIB_PATH) | |
| -o $@ $< \ | ||
| $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS) | ||
|
|
||
| # Build rules and object setup for the RISC-V 32-bit bare-metal target, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The primary Makefile should only contain common definitions. All platform-specific configurations must be moved into their respective target makefiles. |
||
| # including startup code and system call stubs (e.g., write, sbrk) needed for C/C++ runtime support. | ||
|
|
||
| ifeq ($(TARGET),riscv32_baremetal) | ||
|
|
||
| STARTUP_SRC := tensorflow/lite/micro/riscv32_baremetal/start.s | ||
| STUBS_SRC := tensorflow/lite/micro/riscv32_baremetal/stubs.cc | ||
|
|
||
| STARTUP_OBJ := $(CORE_OBJDIR)tensorflow/lite/micro/riscv32_baremetal/start.o | ||
| STUBS_OBJ := $(CORE_OBJDIR)tensorflow/lite/micro/riscv32_baremetal/stubs.o | ||
|
|
||
| .SECONDARY: $(STARTUP_OBJ) $(STUBS_OBJ) | ||
|
|
||
| $(STARTUP_OBJ): $(STARTUP_SRC) | ||
| @mkdir -p $(dir $@) | ||
| $(CC) $(ASFLAGS) -c $< -o $@ | ||
|
|
||
| $(STUBS_OBJ): $(STUBS_SRC) | ||
| @mkdir -p $(dir $@) | ||
| $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ | ||
|
|
||
| $(MICROLITE_LIB_PATH): $(STARTUP_OBJ) $(STUBS_OBJ) | ||
|
|
||
| OBJS += $(STARTUP_OBJ) $(STUBS_OBJ) | ||
| LDFLAGS += $(STARTUP_OBJ) | ||
| # Force stubs.o and enum_stubs.o to stay linked | ||
| LDFLAGS += -Wl,--whole-archive $(STUBS_OBJ) -Wl,--no-whole-archive | ||
|
|
||
| endif | ||
|
|
||
| $(BINDIR)%.test_target: $(BINDIR)%_test | ||
| @test -f $(TEST_SCRIPT) || (echo 'Unable to find the test script. Is the software emulation available in $(TARGET)?'; exit 1) | ||
| $(TEST_SCRIPT) $< $(TEST_PASS_STRING) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| # Toolchain Configuration | ||
| TARGET_ARCH := riscv32 | ||
| TARGET_TOOLCHAIN_PREFIX := riscv32-unknown-elf- | ||
|
|
||
| # Toolchain Path | ||
| TARGET_TOOLCHAIN_ROOT := /home/mips/riscv32/bin/ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is the test code? |
||
|
|
||
| # Minimum necessary flags to avoid pulling unnecessary C++ runtime | ||
| # Support and for using lightweight max and min | ||
| CXXFLAGS += -fno-use-cxa-atexit \ | ||
| -DTF_LITE_USE_GLOBAL_MIN \ | ||
| -DTF_LITE_USE_GLOBAL_MAX \ | ||
|
|
||
| # Minimum necessary flags for ensuring our custom startup file is included and not | ||
| # the default one and for linking our linker to incorporate it in the build | ||
| LDFLAGS = -nostartfiles \ | ||
| -T tensorflow/lite/micro/riscv32_baremetal/linker.ld | ||
|
|
||
| # Include c standard library, math library and compiler support library | ||
| MICROLITE_LIBS = -lc -lm -lgcc | ||
|
|
||
| TEST_SCRIPT := $(TENSORFLOW_ROOT)tensorflow/lite/micro/testing/test_with_qemu.sh | ||
|
|
||
| # List of available examples | ||
| EXAMPLES := person_detection hello_world micro_speech person_detection_test hello_world_test micro_speech_test dtln_test | ||
|
|
||
| # Run a given example in QEMU using test_with_qemu.sh | ||
| $(EXAMPLES): | ||
| @$(TEST_SCRIPT) system-riscv32 - gen/riscv32_baremetal_x86_64_default_gcc/bin/$@ non_test_binary riscv32_baremetal | ||
|
Comment on lines
+24
to
+29
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't seem like the best place for the examples instructions. Why do we need this? |
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The current setup appears to be highly specific to QEMU. If this is the case, perhaps we should rename
riscv32_baremetaltoriscv32_qemufor better clarity.