From cd565d05859442700009efba1a0f2df15e51b7a9 Mon Sep 17 00:00:00 2001 From: Mes Date: Sun, 16 Feb 2025 16:28:54 +0800 Subject: [PATCH] Manage ticks to suppress RCU CPU stall warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the emulator currently operates using sequential emulation, the execution time for the boot process is relatively long, which can result in the generation of RCU CPU stall warnings. To address this issue, there are several potential solutions: 1. Scale the frequency to slow down emulator time during the boot process, thereby eliminating RCU CPU stall warnings. 2. During the boot process, avoid using 'clock_gettime' to update ticks and instead manage the tick increment relationship manually. 3. Implement multi-threaded emulation to accelerate the emulator's execution speed. For the third point, while implementing multi-threaded emulation can significantly accelerate the emulator's execution speed, it cannot guarantee that this issue will not reappear as the number of cores increases in the future. Therefore, a better approach is to use methods 1 and 2 to allow the emulator to set an expected time for completing the boot process. The scale method requires three pieces of information to be implemented: - the cost of 'semu_timer_clocksource' - percentage of 'semu_timer_clocksource' within the boot process - the number of times 'semu_timer_clocksource' is called. In contrast, the increment method only requires the third information to be implemented, making its implementation simpler. Furthermore, through statistical analysis, we found that the other two values (cost and percentage) exhibit different distributions across different environments. Therefore, using the scale method would require an additional profiling step when the emulator starts, adding complexity to the implementation. Finally, since the increment method does not obtain real-time timestamps during the boot process, its overhead is lower compared to the scale method. Because 'semu_timer_clocksource' is called frequently, this reduction in overhead accumulates and results in a noticeable performance improvement. Therefore, this commit opts for the increment method to address this issue. This commit divides time into emulator time and real time. During the boot process, the timer manage the ticks increment to slow down the growth of emulator time, eliminating RCU CPU stall warnings. After the boot process is complete, the growth of emulator time aligns with real time. According to Using RCU’s CPU Stall Detector [1], the grace period for RCU CPU stalls is typically set to 21 seconds. By dividing this value by two as the expected completion time, we can provide a sufficient buffer to reduce the impact of errors and avoid RCU CPU stall warnings. By statistic, the number of times 'semu_timer_clocksource' called is approximately 'SMP count * 2.15 * 1e8'. By the time the boot process is completed, the emulator will have a total of 'boot seconds * frequency' ticks. Therefore, each time, '(boot seconds * frequency) / (2.15 * 1e8 * SMP count)' ticks need to be added. Close #51 [1] https://docs.kernel.org/RCU/stallwarn.html#config-rcu-cpu-stall-timeout --- Makefile | 10 ++++++ main.c | 2 +- riscv.c | 8 +++++ utils.c | 99 +++++++++++++++++++++++++++++++++++++++++++++----------- utils.h | 12 ++++++- 5 files changed, 111 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index 0c6ee75..1897d46 100644 --- a/Makefile +++ b/Makefile @@ -132,6 +132,16 @@ DTC ?= dtc E := S := $E $E +# During boot process, he emulator manually manages the growth of ticks to +# suppress RCU CPU stall warnings. Thus, we need an target time to set the +# increment of ticks. According to Using RCU’s CPU Stall Detector[1], the +# grace period for RCU CPU stalls is typically set to 21 seconds. +# By dividing this value by two as the expected completion time, we can +# provide a sufficient buffer to reduce the impact of errors and avoid +# RCU CPU stall warnings. +# [1] docs.kernel.org/RCU/stallwarn.html#config-rcu-cpu-stall-timeout +CFLAGS += -D SEMU_BOOT_TARGET_TIME=10 + SMP ?= 1 .PHONY: riscv-harts.dtsi riscv-harts.dtsi: diff --git a/main.c b/main.c index 024ce9e..ed11996 100644 --- a/main.c +++ b/main.c @@ -678,7 +678,7 @@ static int semu_init(emu_state_t *emu, int argc, char **argv) virtio_rng_init(); #endif /* Set up ACLINT */ - semu_timer_init(&emu->mtimer.mtime, CLOCK_FREQ); + semu_timer_init(&emu->mtimer.mtime, CLOCK_FREQ, hart_count); emu->mtimer.mtimecmp = calloc(vm->n_hart, sizeof(uint64_t)); emu->mswi.msip = calloc(vm->n_hart, sizeof(uint32_t)); emu->sswi.ssip = calloc(vm->n_hart, sizeof(uint32_t)); diff --git a/riscv.c b/riscv.c index c3fd394..bd92f1f 100644 --- a/riscv.c +++ b/riscv.c @@ -382,6 +382,14 @@ static void op_sret(hart_t *vm) vm->s_mode = vm->sstatus_spp; vm->sstatus_sie = vm->sstatus_spie; + /* After the booting process is complete, initrd will be loaded. At this + * point, the sytstem will switch to U mode for the first time. Therefore, + * by checking whether the switch to U mode has already occurred, we can + * determine if the boot process has been completed. + */ + if (!boot_complete && !vm->s_mode) + boot_complete = true; + /* Reset stack */ vm->sstatus_spp = false; vm->sstatus_spie = true; diff --git a/utils.c b/utils.c index 29f9575..1094ccb 100644 --- a/utils.c +++ b/utils.c @@ -1,3 +1,4 @@ +#include #include #include "utils.h" @@ -19,6 +20,10 @@ #endif #endif +bool boot_complete = false; +static double ticks_increment; +static double boot_ticks; + /* Calculate "x * n / d" without unnecessary overflow or loss of precision. * * Reference: @@ -32,35 +37,93 @@ static inline uint64_t mult_frac(uint64_t x, uint64_t n, uint64_t d) return q * n + r * n / d; } -void semu_timer_init(semu_timer_t *timer, uint64_t freq) -{ - timer->freq = freq; - semu_timer_rebase(timer, 0); -} - -static uint64_t semu_timer_clocksource(uint64_t freq) +/* High-precision time measurement: + * - POSIX systems: clock_gettime() for nanosecond precision + * - macOS: mach_absolute_time() with timebase conversion + * - Other platforms: time(0) with conversion to nanoseconds as fallback + * + * The platform-specific timing logic is now clearly separated: POSIX and macOS + * implementations provide high-precision measurements, while the fallback path + * uses time(0) for a coarser but portable approach. + */ +static inline uint64_t host_time_ns() { #if defined(HAVE_POSIX_TIMER) - struct timespec t; - clock_gettime(CLOCKID, &t); - return t.tv_sec * freq + mult_frac(t.tv_nsec, freq, 1e9); + struct timespec ts; + clock_gettime(CLOCKID, &ts); + return (uint64_t) ts.tv_sec * 1e9 + (uint64_t) ts.tv_nsec; + #elif defined(HAVE_MACH_TIMER) - static mach_timebase_info_data_t t; - if (t.denom == 0) - (void) mach_timebase_info(&t); - return mult_frac(mult_frac(mach_absolute_time(), t.numer, t.denom), freq, - 1e9); + static mach_timebase_info_data_t ts = {0}; + if (ts.denom == 0) + (void) mach_timebase_info(&ts); + + uint64_t now = mach_absolute_time(); + /* convert to nanoseconds: (now * t.numer / t.denom) */ + return mult_frac(now, ts.numer, (uint64_t) ts.denom); + #else - return time(0) * freq; + /* Fallback to non-HRT calls time(0) in seconds => convert to ns. */ + time_t now_sec = time(0); + return (uint64_t) now_sec * 1e9; #endif } +/* The function that returns the "emulator time" in ticks. + * + * Before the boot process is completed, the emulator manually manages the + * growth of ticks to suppress RCU CPU stall warnings. After the boot process is + * completed, the emulator switches back to the real-time timer, using an offset + * bridging to ensure that the ticks of both timers remain consistent. + */ +static uint64_t semu_timer_clocksource(semu_timer_t *timer) +{ + /* After boot process complete, the timer will switch to real time. Thus, + * there is an offset between the real time and the emulator time. + * + * After switching to real time, the correct way to update time is to + * calculate the increment of time. Then add it to the emulator time. + */ + static int64_t offset = 0; + static bool first_switch = true; + + if (!boot_complete) { + boot_ticks += ticks_increment; + return (uint64_t) boot_ticks; + } + + uint64_t real_ticks = mult_frac(host_time_ns(), timer->freq, 1e9); + if (first_switch) { + first_switch = false; + + /* Calculate the offset between the real time and the emulator time */ + offset = (int64_t) (real_ticks - boot_ticks); + } + return (uint64_t) ((int64_t) real_ticks - offset); +} + +void semu_timer_init(semu_timer_t *timer, uint64_t freq, int n_harts) +{ + timer->freq = freq; + timer->begin = mult_frac(host_time_ns(), timer->freq, 1e9); + boot_ticks = timer->begin; /* Initialize the fake ticks for boot process */ + + /* According to statistics, the number of times 'semu_timer_clocksource' + * called is approximately 'SMP count * 2.15 * 1e8'. By the time the boot + * process is completed, the emulator will have a total of 'boot seconds * + * frequency' ticks. Therefore, each time, '(boot seconds * frequency) / + * (2.15 * 1e8 * SMP count)' ticks need to be added. + */ + ticks_increment = + (SEMU_BOOT_TARGET_TIME * CLOCK_FREQ) / (2.15 * 1e8 * n_harts); +} + uint64_t semu_timer_get(semu_timer_t *timer) { - return semu_timer_clocksource(timer->freq) - timer->begin; + return semu_timer_clocksource(timer) - timer->begin; } void semu_timer_rebase(semu_timer_t *timer, uint64_t time) { - timer->begin = semu_timer_clocksource(timer->freq) - time; + timer->begin = semu_timer_clocksource(timer) - time; } diff --git a/utils.h b/utils.h index 5774bc4..159002b 100644 --- a/utils.h +++ b/utils.h @@ -3,13 +3,23 @@ #include #include +/* To suppress RCU CPU stall warnings, the emulator provides a fake timer to + * the Guest OS during the boot process. After the boot process is complete, the + * emulator will switch to real-time timer. + * + * Since the Guest OS transitions to U mode for the first time when it loads the + * initial user-mode process, we use this transition to determine whether the + * boot process has completed. + */ +extern bool boot_complete; + /* TIMER */ typedef struct { uint64_t begin; uint64_t freq; } semu_timer_t; -void semu_timer_init(semu_timer_t *timer, uint64_t freq); +void semu_timer_init(semu_timer_t *timer, uint64_t freq, int n_harts); uint64_t semu_timer_get(semu_timer_t *timer); void semu_timer_rebase(semu_timer_t *timer, uint64_t time);