Skip to content

Commit 0b8b266

Browse files
benzeajmberg-intel
authored andcommitted
um: insert scheduler ticks when userspace does not yield
In time-travel mode userspace can do a lot of work without any time passing. Unfortunately, this can result in OOM situations as the RCU core code will never be run. Work around this by keeping track of userspace processes that do not yield for a lot of operations. When this happens, insert a jiffie into the sched_clock clock to account time against the process and cause the bookkeeping to run. As sched_clock is used for tracing, it is useful to keep it in sync between the different VMs. As such, try to remove added ticks again when the actual clock ticks. Signed-off-by: Benjamin Berg <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Johannes Berg <[email protected]>
1 parent 9b08818 commit 0b8b266

File tree

4 files changed

+66
-1
lines changed

4 files changed

+66
-1
lines changed

arch/um/Kconfig

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,21 @@ config UML_TIME_TRAVEL_SUPPORT
227227

228228
It is safe to say Y, but you probably don't need this.
229229

230+
config UML_MAX_USERSPACE_ITERATIONS
231+
int
232+
prompt "Maximum number of unscheduled userspace iterations"
233+
default 10000
234+
depends on UML_TIME_TRAVEL_SUPPORT
235+
help
236+
In UML inf-cpu and ext time-travel mode userspace can run without being
237+
interrupted. This will eventually overwhelm the kernel and create OOM
238+
situations (mainly RCU not running). This setting specifies the number
239+
of kernel/userspace switches (minor/major page fault, signal or syscall)
240+
for the same userspace thread before the sched_clock is advanced by a
241+
jiffie to trigger scheduling.
242+
243+
Setting it to zero disables the feature.
244+
230245
config KASAN_SHADOW_OFFSET
231246
hex
232247
depends on KASAN

arch/um/include/shared/common-offsets.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,8 @@ DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
2828
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
2929
DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT);
3030
#endif
31-
31+
#ifdef CONFIG_UML_MAX_USERSPACE_ITERATIONS
32+
DEFINE(UML_CONFIG_UML_MAX_USERSPACE_ITERATIONS, CONFIG_UML_MAX_USERSPACE_ITERATIONS);
33+
#else
34+
DEFINE(UML_CONFIG_UML_MAX_USERSPACE_ITERATIONS, 0);
35+
#endif

arch/um/kernel/time.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include <shared/init.h>
2626

2727
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
28+
#include <linux/sched/clock.h>
29+
2830
enum time_travel_mode time_travel_mode;
2931
EXPORT_SYMBOL_GPL(time_travel_mode);
3032

@@ -47,6 +49,15 @@ static u16 time_travel_shm_id;
4749
static struct um_timetravel_schedshm *time_travel_shm;
4850
static union um_timetravel_schedshm_client *time_travel_shm_client;
4951

52+
unsigned long tt_extra_sched_jiffies;
53+
54+
notrace unsigned long long sched_clock(void)
55+
{
56+
return (unsigned long long)(jiffies - INITIAL_JIFFIES +
57+
tt_extra_sched_jiffies)
58+
* (NSEC_PER_SEC / HZ);
59+
}
60+
5061
static void time_travel_set_time(unsigned long long ns)
5162
{
5263
if (unlikely(ns < time_travel_time))
@@ -443,6 +454,11 @@ static void time_travel_periodic_timer(struct time_travel_event *e)
443454
{
444455
time_travel_add_event(&time_travel_timer_event,
445456
time_travel_time + time_travel_timer_interval);
457+
458+
/* clock tick; decrease extra jiffies by keeping sched_clock constant */
459+
if (tt_extra_sched_jiffies > 0)
460+
tt_extra_sched_jiffies -= 1;
461+
446462
deliver_alarm();
447463
}
448464

@@ -594,6 +610,10 @@ EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
594610

595611
static void time_travel_oneshot_timer(struct time_travel_event *e)
596612
{
613+
/* clock tick; decrease extra jiffies by keeping sched_clock constant */
614+
if (tt_extra_sched_jiffies > 0)
615+
tt_extra_sched_jiffies -= 1;
616+
597617
deliver_alarm();
598618
}
599619

arch/um/os-Linux/skas/process.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,9 @@ int start_userspace(unsigned long stub_stack)
388388
return err;
389389
}
390390

391+
int unscheduled_userspace_iterations;
392+
extern unsigned long tt_extra_sched_jiffies;
393+
391394
void userspace(struct uml_pt_regs *regs)
392395
{
393396
int err, status, op, pid = userspace_pid[0];
@@ -397,6 +400,27 @@ void userspace(struct uml_pt_regs *regs)
397400
interrupt_end();
398401

399402
while (1) {
403+
/*
404+
* When we are in time-travel mode, userspace can theoretically
405+
* do a *lot* of work without being scheduled. The problem with
406+
* this is that it will prevent kernel bookkeeping (primarily
407+
* the RCU) from running and this can for example cause OOM
408+
* situations.
409+
*
410+
* This code accounts a jiffie against the scheduling clock
411+
* after the defined userspace iterations in the same thread.
412+
* By doing so the situation is effectively prevented.
413+
*/
414+
if (time_travel_mode == TT_MODE_INFCPU ||
415+
time_travel_mode == TT_MODE_EXTERNAL) {
416+
if (UML_CONFIG_UML_MAX_USERSPACE_ITERATIONS &&
417+
unscheduled_userspace_iterations++ >
418+
UML_CONFIG_UML_MAX_USERSPACE_ITERATIONS) {
419+
tt_extra_sched_jiffies += 1;
420+
unscheduled_userspace_iterations = 0;
421+
}
422+
}
423+
400424
time_travel_print_bc_msg();
401425

402426
current_mm_sync();
@@ -539,6 +563,8 @@ void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
539563

540564
void switch_threads(jmp_buf *me, jmp_buf *you)
541565
{
566+
unscheduled_userspace_iterations = 0;
567+
542568
if (UML_SETJMP(me) == 0)
543569
UML_LONGJMP(you, 1);
544570
}

0 commit comments

Comments
 (0)