Skip to content

Commit aebacb7

Browse files
vianplFrederic Weisbecker
authored andcommitted
timers: Fix get_next_timer_interrupt() with no timers pending
31cd0e1 ("timers: Recalculate next timer interrupt only when necessary") subtly altered get_next_timer_interrupt()'s behaviour. The function no longer consistently returns KTIME_MAX with no timers pending. In order to decide if there are any timers pending we check whether the next expiry will happen NEXT_TIMER_MAX_DELTA jiffies from now. Unfortunately, the next expiry time and the timer base clock are no longer updated in unison. The former changes upon certain timer operations (enqueue, expire, detach), whereas the latter keeps track of jiffies as they move forward. Ultimately breaking the logic above. A simplified example: - Upon entering get_next_timer_interrupt() with: jiffies = 1 base->clk = 0; base->next_expiry = NEXT_TIMER_MAX_DELTA; 'base->next_expiry == base->clk + NEXT_TIMER_MAX_DELTA', the function returns KTIME_MAX. - 'base->clk' is updated to the jiffies value. - The next time we enter get_next_timer_interrupt(), taking into account no timer operations happened: base->clk = 1; base->next_expiry = NEXT_TIMER_MAX_DELTA; 'base->next_expiry != base->clk + NEXT_TIMER_MAX_DELTA', the function returns a valid expire time, which is incorrect. This ultimately might unnecessarily rearm sched's timer on nohz_full setups, and add latency to the system[1]. So, introduce 'base->timers_pending'[2], update it every time 'base->next_expiry' changes, and use it in get_next_timer_interrupt(). [1] See tick_nohz_stop_tick(). [2] A quick pahole check on x86_64 and arm64 shows it doesn't make 'struct timer_base' any bigger. Fixes: 31cd0e1 ("timers: Recalculate next timer interrupt only when necessary") Signed-off-by: Nicolas Saenz Julienne <[email protected]> Signed-off-by: Frederic Weisbecker <[email protected]>
1 parent 1a3402d commit aebacb7

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

kernel/time/timer.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ struct timer_base {
207207
unsigned int cpu;
208208
bool next_expiry_recalc;
209209
bool is_idle;
210+
bool timers_pending;
210211
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
211212
struct hlist_head vectors[WHEEL_SIZE];
212213
} ____cacheline_aligned;
@@ -595,6 +596,7 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
595596
* can reevaluate the wheel:
596597
*/
597598
base->next_expiry = bucket_expiry;
599+
base->timers_pending = true;
598600
base->next_expiry_recalc = false;
599601
trigger_dyntick_cpu(base, timer);
600602
}
@@ -1582,6 +1584,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
15821584
}
15831585

15841586
base->next_expiry_recalc = false;
1587+
base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
15851588

15861589
return next;
15871590
}
@@ -1633,7 +1636,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
16331636
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
16341637
u64 expires = KTIME_MAX;
16351638
unsigned long nextevt;
1636-
bool is_max_delta;
16371639

16381640
/*
16391641
* Pretend that there is no timer pending if the cpu is offline.
@@ -1646,7 +1648,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
16461648
if (base->next_expiry_recalc)
16471649
base->next_expiry = __next_timer_interrupt(base);
16481650
nextevt = base->next_expiry;
1649-
is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
16501651

16511652
/*
16521653
* We have a fresh next event. Check whether we can forward the
@@ -1664,7 +1665,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
16641665
expires = basem;
16651666
base->is_idle = false;
16661667
} else {
1667-
if (!is_max_delta)
1668+
if (base->timers_pending)
16681669
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
16691670
/*
16701671
* If we expect to sleep more than a tick, mark the base idle.
@@ -1947,6 +1948,7 @@ int timers_prepare_cpu(unsigned int cpu)
19471948
base = per_cpu_ptr(&timer_bases[b], cpu);
19481949
base->clk = jiffies;
19491950
base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
1951+
base->timers_pending = false;
19501952
base->is_idle = false;
19511953
}
19521954
return 0;

0 commit comments

Comments
 (0)