Skip to content

Commit 757b000

Browse files
johnstultz-workKAGA-KOKO
authored andcommitted
timekeeping: Fix possible inconsistencies in _COARSE clockids
Lei Chen raised an issue with CLOCK_MONOTONIC_COARSE seeing time inconsistencies. Lei tracked down that this was being caused by the adjustment tk->tkr_mono.xtime_nsec -= offset; which is made to compensate for the unaccumulated cycles in offset when the multiplicator is adjusted forward, so that the non-_COARSE clockids don't see inconsistencies. However, the _COARSE clockid getter functions use the adjusted xtime_nsec value directly and do not compensate the negative offset via the clocksource delta multiplied with the new multiplicator. In that case the caller can observe time going backwards in consecutive calls. By design, this negative adjustment should be fine, because the logic run from timekeeping_adjust() is done after it accumulated approximately multiplicator * interval_cycles into xtime_nsec. The accumulated value is always larger then the mult_adj * offset value, which is subtracted from xtime_nsec. Both operations are done together under the tk_core.lock, so the net change to xtime_nsec is always always be positive. However, do_adjtimex() calls into timekeeping_advance() as well, to to apply the NTP frequency adjustment immediately. In this case, timekeeping_advance() does not return early when the offset is smaller then interval_cycles. In that case there is no time accumulated into xtime_nsec. But the subsequent call into timekeeping_adjust(), which modifies the multiplicator, subtracts from xtime_nsec to correct for the new multiplicator. Here because there was no accumulation, xtime_nsec becomes smaller than before, which opens a window up to the next accumulation, where the _COARSE clockid getters, which don't compensate for the offset, can observe the inconsistency. To fix this, rework the timekeeping_advance() logic so that when invoked from do_adjtimex(), the time is immediately forwarded to accumulate also the sub-interval portion into xtime. That means the remaining offset becomes zero and the subsequent multiplier adjustment therefore does not modify xtime_nsec. There is another related inconsistency. If xtime is forwarded due to the instantaneous multiplier adjustment, the NTP error, which was accumulated with the previous setting, becomes meaningless. Therefore clear the NTP error as well, after forwarding the clock for the instantaneous multiplier update. Fixes: da15cfd ("time: Introduce CLOCK_REALTIME_COARSE") Reported-by: Lei Chen <[email protected]> Signed-off-by: John Stultz <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Link: https://lore.kernel.org/all/[email protected] Closes: https://lore.kernel.org/lkml/[email protected]/
1 parent d1c3a3f commit 757b000

File tree

1 file changed

+69
-25
lines changed

1 file changed

+69
-25
lines changed

kernel/time/timekeeping.c

Lines changed: 69 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -682,20 +682,19 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act
682682
}
683683

684684
/**
685-
* timekeeping_forward_now - update clock to the current time
685+
* timekeeping_forward - update clock to given cycle now value
686686
* @tk: Pointer to the timekeeper to update
687+
* @cycle_now: Current clocksource read value
687688
*
688689
* Forward the current clock to update its state since the last call to
689690
* update_wall_time(). This is useful before significant clock changes,
690691
* as it avoids having to deal with this time offset explicitly.
691692
*/
692-
static void timekeeping_forward_now(struct timekeeper *tk)
693+
static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now)
693694
{
694-
u64 cycle_now, delta;
695+
u64 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
696+
tk->tkr_mono.clock->max_raw_delta);
695697

696-
cycle_now = tk_clock_read(&tk->tkr_mono);
697-
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask,
698-
tk->tkr_mono.clock->max_raw_delta);
699698
tk->tkr_mono.cycle_last = cycle_now;
700699
tk->tkr_raw.cycle_last = cycle_now;
701700

@@ -710,6 +709,21 @@ static void timekeeping_forward_now(struct timekeeper *tk)
710709
}
711710
}
712711

712+
/**
713+
* timekeeping_forward_now - update clock to the current time
714+
* @tk: Pointer to the timekeeper to update
715+
*
716+
* Forward the current clock to update its state since the last call to
717+
* update_wall_time(). This is useful before significant clock changes,
718+
* as it avoids having to deal with this time offset explicitly.
719+
*/
720+
static void timekeeping_forward_now(struct timekeeper *tk)
721+
{
722+
u64 cycle_now = tk_clock_read(&tk->tkr_mono);
723+
724+
timekeeping_forward(tk, cycle_now);
725+
}
726+
713727
/**
714728
* ktime_get_real_ts64 - Returns the time of day in a timespec64.
715729
* @ts: pointer to the timespec to be set
@@ -2151,6 +2165,54 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
21512165
return offset;
21522166
}
21532167

2168+
static u64 timekeeping_accumulate(struct timekeeper *tk, u64 offset,
2169+
enum timekeeping_adv_mode mode,
2170+
unsigned int *clock_set)
2171+
{
2172+
int shift = 0, maxshift;
2173+
2174+
/*
2175+
* TK_ADV_FREQ indicates that adjtimex(2) directly set the
2176+
* frequency or the tick length.
2177+
*
2178+
* Accumulate the offset, so that the new multiplier starts from
2179+
* now. This is required as otherwise for offsets, which are
2180+
* smaller than tk::cycle_interval, timekeeping_adjust() could set
2181+
* xtime_nsec backwards, which subsequently causes time going
2182+
* backwards in the coarse time getters. But even for the case
2183+
* where offset is greater than tk::cycle_interval the periodic
2184+
* accumulation does not have much value.
2185+
*
2186+
* Also reset tk::ntp_error as it does not make sense to keep the
2187+
* old accumulated error around in this case.
2188+
*/
2189+
if (mode == TK_ADV_FREQ) {
2190+
timekeeping_forward(tk, tk->tkr_mono.cycle_last + offset);
2191+
tk->ntp_error = 0;
2192+
return 0;
2193+
}
2194+
2195+
/*
2196+
* With NO_HZ we may have to accumulate many cycle_intervals
2197+
* (think "ticks") worth of time at once. To do this efficiently,
2198+
* we calculate the largest doubling multiple of cycle_intervals
2199+
* that is smaller than the offset. We then accumulate that
2200+
* chunk in one go, and then try to consume the next smaller
2201+
* doubled multiple.
2202+
*/
2203+
shift = ilog2(offset) - ilog2(tk->cycle_interval);
2204+
shift = max(0, shift);
2205+
/* Bound shift to one less than what overflows tick_length */
2206+
maxshift = (64 - (ilog2(ntp_tick_length()) + 1)) - 1;
2207+
shift = min(shift, maxshift);
2208+
while (offset >= tk->cycle_interval) {
2209+
offset = logarithmic_accumulation(tk, offset, shift, clock_set);
2210+
if (offset < tk->cycle_interval << shift)
2211+
shift--;
2212+
}
2213+
return offset;
2214+
}
2215+
21542216
/*
21552217
* timekeeping_advance - Updates the timekeeper to the current time and
21562218
* current NTP tick length
@@ -2160,7 +2222,6 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
21602222
struct timekeeper *tk = &tk_core.shadow_timekeeper;
21612223
struct timekeeper *real_tk = &tk_core.timekeeper;
21622224
unsigned int clock_set = 0;
2163-
int shift = 0, maxshift;
21642225
u64 offset;
21652226

21662227
guard(raw_spinlock_irqsave)(&tk_core.lock);
@@ -2177,24 +2238,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
21772238
if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
21782239
return false;
21792240

2180-
/*
2181-
* With NO_HZ we may have to accumulate many cycle_intervals
2182-
* (think "ticks") worth of time at once. To do this efficiently,
2183-
* we calculate the largest doubling multiple of cycle_intervals
2184-
* that is smaller than the offset. We then accumulate that
2185-
* chunk in one go, and then try to consume the next smaller
2186-
* doubled multiple.
2187-
*/
2188-
shift = ilog2(offset) - ilog2(tk->cycle_interval);
2189-
shift = max(0, shift);
2190-
/* Bound shift to one less than what overflows tick_length */
2191-
maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
2192-
shift = min(shift, maxshift);
2193-
while (offset >= tk->cycle_interval) {
2194-
offset = logarithmic_accumulation(tk, offset, shift, &clock_set);
2195-
if (offset < tk->cycle_interval<<shift)
2196-
shift--;
2197-
}
2241+
offset = timekeeping_accumulate(tk, offset, mode, &clock_set);
21982242

21992243
/* Adjust the multiplier to correct NTP error */
22002244
timekeeping_adjust(tk, offset);

0 commit comments

Comments
 (0)