Skip to content

Commit d7c898a

Browse files
committed
Merge tag 'timers-core-for-vfs' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/tip/tip into vfs.mgtime
Timekeeping interfaces for consumption by the VFS tree. Signed-off-by: Christian Brauner <[email protected]>
2 parents 8cf0b93 + 2a15385 commit d7c898a

File tree

4 files changed

+138
-0
lines changed

4 files changed

+138
-0
lines changed

include/linux/timekeeping.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ extern void ktime_get_real_ts64(struct timespec64 *tv);
4545
extern void ktime_get_coarse_ts64(struct timespec64 *ts);
4646
extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
4747

48+
/* Multigrain timestamp interfaces */
49+
extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
50+
extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
51+
extern unsigned long timekeeping_get_mg_floor_swaps(void);
52+
4853
void getboottime64(struct timespec64 *ts);
4954

5055
/*

kernel/time/timekeeping.c

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,23 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned = {
114114
.base[1] = FAST_TK_INIT,
115115
};
116116

117+
/*
118+
* Multigrain timestamps require tracking the latest fine-grained timestamp
119+
* that has been issued, and never returning a coarse-grained timestamp that is
120+
* earlier than that value.
121+
*
122+
* mg_floor represents the latest fine-grained time that has been handed out as
123+
* a file timestamp on the system. This is tracked as a monotonic ktime_t, and
124+
* converted to a realtime clock value on an as-needed basis.
125+
*
126+
* Maintaining mg_floor ensures the multigrain interfaces never issue a
127+
* timestamp earlier than one that has been previously issued.
128+
*
129+
* The exception to this rule is when there is a backward realtime clock jump. If
130+
* such an event occurs, a timestamp can appear to be earlier than a previous one.
131+
*/
132+
static __cacheline_aligned_in_smp atomic64_t mg_floor;
133+
117134
static inline void tk_normalize_xtime(struct timekeeper *tk)
118135
{
119136
while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
@@ -2394,6 +2411,94 @@ void ktime_get_coarse_real_ts64(struct timespec64 *ts)
23942411
}
23952412
EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
23962413

2414+
/**
2415+
* ktime_get_coarse_real_ts64_mg - return latter of coarse grained time or floor
2416+
* @ts: timespec64 to be filled
2417+
*
2418+
* Fetch the global mg_floor value, convert it to realtime and compare it
2419+
* to the current coarse-grained time. Fill @ts with whichever is
2420+
* latest. Note that this is a filesystem-specific interface and should be
2421+
* avoided outside of that context.
2422+
*/
2423+
void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts)
2424+
{
2425+
struct timekeeper *tk = &tk_core.timekeeper;
2426+
u64 floor = atomic64_read(&mg_floor);
2427+
ktime_t f_real, offset, coarse;
2428+
unsigned int seq;
2429+
2430+
do {
2431+
seq = read_seqcount_begin(&tk_core.seq);
2432+
*ts = tk_xtime(tk);
2433+
offset = tk_core.timekeeper.offs_real;
2434+
} while (read_seqcount_retry(&tk_core.seq, seq));
2435+
2436+
coarse = timespec64_to_ktime(*ts);
2437+
f_real = ktime_add(floor, offset);
2438+
if (ktime_after(f_real, coarse))
2439+
*ts = ktime_to_timespec64(f_real);
2440+
}
2441+
2442+
/**
2443+
* ktime_get_real_ts64_mg - attempt to update floor value and return result
2444+
* @ts: pointer to the timespec to be set
2445+
*
2446+
* Get a monotonic fine-grained time value and attempt to swap it into
2447+
* mg_floor. If that succeeds then accept the new floor value. If it fails
2448+
* then another task raced in during the interim time and updated the
2449+
* floor. Since any update to the floor must be later than the previous
2450+
* floor, either outcome is acceptable.
2451+
*
2452+
* Typically this will be called after calling ktime_get_coarse_real_ts64_mg(),
2453+
* and determining that the resulting coarse-grained timestamp did not effect
2454+
* a change in ctime. Any more recent floor value would effect a change to
2455+
* ctime, so there is no need to retry the atomic64_try_cmpxchg() on failure.
2456+
*
2457+
* @ts will be filled with the latest floor value, regardless of the outcome of
2458+
* the cmpxchg. Note that this is a filesystem specific interface and should be
2459+
* avoided outside of that context.
2460+
*/
2461+
void ktime_get_real_ts64_mg(struct timespec64 *ts)
2462+
{
2463+
struct timekeeper *tk = &tk_core.timekeeper;
2464+
ktime_t old = atomic64_read(&mg_floor);
2465+
ktime_t offset, mono;
2466+
unsigned int seq;
2467+
u64 nsecs;
2468+
2469+
do {
2470+
seq = read_seqcount_begin(&tk_core.seq);
2471+
2472+
ts->tv_sec = tk->xtime_sec;
2473+
mono = tk->tkr_mono.base;
2474+
nsecs = timekeeping_get_ns(&tk->tkr_mono);
2475+
offset = tk_core.timekeeper.offs_real;
2476+
} while (read_seqcount_retry(&tk_core.seq, seq));
2477+
2478+
mono = ktime_add_ns(mono, nsecs);
2479+
2480+
/*
2481+
* Attempt to update the floor with the new time value. As any
2482+
* update must be later then the existing floor, and would effect
2483+
* a change to ctime from the perspective of the current task,
2484+
* accept the resulting floor value regardless of the outcome of
2485+
* the swap.
2486+
*/
2487+
if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) {
2488+
ts->tv_nsec = 0;
2489+
timespec64_add_ns(ts, nsecs);
2490+
timekeeping_inc_mg_floor_swaps();
2491+
} else {
2492+
/*
2493+
* Another task changed mg_floor since "old" was fetched.
2494+
* "old" has been updated with the latest value of "mg_floor".
2495+
* That value is newer than the previous floor value, which
2496+
* is enough to effect a change to ctime. Accept it.
2497+
*/
2498+
*ts = ktime_to_timespec64(ktime_add(old, offset));
2499+
}
2500+
}
2501+
23972502
void ktime_get_coarse_ts64(struct timespec64 *ts)
23982503
{
23992504
struct timekeeper *tk = &tk_core.timekeeper;

kernel/time/timekeeping_debug.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717

1818
#define NUM_BINS 32
1919

20+
/* Incremented every time mg_floor is updated */
21+
DEFINE_PER_CPU(unsigned long, timekeeping_mg_floor_swaps);
22+
2023
static unsigned int sleep_time_bin[NUM_BINS] = {0};
2124

2225
static int tk_debug_sleep_time_show(struct seq_file *s, void *data)
@@ -53,3 +56,13 @@ void tk_debug_account_sleep_time(const struct timespec64 *t)
5356
(s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC);
5457
}
5558

59+
unsigned long timekeeping_get_mg_floor_swaps(void)
60+
{
61+
unsigned long sum = 0;
62+
int cpu;
63+
64+
for_each_possible_cpu(cpu)
65+
sum += data_race(per_cpu(timekeeping_mg_floor_swaps, cpu));
66+
67+
return sum;
68+
}

kernel/time/timekeeping_internal.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,24 @@
1010
* timekeeping debug functions
1111
*/
1212
#ifdef CONFIG_DEBUG_FS
13+
14+
DECLARE_PER_CPU(unsigned long, timekeeping_mg_floor_swaps);
15+
16+
static inline void timekeeping_inc_mg_floor_swaps(void)
17+
{
18+
this_cpu_inc(timekeeping_mg_floor_swaps);
19+
}
20+
1321
extern void tk_debug_account_sleep_time(const struct timespec64 *t);
22+
1423
#else
24+
1525
#define tk_debug_account_sleep_time(x)
26+
27+
static inline void timekeeping_inc_mg_floor_swaps(void)
28+
{
29+
}
30+
1631
#endif
1732

1833
#ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE

0 commit comments

Comments
 (0)