Skip to content

Commit 02dc9d1

Browse files
committed
Merge tag 'timers-ptp-2025-07-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timekeeping and VDSO updates from Thomas Gleixner: - Introduce support for auxiliary timekeepers PTP clocks can be disconnected from the universal CLOCK_TAI reality for various reasons including regularatory requirements for functional safety redundancy. The kernel so far only supports a single notion of time, which means that all clocks are correlated in frequency and only differ by offset to each other. Access to non-correlated PTP clocks has been available so far only through the file descriptor based "POSIX clock IDs", which are subject to locking and have to go all the way out to the hardware. The access is not only horribly slow, as it has to go all the way out to the NIC/PTP hardware, but that also prevents the kernel to read the time of such clocks e.g. from the network stack, where it is required for TSN networking both on the transmit and receive side unless the hardware provides offloading. The auxiliary clocks provide a mechanism to support arbitrary clocks which are not correlated to the system clock. This is not restricted to the PTP use case on purpose as there is no kernel side association of these clocks to a particular PTP device because that's a pure user space configuration decision. Having them independent allows to utilize them for other purposes and also enables them to be tested without hardware dependencies. To avoid pointless overhead these clocks have to be enabled individualy via a new sysfs interface to reduce the overhead to a single compare in the hotpath if they are enabled at the Kconfig level at all. These clocks utilize the existing timekeeping/NTP infrastructures, which has been made possible over the recent releases by incrementaly converting these infrastructures over from a single static instance to a multi-instance pointer based implementation without any performance regression reported. The auxiliary clocks provide the same "emulation" of a "correct" clock as the existing CLOCK_* variants do with an independent instance of data and provide the same steering mechanism through the existing sys_clock_adjtime() interface, which has been confirmed to work by the chronyd(8) maintainer. That allows to provide lockless kernel internal and VDSO support so that applications and kernel internal functionalities can access these clocks without restrictions and at the same performance as the existing system clocks. - Avoid double notifications in the adjtimex() syscall. Not a big issue, but a trivial to avoid latency source. * tag 'timers-ptp-2025-07-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits) vdso/gettimeofday: Add support for auxiliary clocks vdso/vsyscall: Update auxiliary clock data in the datapage vdso: Introduce aux_clock_resolution_ns() vdso/gettimeofday: Introduce vdso_get_timestamp() vdso/gettimeofday: Introduce vdso_set_timespec() vdso/gettimeofday: Introduce vdso_clockid_valid() vdso/gettimeofday: Return bool from clock_gettime() helpers vdso/gettimeofday: Return bool from clock_getres() helpers vdso/helpers: Add helpers for seqlocks of single vdso_clock vdso/vsyscall: Split up __arch_update_vsyscall() into __arch_update_vdso_clock() vdso/vsyscall: Introduce a helper to fill clock configurations timekeeping: Remove the temporary CLOCK_AUX workaround timekeeping: Provide ktime_get_clock_ts64() timekeeping: Provide interface to control auxiliary clocks timekeeping: Provide update for auxiliary timekeepers timekeeping: Provide adjtimex() for auxiliary clocks timekeeping: Prepare do_adtimex() for auxiliary clocks timekeeping: Make do_adjtimex() reusable timekeeping: Add auxiliary clock support to __timekeeping_inject_offset() timekeeping: Make timekeeping_inject_offset() reusable ...
2 parents d614399 + cd3557a commit 02dc9d1

File tree

20 files changed

+950
-236
lines changed

20 files changed

+950
-236
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
What: /sys/kernel/time/aux_clocks/<ID>/enable
2+
Date: May 2025
3+
Contact: Thomas Gleixner <[email protected]>
4+
Description:
5+
Controls the enablement of auxiliary clock timekeepers.

arch/arm64/include/asm/vdso/vsyscall.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,11 @@
1313
* Update the vDSO data page to keep in sync with kernel timekeeping.
1414
*/
1515
static __always_inline
16-
void __arm64_update_vsyscall(struct vdso_time_data *vdata)
16+
void __arch_update_vdso_clock(struct vdso_clock *vc)
1717
{
18-
vdata->clock_data[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
19-
vdata->clock_data[CS_RAW].mask = VDSO_PRECISION_MASK;
18+
vc->mask = VDSO_PRECISION_MASK;
2019
}
21-
#define __arch_update_vsyscall __arm64_update_vsyscall
20+
#define __arch_update_vdso_clock __arch_update_vdso_clock
2221

2322
/* The asm-generic header needs to be included after the definitions above */
2423
#include <asm-generic/vdso/vsyscall.h>

include/asm-generic/vdso/vsyscall.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(vo
2222

2323
#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
2424

25-
#ifndef __arch_update_vsyscall
26-
static __always_inline void __arch_update_vsyscall(struct vdso_time_data *vdata)
25+
#ifndef __arch_update_vdso_clock
26+
static __always_inline void __arch_update_vdso_clock(struct vdso_clock *vc)
2727
{
2828
}
29-
#endif /* __arch_update_vsyscall */
29+
#endif /* __arch_update_vdso_clock */
3030

3131
#ifndef __arch_sync_vdso_time_data
3232
static __always_inline void __arch_sync_vdso_time_data(struct vdso_time_data *vdata)

include/linux/posix-timers.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ static inline int clockid_to_fd(const clockid_t clk)
3737
return ~(clk >> 3);
3838
}
3939

40+
static inline bool clockid_aux_valid(clockid_t id)
41+
{
42+
return IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) && id >= CLOCK_AUX && id <= CLOCK_AUX_LAST;
43+
}
44+
4045
#ifdef CONFIG_POSIX_TIMERS
4146

4247
#include <linux/signal_types.h>

include/linux/timekeeper_internal.h

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,22 @@
1111
#include <linux/jiffies.h>
1212
#include <linux/time.h>
1313

14+
/**
15+
* timekeeper_ids - IDs for various time keepers in the kernel
16+
* @TIMEKEEPER_CORE: The central core timekeeper managing system time
17+
* @TIMEKEEPER_AUX_FIRST: The first AUX timekeeper
18+
* @TIMEKEEPER_AUX_LAST: The last AUX timekeeper
19+
* @TIMEKEEPERS_MAX: The maximum number of timekeepers managed
20+
*/
21+
enum timekeeper_ids {
22+
TIMEKEEPER_CORE,
23+
#ifdef CONFIG_POSIX_AUX_CLOCKS
24+
TIMEKEEPER_AUX_FIRST,
25+
TIMEKEEPER_AUX_LAST = TIMEKEEPER_AUX_FIRST + MAX_AUX_CLOCKS - 1,
26+
#endif
27+
TIMEKEEPERS_MAX,
28+
};
29+
1430
/**
1531
* struct tk_read_base - base structure for timekeeping readout
1632
* @clock: Current clocksource used for timekeeping.
@@ -51,11 +67,14 @@ struct tk_read_base {
5167
* @offs_real: Offset clock monotonic -> clock realtime
5268
* @offs_boot: Offset clock monotonic -> clock boottime
5369
* @offs_tai: Offset clock monotonic -> clock tai
70+
* @offs_aux: Offset clock monotonic -> clock AUX
5471
* @coarse_nsec: The nanoseconds part for coarse time getters
72+
* @id: The timekeeper ID
5573
* @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
5674
* @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
5775
* @clock_was_set_seq: The sequence number of clock was set events
5876
* @cs_was_changed_seq: The sequence number of clocksource change events
77+
* @clock_valid: Indicator for valid clock
5978
* @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
6079
* @cycle_interval: Number of clock cycles in one NTP interval
6180
* @xtime_interval: Number of clock shifted nano seconds in one NTP
@@ -95,13 +114,16 @@ struct tk_read_base {
95114
* @monotonic_to_boottime is a timespec64 representation of @offs_boot to
96115
* accelerate the VDSO update for CLOCK_BOOTTIME.
97116
*
117+
* @offs_aux is used by the auxiliary timekeepers which do not utilize any
118+
* of the regular timekeeper offset fields.
119+
*
98120
* The cacheline ordering of the structure is optimized for in kernel usage of
99121
* the ktime_get() and ktime_get_ts64() family of time accessors. Struct
100122
* timekeeper is prepended in the core timekeeping code with a sequence count,
101123
* which results in the following cacheline layout:
102124
*
103125
* 0: seqcount, tkr_mono
104-
* 1: xtime_sec ... coarse_nsec
126+
* 1: xtime_sec ... id
105127
* 2: tkr_raw, raw_sec
106128
* 3,4: Internal variables
107129
*
@@ -121,8 +143,12 @@ struct timekeeper {
121143
struct timespec64 wall_to_monotonic;
122144
ktime_t offs_real;
123145
ktime_t offs_boot;
124-
ktime_t offs_tai;
146+
union {
147+
ktime_t offs_tai;
148+
ktime_t offs_aux;
149+
};
125150
u32 coarse_nsec;
151+
enum timekeeper_ids id;
126152

127153
/* Cacheline 2: */
128154
struct tk_read_base tkr_raw;
@@ -131,6 +157,7 @@ struct timekeeper {
131157
/* Cachline 3 and 4 (timekeeping internal variables): */
132158
unsigned int clock_was_set_seq;
133159
u8 cs_was_changed_seq;
160+
u8 clock_valid;
134161

135162
struct timespec64 monotonic_to_boot;
136163

@@ -163,4 +190,10 @@ static inline void update_vsyscall_tz(void)
163190
}
164191
#endif
165192

193+
#if defined(CONFIG_GENERIC_GETTIMEOFDAY) && defined(CONFIG_POSIX_AUX_CLOCKS)
194+
extern void vdso_time_update_aux(struct timekeeper *tk);
195+
#else
196+
static inline void vdso_time_update_aux(struct timekeeper *tk) { }
197+
#endif
198+
166199
#endif /* _LINUX_TIMEKEEPER_INTERNAL_H */

include/linux/timekeeping.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ extern void ktime_get_ts64(struct timespec64 *ts);
4444
extern void ktime_get_real_ts64(struct timespec64 *tv);
4545
extern void ktime_get_coarse_ts64(struct timespec64 *ts);
4646
extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
47+
extern void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts);
4748

4849
/* Multigrain timestamp interfaces */
4950
extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
@@ -263,6 +264,17 @@ extern bool timekeeping_rtc_skipresume(void);
263264

264265
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
265266

267+
/*
268+
* Auxiliary clock interfaces
269+
*/
270+
#ifdef CONFIG_POSIX_AUX_CLOCKS
271+
extern bool ktime_get_aux(clockid_t id, ktime_t *kt);
272+
extern bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt);
273+
#else
274+
static inline bool ktime_get_aux(clockid_t id, ktime_t *kt) { return false; }
275+
static inline bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt) { return false; }
276+
#endif
277+
266278
/**
267279
* struct system_time_snapshot - simultaneous raw/real time capture with
268280
* counter value

include/uapi/linux/time.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,17 @@ struct timezone {
6464
#define CLOCK_TAI 11
6565

6666
#define MAX_CLOCKS 16
67+
68+
/*
69+
* AUX clock support. AUXiliary clocks are dynamically configured by
70+
* enabling a clock ID. These clock can be steered independently of the
71+
* core timekeeper. The kernel can support up to 8 auxiliary clocks, but
72+
* the actual limit depends on eventual architecture constraints vs. VDSO.
73+
*/
74+
#define CLOCK_AUX MAX_CLOCKS
75+
#define MAX_AUX_CLOCKS 8
76+
#define CLOCK_AUX_LAST (CLOCK_AUX + MAX_AUX_CLOCKS - 1)
77+
6778
#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC)
6879
#define CLOCKS_MONO CLOCK_MONOTONIC
6980

include/vdso/auxclock.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _VDSO_AUXCLOCK_H
3+
#define _VDSO_AUXCLOCK_H
4+
5+
#include <uapi/linux/time.h>
6+
#include <uapi/linux/types.h>
7+
8+
static __always_inline u64 aux_clock_resolution_ns(void)
9+
{
10+
return 1;
11+
}
12+
13+
#endif /* _VDSO_AUXCLOCK_H */

include/vdso/datapage.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#ifndef __ASSEMBLY__
66

77
#include <linux/compiler.h>
8+
#include <uapi/linux/bits.h>
89
#include <uapi/linux/time.h>
910
#include <uapi/linux/types.h>
1011
#include <uapi/asm-generic/errno-base.h>
@@ -38,13 +39,15 @@ struct vdso_arch_data {
3839
#endif
3940

4041
#define VDSO_BASES (CLOCK_TAI + 1)
42+
#define VDSO_BASE_AUX 0
4143
#define VDSO_HRES (BIT(CLOCK_REALTIME) | \
4244
BIT(CLOCK_MONOTONIC) | \
4345
BIT(CLOCK_BOOTTIME) | \
4446
BIT(CLOCK_TAI))
4547
#define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \
4648
BIT(CLOCK_MONOTONIC_COARSE))
4749
#define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW))
50+
#define VDSO_AUX __GENMASK(CLOCK_AUX_LAST, CLOCK_AUX)
4851

4952
#define CS_HRES_COARSE 0
5053
#define CS_RAW 1
@@ -117,6 +120,7 @@ struct vdso_clock {
117120
* @arch_data: architecture specific data (optional, defaults
118121
* to an empty struct)
119122
* @clock_data: clocksource related data (array)
123+
* @aux_clock_data: auxiliary clocksource related data (array)
120124
* @tz_minuteswest: minutes west of Greenwich
121125
* @tz_dsttime: type of DST correction
122126
* @hrtimer_res: hrtimer resolution
@@ -133,6 +137,7 @@ struct vdso_time_data {
133137
struct arch_vdso_time_data arch_data;
134138

135139
struct vdso_clock clock_data[CS_BASES];
140+
struct vdso_clock aux_clock_data[MAX_AUX_CLOCKS];
136141

137142
s32 tz_minuteswest;
138143
s32 tz_dsttime;

include/vdso/helpers.h

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,32 +28,58 @@ static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc,
2828
return seq != start;
2929
}
3030

31-
static __always_inline void vdso_write_begin(struct vdso_time_data *vd)
31+
static __always_inline void vdso_write_seq_begin(struct vdso_clock *vc)
3232
{
33-
struct vdso_clock *vc = vd->clock_data;
33+
/*
34+
* WRITE_ONCE() is required otherwise the compiler can validly tear
35+
* updates to vc->seq and it is possible that the value seen by the
36+
* reader is inconsistent.
37+
*/
38+
WRITE_ONCE(vc->seq, vc->seq + 1);
39+
}
3440

41+
static __always_inline void vdso_write_seq_end(struct vdso_clock *vc)
42+
{
3543
/*
3644
* WRITE_ONCE() is required otherwise the compiler can validly tear
37-
* updates to vd[x].seq and it is possible that the value seen by the
45+
* updates to vc->seq and it is possible that the value seen by the
3846
* reader is inconsistent.
3947
*/
40-
WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1);
41-
WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1);
48+
WRITE_ONCE(vc->seq, vc->seq + 1);
49+
}
50+
51+
static __always_inline void vdso_write_begin_clock(struct vdso_clock *vc)
52+
{
53+
vdso_write_seq_begin(vc);
54+
/* Ensure the sequence invalidation is visible before data is modified */
55+
smp_wmb();
56+
}
57+
58+
static __always_inline void vdso_write_end_clock(struct vdso_clock *vc)
59+
{
60+
/* Ensure the data update is visible before the sequence is set valid again */
61+
smp_wmb();
62+
vdso_write_seq_end(vc);
63+
}
64+
65+
static __always_inline void vdso_write_begin(struct vdso_time_data *vd)
66+
{
67+
struct vdso_clock *vc = vd->clock_data;
68+
69+
vdso_write_seq_begin(&vc[CS_HRES_COARSE]);
70+
vdso_write_seq_begin(&vc[CS_RAW]);
71+
/* Ensure the sequence invalidation is visible before data is modified */
4272
smp_wmb();
4373
}
4474

4575
static __always_inline void vdso_write_end(struct vdso_time_data *vd)
4676
{
4777
struct vdso_clock *vc = vd->clock_data;
4878

79+
/* Ensure the data update is visible before the sequence is set valid again */
4980
smp_wmb();
50-
/*
51-
* WRITE_ONCE() is required otherwise the compiler can validly tear
52-
* updates to vd[x].seq and it is possible that the value seen by the
53-
* reader is inconsistent.
54-
*/
55-
WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1);
56-
WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1);
81+
vdso_write_seq_end(&vc[CS_HRES_COARSE]);
82+
vdso_write_seq_end(&vc[CS_RAW]);
5783
}
5884

5985
#endif /* !__ASSEMBLY__ */

0 commit comments

Comments
 (0)