Skip to content

Commit 3d7a348

Browse files
author
sf
committed
pvclock: Fix reading clock, add time sensor
* Fix integer overflows during multiplication. This caused time to run at the wrong speed on some machines, depending on tsc frequency. * Increase accuracy by disabling interrupts while reading the clock * Fix the not-TSC_STABLE handling which was broken because it wrongly compared 32 and 64 bit values. (This requires some atomic hackery on i386). * Add a timedelta sensor using the KVM WALL_CLOCK MSR, similar to the sensor in vmmci(4) Partially inspired by an earlier diff by cheloa@ ok kettenis@ mlarkin@
1 parent 6498ec4 commit 3d7a348

File tree

2 files changed

+175
-23
lines changed

2 files changed

+175
-23
lines changed

sys/arch/i386/include/cpufunc.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* $OpenBSD: cpufunc.h,v 1.33 2020/09/13 11:53:16 jsg Exp $ */
1+
/* $OpenBSD: cpufunc.h,v 1.34 2025/06/20 14:06:34 sf Exp $ */
22
/* $NetBSD: cpufunc.h,v 1.8 1994/10/27 04:15:59 cgd Exp $ */
33

44
/*
@@ -229,6 +229,15 @@ rdtsc(void)
229229
return (tsc);
230230
}
231231

232+
static inline uint64_t
233+
rdtsc_lfence(void)
234+
{
235+
uint64_t tsc;
236+
237+
__asm volatile("lfence; rdtsc" : "=A" (tsc));
238+
return tsc;
239+
}
240+
232241
static __inline void
233242
wrmsr(u_int msr, u_int64_t newval)
234243
{

sys/dev/pv/pvclock.c

Lines changed: 165 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* $OpenBSD: pvclock.c,v 1.13 2025/03/31 15:40:22 sf Exp $ */
1+
/* $OpenBSD: pvclock.c,v 1.14 2025/06/20 14:06:34 sf Exp $ */
22

33
/*
44
* Copyright (c) 2018 Reyk Floeter <[email protected]>
@@ -22,6 +22,7 @@
2222

2323
#include <sys/param.h>
2424
#include <sys/systm.h>
25+
#include <sys/timeout.h>
2526
#include <sys/timetc.h>
2627

2728
#include <machine/cpu.h>
@@ -35,13 +36,65 @@
3536
#define PMAP_NOCRYPT 0
3637
#endif
3738

38-
uint pvclock_lastcount;
39+
#if defined(__amd64__)
40+
41+
static inline uint64_t
42+
pvclock_atomic_load(volatile uint64_t *ptr)
43+
{
44+
return *ptr;
45+
}
46+
47+
static inline uint64_t
48+
pvclock_atomic_cas(volatile uint64_t *p, uint64_t e,
49+
uint64_t n)
50+
{
51+
return atomic_cas_ulong((volatile unsigned long *)p, e, n);
52+
}
53+
54+
#elif defined(__i386__)
55+
56+
/*
57+
* We are running on virtualization. Therefore we can assume that we
58+
* have cmpxchg8b, available on pentium and newer.
59+
*/
60+
static inline uint64_t
61+
pvclock_atomic_load(volatile uint64_t *ptr)
62+
{
63+
uint64_t val;
64+
__asm__ volatile ("movl %%ebx,%%eax; movl %%ecx, %%edx; "
65+
"lock cmpxchg8b %1" : "=&A" (val) : "m" (*ptr));
66+
return val;
67+
}
68+
69+
static inline uint64_t
70+
pvclock_atomic_cas(volatile uint64_t *p, uint64_t e,
71+
uint64_t n)
72+
{
73+
__asm volatile("lock cmpxchg8b %1" : "+A" (e), "+m" (*p)
74+
: "b" ((uint32_t)n), "c" ((uint32_t)(n >> 32)));
75+
return (e);
76+
}
77+
78+
#else
79+
#error "pvclock: unsupported x86 architecture?"
80+
#endif
81+
82+
83+
uint64_t pvclock_lastcount;
84+
85+
struct pvpage {
86+
struct pvclock_time_info ti;
87+
struct pvclock_wall_clock wc;
88+
};
3989

4090
struct pvclock_softc {
4191
struct device sc_dev;
42-
void *sc_time;
92+
struct pvpage *sc_page;
4393
paddr_t sc_paddr;
4494
struct timecounter *sc_tc;
95+
struct ksensordev sc_sensordev;
96+
struct ksensor sc_sensor;
97+
struct timeout sc_tick;
4598
};
4699

47100
#define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
@@ -50,12 +103,16 @@ int pvclock_match(struct device *, void *, void *);
50103
void pvclock_attach(struct device *, struct device *, void *);
51104
int pvclock_activate(struct device *, int);
52105

106+
uint64_t pvclock_get(struct timecounter *);
53107
uint pvclock_get_timecount(struct timecounter *);
108+
void pvclock_tick_hook(struct device *);
54109

55110
static inline uint32_t
56111
pvclock_read_begin(const struct pvclock_time_info *);
57112
static inline int
58113
pvclock_read_done(const struct pvclock_time_info *, uint32_t);
114+
static inline uint64_t
115+
pvclock_scale_delta(uint64_t, uint32_t, int);
59116

60117
const struct cfattach pvclock_ca = {
61118
sizeof(struct pvclock_softc),
@@ -123,28 +180,31 @@ void
123180
pvclock_attach(struct device *parent, struct device *self, void *aux)
124181
{
125182
struct pvclock_softc *sc = (struct pvclock_softc *)self;
183+
struct pv_attach_args *pva = aux;
126184
struct pvclock_time_info *ti;
127-
paddr_t pa;
185+
paddr_t pa;
128186
uint32_t version;
129187
uint8_t flags;
130188
struct vm_page *page;
189+
struct pvbus_hv *kvm;
131190

132191
page = uvm_pagealloc(NULL, 0, NULL, 0);
133192
if (page == NULL)
134193
goto err;
135-
sc->sc_time = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
136-
if (sc->sc_time == NULL)
194+
sc->sc_page = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
195+
if (sc->sc_page == NULL)
137196
goto err;
138197

139198
pa = VM_PAGE_TO_PHYS(page);
140-
pmap_kenter_pa((vaddr_t)sc->sc_time, pa | PMAP_NOCRYPT,
199+
pmap_kenter_pa((vaddr_t)sc->sc_page, pa | PMAP_NOCRYPT,
141200
PROT_READ | PROT_WRITE);
142-
memset(sc->sc_time, 0, PAGE_SIZE);
201+
pmap_update(pmap_kernel());
202+
memset(sc->sc_page, 0, PAGE_SIZE);
143203

144204
wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
145205
sc->sc_paddr = pa;
146206

147-
ti = sc->sc_time;
207+
ti = &sc->sc_page->ti;
148208
do {
149209
version = pvclock_read_begin(ti);
150210
flags = ti->ti_flags;
@@ -168,6 +228,22 @@ pvclock_attach(struct device *parent, struct device *self, void *aux)
168228

169229
tc_init(sc->sc_tc);
170230

231+
/*
232+
* The openbsd vmm pvclock does not support the WALL_CLOCK msr,
233+
* therefore we look only for kvm.
234+
*/
235+
kvm = &pva->pva_hv[PVBUS_KVM];
236+
if (kvm->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) {
237+
strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname,
238+
sizeof(sc->sc_sensordev.xname));
239+
sc->sc_sensor.type = SENSOR_TIMEDELTA;
240+
sc->sc_sensor.status = SENSOR_S_UNKNOWN;
241+
sensor_attach(&sc->sc_sensordev, &sc->sc_sensor);
242+
sensordev_install(&sc->sc_sensordev);
243+
244+
config_mountroot(self, pvclock_tick_hook);
245+
}
246+
171247
printf("\n");
172248
return;
173249
err:
@@ -211,44 +287,111 @@ pvclock_read_done(const struct pvclock_time_info *ti,
211287
return (ti->ti_version == version);
212288
}
213289

214-
uint
215-
pvclock_get_timecount(struct timecounter *tc)
290+
static inline uint64_t
291+
pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
292+
{
293+
uint64_t lower, upper;
294+
295+
if (shift < 0)
296+
delta >>= -shift;
297+
else
298+
delta <<= shift;
299+
300+
lower = ((uint64_t)mul_frac * ((uint32_t)delta)) >> 32;
301+
upper = (uint64_t)mul_frac * (delta >> 32);
302+
return lower + upper;
303+
}
304+
305+
static uint64_t
306+
pvclock_cmp_last(uint64_t ctr)
307+
{
308+
uint64_t last;
309+
310+
do {
311+
last = pvclock_atomic_load(&pvclock_lastcount);
312+
if (ctr < last)
313+
return (last);
314+
} while (pvclock_atomic_cas(&pvclock_lastcount, last, ctr) != last);
315+
return (ctr);
316+
}
317+
318+
uint64_t
319+
pvclock_get(struct timecounter *tc)
216320
{
217321
struct pvclock_softc *sc = tc->tc_priv;
218322
struct pvclock_time_info *ti;
219323
uint64_t tsc_timestamp, system_time, delta, ctr;
220324
uint32_t version, mul_frac;
221325
int8_t shift;
222326
uint8_t flags;
327+
int s;
223328

224-
ti = sc->sc_time;
329+
ti = &sc->sc_page->ti;
330+
s = splhigh();
225331
do {
226332
version = pvclock_read_begin(ti);
227333
system_time = ti->ti_system_time;
228334
tsc_timestamp = ti->ti_tsc_timestamp;
229335
mul_frac = ti->ti_tsc_to_system_mul;
230336
shift = ti->ti_tsc_shift;
231337
flags = ti->ti_flags;
338+
delta = rdtsc_lfence();
232339
} while (!pvclock_read_done(ti, version));
340+
splx(s);
233341

234342
/*
235343
* The algorithm is described in
236-
* linux/Documentation/virtual/kvm/msr.txt
344+
* linux/Documentation/virt/kvm/x86/msr.rst
237345
*/
238-
delta = rdtsc() - tsc_timestamp;
239-
if (shift < 0)
240-
delta >>= -shift;
346+
if (delta > tsc_timestamp)
347+
delta -= tsc_timestamp;
241348
else
242-
delta <<= shift;
243-
ctr = ((delta * mul_frac) >> 32) + system_time;
349+
delta = 0;
350+
ctr = pvclock_scale_delta(delta, mul_frac, shift) + system_time;
244351

245352
if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0)
246353
return (ctr);
247354

248-
if (ctr < pvclock_lastcount)
249-
return (pvclock_lastcount);
355+
return pvclock_cmp_last(ctr);
356+
}
250357

251-
atomic_swap_uint(&pvclock_lastcount, ctr);
358+
uint
359+
pvclock_get_timecount(struct timecounter *tc)
360+
{
361+
return (pvclock_get(tc));
362+
}
252363

253-
return (ctr);
364+
void
365+
pvclock_tick(void *arg)
366+
{
367+
struct pvclock_softc *sc = arg;
368+
struct timespec ts;
369+
struct pvclock_wall_clock *wc = &sc->sc_page->wc;
370+
int64_t value;
371+
372+
wrmsr(KVM_MSR_WALL_CLOCK, sc->sc_paddr + offsetof(struct pvpage, wc));
373+
while (wc->wc_version & 0x1)
374+
virtio_membar_sync();
375+
if (wc->wc_sec) {
376+
nanotime(&ts);
377+
value = TIMESPEC_TO_NSEC(&ts) -
378+
SEC_TO_NSEC(wc->wc_sec) - wc->wc_nsec -
379+
pvclock_get(&pvclock_timecounter);
380+
381+
TIMESPEC_TO_TIMEVAL(&sc->sc_sensor.tv, &ts);
382+
sc->sc_sensor.value = value;
383+
sc->sc_sensor.status = SENSOR_S_OK;
384+
} else
385+
sc->sc_sensor.status = SENSOR_S_UNKNOWN;
386+
387+
timeout_add_sec(&sc->sc_tick, 15);
388+
}
389+
390+
void
391+
pvclock_tick_hook(struct device *self)
392+
{
393+
struct pvclock_softc *sc = (struct pvclock_softc *)self;
394+
395+
timeout_set(&sc->sc_tick, pvclock_tick, sc);
396+
pvclock_tick(sc);
254397
}

0 commit comments

Comments
 (0)