1
- /* $OpenBSD: pvclock.c,v 1.13 2025/03/31 15:40:22 sf Exp $ */
1
+ /* $OpenBSD: pvclock.c,v 1.14 2025/06/20 14:06:34 sf Exp $ */
2
2
3
3
/*
4
4
* Copyright (c) 2018 Reyk Floeter <[email protected] >
22
22
23
23
#include <sys/param.h>
24
24
#include <sys/systm.h>
25
+ #include <sys/timeout.h>
25
26
#include <sys/timetc.h>
26
27
27
28
#include <machine/cpu.h>
35
36
#define PMAP_NOCRYPT 0
36
37
#endif
37
38
38
- uint pvclock_lastcount ;
39
+ #if defined(__amd64__ )
40
+
41
+ static inline uint64_t
42
+ pvclock_atomic_load (volatile uint64_t * ptr )
43
+ {
44
+ return * ptr ;
45
+ }
46
+
47
+ static inline uint64_t
48
+ pvclock_atomic_cas (volatile uint64_t * p , uint64_t e ,
49
+ uint64_t n )
50
+ {
51
+ return atomic_cas_ulong ((volatile unsigned long * )p , e , n );
52
+ }
53
+
54
+ #elif defined(__i386__ )
55
+
56
+ /*
57
+ * We are running on virtualization. Therefore we can assume that we
58
+ * have cmpxchg8b, available on pentium and newer.
59
+ */
60
+ static inline uint64_t
61
+ pvclock_atomic_load (volatile uint64_t * ptr )
62
+ {
63
+ uint64_t val ;
64
+ __asm__ volatile ("movl %%ebx,%%eax; movl %%ecx, %%edx; "
65
+ "lock cmpxchg8b %1" : "=&A" (val ) : "m" (* ptr ));
66
+ return val ;
67
+ }
68
+
69
+ static inline uint64_t
70
+ pvclock_atomic_cas (volatile uint64_t * p , uint64_t e ,
71
+ uint64_t n )
72
+ {
73
+ __asm volatile ("lock cmpxchg8b %1" : "+A" (e ), "+m" (* p )
74
+ : "b" ((uint32_t )n ), "c" ((uint32_t )(n >> 32 )));
75
+ return (e );
76
+ }
77
+
78
+ #else
79
+ #error "pvclock: unsupported x86 architecture?"
80
+ #endif
81
+
82
+
83
+ uint64_t pvclock_lastcount ;
84
+
85
+ struct pvpage {
86
+ struct pvclock_time_info ti ;
87
+ struct pvclock_wall_clock wc ;
88
+ };
39
89
40
90
struct pvclock_softc {
41
91
struct device sc_dev ;
42
- void * sc_time ;
92
+ struct pvpage * sc_page ;
43
93
paddr_t sc_paddr ;
44
94
struct timecounter * sc_tc ;
95
+ struct ksensordev sc_sensordev ;
96
+ struct ksensor sc_sensor ;
97
+ struct timeout sc_tick ;
45
98
};
46
99
47
100
#define DEVNAME (_s ) ((_s)->sc_dev.dv_xname)
@@ -50,12 +103,16 @@ int pvclock_match(struct device *, void *, void *);
50
103
void pvclock_attach (struct device * , struct device * , void * );
51
104
int pvclock_activate (struct device * , int );
52
105
106
+ uint64_t pvclock_get (struct timecounter * );
53
107
uint pvclock_get_timecount (struct timecounter * );
108
+ void pvclock_tick_hook (struct device * );
54
109
55
110
static inline uint32_t
56
111
pvclock_read_begin (const struct pvclock_time_info * );
57
112
static inline int
58
113
pvclock_read_done (const struct pvclock_time_info * , uint32_t );
114
+ static inline uint64_t
115
+ pvclock_scale_delta (uint64_t , uint32_t , int );
59
116
60
117
const struct cfattach pvclock_ca = {
61
118
sizeof (struct pvclock_softc ),
@@ -123,28 +180,31 @@ void
123
180
pvclock_attach (struct device * parent , struct device * self , void * aux )
124
181
{
125
182
struct pvclock_softc * sc = (struct pvclock_softc * )self ;
183
+ struct pv_attach_args * pva = aux ;
126
184
struct pvclock_time_info * ti ;
127
- paddr_t pa ;
185
+ paddr_t pa ;
128
186
uint32_t version ;
129
187
uint8_t flags ;
130
188
struct vm_page * page ;
189
+ struct pvbus_hv * kvm ;
131
190
132
191
page = uvm_pagealloc (NULL , 0 , NULL , 0 );
133
192
if (page == NULL )
134
193
goto err ;
135
- sc -> sc_time = km_alloc (PAGE_SIZE , & kv_any , & kp_none , & kd_nowait );
136
- if (sc -> sc_time == NULL )
194
+ sc -> sc_page = km_alloc (PAGE_SIZE , & kv_any , & kp_none , & kd_nowait );
195
+ if (sc -> sc_page == NULL )
137
196
goto err ;
138
197
139
198
pa = VM_PAGE_TO_PHYS (page );
140
- pmap_kenter_pa ((vaddr_t )sc -> sc_time , pa | PMAP_NOCRYPT ,
199
+ pmap_kenter_pa ((vaddr_t )sc -> sc_page , pa | PMAP_NOCRYPT ,
141
200
PROT_READ | PROT_WRITE );
142
- memset (sc -> sc_time , 0 , PAGE_SIZE );
201
+ pmap_update (pmap_kernel ());
202
+ memset (sc -> sc_page , 0 , PAGE_SIZE );
143
203
144
204
wrmsr (KVM_MSR_SYSTEM_TIME , pa | PVCLOCK_SYSTEM_TIME_ENABLE );
145
205
sc -> sc_paddr = pa ;
146
206
147
- ti = sc -> sc_time ;
207
+ ti = & sc -> sc_page -> ti ;
148
208
do {
149
209
version = pvclock_read_begin (ti );
150
210
flags = ti -> ti_flags ;
@@ -168,6 +228,22 @@ pvclock_attach(struct device *parent, struct device *self, void *aux)
168
228
169
229
tc_init (sc -> sc_tc );
170
230
231
+ /*
232
+ * The openbsd vmm pvclock does not support the WALL_CLOCK msr,
233
+ * therefore we look only for kvm.
234
+ */
235
+ kvm = & pva -> pva_hv [PVBUS_KVM ];
236
+ if (kvm -> hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2 )) {
237
+ strlcpy (sc -> sc_sensordev .xname , sc -> sc_dev .dv_xname ,
238
+ sizeof (sc -> sc_sensordev .xname ));
239
+ sc -> sc_sensor .type = SENSOR_TIMEDELTA ;
240
+ sc -> sc_sensor .status = SENSOR_S_UNKNOWN ;
241
+ sensor_attach (& sc -> sc_sensordev , & sc -> sc_sensor );
242
+ sensordev_install (& sc -> sc_sensordev );
243
+
244
+ config_mountroot (self , pvclock_tick_hook );
245
+ }
246
+
171
247
printf ("\n" );
172
248
return ;
173
249
err :
@@ -211,44 +287,111 @@ pvclock_read_done(const struct pvclock_time_info *ti,
211
287
return (ti -> ti_version == version );
212
288
}
213
289
214
- uint
215
- pvclock_get_timecount (struct timecounter * tc )
290
+ static inline uint64_t
291
+ pvclock_scale_delta (uint64_t delta , uint32_t mul_frac , int shift )
292
+ {
293
+ uint64_t lower , upper ;
294
+
295
+ if (shift < 0 )
296
+ delta >>= - shift ;
297
+ else
298
+ delta <<= shift ;
299
+
300
+ lower = ((uint64_t )mul_frac * ((uint32_t )delta )) >> 32 ;
301
+ upper = (uint64_t )mul_frac * (delta >> 32 );
302
+ return lower + upper ;
303
+ }
304
+
305
+ static uint64_t
306
+ pvclock_cmp_last (uint64_t ctr )
307
+ {
308
+ uint64_t last ;
309
+
310
+ do {
311
+ last = pvclock_atomic_load (& pvclock_lastcount );
312
+ if (ctr < last )
313
+ return (last );
314
+ } while (pvclock_atomic_cas (& pvclock_lastcount , last , ctr ) != last );
315
+ return (ctr );
316
+ }
317
+
318
+ uint64_t
319
+ pvclock_get (struct timecounter * tc )
216
320
{
217
321
struct pvclock_softc * sc = tc -> tc_priv ;
218
322
struct pvclock_time_info * ti ;
219
323
uint64_t tsc_timestamp , system_time , delta , ctr ;
220
324
uint32_t version , mul_frac ;
221
325
int8_t shift ;
222
326
uint8_t flags ;
327
+ int s ;
223
328
224
- ti = sc -> sc_time ;
329
+ ti = & sc -> sc_page -> ti ;
330
+ s = splhigh ();
225
331
do {
226
332
version = pvclock_read_begin (ti );
227
333
system_time = ti -> ti_system_time ;
228
334
tsc_timestamp = ti -> ti_tsc_timestamp ;
229
335
mul_frac = ti -> ti_tsc_to_system_mul ;
230
336
shift = ti -> ti_tsc_shift ;
231
337
flags = ti -> ti_flags ;
338
+ delta = rdtsc_lfence ();
232
339
} while (!pvclock_read_done (ti , version ));
340
+ splx (s );
233
341
234
342
/*
235
343
* The algorithm is described in
236
- * linux/Documentation/virtual /kvm/msr.txt
344
+ * linux/Documentation/virt /kvm/x86/ msr.rst
237
345
*/
238
- delta = rdtsc () - tsc_timestamp ;
239
- if (shift < 0 )
240
- delta >>= - shift ;
346
+ if (delta > tsc_timestamp )
347
+ delta -= tsc_timestamp ;
241
348
else
242
- delta <<= shift ;
243
- ctr = (( delta * mul_frac ) >> 32 ) + system_time ;
349
+ delta = 0 ;
350
+ ctr = pvclock_scale_delta ( delta , mul_frac , shift ) + system_time ;
244
351
245
352
if ((flags & PVCLOCK_FLAG_TSC_STABLE ) != 0 )
246
353
return (ctr );
247
354
248
- if (ctr < pvclock_lastcount )
249
- return ( pvclock_lastcount );
355
+ return pvclock_cmp_last (ctr );
356
+ }
250
357
251
- atomic_swap_uint (& pvclock_lastcount , ctr );
358
+ uint
359
+ pvclock_get_timecount (struct timecounter * tc )
360
+ {
361
+ return (pvclock_get (tc ));
362
+ }
252
363
253
- return (ctr );
364
+ void
365
+ pvclock_tick (void * arg )
366
+ {
367
+ struct pvclock_softc * sc = arg ;
368
+ struct timespec ts ;
369
+ struct pvclock_wall_clock * wc = & sc -> sc_page -> wc ;
370
+ int64_t value ;
371
+
372
+ wrmsr (KVM_MSR_WALL_CLOCK , sc -> sc_paddr + offsetof(struct pvpage , wc ));
373
+ while (wc -> wc_version & 0x1 )
374
+ virtio_membar_sync ();
375
+ if (wc -> wc_sec ) {
376
+ nanotime (& ts );
377
+ value = TIMESPEC_TO_NSEC (& ts ) -
378
+ SEC_TO_NSEC (wc -> wc_sec ) - wc -> wc_nsec -
379
+ pvclock_get (& pvclock_timecounter );
380
+
381
+ TIMESPEC_TO_TIMEVAL (& sc -> sc_sensor .tv , & ts );
382
+ sc -> sc_sensor .value = value ;
383
+ sc -> sc_sensor .status = SENSOR_S_OK ;
384
+ } else
385
+ sc -> sc_sensor .status = SENSOR_S_UNKNOWN ;
386
+
387
+ timeout_add_sec (& sc -> sc_tick , 15 );
388
+ }
389
+
390
+ void
391
+ pvclock_tick_hook (struct device * self )
392
+ {
393
+ struct pvclock_softc * sc = (struct pvclock_softc * )self ;
394
+
395
+ timeout_set (& sc -> sc_tick , pvclock_tick , sc );
396
+ pvclock_tick (sc );
254
397
}
0 commit comments