Skip to content

Commit ab02bb3

Browse files
Alexey Makhalovsuryasaimadhu
authored andcommitted
x86/vmware: Add steal time clock support for VMware guests
Steal time is the amount of CPU time needed by a guest virtual machine that is not provided by the host. Steal time occurs when the host allocates this CPU time elsewhere, for example, to another guest. Steal time can be enabled by adding the VM configuration option stealclock.enable = "TRUE". It is supported by VMs that run hardware version 13 or newer. Introduce the VMware steal time infrastructure. The high level code (such as enabling, disabling and hot-plug routines) was derived from KVM. [ Tomer: use READ_ONCE macros and 32bit guests support. ] [ bp: Massage. ] Co-developed-by: Tomer Zeltzer <[email protected]> Signed-off-by: Alexey Makhalov <[email protected]> Signed-off-by: Tomer Zeltzer <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Reviewed-by: Thomas Hellstrom <[email protected]> Reviewed-by: Thomas Gleixner <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent dd735f4 commit ab02bb3

File tree

1 file changed

+197
-0
lines changed

1 file changed

+197
-0
lines changed

arch/x86/kernel/cpu/vmware.c

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include <linux/init.h>
2626
#include <linux/export.h>
2727
#include <linux/clocksource.h>
28+
#include <linux/cpu.h>
29+
#include <linux/reboot.h>
2830
#include <asm/div64.h>
2931
#include <asm/x86_init.h>
3032
#include <asm/hypervisor.h>
@@ -47,6 +49,11 @@
4749
#define VMWARE_CMD_GETVCPU_INFO 68
4850
#define VMWARE_CMD_LEGACY_X2APIC 3
4951
#define VMWARE_CMD_VCPU_RESERVED 31
52+
#define VMWARE_CMD_STEALCLOCK 91
53+
54+
#define STEALCLOCK_NOT_AVAILABLE (-1)
55+
#define STEALCLOCK_DISABLED 0
56+
#define STEALCLOCK_ENABLED 1
5057

5158
#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
5259
__asm__("inl (%%dx), %%eax" : \
@@ -86,6 +93,18 @@
8693
} \
8794
} while (0)
8895

96+
struct vmware_steal_time {
97+
union {
98+
uint64_t clock; /* stolen time counter in units of vtsc */
99+
struct {
100+
/* only for little-endian */
101+
uint32_t clock_low;
102+
uint32_t clock_high;
103+
};
104+
};
105+
uint64_t reserved[7];
106+
};
107+
89108
static unsigned long vmware_tsc_khz __ro_after_init;
90109
static u8 vmware_hypercall_mode __ro_after_init;
91110

@@ -104,6 +123,8 @@ static unsigned long vmware_get_tsc_khz(void)
104123
#ifdef CONFIG_PARAVIRT
105124
static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
106125
static int vmw_sched_clock __initdata = 1;
126+
static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64);
127+
static bool has_steal_clock;
107128

108129
static __init int setup_vmw_sched_clock(char *s)
109130
{
@@ -135,6 +156,163 @@ static void __init vmware_cyc2ns_setup(void)
135156
pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
136157
}
137158

159+
static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
160+
{
161+
uint32_t result, info;
162+
163+
asm volatile (VMWARE_HYPERCALL :
164+
"=a"(result),
165+
"=c"(info) :
166+
"a"(VMWARE_HYPERVISOR_MAGIC),
167+
"b"(0),
168+
"c"(VMWARE_CMD_STEALCLOCK),
169+
"d"(0),
170+
"S"(arg1),
171+
"D"(arg2) :
172+
"memory");
173+
return result;
174+
}
175+
176+
static bool stealclock_enable(phys_addr_t pa)
177+
{
178+
return vmware_cmd_stealclock(upper_32_bits(pa),
179+
lower_32_bits(pa)) == STEALCLOCK_ENABLED;
180+
}
181+
182+
static int __stealclock_disable(void)
183+
{
184+
return vmware_cmd_stealclock(0, 1);
185+
}
186+
187+
static void stealclock_disable(void)
188+
{
189+
__stealclock_disable();
190+
}
191+
192+
static bool vmware_is_stealclock_available(void)
193+
{
194+
return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE;
195+
}
196+
197+
/**
198+
* vmware_steal_clock() - read the per-cpu steal clock
199+
* @cpu: the cpu number whose steal clock we want to read
200+
*
201+
* The function reads the steal clock if we are on a 64-bit system, otherwise
202+
* reads it in parts, checking that the high part didn't change in the
203+
* meantime.
204+
*
205+
* Return:
206+
* The steal clock reading in ns.
207+
*/
208+
static uint64_t vmware_steal_clock(int cpu)
209+
{
210+
struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
211+
uint64_t clock;
212+
213+
if (IS_ENABLED(CONFIG_64BIT))
214+
clock = READ_ONCE(steal->clock);
215+
else {
216+
uint32_t initial_high, low, high;
217+
218+
do {
219+
initial_high = READ_ONCE(steal->clock_high);
220+
/* Do not reorder initial_high and high readings */
221+
virt_rmb();
222+
low = READ_ONCE(steal->clock_low);
223+
/* Keep low reading in between */
224+
virt_rmb();
225+
high = READ_ONCE(steal->clock_high);
226+
} while (initial_high != high);
227+
228+
clock = ((uint64_t)high << 32) | low;
229+
}
230+
231+
return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
232+
vmware_cyc2ns.cyc2ns_shift);
233+
}
234+
235+
static void vmware_register_steal_time(void)
236+
{
237+
int cpu = smp_processor_id();
238+
struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu);
239+
240+
if (!has_steal_clock)
241+
return;
242+
243+
if (!stealclock_enable(slow_virt_to_phys(st))) {
244+
has_steal_clock = false;
245+
return;
246+
}
247+
248+
pr_info("vmware-stealtime: cpu %d, pa %llx\n",
249+
cpu, (unsigned long long) slow_virt_to_phys(st));
250+
}
251+
252+
static void vmware_disable_steal_time(void)
253+
{
254+
if (!has_steal_clock)
255+
return;
256+
257+
stealclock_disable();
258+
}
259+
260+
static void vmware_guest_cpu_init(void)
261+
{
262+
if (has_steal_clock)
263+
vmware_register_steal_time();
264+
}
265+
266+
static void vmware_pv_guest_cpu_reboot(void *unused)
267+
{
268+
vmware_disable_steal_time();
269+
}
270+
271+
static int vmware_pv_reboot_notify(struct notifier_block *nb,
272+
unsigned long code, void *unused)
273+
{
274+
if (code == SYS_RESTART)
275+
on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1);
276+
return NOTIFY_DONE;
277+
}
278+
279+
static struct notifier_block vmware_pv_reboot_nb = {
280+
.notifier_call = vmware_pv_reboot_notify,
281+
};
282+
283+
#ifdef CONFIG_SMP
284+
static void __init vmware_smp_prepare_boot_cpu(void)
285+
{
286+
vmware_guest_cpu_init();
287+
native_smp_prepare_boot_cpu();
288+
}
289+
290+
static int vmware_cpu_online(unsigned int cpu)
291+
{
292+
local_irq_disable();
293+
vmware_guest_cpu_init();
294+
local_irq_enable();
295+
return 0;
296+
}
297+
298+
static int vmware_cpu_down_prepare(unsigned int cpu)
299+
{
300+
local_irq_disable();
301+
vmware_disable_steal_time();
302+
local_irq_enable();
303+
return 0;
304+
}
305+
#endif
306+
307+
static __init int activate_jump_labels(void)
308+
{
309+
if (has_steal_clock)
310+
static_key_slow_inc(&paravirt_steal_enabled);
311+
312+
return 0;
313+
}
314+
arch_initcall(activate_jump_labels);
315+
138316
static void __init vmware_paravirt_ops_setup(void)
139317
{
140318
pv_info.name = "VMware hypervisor";
@@ -148,6 +326,25 @@ static void __init vmware_paravirt_ops_setup(void)
148326
if (vmw_sched_clock)
149327
pv_ops.time.sched_clock = vmware_sched_clock;
150328

329+
if (vmware_is_stealclock_available()) {
330+
has_steal_clock = true;
331+
pv_ops.time.steal_clock = vmware_steal_clock;
332+
333+
/* We use reboot notifier only to disable steal clock */
334+
register_reboot_notifier(&vmware_pv_reboot_nb);
335+
336+
#ifdef CONFIG_SMP
337+
smp_ops.smp_prepare_boot_cpu =
338+
vmware_smp_prepare_boot_cpu;
339+
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
340+
"x86/vmware:online",
341+
vmware_cpu_online,
342+
vmware_cpu_down_prepare) < 0)
343+
pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n");
344+
#else
345+
vmware_guest_cpu_init();
346+
#endif
347+
}
151348
}
152349
#else
153350
#define vmware_paravirt_ops_setup() do {} while (0)

0 commit comments

Comments
 (0)