Skip to content

Commit 64d6a12

Browse files
committed
Merge branch 'x86-hyperv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 hyperv updates from Ingo Molnar: "Misc updates to the hyperv guest code: - Rework clockevents initialization to better support hibernation - Allow guests to enable InvariantTSC - Micro-optimize send_ipi_one" * 'x86-hyperv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/hyperv: Initialize clockevents earlier in CPU onlining x86/hyperv: Allow guests to enable InvariantTSC x86/hyperv: Micro-optimize send_ipi_one()
2 parents cd4771f + 4df4cb9 commit 64d6a12

File tree

10 files changed

+190
-55
lines changed

10 files changed

+190
-55
lines changed

arch/x86/hyperv/hv_apic.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,20 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
194194

195195
static bool __send_ipi_one(int cpu, int vector)
196196
{
197-
struct cpumask mask = CPU_MASK_NONE;
197+
int vp = hv_cpu_number_to_vp_number(cpu);
198198

199-
cpumask_set_cpu(cpu, &mask);
200-
return __send_ipi_mask(&mask, vector);
199+
trace_hyperv_send_ipi_one(cpu, vector);
200+
201+
if (!hv_hypercall_pg || (vp == VP_INVAL))
202+
return false;
203+
204+
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
205+
return false;
206+
207+
if (vp >= 64)
208+
return __send_ipi_mask_ex(cpumask_of(cpu), vector);
209+
210+
return !hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
201211
}
202212

203213
static void hv_send_ipi(int cpu, int vector)

arch/x86/hyperv/hv_init.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,12 @@ void __init hyperv_init(void)
311311
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
312312
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
313313

314+
/*
315+
* Ignore any errors in setting up stimer clockevents
316+
* as we can run with the LAPIC timer as a fallback.
317+
*/
318+
(void)hv_stimer_alloc();
319+
314320
hv_apic_init();
315321

316322
x86_init.pci.arch_init = hv_pci_init;

arch/x86/include/asm/hyperv-tlfs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@
8686
#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
8787
/* AccessReenlightenmentControls privilege */
8888
#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
89+
/* AccessTscInvariantControls privilege */
90+
#define HV_X64_ACCESS_TSC_INVARIANT BIT(15)
8991

9092
/*
9193
* Feature identification: indicates which flags were specified at partition
@@ -278,6 +280,9 @@
278280
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
279281
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
280282

283+
/* TSC invariant control */
284+
#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
285+
281286
/*
282287
* Declare the MSR used to setup pages used to communicate with the hypervisor.
283288
*/

arch/x86/include/asm/trace/hyperv.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,
7171
__entry->ncpus, __entry->vector)
7272
);
7373

74+
TRACE_EVENT(hyperv_send_ipi_one,
75+
TP_PROTO(int cpu,
76+
int vector),
77+
TP_ARGS(cpu, vector),
78+
TP_STRUCT__entry(
79+
__field(int, cpu)
80+
__field(int, vector)
81+
),
82+
TP_fast_assign(__entry->cpu = cpu;
83+
__entry->vector = vector;
84+
),
85+
TP_printk("cpu %d vector %x",
86+
__entry->cpu, __entry->vector)
87+
);
88+
7489
#endif /* CONFIG_HYPERV */
7590

7691
#undef TRACE_INCLUDE_PATH

arch/x86/kernel/cpu/mshyperv.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,12 @@ static void __init ms_hyperv_init_platform(void)
290290
machine_ops.shutdown = hv_machine_shutdown;
291291
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
292292
#endif
293-
mark_tsc_unstable("running on Hyper-V");
293+
if (ms_hyperv.features & HV_X64_ACCESS_TSC_INVARIANT) {
294+
wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
295+
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
296+
} else {
297+
mark_tsc_unstable("running on Hyper-V");
298+
}
294299

295300
/*
296301
* Generation 2 instances don't support reading the NMI status from

drivers/clocksource/hyperv_timer.c

Lines changed: 124 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <linux/clocksource.h>
1818
#include <linux/sched_clock.h>
1919
#include <linux/mm.h>
20+
#include <linux/cpuhotplug.h>
2021
#include <clocksource/hyperv_timer.h>
2122
#include <asm/hyperv-tlfs.h>
2223
#include <asm/mshyperv.h>
@@ -30,6 +31,15 @@ static u64 hv_sched_clock_offset __ro_after_init;
3031
* mechanism is used when running on older versions of Hyper-V
3132
* that don't support Direct Mode. While Hyper-V provides
3233
* four stimer's per CPU, Linux uses only stimer0.
34+
*
35+
* Because Direct Mode does not require processing a VMbus
36+
* message, stimer interrupts can be enabled earlier in the
37+
* process of booting a CPU, and consistent with when timer
38+
* interrupts are enabled for other clocksource drivers.
39+
* However, for legacy versions of Hyper-V when Direct Mode
40+
* is not enabled, setting up stimer interrupts must be
41+
* delayed until VMbus is initialized and can process the
42+
* interrupt message.
3343
*/
3444
static bool direct_mode_enabled;
3545

@@ -102,17 +112,12 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)
102112
/*
103113
* hv_stimer_init - Per-cpu initialization of the clockevent
104114
*/
105-
void hv_stimer_init(unsigned int cpu)
115+
static int hv_stimer_init(unsigned int cpu)
106116
{
107117
struct clock_event_device *ce;
108118

109-
/*
110-
* Synthetic timers are always available except on old versions of
111-
* Hyper-V on x86. In that case, just return as Linux will use a
112-
* clocksource based on emulated PIT or LAPIC timer hardware.
113-
*/
114-
if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
115-
return;
119+
if (!hv_clock_event)
120+
return 0;
116121

117122
ce = per_cpu_ptr(hv_clock_event, cpu);
118123
ce->name = "Hyper-V clockevent";
@@ -127,28 +132,55 @@ void hv_stimer_init(unsigned int cpu)
127132
HV_CLOCK_HZ,
128133
HV_MIN_DELTA_TICKS,
129134
HV_MAX_MAX_DELTA_TICKS);
135+
return 0;
130136
}
131-
EXPORT_SYMBOL_GPL(hv_stimer_init);
132137

133138
/*
134139
* hv_stimer_cleanup - Per-cpu cleanup of the clockevent
135140
*/
136-
void hv_stimer_cleanup(unsigned int cpu)
141+
int hv_stimer_cleanup(unsigned int cpu)
137142
{
138143
struct clock_event_device *ce;
139144

140-
/* Turn off clockevent device */
141-
if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
142-
ce = per_cpu_ptr(hv_clock_event, cpu);
145+
if (!hv_clock_event)
146+
return 0;
147+
148+
/*
149+
* In the legacy case where Direct Mode is not enabled
150+
* (which can only be on x86/64), stimer cleanup happens
151+
* relatively early in the CPU offlining process. We
152+
* must unbind the stimer-based clockevent device so
153+
* that the LAPIC timer can take over until clockevents
154+
* are no longer needed in the offlining process. Note
155+
* that clockevents_unbind_device() eventually calls
156+
* hv_ce_shutdown().
157+
*
158+
* The unbind should not be done when Direct Mode is
159+
* enabled because we may be on an architecture where
160+
* there are no other clockevent devices to fallback to.
161+
*/
162+
ce = per_cpu_ptr(hv_clock_event, cpu);
163+
if (direct_mode_enabled)
143164
hv_ce_shutdown(ce);
144-
}
165+
else
166+
clockevents_unbind_device(ce, cpu);
167+
168+
return 0;
145169
}
146170
EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
147171

148172
/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
149-
int hv_stimer_alloc(int sint)
173+
int hv_stimer_alloc(void)
150174
{
151-
int ret;
175+
int ret = 0;
176+
177+
/*
178+
* Synthetic timers are always available except on old versions of
179+
* Hyper-V on x86. In that case, return as error as Linux will use a
180+
* clockevent based on emulated LAPIC timer hardware.
181+
*/
182+
if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
183+
return -EINVAL;
152184

153185
hv_clock_event = alloc_percpu(struct clock_event_device);
154186
if (!hv_clock_event)
@@ -159,22 +191,78 @@ int hv_stimer_alloc(int sint)
159191
if (direct_mode_enabled) {
160192
ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
161193
hv_stimer0_isr);
162-
if (ret) {
163-
free_percpu(hv_clock_event);
164-
hv_clock_event = NULL;
165-
return ret;
166-
}
194+
if (ret)
195+
goto free_percpu;
196+
197+
/*
198+
* Since we are in Direct Mode, stimer initialization
199+
* can be done now with a CPUHP value in the same range
200+
* as other clockevent devices.
201+
*/
202+
ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
203+
"clockevents/hyperv/stimer:starting",
204+
hv_stimer_init, hv_stimer_cleanup);
205+
if (ret < 0)
206+
goto free_stimer0_irq;
167207
}
208+
return ret;
168209

169-
stimer0_message_sint = sint;
170-
return 0;
210+
free_stimer0_irq:
211+
hv_remove_stimer0_irq(stimer0_irq);
212+
stimer0_irq = 0;
213+
free_percpu:
214+
free_percpu(hv_clock_event);
215+
hv_clock_event = NULL;
216+
return ret;
171217
}
172218
EXPORT_SYMBOL_GPL(hv_stimer_alloc);
173219

220+
/*
221+
* hv_stimer_legacy_init -- Called from the VMbus driver to handle
222+
* the case when Direct Mode is not enabled, and the stimer
223+
* must be initialized late in the CPU onlining process.
224+
*
225+
*/
226+
void hv_stimer_legacy_init(unsigned int cpu, int sint)
227+
{
228+
if (direct_mode_enabled)
229+
return;
230+
231+
/*
232+
* This function gets called by each vCPU, so setting the
233+
* global stimer_message_sint value each time is conceptually
234+
* not ideal, but the value passed in is always the same and
235+
* it avoids introducing yet another interface into this
236+
* clocksource driver just to set the sint in the legacy case.
237+
*/
238+
stimer0_message_sint = sint;
239+
(void)hv_stimer_init(cpu);
240+
}
241+
EXPORT_SYMBOL_GPL(hv_stimer_legacy_init);
242+
243+
/*
244+
* hv_stimer_legacy_cleanup -- Called from the VMbus driver to
245+
* handle the case when Direct Mode is not enabled, and the
246+
* stimer must be cleaned up early in the CPU offlining
247+
* process.
248+
*/
249+
void hv_stimer_legacy_cleanup(unsigned int cpu)
250+
{
251+
if (direct_mode_enabled)
252+
return;
253+
(void)hv_stimer_cleanup(cpu);
254+
}
255+
EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup);
256+
257+
174258
/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
175259
void hv_stimer_free(void)
176260
{
177-
if (direct_mode_enabled && (stimer0_irq != 0)) {
261+
if (!hv_clock_event)
262+
return;
263+
264+
if (direct_mode_enabled) {
265+
cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
178266
hv_remove_stimer0_irq(stimer0_irq);
179267
stimer0_irq = 0;
180268
}
@@ -190,14 +278,20 @@ EXPORT_SYMBOL_GPL(hv_stimer_free);
190278
void hv_stimer_global_cleanup(void)
191279
{
192280
int cpu;
193-
struct clock_event_device *ce;
194281

195-
if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
196-
for_each_present_cpu(cpu) {
197-
ce = per_cpu_ptr(hv_clock_event, cpu);
198-
clockevents_unbind_device(ce, cpu);
199-
}
282+
/*
283+
* hv_stime_legacy_cleanup() will stop the stimer if Direct
284+
* Mode is not enabled, and fallback to the LAPIC timer.
285+
*/
286+
for_each_present_cpu(cpu) {
287+
hv_stimer_legacy_cleanup(cpu);
200288
}
289+
290+
/*
291+
* If Direct Mode is enabled, the cpuhp teardown callback
292+
* (hv_stimer_cleanup) will be run on all CPUs to stop the
293+
* stimers.
294+
*/
201295
hv_stimer_free();
202296
}
203297
EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);

drivers/hv/hv.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ int hv_synic_init(unsigned int cpu)
202202
{
203203
hv_synic_enable_regs(cpu);
204204

205-
hv_stimer_init(cpu);
205+
hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
206206

207207
return 0;
208208
}
@@ -277,7 +277,7 @@ int hv_synic_cleanup(unsigned int cpu)
277277
if (channel_found && vmbus_connection.conn_state == CONNECTED)
278278
return -EBUSY;
279279

280-
hv_stimer_cleanup(cpu);
280+
hv_stimer_legacy_cleanup(cpu);
281281

282282
hv_synic_disable_regs(cpu);
283283

drivers/hv/vmbus_drv.c

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,10 +1340,6 @@ static int vmbus_bus_init(void)
13401340
if (ret)
13411341
goto err_alloc;
13421342

1343-
ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT);
1344-
if (ret < 0)
1345-
goto err_alloc;
1346-
13471343
/*
13481344
* Initialize the per-cpu interrupt state and stimer state.
13491345
* Then connect to the host.
@@ -1400,9 +1396,8 @@ static int vmbus_bus_init(void)
14001396
err_connect:
14011397
cpuhp_remove_state(hyperv_cpuhp_online);
14021398
err_cpuhp:
1403-
hv_stimer_free();
1404-
err_alloc:
14051399
hv_synic_free();
1400+
err_alloc:
14061401
hv_remove_vmbus_irq();
14071402

14081403
bus_unregister(&hv_bus);
@@ -2315,20 +2310,23 @@ static void hv_crash_handler(struct pt_regs *regs)
23152310
static int hv_synic_suspend(void)
23162311
{
23172312
/*
2318-
* When we reach here, all the non-boot CPUs have been offlined, and
2319-
* the stimers on them have been unbound in hv_synic_cleanup() ->
2313+
* When we reach here, all the non-boot CPUs have been offlined.
2314+
* If we're in a legacy configuration where stimer Direct Mode is
2315+
* not enabled, the stimers on the non-boot CPUs have been unbound
2316+
* in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() ->
23202317
* hv_stimer_cleanup() -> clockevents_unbind_device().
23212318
*
2322-
* hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
2323-
* we do not unbind the stimer on CPU0 because: 1) it's unnecessary
2324-
* because the interrupts remain disabled between syscore_suspend()
2325-
* and syscore_resume(): see create_image() and resume_target_kernel();
2319+
* hv_synic_suspend() only runs on CPU0 with interrupts disabled.
2320+
* Here we do not call hv_stimer_legacy_cleanup() on CPU0 because:
2321+
* 1) it's unnecessary as interrupts remain disabled between
2322+
* syscore_suspend() and syscore_resume(): see create_image() and
2323+
* resume_target_kernel()
23262324
* 2) the stimer on CPU0 is automatically disabled later by
23272325
* syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
2328-
* -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
2329-
* would be triggered if we call clockevents_unbind_device(), which
2330-
* may sleep, in an interrupts-disabled context. So, we intentionally
2331-
* don't call hv_stimer_cleanup(0) here.
2326+
* -> clockevents_shutdown() -> ... -> hv_ce_shutdown()
2327+
* 3) a warning would be triggered if we call
2328+
* clockevents_unbind_device(), which may sleep, in an
2329+
* interrupts-disabled context.
23322330
*/
23332331

23342332
hv_synic_disable_regs(0);

0 commit comments

Comments
 (0)