Skip to content

Commit 4eb87dd

Browse files
committed
Merge branch 'remotes/lorenzo/pci/hv'
- Add hibernation support for Hyper-V virtual PCI devices (Dexuan Cui) - Track Hyper-V pci_protocol_version per-hbus, not globally (Dexuan Cui) - Avoid kmemleak false positive on hv hbus buffer (Dexuan Cui) * remotes/lorenzo/pci/hv: PCI: hv: Avoid a kmemleak false positive caused by the hbus buffer PCI: hv: Change pci_protocol_version to per-hbus PCI: hv: Add hibernation support PCI: hv: Reorganize the code in preparation of hibernation
2 parents 454f4de + 877b911 commit 4eb87dd

File tree

1 file changed

+179
-29
lines changed

1 file changed

+179
-29
lines changed

drivers/pci/controller/pci-hyperv.c

Lines changed: 179 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,6 @@ static enum pci_protocol_version_t pci_protocol_versions[] = {
7676
PCI_PROTOCOL_VERSION_1_1,
7777
};
7878

79-
/*
80-
* Protocol version negotiated by hv_pci_protocol_negotiation().
81-
*/
82-
static enum pci_protocol_version_t pci_protocol_version;
83-
8479
#define PCI_CONFIG_MMIO_LENGTH 0x2000
8580
#define CFG_PAGE_OFFSET 0x1000
8681
#define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
@@ -455,12 +450,15 @@ enum hv_pcibus_state {
455450
hv_pcibus_init = 0,
456451
hv_pcibus_probed,
457452
hv_pcibus_installed,
453+
hv_pcibus_removing,
458454
hv_pcibus_removed,
459455
hv_pcibus_maximum
460456
};
461457

462458
struct hv_pcibus_device {
463459
struct pci_sysdata sysdata;
460+
/* Protocol version negotiated with the host */
461+
enum pci_protocol_version_t protocol_version;
464462
enum hv_pcibus_state state;
465463
refcount_t remove_lock;
466464
struct hv_device *hdev;
@@ -1224,7 +1222,7 @@ static void hv_irq_unmask(struct irq_data *data)
12241222
* negative effect (yet?).
12251223
*/
12261224

1227-
if (pci_protocol_version >= PCI_PROTOCOL_VERSION_1_2) {
1225+
if (hbus->protocol_version >= PCI_PROTOCOL_VERSION_1_2) {
12281226
/*
12291227
* PCI_PROTOCOL_VERSION_1_2 supports the VP_SET version of the
12301228
* HVCALL_RETARGET_INTERRUPT hypercall, which also coincides
@@ -1394,7 +1392,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
13941392
ctxt.pci_pkt.completion_func = hv_pci_compose_compl;
13951393
ctxt.pci_pkt.compl_ctxt = ∁
13961394

1397-
switch (pci_protocol_version) {
1395+
switch (hbus->protocol_version) {
13981396
case PCI_PROTOCOL_VERSION_1_1:
13991397
size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
14001398
dest,
@@ -1681,6 +1679,23 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
16811679

16821680
spin_lock_irqsave(&hbus->device_list_lock, flags);
16831681

1682+
/*
1683+
* Clear the memory enable bit, in case it's already set. This occurs
1684+
* in the suspend path of hibernation, where the device is suspended,
1685+
* resumed and suspended again: see hibernation_snapshot() and
1686+
* hibernation_platform_enter().
1687+
*
1688+
* If the memory enable bit is already set, Hyper-V sliently ignores
1689+
* the below BAR updates, and the related PCI device driver can not
1690+
* work, because reading from the device register(s) always returns
1691+
* 0xFFFFFFFF.
1692+
*/
1693+
list_for_each_entry(hpdev, &hbus->children, list_entry) {
1694+
_hv_pcifront_read_config(hpdev, PCI_COMMAND, 2, &command);
1695+
command &= ~PCI_COMMAND_MEMORY;
1696+
_hv_pcifront_write_config(hpdev, PCI_COMMAND, 2, command);
1697+
}
1698+
16841699
/* Pick addresses for the BARs. */
16851700
do {
16861701
list_for_each_entry(hpdev, &hbus->children, list_entry) {
@@ -2107,6 +2122,12 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
21072122
unsigned long flags;
21082123
bool pending_dr;
21092124

2125+
if (hbus->state == hv_pcibus_removing) {
2126+
dev_info(&hbus->hdev->device,
2127+
"PCI VMBus BUS_RELATIONS: ignored\n");
2128+
return;
2129+
}
2130+
21102131
dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
21112132
if (!dr_wrk)
21122133
return;
@@ -2223,11 +2244,19 @@ static void hv_eject_device_work(struct work_struct *work)
22232244
*/
22242245
static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
22252246
{
2247+
struct hv_pcibus_device *hbus = hpdev->hbus;
2248+
struct hv_device *hdev = hbus->hdev;
2249+
2250+
if (hbus->state == hv_pcibus_removing) {
2251+
dev_info(&hdev->device, "PCI VMBus EJECT: ignored\n");
2252+
return;
2253+
}
2254+
22262255
hpdev->state = hv_pcichild_ejecting;
22272256
get_pcichild(hpdev);
22282257
INIT_WORK(&hpdev->wrk, hv_eject_device_work);
2229-
get_hvpcibus(hpdev->hbus);
2230-
queue_work(hpdev->hbus->wq, &hpdev->wrk);
2258+
get_hvpcibus(hbus);
2259+
queue_work(hbus->wq, &hpdev->wrk);
22312260
}
22322261

22332262
/**
@@ -2379,8 +2408,11 @@ static void hv_pci_onchannelcallback(void *context)
23792408
* failing if the host doesn't support the necessary protocol
23802409
* level.
23812410
*/
2382-
static int hv_pci_protocol_negotiation(struct hv_device *hdev)
2411+
static int hv_pci_protocol_negotiation(struct hv_device *hdev,
2412+
enum pci_protocol_version_t version[],
2413+
int num_version)
23832414
{
2415+
struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
23842416
struct pci_version_request *version_req;
23852417
struct hv_pci_compl comp_pkt;
23862418
struct pci_packet *pkt;
@@ -2403,8 +2435,8 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
24032435
version_req = (struct pci_version_request *)&pkt->message;
24042436
version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
24052437

2406-
for (i = 0; i < ARRAY_SIZE(pci_protocol_versions); i++) {
2407-
version_req->protocol_version = pci_protocol_versions[i];
2438+
for (i = 0; i < num_version; i++) {
2439+
version_req->protocol_version = version[i];
24082440
ret = vmbus_sendpacket(hdev->channel, version_req,
24092441
sizeof(struct pci_version_request),
24102442
(unsigned long)pkt, VM_PKT_DATA_INBAND,
@@ -2420,10 +2452,10 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
24202452
}
24212453

24222454
if (comp_pkt.completion_status >= 0) {
2423-
pci_protocol_version = pci_protocol_versions[i];
2455+
hbus->protocol_version = version[i];
24242456
dev_info(&hdev->device,
24252457
"PCI VMBus probing: Using version %#x\n",
2426-
pci_protocol_version);
2458+
hbus->protocol_version);
24272459
goto exit;
24282460
}
24292461

@@ -2707,7 +2739,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
27072739
u32 wslot;
27082740
int ret;
27092741

2710-
size_res = (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2)
2742+
size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
27112743
? sizeof(*res_assigned) : sizeof(*res_assigned2);
27122744

27132745
pkt = kmalloc(sizeof(*pkt) + size_res, GFP_KERNEL);
@@ -2726,7 +2758,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
27262758
pkt->completion_func = hv_pci_generic_compl;
27272759
pkt->compl_ctxt = &comp_pkt;
27282760

2729-
if (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2) {
2761+
if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2) {
27302762
res_assigned =
27312763
(struct pci_resources_assigned *)&pkt->message;
27322764
res_assigned->message_type.type =
@@ -2870,9 +2902,27 @@ static int hv_pci_probe(struct hv_device *hdev,
28702902
* hv_pcibus_device contains the hypercall arguments for retargeting in
28712903
* hv_irq_unmask(). Those must not cross a page boundary.
28722904
*/
2873-
BUILD_BUG_ON(sizeof(*hbus) > PAGE_SIZE);
2905+
BUILD_BUG_ON(sizeof(*hbus) > HV_HYP_PAGE_SIZE);
28742906

2875-
hbus = (struct hv_pcibus_device *)get_zeroed_page(GFP_KERNEL);
2907+
/*
2908+
* With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural
2909+
* alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate
2910+
* a 4KB buffer that is guaranteed to be 4KB-aligned. Here the size and
2911+
* alignment of hbus is important because hbus's field
2912+
* retarget_msi_interrupt_params must not cross a 4KB page boundary.
2913+
*
2914+
* Here we prefer kzalloc to get_zeroed_page(), because a buffer
2915+
* allocated by the latter is not tracked and scanned by kmemleak, and
2916+
* hence kmemleak reports the pointer contained in the hbus buffer
2917+
* (i.e. the hpdev struct, which is created in new_pcichild_device() and
2918+
* is tracked by hbus->children) as memory leak (false positive).
2919+
*
2920+
* If the kernel doesn't have 59bb47985c1d, get_zeroed_page() *must* be
2921+
* used to allocate the hbus buffer and we can avoid the kmemleak false
2922+
* positive by using kmemleak_alloc() and kmemleak_free() to ask
2923+
* kmemleak to track and scan the hbus buffer.
2924+
*/
2925+
hbus = (struct hv_pcibus_device *)kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
28762926
if (!hbus)
28772927
return -ENOMEM;
28782928
hbus->state = hv_pcibus_init;
@@ -2930,7 +2980,8 @@ static int hv_pci_probe(struct hv_device *hdev,
29302980

29312981
hv_set_drvdata(hdev, hbus);
29322982

2933-
ret = hv_pci_protocol_negotiation(hdev);
2983+
ret = hv_pci_protocol_negotiation(hdev, pci_protocol_versions,
2984+
ARRAY_SIZE(pci_protocol_versions));
29342985
if (ret)
29352986
goto close;
29362987

@@ -3011,7 +3062,7 @@ static int hv_pci_probe(struct hv_device *hdev,
30113062
return ret;
30123063
}
30133064

3014-
static void hv_pci_bus_exit(struct hv_device *hdev)
3065+
static int hv_pci_bus_exit(struct hv_device *hdev, bool hibernating)
30153066
{
30163067
struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
30173068
struct {
@@ -3027,16 +3078,20 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
30273078
* access the per-channel ringbuffer any longer.
30283079
*/
30293080
if (hdev->channel->rescind)
3030-
return;
3081+
return 0;
30313082

3032-
/* Delete any children which might still exist. */
3033-
memset(&relations, 0, sizeof(relations));
3034-
hv_pci_devices_present(hbus, &relations);
3083+
if (!hibernating) {
3084+
/* Delete any children which might still exist. */
3085+
memset(&relations, 0, sizeof(relations));
3086+
hv_pci_devices_present(hbus, &relations);
3087+
}
30353088

30363089
ret = hv_send_resources_released(hdev);
3037-
if (ret)
3090+
if (ret) {
30383091
dev_err(&hdev->device,
30393092
"Couldn't send resources released packet(s)\n");
3093+
return ret;
3094+
}
30403095

30413096
memset(&pkt.teardown_packet, 0, sizeof(pkt.teardown_packet));
30423097
init_completion(&comp_pkt.host_event);
@@ -3049,8 +3104,13 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
30493104
(unsigned long)&pkt.teardown_packet,
30503105
VM_PKT_DATA_INBAND,
30513106
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
3052-
if (!ret)
3053-
wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ);
3107+
if (ret)
3108+
return ret;
3109+
3110+
if (wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ) == 0)
3111+
return -ETIMEDOUT;
3112+
3113+
return 0;
30543114
}
30553115

30563116
/**
@@ -3062,6 +3122,7 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
30623122
static int hv_pci_remove(struct hv_device *hdev)
30633123
{
30643124
struct hv_pcibus_device *hbus;
3125+
int ret;
30653126

30663127
hbus = hv_get_drvdata(hdev);
30673128
if (hbus->state == hv_pcibus_installed) {
@@ -3074,7 +3135,7 @@ static int hv_pci_remove(struct hv_device *hdev)
30743135
hbus->state = hv_pcibus_removed;
30753136
}
30763137

3077-
hv_pci_bus_exit(hdev);
3138+
ret = hv_pci_bus_exit(hdev, false);
30783139

30793140
vmbus_close(hdev->channel);
30803141

@@ -3090,10 +3151,97 @@ static int hv_pci_remove(struct hv_device *hdev)
30903151

30913152
hv_put_dom_num(hbus->sysdata.domain);
30923153

3093-
free_page((unsigned long)hbus);
3154+
kfree(hbus);
3155+
return ret;
3156+
}
3157+
3158+
static int hv_pci_suspend(struct hv_device *hdev)
3159+
{
3160+
struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
3161+
enum hv_pcibus_state old_state;
3162+
int ret;
3163+
3164+
/*
3165+
* hv_pci_suspend() must make sure there are no pending work items
3166+
* before calling vmbus_close(), since it runs in a process context
3167+
* as a callback in dpm_suspend(). When it starts to run, the channel
3168+
* callback hv_pci_onchannelcallback(), which runs in a tasklet
3169+
* context, can be still running concurrently and scheduling new work
3170+
* items onto hbus->wq in hv_pci_devices_present() and
3171+
* hv_pci_eject_device(), and the work item handlers can access the
3172+
* vmbus channel, which can be being closed by hv_pci_suspend(), e.g.
3173+
* the work item handler pci_devices_present_work() ->
3174+
* new_pcichild_device() writes to the vmbus channel.
3175+
*
3176+
* To eliminate the race, hv_pci_suspend() disables the channel
3177+
* callback tasklet, sets hbus->state to hv_pcibus_removing, and
3178+
* re-enables the tasklet. This way, when hv_pci_suspend() proceeds,
3179+
* it knows that no new work item can be scheduled, and then it flushes
3180+
* hbus->wq and safely closes the vmbus channel.
3181+
*/
3182+
tasklet_disable(&hdev->channel->callback_event);
3183+
3184+
/* Change the hbus state to prevent new work items. */
3185+
old_state = hbus->state;
3186+
if (hbus->state == hv_pcibus_installed)
3187+
hbus->state = hv_pcibus_removing;
3188+
3189+
tasklet_enable(&hdev->channel->callback_event);
3190+
3191+
if (old_state != hv_pcibus_installed)
3192+
return -EINVAL;
3193+
3194+
flush_workqueue(hbus->wq);
3195+
3196+
ret = hv_pci_bus_exit(hdev, true);
3197+
if (ret)
3198+
return ret;
3199+
3200+
vmbus_close(hdev->channel);
3201+
30943202
return 0;
30953203
}
30963204

3205+
static int hv_pci_resume(struct hv_device *hdev)
3206+
{
3207+
struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
3208+
enum pci_protocol_version_t version[1];
3209+
int ret;
3210+
3211+
hbus->state = hv_pcibus_init;
3212+
3213+
ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
3214+
hv_pci_onchannelcallback, hbus);
3215+
if (ret)
3216+
return ret;
3217+
3218+
/* Only use the version that was in use before hibernation. */
3219+
version[0] = hbus->protocol_version;
3220+
ret = hv_pci_protocol_negotiation(hdev, version, 1);
3221+
if (ret)
3222+
goto out;
3223+
3224+
ret = hv_pci_query_relations(hdev);
3225+
if (ret)
3226+
goto out;
3227+
3228+
ret = hv_pci_enter_d0(hdev);
3229+
if (ret)
3230+
goto out;
3231+
3232+
ret = hv_send_resources_allocated(hdev);
3233+
if (ret)
3234+
goto out;
3235+
3236+
prepopulate_bars(hbus);
3237+
3238+
hbus->state = hv_pcibus_installed;
3239+
return 0;
3240+
out:
3241+
vmbus_close(hdev->channel);
3242+
return ret;
3243+
}
3244+
30973245
static const struct hv_vmbus_device_id hv_pci_id_table[] = {
30983246
/* PCI Pass-through Class ID */
30993247
/* 44C4F61D-4444-4400-9D52-802E27EDE19F */
@@ -3108,6 +3256,8 @@ static struct hv_driver hv_pci_drv = {
31083256
.id_table = hv_pci_id_table,
31093257
.probe = hv_pci_probe,
31103258
.remove = hv_pci_remove,
3259+
.suspend = hv_pci_suspend,
3260+
.resume = hv_pci_resume,
31113261
};
31123262

31133263
static void __exit exit_hv_pci_drv(void)

0 commit comments

Comments
 (0)