@@ -76,11 +76,6 @@ static enum pci_protocol_version_t pci_protocol_versions[] = {
76
76
PCI_PROTOCOL_VERSION_1_1 ,
77
77
};
78
78
79
- /*
80
- * Protocol version negotiated by hv_pci_protocol_negotiation().
81
- */
82
- static enum pci_protocol_version_t pci_protocol_version ;
83
-
84
79
#define PCI_CONFIG_MMIO_LENGTH 0x2000
85
80
#define CFG_PAGE_OFFSET 0x1000
86
81
#define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
@@ -455,12 +450,15 @@ enum hv_pcibus_state {
455
450
hv_pcibus_init = 0 ,
456
451
hv_pcibus_probed ,
457
452
hv_pcibus_installed ,
453
+ hv_pcibus_removing ,
458
454
hv_pcibus_removed ,
459
455
hv_pcibus_maximum
460
456
};
461
457
462
458
struct hv_pcibus_device {
463
459
struct pci_sysdata sysdata ;
460
+ /* Protocol version negotiated with the host */
461
+ enum pci_protocol_version_t protocol_version ;
464
462
enum hv_pcibus_state state ;
465
463
refcount_t remove_lock ;
466
464
struct hv_device * hdev ;
@@ -1224,7 +1222,7 @@ static void hv_irq_unmask(struct irq_data *data)
1224
1222
* negative effect (yet?).
1225
1223
*/
1226
1224
1227
- if (pci_protocol_version >= PCI_PROTOCOL_VERSION_1_2 ) {
1225
+ if (hbus -> protocol_version >= PCI_PROTOCOL_VERSION_1_2 ) {
1228
1226
/*
1229
1227
* PCI_PROTOCOL_VERSION_1_2 supports the VP_SET version of the
1230
1228
* HVCALL_RETARGET_INTERRUPT hypercall, which also coincides
@@ -1394,7 +1392,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
1394
1392
ctxt .pci_pkt .completion_func = hv_pci_compose_compl ;
1395
1393
ctxt .pci_pkt .compl_ctxt = & comp ;
1396
1394
1397
- switch (pci_protocol_version ) {
1395
+ switch (hbus -> protocol_version ) {
1398
1396
case PCI_PROTOCOL_VERSION_1_1 :
1399
1397
size = hv_compose_msi_req_v1 (& ctxt .int_pkts .v1 ,
1400
1398
dest ,
@@ -1681,6 +1679,23 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
1681
1679
1682
1680
spin_lock_irqsave (& hbus -> device_list_lock , flags );
1683
1681
1682
+ /*
1683
+ * Clear the memory enable bit, in case it's already set. This occurs
1684
+ * in the suspend path of hibernation, where the device is suspended,
1685
+ * resumed and suspended again: see hibernation_snapshot() and
1686
+ * hibernation_platform_enter().
1687
+ *
1688
+ * If the memory enable bit is already set, Hyper-V sliently ignores
1689
+ * the below BAR updates, and the related PCI device driver can not
1690
+ * work, because reading from the device register(s) always returns
1691
+ * 0xFFFFFFFF.
1692
+ */
1693
+ list_for_each_entry (hpdev , & hbus -> children , list_entry ) {
1694
+ _hv_pcifront_read_config (hpdev , PCI_COMMAND , 2 , & command );
1695
+ command &= ~PCI_COMMAND_MEMORY ;
1696
+ _hv_pcifront_write_config (hpdev , PCI_COMMAND , 2 , command );
1697
+ }
1698
+
1684
1699
/* Pick addresses for the BARs. */
1685
1700
do {
1686
1701
list_for_each_entry (hpdev , & hbus -> children , list_entry ) {
@@ -2107,6 +2122,12 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
2107
2122
unsigned long flags ;
2108
2123
bool pending_dr ;
2109
2124
2125
+ if (hbus -> state == hv_pcibus_removing ) {
2126
+ dev_info (& hbus -> hdev -> device ,
2127
+ "PCI VMBus BUS_RELATIONS: ignored\n" );
2128
+ return ;
2129
+ }
2130
+
2110
2131
dr_wrk = kzalloc (sizeof (* dr_wrk ), GFP_NOWAIT );
2111
2132
if (!dr_wrk )
2112
2133
return ;
@@ -2223,11 +2244,19 @@ static void hv_eject_device_work(struct work_struct *work)
2223
2244
*/
2224
2245
static void hv_pci_eject_device (struct hv_pci_dev * hpdev )
2225
2246
{
2247
+ struct hv_pcibus_device * hbus = hpdev -> hbus ;
2248
+ struct hv_device * hdev = hbus -> hdev ;
2249
+
2250
+ if (hbus -> state == hv_pcibus_removing ) {
2251
+ dev_info (& hdev -> device , "PCI VMBus EJECT: ignored\n" );
2252
+ return ;
2253
+ }
2254
+
2226
2255
hpdev -> state = hv_pcichild_ejecting ;
2227
2256
get_pcichild (hpdev );
2228
2257
INIT_WORK (& hpdev -> wrk , hv_eject_device_work );
2229
- get_hvpcibus (hpdev -> hbus );
2230
- queue_work (hpdev -> hbus -> wq , & hpdev -> wrk );
2258
+ get_hvpcibus (hbus );
2259
+ queue_work (hbus -> wq , & hpdev -> wrk );
2231
2260
}
2232
2261
2233
2262
/**
@@ -2379,8 +2408,11 @@ static void hv_pci_onchannelcallback(void *context)
2379
2408
* failing if the host doesn't support the necessary protocol
2380
2409
* level.
2381
2410
*/
2382
- static int hv_pci_protocol_negotiation (struct hv_device * hdev )
2411
+ static int hv_pci_protocol_negotiation (struct hv_device * hdev ,
2412
+ enum pci_protocol_version_t version [],
2413
+ int num_version )
2383
2414
{
2415
+ struct hv_pcibus_device * hbus = hv_get_drvdata (hdev );
2384
2416
struct pci_version_request * version_req ;
2385
2417
struct hv_pci_compl comp_pkt ;
2386
2418
struct pci_packet * pkt ;
@@ -2403,8 +2435,8 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
2403
2435
version_req = (struct pci_version_request * )& pkt -> message ;
2404
2436
version_req -> message_type .type = PCI_QUERY_PROTOCOL_VERSION ;
2405
2437
2406
- for (i = 0 ; i < ARRAY_SIZE ( pci_protocol_versions ) ; i ++ ) {
2407
- version_req -> protocol_version = pci_protocol_versions [i ];
2438
+ for (i = 0 ; i < num_version ; i ++ ) {
2439
+ version_req -> protocol_version = version [i ];
2408
2440
ret = vmbus_sendpacket (hdev -> channel , version_req ,
2409
2441
sizeof (struct pci_version_request ),
2410
2442
(unsigned long )pkt , VM_PKT_DATA_INBAND ,
@@ -2420,10 +2452,10 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
2420
2452
}
2421
2453
2422
2454
if (comp_pkt .completion_status >= 0 ) {
2423
- pci_protocol_version = pci_protocol_versions [i ];
2455
+ hbus -> protocol_version = version [i ];
2424
2456
dev_info (& hdev -> device ,
2425
2457
"PCI VMBus probing: Using version %#x\n" ,
2426
- pci_protocol_version );
2458
+ hbus -> protocol_version );
2427
2459
goto exit ;
2428
2460
}
2429
2461
@@ -2707,7 +2739,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
2707
2739
u32 wslot ;
2708
2740
int ret ;
2709
2741
2710
- size_res = (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2 )
2742
+ size_res = (hbus -> protocol_version < PCI_PROTOCOL_VERSION_1_2 )
2711
2743
? sizeof (* res_assigned ) : sizeof (* res_assigned2 );
2712
2744
2713
2745
pkt = kmalloc (sizeof (* pkt ) + size_res , GFP_KERNEL );
@@ -2726,7 +2758,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
2726
2758
pkt -> completion_func = hv_pci_generic_compl ;
2727
2759
pkt -> compl_ctxt = & comp_pkt ;
2728
2760
2729
- if (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2 ) {
2761
+ if (hbus -> protocol_version < PCI_PROTOCOL_VERSION_1_2 ) {
2730
2762
res_assigned =
2731
2763
(struct pci_resources_assigned * )& pkt -> message ;
2732
2764
res_assigned -> message_type .type =
@@ -2870,9 +2902,27 @@ static int hv_pci_probe(struct hv_device *hdev,
2870
2902
* hv_pcibus_device contains the hypercall arguments for retargeting in
2871
2903
* hv_irq_unmask(). Those must not cross a page boundary.
2872
2904
*/
2873
- BUILD_BUG_ON (sizeof (* hbus ) > PAGE_SIZE );
2905
+ BUILD_BUG_ON (sizeof (* hbus ) > HV_HYP_PAGE_SIZE );
2874
2906
2875
- hbus = (struct hv_pcibus_device * )get_zeroed_page (GFP_KERNEL );
2907
+ /*
2908
+ * With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural
2909
+ * alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate
2910
+ * a 4KB buffer that is guaranteed to be 4KB-aligned. Here the size and
2911
+ * alignment of hbus is important because hbus's field
2912
+ * retarget_msi_interrupt_params must not cross a 4KB page boundary.
2913
+ *
2914
+ * Here we prefer kzalloc to get_zeroed_page(), because a buffer
2915
+ * allocated by the latter is not tracked and scanned by kmemleak, and
2916
+ * hence kmemleak reports the pointer contained in the hbus buffer
2917
+ * (i.e. the hpdev struct, which is created in new_pcichild_device() and
2918
+ * is tracked by hbus->children) as memory leak (false positive).
2919
+ *
2920
+ * If the kernel doesn't have 59bb47985c1d, get_zeroed_page() *must* be
2921
+ * used to allocate the hbus buffer and we can avoid the kmemleak false
2922
+ * positive by using kmemleak_alloc() and kmemleak_free() to ask
2923
+ * kmemleak to track and scan the hbus buffer.
2924
+ */
2925
+ hbus = (struct hv_pcibus_device * )kzalloc (HV_HYP_PAGE_SIZE , GFP_KERNEL );
2876
2926
if (!hbus )
2877
2927
return - ENOMEM ;
2878
2928
hbus -> state = hv_pcibus_init ;
@@ -2930,7 +2980,8 @@ static int hv_pci_probe(struct hv_device *hdev,
2930
2980
2931
2981
hv_set_drvdata (hdev , hbus );
2932
2982
2933
- ret = hv_pci_protocol_negotiation (hdev );
2983
+ ret = hv_pci_protocol_negotiation (hdev , pci_protocol_versions ,
2984
+ ARRAY_SIZE (pci_protocol_versions ));
2934
2985
if (ret )
2935
2986
goto close ;
2936
2987
@@ -3011,7 +3062,7 @@ static int hv_pci_probe(struct hv_device *hdev,
3011
3062
return ret ;
3012
3063
}
3013
3064
3014
- static void hv_pci_bus_exit (struct hv_device * hdev )
3065
+ static int hv_pci_bus_exit (struct hv_device * hdev , bool hibernating )
3015
3066
{
3016
3067
struct hv_pcibus_device * hbus = hv_get_drvdata (hdev );
3017
3068
struct {
@@ -3027,16 +3078,20 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
3027
3078
* access the per-channel ringbuffer any longer.
3028
3079
*/
3029
3080
if (hdev -> channel -> rescind )
3030
- return ;
3081
+ return 0 ;
3031
3082
3032
- /* Delete any children which might still exist. */
3033
- memset (& relations , 0 , sizeof (relations ));
3034
- hv_pci_devices_present (hbus , & relations );
3083
+ if (!hibernating ) {
3084
+ /* Delete any children which might still exist. */
3085
+ memset (& relations , 0 , sizeof (relations ));
3086
+ hv_pci_devices_present (hbus , & relations );
3087
+ }
3035
3088
3036
3089
ret = hv_send_resources_released (hdev );
3037
- if (ret )
3090
+ if (ret ) {
3038
3091
dev_err (& hdev -> device ,
3039
3092
"Couldn't send resources released packet(s)\n" );
3093
+ return ret ;
3094
+ }
3040
3095
3041
3096
memset (& pkt .teardown_packet , 0 , sizeof (pkt .teardown_packet ));
3042
3097
init_completion (& comp_pkt .host_event );
@@ -3049,8 +3104,13 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
3049
3104
(unsigned long )& pkt .teardown_packet ,
3050
3105
VM_PKT_DATA_INBAND ,
3051
3106
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED );
3052
- if (!ret )
3053
- wait_for_completion_timeout (& comp_pkt .host_event , 10 * HZ );
3107
+ if (ret )
3108
+ return ret ;
3109
+
3110
+ if (wait_for_completion_timeout (& comp_pkt .host_event , 10 * HZ ) == 0 )
3111
+ return - ETIMEDOUT ;
3112
+
3113
+ return 0 ;
3054
3114
}
3055
3115
3056
3116
/**
@@ -3062,6 +3122,7 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
3062
3122
static int hv_pci_remove (struct hv_device * hdev )
3063
3123
{
3064
3124
struct hv_pcibus_device * hbus ;
3125
+ int ret ;
3065
3126
3066
3127
hbus = hv_get_drvdata (hdev );
3067
3128
if (hbus -> state == hv_pcibus_installed ) {
@@ -3074,7 +3135,7 @@ static int hv_pci_remove(struct hv_device *hdev)
3074
3135
hbus -> state = hv_pcibus_removed ;
3075
3136
}
3076
3137
3077
- hv_pci_bus_exit (hdev );
3138
+ ret = hv_pci_bus_exit (hdev , false );
3078
3139
3079
3140
vmbus_close (hdev -> channel );
3080
3141
@@ -3090,10 +3151,97 @@ static int hv_pci_remove(struct hv_device *hdev)
3090
3151
3091
3152
hv_put_dom_num (hbus -> sysdata .domain );
3092
3153
3093
- free_page ((unsigned long )hbus );
3154
+ kfree (hbus );
3155
+ return ret ;
3156
+ }
3157
+
3158
+ static int hv_pci_suspend (struct hv_device * hdev )
3159
+ {
3160
+ struct hv_pcibus_device * hbus = hv_get_drvdata (hdev );
3161
+ enum hv_pcibus_state old_state ;
3162
+ int ret ;
3163
+
3164
+ /*
3165
+ * hv_pci_suspend() must make sure there are no pending work items
3166
+ * before calling vmbus_close(), since it runs in a process context
3167
+ * as a callback in dpm_suspend(). When it starts to run, the channel
3168
+ * callback hv_pci_onchannelcallback(), which runs in a tasklet
3169
+ * context, can be still running concurrently and scheduling new work
3170
+ * items onto hbus->wq in hv_pci_devices_present() and
3171
+ * hv_pci_eject_device(), and the work item handlers can access the
3172
+ * vmbus channel, which can be being closed by hv_pci_suspend(), e.g.
3173
+ * the work item handler pci_devices_present_work() ->
3174
+ * new_pcichild_device() writes to the vmbus channel.
3175
+ *
3176
+ * To eliminate the race, hv_pci_suspend() disables the channel
3177
+ * callback tasklet, sets hbus->state to hv_pcibus_removing, and
3178
+ * re-enables the tasklet. This way, when hv_pci_suspend() proceeds,
3179
+ * it knows that no new work item can be scheduled, and then it flushes
3180
+ * hbus->wq and safely closes the vmbus channel.
3181
+ */
3182
+ tasklet_disable (& hdev -> channel -> callback_event );
3183
+
3184
+ /* Change the hbus state to prevent new work items. */
3185
+ old_state = hbus -> state ;
3186
+ if (hbus -> state == hv_pcibus_installed )
3187
+ hbus -> state = hv_pcibus_removing ;
3188
+
3189
+ tasklet_enable (& hdev -> channel -> callback_event );
3190
+
3191
+ if (old_state != hv_pcibus_installed )
3192
+ return - EINVAL ;
3193
+
3194
+ flush_workqueue (hbus -> wq );
3195
+
3196
+ ret = hv_pci_bus_exit (hdev , true);
3197
+ if (ret )
3198
+ return ret ;
3199
+
3200
+ vmbus_close (hdev -> channel );
3201
+
3094
3202
return 0 ;
3095
3203
}
3096
3204
3205
+ static int hv_pci_resume (struct hv_device * hdev )
3206
+ {
3207
+ struct hv_pcibus_device * hbus = hv_get_drvdata (hdev );
3208
+ enum pci_protocol_version_t version [1 ];
3209
+ int ret ;
3210
+
3211
+ hbus -> state = hv_pcibus_init ;
3212
+
3213
+ ret = vmbus_open (hdev -> channel , pci_ring_size , pci_ring_size , NULL , 0 ,
3214
+ hv_pci_onchannelcallback , hbus );
3215
+ if (ret )
3216
+ return ret ;
3217
+
3218
+ /* Only use the version that was in use before hibernation. */
3219
+ version [0 ] = hbus -> protocol_version ;
3220
+ ret = hv_pci_protocol_negotiation (hdev , version , 1 );
3221
+ if (ret )
3222
+ goto out ;
3223
+
3224
+ ret = hv_pci_query_relations (hdev );
3225
+ if (ret )
3226
+ goto out ;
3227
+
3228
+ ret = hv_pci_enter_d0 (hdev );
3229
+ if (ret )
3230
+ goto out ;
3231
+
3232
+ ret = hv_send_resources_allocated (hdev );
3233
+ if (ret )
3234
+ goto out ;
3235
+
3236
+ prepopulate_bars (hbus );
3237
+
3238
+ hbus -> state = hv_pcibus_installed ;
3239
+ return 0 ;
3240
+ out :
3241
+ vmbus_close (hdev -> channel );
3242
+ return ret ;
3243
+ }
3244
+
3097
3245
static const struct hv_vmbus_device_id hv_pci_id_table [] = {
3098
3246
/* PCI Pass-through Class ID */
3099
3247
/* 44C4F61D-4444-4400-9D52-802E27EDE19F */
@@ -3108,6 +3256,8 @@ static struct hv_driver hv_pci_drv = {
3108
3256
.id_table = hv_pci_id_table ,
3109
3257
.probe = hv_pci_probe ,
3110
3258
.remove = hv_pci_remove ,
3259
+ .suspend = hv_pci_suspend ,
3260
+ .resume = hv_pci_resume ,
3111
3261
};
3112
3262
3113
3263
static void __exit exit_hv_pci_drv (void )
0 commit comments