Skip to content

Commit 081f359

Browse files
committed
Merge tag 'hyperv-fixes-signed-20221125' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv fixes from Wei Liu: - Fix IRTE allocation in Hyper-V PCI controller (Dexuan Cui) - Fix handling of SCSI srb_status and capacity change events (Michael Kelley) - Restore VP assist page after CPU offlining and onlining (Vitaly Kuznetsov) - Fix some memory leak issues in VMBus (Yang Yingliang) * tag 'hyperv-fixes-signed-20221125' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: Drivers: hv: vmbus: fix possible memory leak in vmbus_device_register() Drivers: hv: vmbus: fix double free in the error path of vmbus_add_channel_work() PCI: hv: Only reuse existing IRTE allocation for Multi-MSI scsi: storvsc: Fix handling of srb_status and capacity change events x86/hyperv: Restore VP assist page after cpu offlining/onlining
2 parents 0b1dcc2 + 25c94b0 commit 081f359

File tree

5 files changed

+141
-79
lines changed

5 files changed

+141
-79
lines changed

arch/x86/hyperv/hv_init.c

Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void)
7777
static int hv_cpu_init(unsigned int cpu)
7878
{
7979
union hv_vp_assist_msr_contents msr = { 0 };
80-
struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
80+
struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
8181
int ret;
8282

8383
ret = hv_common_cpu_init(cpu);
@@ -87,34 +87,32 @@ static int hv_cpu_init(unsigned int cpu)
8787
if (!hv_vp_assist_page)
8888
return 0;
8989

90-
if (!*hvp) {
91-
if (hv_root_partition) {
92-
/*
93-
* For root partition we get the hypervisor provided VP assist
94-
* page, instead of allocating a new page.
95-
*/
96-
rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
97-
*hvp = memremap(msr.pfn <<
98-
HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
99-
PAGE_SIZE, MEMREMAP_WB);
100-
} else {
101-
/*
102-
* The VP assist page is an "overlay" page (see Hyper-V TLFS's
103-
* Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
104-
* out to make sure we always write the EOI MSR in
105-
* hv_apic_eoi_write() *after* the EOI optimization is disabled
106-
* in hv_cpu_die(), otherwise a CPU may not be stopped in the
107-
* case of CPU offlining and the VM will hang.
108-
*/
90+
if (hv_root_partition) {
91+
/*
92+
* For root partition we get the hypervisor provided VP assist
93+
* page, instead of allocating a new page.
94+
*/
95+
rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
96+
*hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
97+
PAGE_SIZE, MEMREMAP_WB);
98+
} else {
99+
/*
100+
* The VP assist page is an "overlay" page (see Hyper-V TLFS's
101+
* Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
102+
* out to make sure we always write the EOI MSR in
103+
* hv_apic_eoi_write() *after* the EOI optimization is disabled
104+
* in hv_cpu_die(), otherwise a CPU may not be stopped in the
105+
* case of CPU offlining and the VM will hang.
106+
*/
107+
if (!*hvp)
109108
*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
110-
if (*hvp)
111-
msr.pfn = vmalloc_to_pfn(*hvp);
112-
}
113-
WARN_ON(!(*hvp));
114-
if (*hvp) {
115-
msr.enable = 1;
116-
wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
117-
}
109+
if (*hvp)
110+
msr.pfn = vmalloc_to_pfn(*hvp);
111+
112+
}
113+
if (!WARN_ON(!(*hvp))) {
114+
msr.enable = 1;
115+
wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
118116
}
119117

120118
return hyperv_init_ghcb();

drivers/hv/channel_mgmt.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,13 +533,17 @@ static void vmbus_add_channel_work(struct work_struct *work)
533533
* Add the new device to the bus. This will kick off device-driver
534534
* binding which eventually invokes the device driver's AddDevice()
535535
* method.
536+
*
537+
* If vmbus_device_register() fails, the 'device_obj' is freed in
538+
* vmbus_device_release() as called by device_unregister() in the
539+
* error path of vmbus_device_register(). In the outside error
540+
* path, there's no need to free it.
536541
*/
537542
ret = vmbus_device_register(newchannel->device_obj);
538543

539544
if (ret != 0) {
540545
pr_err("unable to add child device object (relid %d)\n",
541546
newchannel->offermsg.child_relid);
542-
kfree(newchannel->device_obj);
543547
goto err_deq_chan;
544548
}
545549

drivers/hv/vmbus_drv.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,6 +2082,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
20822082
ret = device_register(&child_device_obj->device);
20832083
if (ret) {
20842084
pr_err("Unable to register child device\n");
2085+
put_device(&child_device_obj->device);
20852086
return ret;
20862087
}
20872088

drivers/pci/controller/pci-hyperv.c

Lines changed: 75 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,7 +1613,7 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp,
16131613
}
16141614

16151615
static u32 hv_compose_msi_req_v1(
1616-
struct pci_create_interrupt *int_pkt, const struct cpumask *affinity,
1616+
struct pci_create_interrupt *int_pkt,
16171617
u32 slot, u8 vector, u16 vector_count)
16181618
{
16191619
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
@@ -1631,6 +1631,35 @@ static u32 hv_compose_msi_req_v1(
16311631
return sizeof(*int_pkt);
16321632
}
16331633

1634+
/*
1635+
* The vCPU selected by hv_compose_multi_msi_req_get_cpu() and
1636+
* hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be
1637+
* interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V
1638+
* via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is
1639+
* not irrelevant because Hyper-V chooses the physical CPU to handle the
1640+
* interrupts based on the vCPU specified in message sent to the vPCI VSP in
1641+
* hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest,
1642+
* but assigning too many vPCI device interrupts to the same pCPU can cause a
1643+
* performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V
1644+
* to spread out the pCPUs that it selects.
1645+
*
1646+
* For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu()
1647+
* to always return the same dummy vCPU, because a second call to
1648+
* hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a
1649+
* new pCPU for the interrupt. But for the multi-MSI case, the second call to
1650+
* hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the
1651+
* original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that
1652+
* the pCPUs are spread out. All interrupts for a multi-MSI device end up using
1653+
* the same pCPU, even though the vCPUs will be spread out by later calls
1654+
* to hv_irq_unmask(), but that is the best we can do now.
1655+
*
1656+
* With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not*
1657+
* cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an
1658+
* enhancement is planned for a future version. With that enhancement, the
1659+
* dummy vCPU selection won't matter, and interrupts for the same multi-MSI
1660+
* device will be spread across multiple pCPUs.
1661+
*/
1662+
16341663
/*
16351664
* Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
16361665
* by subsequent retarget in hv_irq_unmask().
@@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity)
16401669
return cpumask_first_and(affinity, cpu_online_mask);
16411670
}
16421671

1643-
static u32 hv_compose_msi_req_v2(
1644-
struct pci_create_interrupt2 *int_pkt, const struct cpumask *affinity,
1645-
u32 slot, u8 vector, u16 vector_count)
1672+
/*
1673+
* Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0.
1674+
*/
1675+
static int hv_compose_multi_msi_req_get_cpu(void)
16461676
{
1677+
static DEFINE_SPINLOCK(multi_msi_cpu_lock);
1678+
1679+
/* -1 means starting with CPU 0 */
1680+
static int cpu_next = -1;
1681+
1682+
unsigned long flags;
16471683
int cpu;
16481684

1685+
spin_lock_irqsave(&multi_msi_cpu_lock, flags);
1686+
1687+
cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids,
1688+
false);
1689+
cpu = cpu_next;
1690+
1691+
spin_unlock_irqrestore(&multi_msi_cpu_lock, flags);
1692+
1693+
return cpu;
1694+
}
1695+
1696+
static u32 hv_compose_msi_req_v2(
1697+
struct pci_create_interrupt2 *int_pkt, int cpu,
1698+
u32 slot, u8 vector, u16 vector_count)
1699+
{
16491700
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
16501701
int_pkt->wslot.slot = slot;
16511702
int_pkt->int_desc.vector = vector;
16521703
int_pkt->int_desc.vector_count = vector_count;
16531704
int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
1654-
cpu = hv_compose_msi_req_get_cpu(affinity);
16551705
int_pkt->int_desc.processor_array[0] =
16561706
hv_cpu_number_to_vp_number(cpu);
16571707
int_pkt->int_desc.processor_count = 1;
@@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2(
16601710
}
16611711

16621712
static u32 hv_compose_msi_req_v3(
1663-
struct pci_create_interrupt3 *int_pkt, const struct cpumask *affinity,
1713+
struct pci_create_interrupt3 *int_pkt, int cpu,
16641714
u32 slot, u32 vector, u16 vector_count)
16651715
{
1666-
int cpu;
1667-
16681716
int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3;
16691717
int_pkt->wslot.slot = slot;
16701718
int_pkt->int_desc.vector = vector;
16711719
int_pkt->int_desc.reserved = 0;
16721720
int_pkt->int_desc.vector_count = vector_count;
16731721
int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
1674-
cpu = hv_compose_msi_req_get_cpu(affinity);
16751722
int_pkt->int_desc.processor_array[0] =
16761723
hv_cpu_number_to_vp_number(cpu);
16771724
int_pkt->int_desc.processor_count = 1;
@@ -1715,20 +1762,25 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
17151762
struct pci_create_interrupt3 v3;
17161763
} int_pkts;
17171764
} __packed ctxt;
1765+
bool multi_msi;
17181766
u64 trans_id;
17191767
u32 size;
17201768
int ret;
1769+
int cpu;
1770+
1771+
msi_desc = irq_data_get_msi_desc(data);
1772+
multi_msi = !msi_desc->pci.msi_attrib.is_msix &&
1773+
msi_desc->nvec_used > 1;
17211774

17221775
/* Reuse the previous allocation */
1723-
if (data->chip_data) {
1776+
if (data->chip_data && multi_msi) {
17241777
int_desc = data->chip_data;
17251778
msg->address_hi = int_desc->address >> 32;
17261779
msg->address_lo = int_desc->address & 0xffffffff;
17271780
msg->data = int_desc->data;
17281781
return;
17291782
}
17301783

1731-
msi_desc = irq_data_get_msi_desc(data);
17321784
pdev = msi_desc_to_pci_dev(msi_desc);
17331785
dest = irq_data_get_effective_affinity_mask(data);
17341786
pbus = pdev->bus;
@@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
17381790
if (!hpdev)
17391791
goto return_null_message;
17401792

1793+
/* Free any previous message that might have already been composed. */
1794+
if (data->chip_data && !multi_msi) {
1795+
int_desc = data->chip_data;
1796+
data->chip_data = NULL;
1797+
hv_int_desc_free(hpdev, int_desc);
1798+
}
1799+
17411800
int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
17421801
if (!int_desc)
17431802
goto drop_reference;
17441803

1745-
if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) {
1804+
if (multi_msi) {
17461805
/*
17471806
* If this is not the first MSI of Multi MSI, we already have
17481807
* a mapping. Can exit early.
@@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
17671826
*/
17681827
vector = 32;
17691828
vector_count = msi_desc->nvec_used;
1829+
cpu = hv_compose_multi_msi_req_get_cpu();
17701830
} else {
17711831
vector = hv_msi_get_int_vector(data);
17721832
vector_count = 1;
1833+
cpu = hv_compose_msi_req_get_cpu(dest);
17731834
}
17741835

17751836
/*
@@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
17851846
switch (hbus->protocol_version) {
17861847
case PCI_PROTOCOL_VERSION_1_1:
17871848
size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
1788-
dest,
17891849
hpdev->desc.win_slot.slot,
17901850
(u8)vector,
17911851
vector_count);
@@ -1794,15 +1854,15 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
17941854
case PCI_PROTOCOL_VERSION_1_2:
17951855
case PCI_PROTOCOL_VERSION_1_3:
17961856
size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
1797-
dest,
1857+
cpu,
17981858
hpdev->desc.win_slot.slot,
17991859
(u8)vector,
18001860
vector_count);
18011861
break;
18021862

18031863
case PCI_PROTOCOL_VERSION_1_4:
18041864
size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
1805-
dest,
1865+
cpu,
18061866
hpdev->desc.win_slot.slot,
18071867
vector,
18081868
vector_count);

drivers/scsi/storvsc_drv.c

Lines changed: 34 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -303,16 +303,21 @@ enum storvsc_request_type {
303303
};
304304

305305
/*
306-
* SRB status codes and masks; a subset of the codes used here.
306+
* SRB status codes and masks. In the 8-bit field, the two high order bits
307+
* are flags, while the remaining 6 bits are an integer status code. The
308+
* definitions here include only the subset of the integer status codes that
309+
* are tested for in this driver.
307310
*/
308-
309311
#define SRB_STATUS_AUTOSENSE_VALID 0x80
310312
#define SRB_STATUS_QUEUE_FROZEN 0x40
311-
#define SRB_STATUS_INVALID_LUN 0x20
312-
#define SRB_STATUS_SUCCESS 0x01
313-
#define SRB_STATUS_ABORTED 0x02
314-
#define SRB_STATUS_ERROR 0x04
315-
#define SRB_STATUS_DATA_OVERRUN 0x12
313+
314+
/* SRB status integer codes */
315+
#define SRB_STATUS_SUCCESS 0x01
316+
#define SRB_STATUS_ABORTED 0x02
317+
#define SRB_STATUS_ERROR 0x04
318+
#define SRB_STATUS_INVALID_REQUEST 0x06
319+
#define SRB_STATUS_DATA_OVERRUN 0x12
320+
#define SRB_STATUS_INVALID_LUN 0x20
316321

317322
#define SRB_STATUS(status) \
318323
(status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN))
@@ -969,38 +974,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
969974
void (*process_err_fn)(struct work_struct *work);
970975
struct hv_host_device *host_dev = shost_priv(host);
971976

972-
/*
973-
* In some situations, Hyper-V sets multiple bits in the
974-
* srb_status, such as ABORTED and ERROR. So process them
975-
* individually, with the most specific bits first.
976-
*/
977-
978-
if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) {
979-
set_host_byte(scmnd, DID_NO_CONNECT);
980-
process_err_fn = storvsc_remove_lun;
981-
goto do_work;
982-
}
977+
switch (SRB_STATUS(vm_srb->srb_status)) {
978+
case SRB_STATUS_ERROR:
979+
case SRB_STATUS_ABORTED:
980+
case SRB_STATUS_INVALID_REQUEST:
981+
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) {
982+
/* Check for capacity change */
983+
if ((asc == 0x2a) && (ascq == 0x9)) {
984+
process_err_fn = storvsc_device_scan;
985+
/* Retry the I/O that triggered this. */
986+
set_host_byte(scmnd, DID_REQUEUE);
987+
goto do_work;
988+
}
983989

984-
if (vm_srb->srb_status & SRB_STATUS_ABORTED) {
985-
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
986-
/* Capacity data has changed */
987-
(asc == 0x2a) && (ascq == 0x9)) {
988-
process_err_fn = storvsc_device_scan;
989990
/*
990-
* Retry the I/O that triggered this.
991+
* Otherwise, let upper layer deal with the
992+
* error when sense message is present
991993
*/
992-
set_host_byte(scmnd, DID_REQUEUE);
993-
goto do_work;
994-
}
995-
}
996-
997-
if (vm_srb->srb_status & SRB_STATUS_ERROR) {
998-
/*
999-
* Let upper layer deal with error when
1000-
* sense message is present.
1001-
*/
1002-
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
1003994
return;
995+
}
1004996

1005997
/*
1006998
* If there is an error; offline the device since all
@@ -1023,6 +1015,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
10231015
default:
10241016
set_host_byte(scmnd, DID_ERROR);
10251017
}
1018+
return;
1019+
1020+
case SRB_STATUS_INVALID_LUN:
1021+
set_host_byte(scmnd, DID_NO_CONNECT);
1022+
process_err_fn = storvsc_remove_lun;
1023+
goto do_work;
1024+
10261025
}
10271026
return;
10281027

0 commit comments

Comments
 (0)