Skip to content

Commit afaa33d

Browse files
andrea-parriliuw
authored andcommitted
Drivers: hv: vmbus: Resolve more races involving init_vp_index()
init_vp_index() uses the (per-node) hv_numa_map[] masks to record the CPUs allocated for channel interrupts at a given time, and distribute the performance-critical channels across the available CPUs: in part., the mask of "candidate" target CPUs in a given NUMA node, for a newly offered channel, is determined by XOR-ing the node's CPU mask and the node's hv_numa_map. This operation/mechanism assumes that no offline CPUs is set in the hv_numa_map mask, an assumption that does not hold since such mask is currently not updated when a channel is removed or assigned to a different CPU. To address the issues described above, this adds hooks in the channel removal path (hv_process_channel_removal()) and in target_cpu_store() in order to clear, resp. to update, the hv_numa_map[] masks as needed. This also adds a (missed) update of the masks in init_vp_index() (cf., e.g., the memory-allocation failure path in this function). Like in the case of init_vp_index(), such hooks require to determine if the given channel is performance critical. init_vp_index() does this by parsing the channel's offer, it can not rely on the device data structure (device_obj) to retrieve such information because the device data structure has not been allocated/linked with the channel by the time that init_vp_index() executes. A similar situation may hold in hv_is_alloced_cpu() (defined below); the adopted approach is to "cache" the device type of the channel, as computed by parsing the channel's offer, in the channel structure itself. Fixes: 7527810 ("Drivers: hv: vmbus: Introduce the CHANNELMSG_MODIFYCHANNEL message type") Signed-off-by: Andrea Parri (Microsoft) <[email protected]> Reviewed-by: Michael Kelley <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Wei Liu <[email protected]>
1 parent a949e86 commit afaa33d

File tree

4 files changed

+84
-12
lines changed

4 files changed

+84
-12
lines changed

drivers/hv/channel_mgmt.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424

2525
#include "hyperv_vmbus.h"
2626

27-
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
27+
static void init_vp_index(struct vmbus_channel *channel);
2828

29-
static const struct vmbus_device vmbus_devs[] = {
29+
const struct vmbus_device vmbus_devs[] = {
3030
/* IDE */
3131
{ .dev_type = HV_IDE,
3232
HV_IDE_GUID,
@@ -431,6 +431,13 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
431431
spin_unlock_irqrestore(&primary_channel->lock, flags);
432432
}
433433

434+
/*
435+
* If this is a "perf" channel, updates the hv_numa_map[] masks so that
436+
* init_vp_index() can (re-)use the CPU.
437+
*/
438+
if (hv_is_perf_channel(channel))
439+
hv_clear_alloced_cpu(channel->target_cpu);
440+
434441
/*
435442
* Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
436443
* the relid is invalidated; after hibernation, when the user-space app
@@ -497,7 +504,7 @@ static void vmbus_add_channel_work(struct work_struct *work)
497504
if (!newchannel->device_obj)
498505
goto err_deq_chan;
499506

500-
newchannel->device_obj->device_id = hv_get_dev_type(newchannel);
507+
newchannel->device_obj->device_id = newchannel->device_id;
501508
/*
502509
* Add the new device to the bus. This will kick off device-driver
503510
* binding which eventually invokes the device driver's AddDevice()
@@ -580,7 +587,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
580587
*/
581588
mutex_lock(&vmbus_connection.channel_mutex);
582589

583-
init_vp_index(newchannel, hv_get_dev_type(newchannel));
590+
init_vp_index(newchannel);
584591

585592
/* Remember the channels that should be cleaned up upon suspend. */
586593
if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
@@ -676,9 +683,9 @@ static int next_numa_node_id;
676683
* evenly among all the available NUMA nodes. Once the node is assigned,
677684
* we will assign the CPU based on a simple round robin scheme.
678685
*/
679-
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
686+
static void init_vp_index(struct vmbus_channel *channel)
680687
{
681-
bool perf_chn = vmbus_devs[dev_type].perf_device;
688+
bool perf_chn = hv_is_perf_channel(channel);
682689
cpumask_var_t available_mask;
683690
struct cpumask *alloced_mask;
684691
u32 target_cpu;
@@ -699,6 +706,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
699706
channel->target_cpu = VMBUS_CONNECT_CPU;
700707
channel->target_vp =
701708
hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
709+
if (perf_chn)
710+
hv_set_alloced_cpu(VMBUS_CONNECT_CPU);
702711
return;
703712
}
704713

@@ -862,6 +871,7 @@ static void vmbus_setup_channel_state(struct vmbus_channel *channel,
862871
sizeof(struct vmbus_channel_offer_channel));
863872
channel->monitor_grp = (u8)offer->monitorid / 32;
864873
channel->monitor_bit = (u8)offer->monitorid % 32;
874+
channel->device_id = hv_get_dev_type(channel);
865875
}
866876

867877
/*

drivers/hv/hyperv_vmbus.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,54 @@ enum delay {
395395
MESSAGE_DELAY = 1,
396396
};
397397

398+
extern const struct vmbus_device vmbus_devs[];
399+
400+
static inline bool hv_is_perf_channel(struct vmbus_channel *channel)
401+
{
402+
return vmbus_devs[channel->device_id].perf_device;
403+
}
404+
405+
static inline bool hv_is_alloced_cpu(unsigned int cpu)
406+
{
407+
struct vmbus_channel *channel, *sc;
408+
409+
lockdep_assert_held(&vmbus_connection.channel_mutex);
410+
/*
411+
* List additions/deletions as well as updates of the target CPUs are
412+
* protected by channel_mutex.
413+
*/
414+
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
415+
if (!hv_is_perf_channel(channel))
416+
continue;
417+
if (channel->target_cpu == cpu)
418+
return true;
419+
list_for_each_entry(sc, &channel->sc_list, sc_list) {
420+
if (sc->target_cpu == cpu)
421+
return true;
422+
}
423+
}
424+
return false;
425+
}
426+
427+
static inline void hv_set_alloced_cpu(unsigned int cpu)
428+
{
429+
cpumask_set_cpu(cpu, &hv_context.hv_numa_map[cpu_to_node(cpu)]);
430+
}
431+
432+
static inline void hv_clear_alloced_cpu(unsigned int cpu)
433+
{
434+
if (hv_is_alloced_cpu(cpu))
435+
return;
436+
cpumask_clear_cpu(cpu, &hv_context.hv_numa_map[cpu_to_node(cpu)]);
437+
}
438+
439+
static inline void hv_update_alloced_cpus(unsigned int old_cpu,
440+
unsigned int new_cpu)
441+
{
442+
hv_set_alloced_cpu(new_cpu);
443+
hv_clear_alloced_cpu(old_cpu);
444+
}
445+
398446
#ifdef CONFIG_HYPERV_TESTING
399447

400448
int hv_debug_add_dev_dir(struct hv_device *dev);

drivers/hv/vmbus_drv.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,8 +1687,8 @@ static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
16871687
static ssize_t target_cpu_store(struct vmbus_channel *channel,
16881688
const char *buf, size_t count)
16891689
{
1690+
u32 target_cpu, origin_cpu;
16901691
ssize_t ret = count;
1691-
u32 target_cpu;
16921692

16931693
if (vmbus_proto_version < VERSION_WIN10_V4_1)
16941694
return -EIO;
@@ -1741,7 +1741,8 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
17411741
goto cpu_store_unlock;
17421742
}
17431743

1744-
if (channel->target_cpu == target_cpu)
1744+
origin_cpu = channel->target_cpu;
1745+
if (target_cpu == origin_cpu)
17451746
goto cpu_store_unlock;
17461747

17471748
if (vmbus_send_modifychannel(channel->offermsg.child_relid,
@@ -1763,14 +1764,20 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
17631764
* in on a CPU that is different from the channel target_cpu value.
17641765
*/
17651766

1766-
if (channel->change_target_cpu_callback)
1767-
(*channel->change_target_cpu_callback)(channel,
1768-
channel->target_cpu, target_cpu);
1769-
17701767
channel->target_cpu = target_cpu;
17711768
channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
17721769
channel->numa_node = cpu_to_node(target_cpu);
17731770

1771+
/* See init_vp_index(). */
1772+
if (hv_is_perf_channel(channel))
1773+
hv_update_alloced_cpus(origin_cpu, target_cpu);
1774+
1775+
/* Currently set only for storvsc channels. */
1776+
if (channel->change_target_cpu_callback) {
1777+
(*channel->change_target_cpu_callback)(channel,
1778+
origin_cpu, target_cpu);
1779+
}
1780+
17741781
cpu_store_unlock:
17751782
mutex_unlock(&vmbus_connection.channel_mutex);
17761783
cpus_read_unlock();

include/linux/hyperv.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,13 @@ struct vmbus_channel {
901901

902902
bool probe_done;
903903

904+
/*
905+
* Cache the device ID here for easy access; this is useful, in
906+
* particular, in situations where the channel's device_obj has
907+
* not been allocated/initialized yet.
908+
*/
909+
u16 device_id;
910+
904911
/*
905912
* We must offload the handling of the primary/sub channels
906913
* from the single-threaded vmbus_connection.work_queue to

0 commit comments

Comments
 (0)