@@ -1406,14 +1406,19 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
14061406 }
14071407
14081408 /*
1409- * Our channel array is sparsley populated and we
1409+ * Our channel array could be sparsley populated and we
14101410 * initiated I/O on a processor/hw-q that does not
14111411 * currently have a designated channel. Fix this.
14121412 * The strategy is simple:
1413- * I. Ensure NUMA locality
1414- * II. Distribute evenly (best effort)
1413+ * I. Prefer the channel associated with the current CPU
1414+ * II. Ensure NUMA locality
1415+ * III. Distribute evenly (best effort)
14151416 */
14161417
1418+ /* Prefer the channel on the I/O issuing processor/hw-q */
1419+ if (cpumask_test_cpu (q_num , & stor_device -> alloced_cpus ))
1420+ return stor_device -> stor_chns [q_num ];
1421+
14171422 node_mask = cpumask_of_node (cpu_to_node (q_num ));
14181423
14191424 num_channels = 0 ;
@@ -1469,59 +1474,48 @@ static int storvsc_do_io(struct hv_device *device,
14691474 /* See storvsc_change_target_cpu(). */
14701475 outgoing_channel = READ_ONCE (stor_device -> stor_chns [q_num ]);
14711476 if (outgoing_channel != NULL ) {
1472- if (outgoing_channel -> target_cpu == q_num ) {
1473- /*
1474- * Ideally, we want to pick a different channel if
1475- * available on the same NUMA node.
1476- */
1477- node_mask = cpumask_of_node (cpu_to_node (q_num ));
1478- for_each_cpu_wrap (tgt_cpu ,
1479- & stor_device -> alloced_cpus , q_num + 1 ) {
1480- if (!cpumask_test_cpu (tgt_cpu , node_mask ))
1481- continue ;
1482- if (tgt_cpu == q_num )
1483- continue ;
1484- channel = READ_ONCE (
1485- stor_device -> stor_chns [tgt_cpu ]);
1486- if (channel == NULL )
1487- continue ;
1488- if (hv_get_avail_to_write_percent (
1489- & channel -> outbound )
1490- > ring_avail_percent_lowater ) {
1491- outgoing_channel = channel ;
1492- goto found_channel ;
1493- }
1494- }
1477+ if (hv_get_avail_to_write_percent (& outgoing_channel -> outbound )
1478+ > ring_avail_percent_lowater )
1479+ goto found_channel ;
14951480
1496- /*
1497- * All the other channels on the same NUMA node are
1498- * busy. Try to use the channel on the current CPU
1499- */
1500- if (hv_get_avail_to_write_percent (
1501- & outgoing_channel -> outbound )
1502- > ring_avail_percent_lowater )
1481+ /*
1482+ * Channel is busy, try to find a channel on the same NUMA node
1483+ */
1484+ node_mask = cpumask_of_node (cpu_to_node (q_num ));
1485+ for_each_cpu_wrap (tgt_cpu , & stor_device -> alloced_cpus ,
1486+ q_num + 1 ) {
1487+ if (!cpumask_test_cpu (tgt_cpu , node_mask ))
1488+ continue ;
1489+ channel = READ_ONCE (stor_device -> stor_chns [tgt_cpu ]);
1490+ if (!channel )
1491+ continue ;
1492+ if (hv_get_avail_to_write_percent (& channel -> outbound )
1493+ > ring_avail_percent_lowater ) {
1494+ outgoing_channel = channel ;
15031495 goto found_channel ;
1496+ }
1497+ }
15041498
1505- /*
1506- * If we reach here, all the channels on the current
1507- * NUMA node are busy. Try to find a channel in
1508- * other NUMA nodes
1509- */
1510- for_each_cpu (tgt_cpu , & stor_device -> alloced_cpus ) {
1511- if (cpumask_test_cpu (tgt_cpu , node_mask ))
1512- continue ;
1513- channel = READ_ONCE (
1514- stor_device -> stor_chns [tgt_cpu ]);
1515- if (channel == NULL )
1516- continue ;
1517- if (hv_get_avail_to_write_percent (
1518- & channel -> outbound )
1519- > ring_avail_percent_lowater ) {
1520- outgoing_channel = channel ;
1521- goto found_channel ;
1522- }
1499+ /*
1500+ * If we reach here, all the channels on the current
1501+ * NUMA node are busy. Try to find a channel in
1502+ * all NUMA nodes
1503+ */
1504+ for_each_cpu_wrap (tgt_cpu , & stor_device -> alloced_cpus ,
1505+ q_num + 1 ) {
1506+ channel = READ_ONCE (stor_device -> stor_chns [tgt_cpu ]);
1507+ if (!channel )
1508+ continue ;
1509+ if (hv_get_avail_to_write_percent (& channel -> outbound )
1510+ > ring_avail_percent_lowater ) {
1511+ outgoing_channel = channel ;
1512+ goto found_channel ;
15231513 }
15241514 }
1515+ /*
1516+ * If we reach here, all the channels are busy. Use the
1517+ * original channel found.
1518+ */
15251519 } else {
15261520 spin_lock_irqsave (& stor_device -> lock , flags );
15271521 outgoing_channel = stor_device -> stor_chns [q_num ];
0 commit comments