@@ -621,6 +621,64 @@ static inline struct storvsc_device *get_in_stor_device(
621
621
622
622
}
623
623
624
+ static void storvsc_change_target_cpu (struct vmbus_channel * channel , u32 old ,
625
+ u32 new )
626
+ {
627
+ struct storvsc_device * stor_device ;
628
+ struct vmbus_channel * cur_chn ;
629
+ bool old_is_alloced = false;
630
+ struct hv_device * device ;
631
+ unsigned long flags ;
632
+ int cpu ;
633
+
634
+ device = channel -> primary_channel ?
635
+ channel -> primary_channel -> device_obj
636
+ : channel -> device_obj ;
637
+ stor_device = get_out_stor_device (device );
638
+ if (!stor_device )
639
+ return ;
640
+
641
+ /* See storvsc_do_io() -> get_og_chn(). */
642
+ spin_lock_irqsave (& device -> channel -> lock , flags );
643
+
644
+ /*
645
+ * Determines if the storvsc device has other channels assigned to
646
+ * the "old" CPU to update the alloced_cpus mask and the stor_chns
647
+ * array.
648
+ */
649
+ if (device -> channel != channel && device -> channel -> target_cpu == old ) {
650
+ cur_chn = device -> channel ;
651
+ old_is_alloced = true;
652
+ goto old_is_alloced ;
653
+ }
654
+ list_for_each_entry (cur_chn , & device -> channel -> sc_list , sc_list ) {
655
+ if (cur_chn == channel )
656
+ continue ;
657
+ if (cur_chn -> target_cpu == old ) {
658
+ old_is_alloced = true;
659
+ goto old_is_alloced ;
660
+ }
661
+ }
662
+
663
+ old_is_alloced :
664
+ if (old_is_alloced )
665
+ WRITE_ONCE (stor_device -> stor_chns [old ], cur_chn );
666
+ else
667
+ cpumask_clear_cpu (old , & stor_device -> alloced_cpus );
668
+
669
+ /* "Flush" the stor_chns array. */
670
+ for_each_possible_cpu (cpu ) {
671
+ if (stor_device -> stor_chns [cpu ] && !cpumask_test_cpu (
672
+ cpu , & stor_device -> alloced_cpus ))
673
+ WRITE_ONCE (stor_device -> stor_chns [cpu ], NULL );
674
+ }
675
+
676
+ WRITE_ONCE (stor_device -> stor_chns [new ], channel );
677
+ cpumask_set_cpu (new , & stor_device -> alloced_cpus );
678
+
679
+ spin_unlock_irqrestore (& device -> channel -> lock , flags );
680
+ }
681
+
624
682
static void handle_sc_creation (struct vmbus_channel * new_sc )
625
683
{
626
684
struct hv_device * device = new_sc -> primary_channel -> device_obj ;
@@ -648,6 +706,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
648
706
return ;
649
707
}
650
708
709
+ new_sc -> change_target_cpu_callback = storvsc_change_target_cpu ;
710
+
651
711
/* Add the sub-channel to the array of available channels. */
652
712
stor_device -> stor_chns [new_sc -> target_cpu ] = new_sc ;
653
713
cpumask_set_cpu (new_sc -> target_cpu , & stor_device -> alloced_cpus );
@@ -876,6 +936,8 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
876
936
if (stor_device -> stor_chns == NULL )
877
937
return - ENOMEM ;
878
938
939
+ device -> channel -> change_target_cpu_callback = storvsc_change_target_cpu ;
940
+
879
941
stor_device -> stor_chns [device -> channel -> target_cpu ] = device -> channel ;
880
942
cpumask_set_cpu (device -> channel -> target_cpu ,
881
943
& stor_device -> alloced_cpus );
@@ -1248,8 +1310,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
1248
1310
const struct cpumask * node_mask ;
1249
1311
int num_channels , tgt_cpu ;
1250
1312
1251
- if (stor_device -> num_sc == 0 )
1313
+ if (stor_device -> num_sc == 0 ) {
1314
+ stor_device -> stor_chns [q_num ] = stor_device -> device -> channel ;
1252
1315
return stor_device -> device -> channel ;
1316
+ }
1253
1317
1254
1318
/*
1255
1319
* Our channel array is sparsley populated and we
@@ -1258,7 +1322,6 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
1258
1322
* The strategy is simple:
1259
1323
* I. Ensure NUMA locality
1260
1324
* II. Distribute evenly (best effort)
1261
- * III. Mapping is persistent.
1262
1325
*/
1263
1326
1264
1327
node_mask = cpumask_of_node (cpu_to_node (q_num ));
@@ -1268,8 +1331,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
1268
1331
if (cpumask_test_cpu (tgt_cpu , node_mask ))
1269
1332
num_channels ++ ;
1270
1333
}
1271
- if (num_channels == 0 )
1334
+ if (num_channels == 0 ) {
1335
+ stor_device -> stor_chns [q_num ] = stor_device -> device -> channel ;
1272
1336
return stor_device -> device -> channel ;
1337
+ }
1273
1338
1274
1339
hash_qnum = q_num ;
1275
1340
while (hash_qnum >= num_channels )
@@ -1295,6 +1360,7 @@ static int storvsc_do_io(struct hv_device *device,
1295
1360
struct storvsc_device * stor_device ;
1296
1361
struct vstor_packet * vstor_packet ;
1297
1362
struct vmbus_channel * outgoing_channel , * channel ;
1363
+ unsigned long flags ;
1298
1364
int ret = 0 ;
1299
1365
const struct cpumask * node_mask ;
1300
1366
int tgt_cpu ;
@@ -1308,10 +1374,11 @@ static int storvsc_do_io(struct hv_device *device,
1308
1374
1309
1375
request -> device = device ;
1310
1376
/*
1311
- * Select an an appropriate channel to send the request out.
1377
+ * Select an appropriate channel to send the request out.
1312
1378
*/
1313
- if (stor_device -> stor_chns [q_num ] != NULL ) {
1314
- outgoing_channel = stor_device -> stor_chns [q_num ];
1379
+ /* See storvsc_change_target_cpu(). */
1380
+ outgoing_channel = READ_ONCE (stor_device -> stor_chns [q_num ]);
1381
+ if (outgoing_channel != NULL ) {
1315
1382
if (outgoing_channel -> target_cpu == q_num ) {
1316
1383
/*
1317
1384
* Ideally, we want to pick a different channel if
@@ -1324,7 +1391,10 @@ static int storvsc_do_io(struct hv_device *device,
1324
1391
continue ;
1325
1392
if (tgt_cpu == q_num )
1326
1393
continue ;
1327
- channel = stor_device -> stor_chns [tgt_cpu ];
1394
+ channel = READ_ONCE (
1395
+ stor_device -> stor_chns [tgt_cpu ]);
1396
+ if (channel == NULL )
1397
+ continue ;
1328
1398
if (hv_get_avail_to_write_percent (
1329
1399
& channel -> outbound )
1330
1400
> ring_avail_percent_lowater ) {
@@ -1350,7 +1420,10 @@ static int storvsc_do_io(struct hv_device *device,
1350
1420
for_each_cpu (tgt_cpu , & stor_device -> alloced_cpus ) {
1351
1421
if (cpumask_test_cpu (tgt_cpu , node_mask ))
1352
1422
continue ;
1353
- channel = stor_device -> stor_chns [tgt_cpu ];
1423
+ channel = READ_ONCE (
1424
+ stor_device -> stor_chns [tgt_cpu ]);
1425
+ if (channel == NULL )
1426
+ continue ;
1354
1427
if (hv_get_avail_to_write_percent (
1355
1428
& channel -> outbound )
1356
1429
> ring_avail_percent_lowater ) {
@@ -1360,7 +1433,14 @@ static int storvsc_do_io(struct hv_device *device,
1360
1433
}
1361
1434
}
1362
1435
} else {
1436
+ spin_lock_irqsave (& device -> channel -> lock , flags );
1437
+ outgoing_channel = stor_device -> stor_chns [q_num ];
1438
+ if (outgoing_channel != NULL ) {
1439
+ spin_unlock_irqrestore (& device -> channel -> lock , flags );
1440
+ goto found_channel ;
1441
+ }
1363
1442
outgoing_channel = get_og_chn (stor_device , q_num );
1443
+ spin_unlock_irqrestore (& device -> channel -> lock , flags );
1364
1444
}
1365
1445
1366
1446
found_channel :
0 commit comments