@@ -206,10 +206,73 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev)
206
206
};
207
207
}
208
208
209
+ static void ublk_print_cpu_set (const cpu_set_t * set , char * buf , unsigned len )
210
+ {
211
+ unsigned done = 0 ;
212
+ int i ;
213
+
214
+ for (i = 0 ; i < CPU_SETSIZE ; i ++ ) {
215
+ if (CPU_ISSET (i , set ))
216
+ done += snprintf (& buf [done ], len - done , "%d " , i );
217
+ }
218
+ }
219
+
220
+ static void ublk_adjust_affinity (cpu_set_t * set )
221
+ {
222
+ int j , updated = 0 ;
223
+
224
+ /*
225
+ * Just keep the 1st CPU now.
226
+ *
227
+ * In future, auto affinity selection can be tried.
228
+ */
229
+ for (j = 0 ; j < CPU_SETSIZE ; j ++ ) {
230
+ if (CPU_ISSET (j , set )) {
231
+ if (!updated ) {
232
+ updated = 1 ;
233
+ continue ;
234
+ }
235
+ CPU_CLR (j , set );
236
+ }
237
+ }
238
+ }
239
+
240
+ /* Caller must free the allocated buffer */
241
+ static int ublk_ctrl_get_affinity (struct ublk_dev * ctrl_dev , cpu_set_t * * ptr_buf )
242
+ {
243
+ struct ublk_ctrl_cmd_data data = {
244
+ .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY ,
245
+ .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF ,
246
+ };
247
+ cpu_set_t * buf ;
248
+ int i , ret ;
249
+
250
+ buf = malloc (sizeof (cpu_set_t ) * ctrl_dev -> dev_info .nr_hw_queues );
251
+ if (!buf )
252
+ return - ENOMEM ;
253
+
254
+ for (i = 0 ; i < ctrl_dev -> dev_info .nr_hw_queues ; i ++ ) {
255
+ data .data [0 ] = i ;
256
+ data .len = sizeof (cpu_set_t );
257
+ data .addr = (__u64 )& buf [i ];
258
+
259
+ ret = __ublk_ctrl_cmd (ctrl_dev , & data );
260
+ if (ret < 0 ) {
261
+ free (buf );
262
+ return ret ;
263
+ }
264
+ ublk_adjust_affinity (& buf [i ]);
265
+ }
266
+
267
+ * ptr_buf = buf ;
268
+ return 0 ;
269
+ }
270
+
209
271
static void ublk_ctrl_dump (struct ublk_dev * dev )
210
272
{
211
273
struct ublksrv_ctrl_dev_info * info = & dev -> dev_info ;
212
274
struct ublk_params p ;
275
+ cpu_set_t * affinity ;
213
276
int ret ;
214
277
215
278
ret = ublk_ctrl_get_params (dev , & p );
@@ -218,12 +281,31 @@ static void ublk_ctrl_dump(struct ublk_dev *dev)
218
281
return ;
219
282
}
220
283
284
+ ret = ublk_ctrl_get_affinity (dev , & affinity );
285
+ if (ret < 0 ) {
286
+ ublk_err ("failed to get affinity %m\n" );
287
+ return ;
288
+ }
289
+
221
290
ublk_log ("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n" ,
222
291
info -> dev_id , info -> nr_hw_queues , info -> queue_depth ,
223
292
1 << p .basic .logical_bs_shift , p .basic .dev_sectors );
224
293
ublk_log ("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n" ,
225
294
info -> max_io_buf_bytes , info -> ublksrv_pid , info -> flags ,
226
295
ublk_dev_state_desc (dev ));
296
+
297
+ if (affinity ) {
298
+ char buf [512 ];
299
+ int i ;
300
+
301
+ for (i = 0 ; i < info -> nr_hw_queues ; i ++ ) {
302
+ ublk_print_cpu_set (& affinity [i ], buf , sizeof (buf ));
303
+ printf ("\tqueue %u: tid %d affinity(%s)\n" ,
304
+ i , dev -> q [i ].tid , buf );
305
+ }
306
+ free (affinity );
307
+ }
308
+
227
309
fflush (stdout );
228
310
}
229
311
@@ -603,9 +685,24 @@ static int ublk_process_io(struct ublk_queue *q)
603
685
return reapped ;
604
686
}
605
687
688
+ static void ublk_queue_set_sched_affinity (const struct ublk_queue * q ,
689
+ cpu_set_t * cpuset )
690
+ {
691
+ if (sched_setaffinity (0 , sizeof (* cpuset ), cpuset ) < 0 )
692
+ ublk_err ("ublk dev %u queue %u set affinity failed" ,
693
+ q -> dev -> dev_info .dev_id , q -> q_id );
694
+ }
695
+
696
+ struct ublk_queue_info {
697
+ struct ublk_queue * q ;
698
+ sem_t * queue_sem ;
699
+ cpu_set_t * affinity ;
700
+ };
701
+
606
702
static void * ublk_io_handler_fn (void * data )
607
703
{
608
- struct ublk_queue * q = data ;
704
+ struct ublk_queue_info * info = data ;
705
+ struct ublk_queue * q = info -> q ;
609
706
int dev_id = q -> dev -> dev_info .dev_id ;
610
707
int ret ;
611
708
@@ -615,6 +712,10 @@ static void *ublk_io_handler_fn(void *data)
615
712
dev_id , q -> q_id );
616
713
return NULL ;
617
714
}
715
+ /* IO perf is sensitive with queue pthread affinity on NUMA machine*/
716
+ ublk_queue_set_sched_affinity (q , info -> affinity );
717
+ sem_post (info -> queue_sem );
718
+
618
719
ublk_dbg (UBLK_DBG_QUEUE , "tid %d: ublk dev %d queue %d started\n" ,
619
720
q -> tid , dev_id , q -> q_id );
620
721
@@ -640,7 +741,7 @@ static void ublk_set_parameters(struct ublk_dev *dev)
640
741
dev -> dev_info .dev_id , ret );
641
742
}
642
743
643
- static int ublk_send_dev_event (const struct dev_ctx * ctx , int dev_id )
744
+ static int ublk_send_dev_event (const struct dev_ctx * ctx , struct ublk_dev * dev , int dev_id )
644
745
{
645
746
uint64_t id ;
646
747
int evtfd = ctx -> _evtfd ;
@@ -653,35 +754,61 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
653
754
else
654
755
id = ERROR_EVTFD_DEVID ;
655
756
757
+ if (dev && ctx -> shadow_dev )
758
+ memcpy (& ctx -> shadow_dev -> q , & dev -> q , sizeof (dev -> q ));
759
+
656
760
if (write (evtfd , & id , sizeof (id )) != sizeof (id ))
657
761
return - EINVAL ;
658
762
659
763
close (evtfd );
764
+ shmdt (ctx -> shadow_dev );
660
765
661
766
return 0 ;
662
767
}
663
768
664
769
665
770
static int ublk_start_daemon (const struct dev_ctx * ctx , struct ublk_dev * dev )
666
771
{
667
- int ret , i ;
668
- void * thread_ret ;
669
772
const struct ublksrv_ctrl_dev_info * dinfo = & dev -> dev_info ;
773
+ struct ublk_queue_info * qinfo ;
774
+ cpu_set_t * affinity_buf ;
775
+ void * thread_ret ;
776
+ sem_t queue_sem ;
777
+ int ret , i ;
670
778
671
779
ublk_dbg (UBLK_DBG_DEV , "%s enter\n" , __func__ );
672
780
781
+ qinfo = (struct ublk_queue_info * )calloc (sizeof (struct ublk_queue_info ),
782
+ dinfo -> nr_hw_queues );
783
+ if (!qinfo )
784
+ return - ENOMEM ;
785
+
786
+ sem_init (& queue_sem , 0 , 0 );
673
787
ret = ublk_dev_prep (ctx , dev );
674
788
if (ret )
675
789
return ret ;
676
790
791
+ ret = ublk_ctrl_get_affinity (dev , & affinity_buf );
792
+ if (ret )
793
+ return ret ;
794
+
677
795
for (i = 0 ; i < dinfo -> nr_hw_queues ; i ++ ) {
678
796
dev -> q [i ].dev = dev ;
679
797
dev -> q [i ].q_id = i ;
798
+
799
+ qinfo [i ].q = & dev -> q [i ];
800
+ qinfo [i ].queue_sem = & queue_sem ;
801
+ qinfo [i ].affinity = & affinity_buf [i ];
680
802
pthread_create (& dev -> q [i ].thread , NULL ,
681
803
ublk_io_handler_fn ,
682
- & dev -> q [i ]);
804
+ & qinfo [i ]);
683
805
}
684
806
807
+ for (i = 0 ; i < dinfo -> nr_hw_queues ; i ++ )
808
+ sem_wait (& queue_sem );
809
+ free (qinfo );
810
+ free (affinity_buf );
811
+
685
812
/* everything is fine now, start us */
686
813
ublk_set_parameters (dev );
687
814
ret = ublk_ctrl_start_dev (dev , getpid ());
@@ -694,7 +821,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
694
821
if (ctx -> fg )
695
822
ublk_ctrl_dump (dev );
696
823
else
697
- ublk_send_dev_event (ctx , dev -> dev_info .dev_id );
824
+ ublk_send_dev_event (ctx , dev , dev -> dev_info .dev_id );
698
825
699
826
/* wait until we are terminated */
700
827
for (i = 0 ; i < dinfo -> nr_hw_queues ; i ++ )
@@ -873,7 +1000,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
873
1000
874
1001
fail :
875
1002
if (ret < 0 )
876
- ublk_send_dev_event (ctx , -1 );
1003
+ ublk_send_dev_event (ctx , dev , -1 );
877
1004
ublk_ctrl_deinit (dev );
878
1005
return ret ;
879
1006
}
@@ -887,6 +1014,16 @@ static int cmd_dev_add(struct dev_ctx *ctx)
887
1014
if (ctx -> fg )
888
1015
goto run ;
889
1016
1017
+ ctx -> _shmid = shmget (IPC_PRIVATE , sizeof (struct ublk_dev ), IPC_CREAT | 0666 );
1018
+ if (ctx -> _shmid < 0 ) {
1019
+ ublk_err ("%s: failed to shmget %s\n" , __func__ , strerror (errno ));
1020
+ exit (-1 );
1021
+ }
1022
+ ctx -> shadow_dev = (struct ublk_dev * )shmat (ctx -> _shmid , NULL , 0 );
1023
+ if (ctx -> shadow_dev == (struct ublk_dev * )-1 ) {
1024
+ ublk_err ("%s: failed to shmat %s\n" , __func__ , strerror (errno ));
1025
+ exit (-1 );
1026
+ }
890
1027
ctx -> _evtfd = eventfd (0 , 0 );
891
1028
if (ctx -> _evtfd < 0 ) {
892
1029
ublk_err ("%s: failed to create eventfd %s\n" , __func__ , strerror (errno ));
@@ -922,6 +1059,8 @@ static int cmd_dev_add(struct dev_ctx *ctx)
922
1059
if (__cmd_dev_list (ctx ) >= 0 )
923
1060
exit_code = EXIT_SUCCESS ;
924
1061
}
1062
+ shmdt (ctx -> shadow_dev );
1063
+ shmctl (ctx -> _shmid , IPC_RMID , NULL );
925
1064
/* wait for child and detach from it */
926
1065
wait (NULL );
927
1066
exit (exit_code );
@@ -988,6 +1127,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx)
988
1127
ublk_err ("%s: can't get dev info from %d: %d\n" ,
989
1128
__func__ , ctx -> dev_id , ret );
990
1129
} else {
1130
+ if (ctx -> shadow_dev )
1131
+ memcpy (& dev -> q , ctx -> shadow_dev -> q , sizeof (dev -> q ));
1132
+
991
1133
ublk_ctrl_dump (dev );
992
1134
}
993
1135
0 commit comments