@@ -421,13 +421,20 @@ static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl)
421421static int openib_btl_size_queues (struct mca_btl_openib_module_t * openib_btl )
422422{
423423 uint32_t send_cqes , recv_cqes ;
424- int rc = OPAL_SUCCESS , qp ;
424+ int rc = OPAL_SUCCESS ;
425425 mca_btl_openib_device_t * device = openib_btl -> device ;
426+ uint32_t requested [BTL_OPENIB_MAX_CQ ];
427+ bool need_resize = false;
426428
427429 opal_mutex_lock (& openib_btl -> ib_lock );
430+
431+ for (int cq = 0 ; cq < BTL_OPENIB_MAX_CQ ; ++ cq ) {
432+ requested [cq ] = 0 ;
433+ }
434+
428435 /* figure out reasonable sizes for completion queues */
429- for ( qp = 0 ; qp < mca_btl_openib_component .num_qps ; qp ++ ) {
430- if (BTL_OPENIB_QP_TYPE_SRQ (qp )) {
436+ for ( int qp = 0 ; qp < mca_btl_openib_component .num_qps ; qp ++ ) {
437+ if (BTL_OPENIB_QP_TYPE_SRQ (qp )) {
431438 send_cqes = mca_btl_openib_component .qp_infos [qp ].u .srq_qp .sd_max ;
432439 recv_cqes = mca_btl_openib_component .qp_infos [qp ].rd_num ;
433440 } else {
@@ -436,24 +443,30 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl)
436443 recv_cqes = send_cqes ;
437444 }
438445
439- opal_mutex_lock (& openib_btl -> device -> device_lock );
440- openib_btl -> device -> cq_size [qp_cq_prio (qp )] += recv_cqes ;
441- openib_btl -> device -> cq_size [BTL_OPENIB_LP_CQ ] += send_cqes ;
442- opal_mutex_unlock (& openib_btl -> device -> device_lock );
446+ requested [qp_cq_prio (qp )] += recv_cqes ;
447+ requested [BTL_OPENIB_LP_CQ ] += send_cqes ;
443448 }
444449
445- rc = adjust_cq (device , BTL_OPENIB_HP_CQ );
446- if (OPAL_SUCCESS != rc ) {
447- goto out ;
448- }
450+ opal_mutex_lock (& openib_btl -> device -> device_lock );
451+ for (int cq = 0 ; cq < BTL_OPENIB_MAX_CQ ; ++ cq ) {
452+ if (requested [cq ] < mca_btl_openib_component .ib_cq_size [cq ]) {
453+ requested [cq ] = mca_btl_openib_component .ib_cq_size [cq ];
454+ } else if (requested [cq ] > openib_btl -> device -> ib_dev_attr .max_cqe ) {
455+ requested [cq ] = openib_btl -> device -> ib_dev_attr .max_cqe ;
456+ }
449457
450- rc = adjust_cq (device , BTL_OPENIB_LP_CQ );
451- if (OPAL_SUCCESS != rc ) {
452- goto out ;
453- }
458+ if (openib_btl -> device -> cq_size [cq ] < requested [cq ]) {
459+ openib_btl -> device -> cq_size [cq ] = requested [cq ];
454460
455- out :
461+ rc = adjust_cq (device , cq );
462+ if (OPAL_SUCCESS != rc ) {
463+ break ;
464+ }
465+ }
466+ }
467+ opal_mutex_unlock (& openib_btl -> device -> device_lock );
456468 opal_mutex_unlock (& openib_btl -> ib_lock );
469+
457470 return rc ;
458471}
459472
@@ -1081,7 +1094,7 @@ int mca_btl_openib_add_procs(
10811094 }
10821095
10831096 if (nprocs_new ) {
1084- OPAL_THREAD_ADD32 (& openib_btl -> num_peers , nprocs_new );
1097+ opal_atomic_add_32 (& openib_btl -> num_peers , nprocs_new );
10851098
10861099 /* adjust cq sizes given the new procs */
10871100 rc = openib_btl_size_queues (openib_btl );
@@ -1191,7 +1204,7 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11911204
11921205 /* this is a new process to this openib btl
11931206 * account this procs if need */
1194- OPAL_THREAD_ADD32 (& openib_btl -> num_peers , 1 );
1207+ opal_atomic_add_32 (& openib_btl -> num_peers , 1 );
11951208 rc = openib_btl_size_queues (openib_btl );
11961209 if (OPAL_SUCCESS != rc ) {
11971210 BTL_ERROR (("error creating cqs" ));
0 commit comments