@@ -57,7 +57,8 @@ static int cray_resolve_peers(const char *nodename,
5757 opal_list_t * procs );
5858static int cray_resolve_nodes (opal_jobid_t jobid , char * * nodelist );
5959static int cray_put (opal_pmix_scope_t scope , opal_value_t * kv );
60- static int cray_fence (opal_list_t * procs , int collect_data );
60+ static int cray_fencenb (opal_list_t * procs , int collect_data ,
61+ opal_pmix_op_cbfunc_t cbfunc , void * cbdata );
6162static int cray_commit (void );
6263static int cray_get (const opal_process_name_t * id ,
6364 const char * key , opal_list_t * info ,
@@ -90,8 +91,8 @@ const opal_pmix_base_module_t opal_pmix_cray_module = {
9091 .initialized = cray_initialized ,
9192 .abort = cray_abort ,
9293 .commit = cray_commit ,
93- .fence = cray_fence ,
94- .fence_nb = NULL ,
94+ .fence = NULL ,
95+ .fence_nb = cray_fencenb ,
9596 .put = cray_put ,
9697 .get = cray_get ,
9798 .get_nb = cray_get_nb ,
@@ -119,6 +120,17 @@ const opal_pmix_base_module_t opal_pmix_cray_module = {
119120// usage accounting
120121static int pmix_init_count = 0 ;
121122
123+ // local object
124+ typedef struct {
125+ opal_object_t super ;
126+ opal_event_t ev ;
127+ opal_pmix_op_cbfunc_t opcbfunc ;
128+ void * cbdata ;
129+ } pmi_opcaddy_t ;
130+ OBJ_CLASS_INSTANCE (pmi_opcaddy_t ,
131+ opal_object_t ,
132+ NULL , NULL );
133+
122134// PMI constant values:
123135static int pmix_kvslen_max = 0 ;
124136static int pmix_keylen_max = 0 ;
@@ -512,8 +524,9 @@ static int cray_commit(void)
512524 return OPAL_SUCCESS ;
513525}
514526
515- static int cray_fence ( opal_list_t * procs , int collect_data )
527+ static void fencenb ( int sd , short args , void * cbdata )
516528{
529+ pmi_opcaddy_t * op = (pmi_opcaddy_t * )cbdata ;
517530 int rc , cnt ;
518531 int32_t i ;
519532 int * all_lens = NULL ;
@@ -550,7 +563,8 @@ static int cray_fence(opal_list_t *procs, int collect_data)
550563
551564 send_buffer = OBJ_NEW (opal_buffer_t );
552565 if (NULL == send_buffer ) {
553- return OPAL_ERR_OUT_OF_RESOURCE ;
566+ rc = OPAL_ERR_OUT_OF_RESOURCE ;
567+ goto fn_exit ;
554568 }
555569
556570 opal_dss .copy_payload (send_buffer , mca_pmix_cray_component .cache_global );
@@ -668,7 +682,7 @@ static int cray_fence(opal_list_t *procs, int collect_data)
668682 * for every process in the job.
669683 *
670684 * we only need to set locality for each local rank as "not found"
671- * equates to "non-local"
685+ * equates to "non-local"
672686 */
673687
674688 for (i = 0 ; i < pmix_nlranks ; i ++ ) {
@@ -732,7 +746,27 @@ static int cray_fence(opal_list_t *procs, int collect_data)
732746 if (r_bytes_and_ranks != NULL ) {
733747 free (r_bytes_and_ranks );
734748 }
735- return rc ;
749+ if (NULL != op -> opcbfunc ) {
750+ op -> opcbfunc (rc , op -> cbdata );
751+ }
752+ OBJ_RELEASE (op );
753+ return ;
754+ }
755+
756+ static int cray_fencenb (opal_list_t * procs , int collect_data ,
757+ opal_pmix_op_cbfunc_t cbfunc , void * cbdata )
758+ {
759+ pmi_opcaddy_t * op ;
760+
761+ /* thread-shift this so we don't block in Cray's barrier */
762+ op = OBJ_NEW (pmi_opcaddy_t );
763+ op -> opcbfunc = cbfunc ;
764+ op -> cbdata = cbdata ;
765+ event_assign (& op -> ev , opal_pmix_base .evbase , -1 ,
766+ EV_WRITE , fencenb , op );
767+ event_active (& op -> ev , EV_WRITE , 1 );
768+
769+ return OPAL_SUCCESS ;
736770}
737771
738772static int cray_get (const opal_process_name_t * id , const char * key , opal_list_t * info , opal_value_t * * kv )
0 commit comments