@@ -57,7 +57,8 @@ static int cray_resolve_peers(const char *nodename,
5757 opal_list_t * procs );
5858static int cray_resolve_nodes (opal_jobid_t jobid , char * * nodelist );
5959static int cray_put (opal_pmix_scope_t scope , opal_value_t * kv );
60- static int cray_fence (opal_list_t * procs , int collect_data );
60+ static int cray_fencenb (opal_list_t * procs , int collect_data ,
61+ opal_pmix_op_cbfunc_t cbfunc , void * cbdata );
6162static int cray_commit (void );
6263static int cray_get (const opal_process_name_t * id ,
6364 const char * key , opal_list_t * info ,
@@ -90,8 +91,8 @@ const opal_pmix_base_module_t opal_pmix_cray_module = {
9091 .initialized = cray_initialized ,
9192 .abort = cray_abort ,
9293 .commit = cray_commit ,
93- .fence = cray_fence ,
94- .fence_nb = NULL ,
94+ .fence = NULL ,
95+ .fence_nb = cray_fencenb ,
9596 .put = cray_put ,
9697 .get = cray_get ,
9798 .get_nb = cray_get_nb ,
@@ -119,6 +120,17 @@ const opal_pmix_base_module_t opal_pmix_cray_module = {
119120// usage accounting
120121static int pmix_init_count = 0 ;
121122
123+ // local object
124+ typedef struct {
125+ opal_object_t super ;
126+ opal_event_t ev ;
127+ opal_pmix_op_cbfunc_t opcbfunc ;
128+ void * cbdata ;
129+ } pmi_opcaddy_t ;
130+ OBJ_CLASS_INSTANCE (pmi_opcaddy_t ,
131+ opal_object_t ,
132+ NULL , NULL );
133+
122134// PMI constant values:
123135static int pmix_kvslen_max = 0 ;
124136static int pmix_keylen_max = 0 ;
@@ -524,8 +536,9 @@ static int cray_commit(void)
524536 return OPAL_SUCCESS ;
525537}
526538
527- static int cray_fence ( opal_list_t * procs , int collect_data )
539+ static void fencenb ( int sd , short args , void * cbdata )
528540{
541+ pmi_opcaddy_t * op = (pmi_opcaddy_t * )cbdata ;
529542 int rc , cnt ;
530543 int32_t i ;
531544 int * all_lens = NULL ;
@@ -562,7 +575,8 @@ static int cray_fence(opal_list_t *procs, int collect_data)
562575
563576 send_buffer = OBJ_NEW (opal_buffer_t );
564577 if (NULL == send_buffer ) {
565- return OPAL_ERR_OUT_OF_RESOURCE ;
578+ rc = OPAL_ERR_OUT_OF_RESOURCE ;
579+ goto fn_exit ;
566580 }
567581
568582 opal_dss .copy_payload (send_buffer , mca_pmix_cray_component .cache_global );
@@ -680,7 +694,7 @@ static int cray_fence(opal_list_t *procs, int collect_data)
680694 * for every process in the job.
681695 *
682696 * we only need to set locality for each local rank as "not found"
683- * equates to "non-local"
697+ * equates to "non-local"
684698 */
685699
686700 for (i = 0 ; i < pmix_nlranks ; i ++ ) {
@@ -744,7 +758,27 @@ static int cray_fence(opal_list_t *procs, int collect_data)
744758 if (r_bytes_and_ranks != NULL ) {
745759 free (r_bytes_and_ranks );
746760 }
747- return rc ;
761+ if (NULL != op -> opcbfunc ) {
762+ op -> opcbfunc (rc , op -> cbdata );
763+ }
764+ OBJ_RELEASE (op );
765+ return ;
766+ }
767+
768+ static int cray_fencenb (opal_list_t * procs , int collect_data ,
769+ opal_pmix_op_cbfunc_t cbfunc , void * cbdata )
770+ {
771+ pmi_opcaddy_t * op ;
772+
773+ /* thread-shift this so we don't block in Cray's barrier */
774+ op = OBJ_NEW (pmi_opcaddy_t );
775+ op -> opcbfunc = cbfunc ;
776+ op -> cbdata = cbdata ;
777+ event_assign (& op -> ev , opal_pmix_base .evbase , -1 ,
778+ EV_WRITE , fencenb , op );
779+ event_active (& op -> ev , EV_WRITE , 1 );
780+
781+ return OPAL_SUCCESS ;
748782}
749783
750784static int cray_get (const opal_process_name_t * id , const char * key , opal_list_t * info , opal_value_t * * kv )
0 commit comments