11/*
2- * Copyright (c) 2013 Mellanox Technologies, Inc.
2+ * Copyright (c) 2013-2018 Mellanox Technologies, Inc.
33 * All rights reserved.
44 * Copyright (c) 2014-2016 Research Organization for Information Science
55 * and Technology (RIST). All rights reserved.
1717#include "oshmem/constants.h"
1818#include "oshmem/runtime/runtime.h"
1919#include "oshmem/mca/scoll/base/base.h"
20+ #include "oshmem/proc/proc_group_cache.h"
2021
2122#ifdef HAVE_STRINGS_H
2223#include <strings.h>
@@ -65,40 +66,67 @@ oshmem_group_t* oshmem_group_null = NULL;
6566
6667OBJ_CLASS_INSTANCE (oshmem_group_t , opal_object_t , NULL , NULL );
6768
69+ static void oshmem_proc_group_destroy_internal (oshmem_group_t * group ,
70+ int scoll_unselect );
71+
6872int oshmem_proc_group_init (void )
6973{
74+ int rc ;
75+
76+ rc = oshmem_group_cache_init ();
77+ if (OSHMEM_SUCCESS != rc ) {
78+ return rc ;
79+ }
80+
7081 /* Setup communicator array */
7182 OBJ_CONSTRUCT (& oshmem_group_array , opal_pointer_array_t );
72- if (OPAL_SUCCESS
73- != opal_pointer_array_init (& oshmem_group_array ,
74- 0 ,
75- ORTE_GLOBAL_ARRAY_MAX_SIZE ,
76- 1 )) {
77- return OSHMEM_ERROR ;
83+
84+ rc = opal_pointer_array_init (& oshmem_group_array , 0 ,
85+ ORTE_GLOBAL_ARRAY_MAX_SIZE , 1 );
86+ if (OPAL_SUCCESS != rc ) {
87+ goto err1 ;
7888 }
7989
8090 /* Setup SHMEM_GROUP_ALL */
81- if (NULL
82- == (oshmem_group_all =
83- oshmem_proc_group_create (0 ,
84- 1 ,
85- ompi_comm_size (oshmem_comm_world )))) {
86- return OSHMEM_ERROR ;
91+ oshmem_group_all = oshmem_proc_group_create (0 , 1 , ompi_comm_size (oshmem_comm_world ));
92+ if (NULL == oshmem_group_all ) {
93+ goto err2 ;
8794 }
8895
8996 /* Setup SHMEM_GROUP_SELF */
90- if (NULL
91- == (oshmem_group_self = oshmem_proc_group_create (oshmem_proc_pe (oshmem_proc_local ()),
92- 0 ,
93- 1 ))) {
94- oshmem_proc_group_destroy (oshmem_group_self );
95- return OSHMEM_ERROR ;
97+ oshmem_group_self = oshmem_proc_group_create (oshmem_proc_pe (oshmem_proc_local ()), 0 , 1 );
98+ if (NULL == oshmem_group_self ) {
99+ goto err3 ;
96100 }
97101
98102 /* Setup SHMEM_GROUP_NULL */
99103 oshmem_group_null = NULL ;
100104
101105 return OSHMEM_SUCCESS ;
106+
107+ err3 :
108+ oshmem_proc_group_destroy_internal (oshmem_group_all , 1 );
109+ err2 :
110+ OBJ_DESTRUCT (& oshmem_group_array );
111+ err1 :
112+ oshmem_group_cache_destroy ();
113+ return OSHMEM_ERROR ;
114+ }
115+
116+ void oshmem_proc_group_finalize_scoll (void )
117+ {
118+ int max , i ;
119+ oshmem_group_t * group ;
120+
121+ /* Check whether we have some left */
122+ max = opal_pointer_array_get_size (& oshmem_group_array );
123+ for (i = 0 ; i < max ; i ++ ) {
124+ group = (oshmem_group_t * ) opal_pointer_array_get_item (& oshmem_group_array ,
125+ i );
126+ if (NULL != group ) {
127+ mca_scoll_base_group_unselect (group );
128+ }
129+ }
102130}
103131
104132int oshmem_proc_group_finalize (void )
@@ -114,18 +142,17 @@ int oshmem_proc_group_finalize(void)
114142 i );
115143 if (NULL != group ) {
116144 /* Group has not been freed before finalize */
117- oshmem_proc_group_destroy (group );
145+ oshmem_proc_group_destroy_internal (group , 0 );
118146 }
119147 }
120148
121149 OBJ_DESTRUCT (& oshmem_group_array );
122150
151+ oshmem_group_cache_destroy ();
123152 return OSHMEM_SUCCESS ;
124153}
125154
126- oshmem_group_t * oshmem_proc_group_create (int pe_start ,
127- int pe_stride ,
128- size_t pe_size )
155+ oshmem_group_t * oshmem_proc_group_create (int pe_start , int pe_stride , int pe_size )
129156{
130157 int cur_pe , count_pe ;
131158 int i ;
@@ -135,107 +162,133 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start,
135162
136163 assert (oshmem_proc_local ());
137164
165+ group = oshmem_group_cache_find (pe_start , pe_stride , pe_size );
166+ if (NULL != group ) {
167+ return group ;
168+ }
169+
138170 group = OBJ_NEW (oshmem_group_t );
171+ if (NULL == group ) {
172+ return NULL ;
173+ }
139174
140- if (group ) {
141- cur_pe = 0 ;
142- count_pe = 0 ;
175+ cur_pe = 0 ;
176+ count_pe = 0 ;
143177
144- OPAL_THREAD_LOCK (& oshmem_proc_lock );
178+ OPAL_THREAD_LOCK (& oshmem_proc_lock );
179+
180+ /* allocate an array */
181+ proc_array = (ompi_proc_t * * ) malloc (pe_size * sizeof (ompi_proc_t * ));
182+ if (NULL == proc_array ) {
183+ OBJ_RELEASE (group );
184+ OPAL_THREAD_UNLOCK (& oshmem_proc_lock );
185+ return NULL ;
186+ }
145187
146- /* allocate an array */
147- proc_array = (ompi_proc_t * * ) malloc (pe_size * sizeof (ompi_proc_t * ));
148- if (NULL == proc_array ) {
188+ group -> my_pe = oshmem_proc_pe (oshmem_proc_local ());
189+ group -> is_member = 0 ;
190+ for (i = 0 ; i < ompi_comm_size (oshmem_comm_world ) ; i ++ ) {
191+ proc = oshmem_proc_find (i );
192+ if (NULL == proc ) {
193+ opal_output (0 ,
194+ "Error: Can not find proc object for pe = %d" , i );
195+ free (proc_array );
149196 OBJ_RELEASE (group );
150197 OPAL_THREAD_UNLOCK (& oshmem_proc_lock );
151- return NULL ;
198+ return NULL ;
152199 }
153-
154- group -> my_pe = oshmem_proc_pe (oshmem_proc_local ());
155- group -> is_member = 0 ;
156- for (i = 0 ; i < ompi_comm_size (oshmem_comm_world ) ; i ++ ) {
157- proc = oshmem_proc_find (i );
158- if (NULL == proc ) {
159- opal_output (0 ,
160- "Error: Can not find proc object for pe = %d" , i );
161- free (proc_array );
162- OBJ_RELEASE (group );
163- OPAL_THREAD_UNLOCK (& oshmem_proc_lock );
164- return NULL ;
165- }
166- if (count_pe >= (int ) pe_size ) {
167- break ;
168- } else if ((cur_pe >= pe_start )
169- && ((pe_stride == 0 )
170- || (((cur_pe - pe_start ) % pe_stride ) == 0 ))) {
171- proc_array [count_pe ++ ] = proc ;
172- if (oshmem_proc_pe (proc ) == group -> my_pe )
173- group -> is_member = 1 ;
174- }
175- cur_pe ++ ;
200+ if (count_pe >= (int ) pe_size ) {
201+ break ;
202+ } else if ((cur_pe >= pe_start )
203+ && ((pe_stride == 0 )
204+ || (((cur_pe - pe_start ) % pe_stride ) == 0 ))) {
205+ proc_array [count_pe ++ ] = proc ;
206+ if (oshmem_proc_pe (proc ) == group -> my_pe )
207+ group -> is_member = 1 ;
176208 }
177- group -> proc_array = proc_array ;
178- group -> proc_count = (int ) count_pe ;
179- group -> ompi_comm = NULL ;
180-
181- /* Prepare peers list */
182- OBJ_CONSTRUCT (& (group -> peer_list ), opal_list_t );
183- {
184- orte_namelist_t * peer = NULL ;
185-
186- for (i = 0 ; i < group -> proc_count ; i ++ ) {
187- peer = OBJ_NEW (orte_namelist_t );
188- peer -> name .jobid = OSHMEM_PROC_JOBID (group -> proc_array [i ]);
189- peer -> name .vpid = OSHMEM_PROC_VPID (group -> proc_array [i ]);
190- opal_list_append (& (group -> peer_list ), & peer -> super );
191- }
209+ cur_pe ++ ;
210+ }
211+ group -> proc_array = proc_array ;
212+ group -> proc_count = (int ) count_pe ;
213+ group -> ompi_comm = NULL ;
214+
215+ /* Prepare peers list */
216+ OBJ_CONSTRUCT (& (group -> peer_list ), opal_list_t );
217+ {
218+ orte_namelist_t * peer = NULL ;
219+
220+ for (i = 0 ; i < group -> proc_count ; i ++ ) {
221+ peer = OBJ_NEW (orte_namelist_t );
222+ peer -> name .jobid = OSHMEM_PROC_JOBID (group -> proc_array [i ]);
223+ peer -> name .vpid = OSHMEM_PROC_VPID (group -> proc_array [i ]);
224+ opal_list_append (& (group -> peer_list ), & peer -> super );
192225 }
193- group -> id = opal_pointer_array_add (& oshmem_group_array , group );
226+ }
227+ group -> id = opal_pointer_array_add (& oshmem_group_array , group );
194228
195- memset (& group -> g_scoll , 0 , sizeof (mca_scoll_base_group_scoll_t ));
229+ memset (& group -> g_scoll , 0 , sizeof (mca_scoll_base_group_scoll_t ));
196230
197- if (OSHMEM_SUCCESS != mca_scoll_base_select (group )) {
198- opal_output (0 ,
199- "Error: No collective modules are available: group is not created, returning NULL" );
200- oshmem_proc_group_destroy (group );
201- OPAL_THREAD_UNLOCK (& oshmem_proc_lock );
202- return NULL ;
203- }
231+ if (OSHMEM_SUCCESS != mca_scoll_base_select (group )) {
232+ opal_output (0 ,
233+ "Error: No collective modules are available: group is not created, returning NULL" );
234+ oshmem_proc_group_destroy_internal (group , 0 );
204235 OPAL_THREAD_UNLOCK (& oshmem_proc_lock );
236+ return NULL ;
205237 }
206238
239+ if (OSHMEM_SUCCESS != oshmem_group_cache_insert (group , pe_start ,
240+ pe_stride , pe_size )) {
241+ oshmem_proc_group_destroy_internal (group , 1 );
242+ OPAL_THREAD_UNLOCK (& oshmem_proc_lock );
243+ return NULL ;
244+ }
245+
246+ OPAL_THREAD_UNLOCK (& oshmem_proc_lock );
207247 return group ;
208248}
209249
210- void oshmem_proc_group_destroy (oshmem_group_t * group )
250+ static void
251+ oshmem_proc_group_destroy_internal (oshmem_group_t * group , int scoll_unselect )
211252{
212- if (group ) {
253+ if (NULL == group ) {
254+ return ;
255+ }
256+
257+ if (scoll_unselect ) {
213258 mca_scoll_base_group_unselect (group );
259+ }
214260
215- /* Destroy proc array */
216- if (group -> proc_array ) {
217- free (group -> proc_array );
218- }
261+ /* Destroy proc array */
262+ if (group -> proc_array ) {
263+ free (group -> proc_array );
264+ }
219265
220- /* Destroy peer list */
221- {
222- opal_list_item_t * item ;
266+ /* Destroy peer list */
267+ {
268+ opal_list_item_t * item ;
223269
224- while (NULL != (item = opal_list_remove_first (& (group -> peer_list )))) {
225- /* destruct the item (we constructed it), then free the memory chunk */
226- OBJ_RELEASE (item );
227- }
228- OBJ_DESTRUCT (& (group -> peer_list ));
270+ while (NULL != (item = opal_list_remove_first (& (group -> peer_list )))) {
271+ /* destruct the item (we constructed it), then free the memory chunk */
272+ OBJ_RELEASE (item );
229273 }
274+ OBJ_DESTRUCT (& (group -> peer_list ));
275+ }
230276
231- /* reset the oshmem_group_array entry - make sure that the
232- * entry is in the table */
233- if (NULL
234- != opal_pointer_array_get_item (& oshmem_group_array ,
235- group -> id )) {
236- opal_pointer_array_set_item (& oshmem_group_array , group -> id , NULL );
237- }
277+ /* reset the oshmem_group_array entry - make sure that the
278+ * entry is in the table */
279+ if (NULL
280+ != opal_pointer_array_get_item (& oshmem_group_array ,
281+ group -> id )) {
282+ opal_pointer_array_set_item (& oshmem_group_array , group -> id , NULL );
283+ }
238284
239- OBJ_RELEASE (group );
285+ OBJ_RELEASE (group );
286+ }
287+
288+ void oshmem_proc_group_destroy (oshmem_group_t * group )
289+ {
290+ if (oshmem_group_cache_enabled ()) {
291+ return ;
240292 }
293+ oshmem_proc_group_destroy_internal (group , 1 );
241294}
0 commit comments