1313 * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
1414 * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
1515 * Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
16- * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
16+ * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
1717 * reserved.
1818 * $COPYRIGHT$
1919 *
2828#include "opal/align.h"
2929#include "opal/util/output.h"
3030#include "opal/mca/mpool/mpool.h"
31+ #include "opal/mca/mpool/base/base.h"
32+ #include "opal/mca/rcache/rcache.h"
33+ #include "opal/util/sys_limits.h"
3134
3235typedef struct opal_free_list_item_t opal_free_list_memory_t ;
3336
@@ -49,17 +52,22 @@ static void opal_free_list_construct(opal_free_list_t* fl)
4952 fl -> fl_payload_buffer_alignment = 0 ;
5053 fl -> fl_frag_class = OBJ_CLASS (opal_free_list_item_t );
5154 fl -> fl_mpool = NULL ;
55+ fl -> fl_rcache = NULL ;
5256 /* default flags */
53- fl -> fl_mpool_reg_flags = MCA_MPOOL_FLAGS_CACHE_BYPASS |
54- MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM ;
57+ fl -> fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS |
58+ MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM ;
5559 fl -> ctx = NULL ;
5660 OBJ_CONSTRUCT (& (fl -> fl_allocations ), opal_list_t );
5761}
5862
5963static void opal_free_list_allocation_release (opal_free_list_t * fl , opal_free_list_memory_t * fl_mem )
6064{
65+ if (NULL != fl -> fl_rcache ) {
66+ fl -> fl_rcache -> rcache_deregister (fl -> fl_rcache , fl_mem -> registration );
67+ }
68+
6169 if (NULL != fl -> fl_mpool ) {
62- fl -> fl_mpool -> mpool_free (fl -> fl_mpool , fl_mem -> ptr , fl_mem -> registration );
70+ fl -> fl_mpool -> mpool_free (fl -> fl_mpool , fl_mem -> ptr );
6371 } else if (fl_mem -> ptr ) {
6472 free (fl_mem -> ptr );
6573 }
@@ -108,8 +116,9 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
108116 opal_class_t * frag_class , size_t payload_buffer_size ,
109117 size_t payload_buffer_alignment , int num_elements_to_alloc ,
110118 int max_elements_to_alloc , int num_elements_per_alloc ,
111- mca_mpool_base_module_t * mpool , int mpool_reg_flags ,
112- void * unused0 , opal_free_list_item_init_fn_t item_init , void * ctx )
119+ mca_mpool_base_module_t * mpool , int rcache_reg_flags ,
120+ mca_rcache_base_module_t * rcache , opal_free_list_item_init_fn_t item_init ,
121+ void * ctx )
113122{
114123 /* alignment must be more than zero and power of two */
115124 if (frag_alignment <= 1 || (frag_alignment & (frag_alignment - 1 ))) {
@@ -137,11 +146,12 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
137146 flist -> fl_max_to_alloc = max_elements_to_alloc ;
138147 flist -> fl_num_allocated = 0 ;
139148 flist -> fl_num_per_alloc = num_elements_per_alloc ;
140- flist -> fl_mpool = mpool ;
149+ flist -> fl_mpool = mpool ? mpool : mca_mpool_base_default_module ;
150+ flist -> fl_rcache = rcache ;
141151 flist -> fl_frag_alignment = frag_alignment ;
142152 flist -> fl_payload_buffer_alignment = payload_buffer_alignment ;
143153 flist -> item_init = item_init ;
144- flist -> fl_mpool_reg_flags |= mpool_reg_flags ;
154+ flist -> fl_rcache_reg_flags |= rcache_reg_flags ;
145155 flist -> ctx = ctx ;
146156
147157 if (num_elements_to_alloc ) {
@@ -153,10 +163,10 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
153163
154164int opal_free_list_grow_st (opal_free_list_t * flist , size_t num_elements )
155165{
156- unsigned char * ptr , * mpool_alloc_ptr = NULL , * payload_ptr = NULL ;
166+ unsigned char * ptr , * payload_ptr = NULL ;
157167 opal_free_list_memory_t * alloc_ptr ;
158- size_t alloc_size , head_size , elem_size = 0 ;
159- mca_mpool_base_registration_t * reg = NULL ;
168+ size_t alloc_size , head_size , elem_size = 0 , buffer_size , align ;
169+ mca_rcache_base_registration_t * reg = NULL ;
160170 int rc = OPAL_SUCCESS ;
161171
162172 if (flist -> fl_max_to_alloc && (flist -> fl_num_allocated + num_elements ) >
@@ -170,6 +180,29 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
170180
171181 head_size = OPAL_ALIGN (flist -> fl_frag_size , flist -> fl_frag_alignment , size_t );
172182
183+ /* NTH: calculate allocation alignment first as it might change the number of elements */
184+ if (0 != flist -> fl_payload_buffer_size ) {
185+ elem_size = OPAL_ALIGN (flist -> fl_payload_buffer_size ,
186+ flist -> fl_payload_buffer_alignment , size_t );
187+
188+ /* elem_size should not be 0 here */
189+ assert (elem_size > 0 );
190+
191+ buffer_size = num_elements * elem_size ;
192+ align = flist -> fl_payload_buffer_alignment ;
193+
194+ if (MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM & flist -> fl_rcache_reg_flags ) {
195+ size_t pagesize = opal_getpagesize ();
196+ /* CUDA cannot handle registering overlapping regions, so make
197+ * sure each region is page sized and page aligned. */
198+ align = OPAL_ALIGN (align , pagesize , size_t );
199+ buffer_size = OPAL_ALIGN (buffer_size , pagesize , size_t );
200+
201+ /* avoid wasting space in the buffer */
202+ num_elements = buffer_size / elem_size ;
203+ }
204+ }
205+
173206 /* calculate head allocation size */
174207 alloc_size = num_elements * head_size + sizeof (opal_free_list_memory_t ) +
175208 flist -> fl_frag_alignment ;
@@ -180,37 +213,23 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
180213 }
181214
182215 if (0 != flist -> fl_payload_buffer_size ) {
183- elem_size = OPAL_ALIGN (flist -> fl_payload_buffer_size ,
184- flist -> fl_payload_buffer_alignment , size_t );
185-
186- /* elem_size should not be 0 here */
187- assert (elem_size > 0 );
188-
189216 /* allocate the rest from the mpool (or use memalign/malloc) */
190- if (flist -> fl_mpool != NULL ) {
191- payload_ptr = mpool_alloc_ptr =
192- (unsigned char * ) flist -> fl_mpool -> mpool_alloc (flist -> fl_mpool ,
193- num_elements * elem_size ,
194- flist -> fl_payload_buffer_alignment ,
195- flist -> fl_mpool_reg_flags , & reg );
196- } else {
197- #ifdef HAVE_POSIX_MEMALIGN
198- posix_memalign ((void * * ) & mpool_alloc_ptr , flist -> fl_payload_buffer_alignment ,
199- num_elements * elem_size );
200- payload_ptr = mpool_alloc_ptr ;
201- #else
202- mpool_alloc_ptr = (unsigned char * ) malloc (num_elements * elem_size +
203- flist -> fl_payload_buffer_alignment );
204- payload_ptr = (unsigned char * ) OPAL_ALIGN ((uintptr_t )mpool_alloc_ptr ,
205- flist -> fl_payload_buffer_alignment ,
206- uintptr_t );
207- #endif
208- }
209-
210- if (NULL == mpool_alloc_ptr ) {
217+ payload_ptr = (unsigned char * ) flist -> fl_mpool -> mpool_alloc (flist -> fl_mpool , buffer_size , align , 0 );
218+ if (NULL == payload_ptr ) {
211219 free (alloc_ptr );
212220 return OPAL_ERR_TEMP_OUT_OF_RESOURCE ;
213221 }
222+
223+ if (flist -> fl_rcache ) {
224+ rc = flist -> fl_rcache -> rcache_register (flist -> fl_rcache , payload_ptr , num_elements * elem_size ,
225+ flist -> fl_rcache_reg_flags , MCA_RCACHE_ACCESS_ANY , & reg );
226+ if (OPAL_UNLIKELY (OPAL_SUCCESS != rc )) {
227+ free (alloc_ptr );
228+ flist -> fl_mpool -> mpool_free (flist -> fl_mpool , payload_ptr );
229+
230+ return rc ;
231+ }
232+ }
214233 }
215234
216235 /* make the alloc_ptr a list item, save the chunk in the allocations list,
@@ -219,7 +238,7 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
219238 opal_list_append (& (flist -> fl_allocations ), (opal_list_item_t * )alloc_ptr );
220239
221240 alloc_ptr -> registration = reg ;
222- alloc_ptr -> ptr = mpool_alloc_ptr ;
241+ alloc_ptr -> ptr = payload_ptr ;
223242
224243 ptr = (unsigned char * )alloc_ptr + sizeof (opal_free_list_memory_t );
225244 ptr = OPAL_ALIGN_PTR (ptr , flist -> fl_frag_alignment , unsigned char * );
0 commit comments