Skip to content

Commit 9410574

Browse files
authored
Merge pull request #3149 from hjelmn/btl_ugni_2_0
Improve multi-threaded RMA performance of the ugni btl
2 parents e4a35f2 + d5aaeb7 commit 9410574

27 files changed

+1992
-1597
lines changed

opal/mca/btl/ugni/Makefile.am

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- indent-tabs-mode:nil -*-
22
#
3-
# Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
3+
# Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
44
# reserved.
55
# Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
66
#
@@ -40,14 +40,15 @@ ugni_SOURCES = \
4040
btl_ugni_smsg.c \
4141
btl_ugni_progress_thread.c \
4242
btl_ugni_prepare.h \
43-
btl_ugni_atomic.c
43+
btl_ugni_atomic.c \
44+
btl_ugni_init.c \
45+
btl_ugni_device.h
4446

4547
mcacomponentdir = $(opallibdir)
4648
mcacomponent_LTLIBRARIES = $(component_install)
4749
mca_btl_ugni_la_SOURCES = $(ugni_SOURCES)
4850
nodist_mca_btl_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
49-
mca_btl_ugni_la_LIBADD = $(btl_ugni_LIBS) \
50-
$(OPAL_TOP_BUILDDIR)/opal/mca/common/ugni/lib@OPAL_LIB_PREFIX@mca_common_ugni.la
51+
mca_btl_ugni_la_LIBADD = $(btl_ugni_LIBS)
5152
mca_btl_ugni_la_LDFLAGS = -module -avoid-version $(btl_ugni_LDFLAGS)
5253

5354
noinst_LTLIBRARIES = $(component_noinst)

opal/mca/btl/ugni/btl_ugni.h

Lines changed: 212 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
3+
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
44
* reserved.
55
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
66
* Copyright (c) 2014 Research Organization for Information Science
@@ -34,7 +34,6 @@
3434
#include "opal/mca/btl/base/btl_base_error.h"
3535
#include "opal/class/opal_hash_table.h"
3636
#include "opal/class/opal_free_list.h"
37-
#include "opal/mca/common/ugni/common_ugni.h"
3837

3938
#include <errno.h>
4039
#include <stdint.h>
@@ -48,6 +47,23 @@
4847
#define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x8000000000000000ull
4948
#define MCA_BTL_UGNI_DATAGRAM_MASK 0x8000000000000000ull
5049

50+
/** maximum number of supported virtual devices */
51+
#define MCA_BTL_UGNI_MAX_DEV_HANDLES 128
52+
53+
/** number of rdma completion queue items to remove per progress loop */
54+
#define MCA_BTL_UGNI_COMPLETIONS_PER_LOOP 16
55+
56+
/**
57+
* Modex data
58+
*/
59+
struct mca_btl_ugni_modex_t {
60+
/** GNI NIC address */
61+
uint32_t addr;
62+
/** CDM identifier (base) */
63+
int id;
64+
};
65+
typedef struct mca_btl_ugni_modex_t mca_btl_ugni_modex_t;
66+
5167
/* ompi and smsg endpoint attributes */
5268
typedef struct mca_btl_ugni_endpoint_attr_t {
5369
opal_process_name_t proc_name;
@@ -61,12 +77,73 @@ enum {
6177
MCA_BTL_UGNI_RCACHE_GRDMA
6278
};
6379

80+
enum mca_btl_ugni_free_list_id_t {
81+
/* eager fragment list (registered) */
82+
MCA_BTL_UGNI_LIST_EAGER_SEND,
83+
MCA_BTL_UGNI_LIST_EAGER_RECV,
84+
/* SMSG fragment list (unregistered) */
85+
MCA_BTL_UGNI_LIST_SMSG,
86+
/* RDMA fragment list */
87+
MCA_BTL_UGNI_LIST_RDMA,
88+
MCA_BTL_UGNI_LIST_RDMA_INT,
89+
MCA_BTL_UGNI_LIST_MAX,
90+
};
91+
92+
struct mca_btl_ugni_cq_t {
93+
/** ugni CQ handle */
94+
gni_cq_handle_t gni_handle;
95+
/** number of completions expected on the CQ */
96+
int32_t active_operations;
97+
};
98+
typedef struct mca_btl_ugni_cq_t mca_btl_ugni_cq_t;
99+
100+
/**
101+
* GNI virtual device
102+
*/
103+
struct mca_btl_ugni_device_t {
104+
/** Communication domain handle */
105+
gni_cdm_handle_t dev_cd_handle;
106+
107+
/** protection for ugni access */
108+
volatile int32_t lock;
109+
110+
/** Index of device in module devices array */
111+
int dev_index;
112+
113+
/** number of SMSG connections */
114+
volatile int32_t smsg_connections;
115+
116+
/** uGNI device handle */
117+
gni_nic_handle_t dev_handle;
118+
119+
/** uGNI rdma completion queue */
120+
mca_btl_ugni_cq_t dev_rdma_local_cq;
121+
122+
/** local rdma completion queue (async) */
123+
mca_btl_ugni_cq_t dev_rdma_local_irq_cq;
124+
125+
/** local SMSG completion queue */
126+
mca_btl_ugni_cq_t dev_smsg_local_cq;
127+
128+
/** IRQ memory handle for this device */
129+
gni_mem_handle_t smsg_irq_mhndl;
130+
131+
/** RDMA endpoint free list */
132+
opal_free_list_t endpoints;
133+
134+
/** post descriptors pending resources */
135+
opal_list_t pending_post;
136+
};
137+
typedef struct mca_btl_ugni_device_t mca_btl_ugni_device_t;
138+
139+
typedef intptr_t (*mca_btl_ugni_device_serialize_fn_t) (mca_btl_ugni_device_t *device, void *arg);
140+
64141
typedef struct mca_btl_ugni_module_t {
65142
mca_btl_base_module_t super;
66143

67144
bool initialized;
68145

69-
opal_common_ugni_device_t *device;
146+
mca_btl_ugni_device_t devices[MCA_BTL_UGNI_MAX_DEV_HANDLES];
70147

71148
opal_mutex_t endpoint_lock;
72149
size_t endpoint_count;
@@ -82,9 +159,6 @@ typedef struct mca_btl_ugni_module_t {
82159
opal_mutex_t eager_get_pending_lock;
83160
opal_list_t eager_get_pending;
84161

85-
opal_mutex_t pending_descriptors_lock;
86-
opal_list_t pending_descriptors;
87-
88162
opal_free_list_t post_descriptors;
89163

90164
mca_mpool_base_module_t *mpool;
@@ -95,23 +169,11 @@ typedef struct mca_btl_ugni_module_t {
95169

96170
struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
97171

98-
gni_cq_handle_t rdma_local_cq;
99172
gni_cq_handle_t smsg_remote_cq;
100-
gni_cq_handle_t smsg_local_cq;
101173
gni_cq_handle_t smsg_remote_irq_cq;
102-
gni_cq_handle_t rdma_local_irq_cq;
103-
104-
/* eager fragment list (registered) */
105-
opal_free_list_t eager_frags_send;
106-
opal_free_list_t eager_frags_recv;
107-
108-
/* SMSG fragment list (unregistered) */
109-
opal_free_list_t smsg_frags;
110-
111-
/* RDMA fragment list */
112-
opal_free_list_t rdma_frags;
113-
opal_free_list_t rdma_int_frags;
114174

175+
/** fragment free lists (see enum mca_btl_ugni_free_list_id_t) */
176+
opal_free_list_t frags_lists[MCA_BTL_UGNI_LIST_MAX];
115177

116178
/* lock for this list */
117179
opal_mutex_t ep_wait_list_lock;
@@ -197,10 +259,62 @@ typedef struct mca_btl_ugni_component_t {
197259
/* Indicate whether progress thread allowed */
198260
bool progress_thread_enabled;
199261

262+
/** Number of ugni device contexts to create per GNI device */
263+
int virtual_device_count;
264+
265+
/** Protection tag */
266+
uint8_t ptag;
267+
268+
/** Unique id for this process assigned by the system */
269+
uint32_t cookie;
270+
271+
/** Starting value of communication identifier */
272+
uint32_t cdm_id_base;
273+
274+
/** GNI CDM flags */
275+
uint32_t cdm_flags;
276+
277+
/** NIC address */
278+
uint32_t dev_addr;
200279
} mca_btl_ugni_component_t;
201280

202-
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
203-
opal_common_ugni_device_t *device);
281+
/* Global structures */
282+
283+
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
284+
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
285+
286+
/**
287+
* Get a virtual device for communication
288+
*/
289+
static inline mca_btl_ugni_device_t *mca_btl_ugni_ep_get_device (mca_btl_ugni_module_t *ugni_module)
290+
{
291+
static volatile uint32_t device_index = (uint32_t) 0;
292+
uint32_t dev_index;
293+
294+
/* don't really care if the device index is atomically updated */
295+
dev_index = (device_index++) & (mca_btl_ugni_component.virtual_device_count - 1);
296+
297+
return ugni_module->devices + dev_index;
298+
}
299+
300+
static inline int mca_btl_rc_ugni_to_opal (gni_return_t rc)
301+
{
302+
static int codes[] = {OPAL_SUCCESS,
303+
OPAL_ERR_RESOURCE_BUSY,
304+
OPAL_ERR_BAD_PARAM,
305+
OPAL_ERR_OUT_OF_RESOURCE,
306+
OPAL_ERR_TIMEOUT,
307+
OPAL_ERR_PERM,
308+
OPAL_ERROR,
309+
OPAL_ERR_BAD_PARAM,
310+
OPAL_ERR_BAD_PARAM,
311+
OPAL_ERR_NOT_FOUND,
312+
OPAL_ERR_VALUE_OUT_OF_BOUNDS,
313+
OPAL_ERROR,
314+
OPAL_ERR_NOT_SUPPORTED,
315+
OPAL_ERR_OUT_OF_RESOURCE};
316+
return codes[rc];
317+
}
204318

205319
/**
206320
* BML->BTL notification of change in the process list.
@@ -324,10 +438,32 @@ typedef struct mca_btl_ugni_reg_t {
324438
mca_btl_base_registration_handle_t handle;
325439
} mca_btl_ugni_reg_t;
326440

327-
/* Global structures */
441+
/**
442+
* Initialize uGNI support.
443+
*/
444+
int mca_btl_ugni_init (void);
328445

329-
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
330-
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
446+
/**
447+
* Finalize uGNI support.
448+
*/
449+
int mca_btl_ugni_fini (void);
450+
451+
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module);
452+
453+
/**
454+
* Intialize a virtual device for device index 0.
455+
*
456+
* @param[inout] device Device to initialize
457+
* @param[in] virtual_device_id Virtual device identified (up to max handles)
458+
*/
459+
int mca_btl_ugni_device_init (mca_btl_ugni_device_t *device, int virtual_device_id);
460+
461+
/**
462+
* Finalize a virtual device.
463+
*
464+
* @param[in] device Device to finalize
465+
*/
466+
int mca_btl_ugni_device_fini (mca_btl_ugni_device_t *dev);
331467

332468
/* Get a unique 64-bit id for the process name */
333469
static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
@@ -338,6 +474,57 @@ static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
338474
int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t* btl);
339475
int mca_btl_ugni_kill_progress_thread(void);
340476

477+
/**
478+
* Try to lock a uGNI device for exclusive access
479+
*/
480+
static inline int mca_btl_ugni_device_trylock (mca_btl_ugni_device_t *device)
481+
{
482+
/* checking the lock non-atomically first can reduce the number of
483+
* unnecessary atomic operations. */
484+
return (device->lock || opal_atomic_swap_32 (&device->lock, 1));
485+
}
486+
487+
/**
488+
* Lock a uGNI device for exclusive access
489+
*/
490+
static inline void mca_btl_ugni_device_lock (mca_btl_ugni_device_t *device)
491+
{
492+
while (mca_btl_ugni_device_trylock (device));
493+
}
494+
495+
/**
496+
* Release exclusive access to the device
497+
*/
498+
static inline void mca_btl_ugni_device_unlock (mca_btl_ugni_device_t *device)
499+
{
500+
opal_atomic_wmb ();
501+
device->lock = 0;
502+
}
503+
504+
/**
505+
* Serialize an operation on a uGNI device
506+
*
507+
* @params[in] device ugni device
508+
* @params[in] fn function to serialize
509+
* @params[in] arg function argument
510+
*/
511+
static inline intptr_t mca_btl_ugni_device_serialize (mca_btl_ugni_device_t *device,
512+
mca_btl_ugni_device_serialize_fn_t fn, void *arg)
513+
{
514+
intptr_t rc;
515+
516+
if (!opal_using_threads ()) {
517+
return fn (device, arg);
518+
}
519+
520+
/* NTH: for now the device is just protected by a spin lock but this will change in the future */
521+
mca_btl_ugni_device_lock (device);
522+
rc = fn (device, arg);
523+
mca_btl_ugni_device_unlock (device);
524+
return rc;
525+
}
526+
527+
341528
/** Number of times the progress thread has woken up */
342529
extern unsigned int mca_btl_ugni_progress_thread_wakeups;
343530

0 commit comments

Comments
 (0)