11/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22/*
3- * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
3+ * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
44 * reserved.
55 * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
66 * Copyright (c) 2014 Research Organization for Information Science
3434#include "opal/mca/btl/base/btl_base_error.h"
3535#include "opal/class/opal_hash_table.h"
3636#include "opal/class/opal_free_list.h"
37- #include "opal/mca/common/ugni/common_ugni.h"
3837
3938#include <errno.h>
4039#include <stdint.h>
4847#define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x8000000000000000ull
4948#define MCA_BTL_UGNI_DATAGRAM_MASK 0x8000000000000000ull
5049
50+ /** maximum number of supported virtual devices */
51+ #define MCA_BTL_UGNI_MAX_DEV_HANDLES 128
52+
53+ /** number of rdma completion queue items to remove per progress loop */
54+ #define MCA_BTL_UGNI_COMPLETIONS_PER_LOOP 16
55+
56+ /**
57+ * Modex data
58+ */
59+ struct mca_btl_ugni_modex_t {
60+ /** GNI NIC address */
61+ uint32_t addr ;
62+ /** CDM identifier (base) */
63+ int id ;
64+ };
65+ typedef struct mca_btl_ugni_modex_t mca_btl_ugni_modex_t ;
66+
5167/* ompi and smsg endpoint attributes */
5268typedef struct mca_btl_ugni_endpoint_attr_t {
5369 opal_process_name_t proc_name ;
@@ -61,12 +77,73 @@ enum {
6177 MCA_BTL_UGNI_RCACHE_GRDMA
6278};
6379
80+ enum mca_btl_ugni_free_list_id_t {
81+ /* eager fragment list (registered) */
82+ MCA_BTL_UGNI_LIST_EAGER_SEND ,
83+ MCA_BTL_UGNI_LIST_EAGER_RECV ,
84+ /* SMSG fragment list (unregistered) */
85+ MCA_BTL_UGNI_LIST_SMSG ,
86+ /* RDMA fragment list */
87+ MCA_BTL_UGNI_LIST_RDMA ,
88+ MCA_BTL_UGNI_LIST_RDMA_INT ,
89+ MCA_BTL_UGNI_LIST_MAX ,
90+ };
91+
92+ struct mca_btl_ugni_cq_t {
93+ /** ugni CQ handle */
94+ gni_cq_handle_t gni_handle ;
95+ /** number of completions expected on the CQ */
96+ int32_t active_operations ;
97+ };
98+ typedef struct mca_btl_ugni_cq_t mca_btl_ugni_cq_t ;
99+
100+ /**
101+ * GNI virtual device
102+ */
103+ struct mca_btl_ugni_device_t {
104+ /** Communication domain handle */
105+ gni_cdm_handle_t dev_cd_handle ;
106+
107+ /** protection for ugni access */
108+ volatile int32_t lock ;
109+
110+ /** Index of device in module devices array */
111+ int dev_index ;
112+
113+ /** number of SMSG connections */
114+ volatile int32_t smsg_connections ;
115+
116+ /** uGNI device handle */
117+ gni_nic_handle_t dev_handle ;
118+
119+ /** uGNI rdma completion queue */
120+ mca_btl_ugni_cq_t dev_rdma_local_cq ;
121+
122+ /** local rdma completion queue (async) */
123+ mca_btl_ugni_cq_t dev_rdma_local_irq_cq ;
124+
125+ /** local SMSG completion queue */
126+ mca_btl_ugni_cq_t dev_smsg_local_cq ;
127+
128+ /** IRQ memory handle for this device */
129+ gni_mem_handle_t smsg_irq_mhndl ;
130+
131+ /** RDMA endpoint free list */
132+ opal_free_list_t endpoints ;
133+
134+ /** post descriptors pending resources */
135+ opal_list_t pending_post ;
136+ };
137+ typedef struct mca_btl_ugni_device_t mca_btl_ugni_device_t ;
138+
139+ typedef intptr_t (* mca_btl_ugni_device_serialize_fn_t ) (mca_btl_ugni_device_t * device , void * arg );
140+
64141typedef struct mca_btl_ugni_module_t {
65142 mca_btl_base_module_t super ;
66143
67144 bool initialized ;
68145
69- opal_common_ugni_device_t * device ;
146+ mca_btl_ugni_device_t devices [ MCA_BTL_UGNI_MAX_DEV_HANDLES ] ;
70147
71148 opal_mutex_t endpoint_lock ;
72149 size_t endpoint_count ;
@@ -82,9 +159,6 @@ typedef struct mca_btl_ugni_module_t {
82159 opal_mutex_t eager_get_pending_lock ;
83160 opal_list_t eager_get_pending ;
84161
85- opal_mutex_t pending_descriptors_lock ;
86- opal_list_t pending_descriptors ;
87-
88162 opal_free_list_t post_descriptors ;
89163
90164 mca_mpool_base_module_t * mpool ;
@@ -95,23 +169,11 @@ typedef struct mca_btl_ugni_module_t {
95169
96170 struct mca_btl_ugni_endpoint_attr_t wc_remote_attr , wc_local_attr ;
97171
98- gni_cq_handle_t rdma_local_cq ;
99172 gni_cq_handle_t smsg_remote_cq ;
100- gni_cq_handle_t smsg_local_cq ;
101173 gni_cq_handle_t smsg_remote_irq_cq ;
102- gni_cq_handle_t rdma_local_irq_cq ;
103-
104- /* eager fragment list (registered) */
105- opal_free_list_t eager_frags_send ;
106- opal_free_list_t eager_frags_recv ;
107-
108- /* SMSG fragment list (unregistered) */
109- opal_free_list_t smsg_frags ;
110-
111- /* RDMA fragment list */
112- opal_free_list_t rdma_frags ;
113- opal_free_list_t rdma_int_frags ;
114174
175+ /** fragment free lists (see enum mca_btl_ugni_free_list_id_t) */
176+ opal_free_list_t frags_lists [MCA_BTL_UGNI_LIST_MAX ];
115177
116178 /* lock for this list */
117179 opal_mutex_t ep_wait_list_lock ;
@@ -197,10 +259,62 @@ typedef struct mca_btl_ugni_component_t {
197259 /* Indicate whether progress thread allowed */
198260 bool progress_thread_enabled ;
199261
262+ /** Number of ugni device contexts to create per GNI device */
263+ int virtual_device_count ;
264+
265+ /** Protection tag */
266+ uint8_t ptag ;
267+
268+ /** Unique id for this process assigned by the system */
269+ uint32_t cookie ;
270+
271+ /** Starting value of communication identifier */
272+ uint32_t cdm_id_base ;
273+
274+ /** GNI CDM flags */
275+ uint32_t cdm_flags ;
276+
277+ /** NIC address */
278+ uint32_t dev_addr ;
200279} mca_btl_ugni_component_t ;
201280
202- int mca_btl_ugni_module_init (mca_btl_ugni_module_t * ugni_module ,
203- opal_common_ugni_device_t * device );
281+ /* Global structures */
282+
283+ OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component ;
284+ OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module ;
285+
286+ /**
287+ * Get a virtual device for communication
288+ */
289+ static inline mca_btl_ugni_device_t * mca_btl_ugni_ep_get_device (mca_btl_ugni_module_t * ugni_module )
290+ {
291+ static volatile uint32_t device_index = (uint32_t ) 0 ;
292+ uint32_t dev_index ;
293+
294+ /* don't really care if the device index is atomically updated */
295+ dev_index = (device_index ++ ) & (mca_btl_ugni_component .virtual_device_count - 1 );
296+
297+ return ugni_module -> devices + dev_index ;
298+ }
299+
300+ static inline int mca_btl_rc_ugni_to_opal (gni_return_t rc )
301+ {
302+ static int codes [] = {OPAL_SUCCESS ,
303+ OPAL_ERR_RESOURCE_BUSY ,
304+ OPAL_ERR_BAD_PARAM ,
305+ OPAL_ERR_OUT_OF_RESOURCE ,
306+ OPAL_ERR_TIMEOUT ,
307+ OPAL_ERR_PERM ,
308+ OPAL_ERROR ,
309+ OPAL_ERR_BAD_PARAM ,
310+ OPAL_ERR_BAD_PARAM ,
311+ OPAL_ERR_NOT_FOUND ,
312+ OPAL_ERR_VALUE_OUT_OF_BOUNDS ,
313+ OPAL_ERROR ,
314+ OPAL_ERR_NOT_SUPPORTED ,
315+ OPAL_ERR_OUT_OF_RESOURCE };
316+ return codes [rc ];
317+ }
204318
205319/**
206320 * BML->BTL notification of change in the process list.
@@ -324,10 +438,32 @@ typedef struct mca_btl_ugni_reg_t {
324438 mca_btl_base_registration_handle_t handle ;
325439} mca_btl_ugni_reg_t ;
326440
327- /* Global structures */
441+ /**
442+ * Initialize uGNI support.
443+ */
444+ int mca_btl_ugni_init (void );
328445
329- OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component ;
330- OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module ;
446+ /**
447+ * Finalize uGNI support.
448+ */
449+ int mca_btl_ugni_fini (void );
450+
451+ int mca_btl_ugni_module_init (mca_btl_ugni_module_t * ugni_module );
452+
453+ /**
454+ * Intialize a virtual device for device index 0.
455+ *
456+ * @param[inout] device Device to initialize
457+ * @param[in] virtual_device_id Virtual device identified (up to max handles)
458+ */
459+ int mca_btl_ugni_device_init (mca_btl_ugni_device_t * device , int virtual_device_id );
460+
461+ /**
462+ * Finalize a virtual device.
463+ *
464+ * @param[in] device Device to finalize
465+ */
466+ int mca_btl_ugni_device_fini (mca_btl_ugni_device_t * dev );
331467
332468/* Get a unique 64-bit id for the process name */
333469static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name ) {
@@ -338,6 +474,57 @@ static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
338474int mca_btl_ugni_spawn_progress_thread (struct mca_btl_base_module_t * btl );
339475int mca_btl_ugni_kill_progress_thread (void );
340476
477+ /**
478+ * Try to lock a uGNI device for exclusive access
479+ */
480+ static inline int mca_btl_ugni_device_trylock (mca_btl_ugni_device_t * device )
481+ {
482+ /* checking the lock non-atomically first can reduce the number of
483+ * unnecessary atomic operations. */
484+ return (device -> lock || opal_atomic_swap_32 (& device -> lock , 1 ));
485+ }
486+
487+ /**
488+ * Lock a uGNI device for exclusive access
489+ */
490+ static inline void mca_btl_ugni_device_lock (mca_btl_ugni_device_t * device )
491+ {
492+ while (mca_btl_ugni_device_trylock (device ));
493+ }
494+
495+ /**
496+ * Release exclusive access to the device
497+ */
498+ static inline void mca_btl_ugni_device_unlock (mca_btl_ugni_device_t * device )
499+ {
500+ opal_atomic_wmb ();
501+ device -> lock = 0 ;
502+ }
503+
504+ /**
505+ * Serialize an operation on a uGNI device
506+ *
507+ * @params[in] device ugni device
508+ * @params[in] fn function to serialize
509+ * @params[in] arg function argument
510+ */
511+ static inline intptr_t mca_btl_ugni_device_serialize (mca_btl_ugni_device_t * device ,
512+ mca_btl_ugni_device_serialize_fn_t fn , void * arg )
513+ {
514+ intptr_t rc ;
515+
516+ if (!opal_using_threads ()) {
517+ return fn (device , arg );
518+ }
519+
520+ /* NTH: for now the device is just protected by a spin lock but this will change in the future */
521+ mca_btl_ugni_device_lock (device );
522+ rc = fn (device , arg );
523+ mca_btl_ugni_device_unlock (device );
524+ return rc ;
525+ }
526+
527+
341528/** Number of times the progress thread has woken up */
342529extern unsigned int mca_btl_ugni_progress_thread_wakeups ;
343530
0 commit comments