1
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2
2
/*
3
- * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
3
+ * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
4
4
* reserved.
5
5
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
6
6
* Copyright (c) 2014 Research Organization for Information Science
34
34
#include "opal/mca/btl/base/btl_base_error.h"
35
35
#include "opal/class/opal_hash_table.h"
36
36
#include "opal/class/opal_free_list.h"
37
- #include "opal/mca/common/ugni/common_ugni.h"
38
37
39
38
#include <errno.h>
40
39
#include <stdint.h>
48
47
#define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x8000000000000000ull
49
48
#define MCA_BTL_UGNI_DATAGRAM_MASK 0x8000000000000000ull
50
49
50
+ /** maximum number of supported virtual devices */
51
+ #define MCA_BTL_UGNI_MAX_DEV_HANDLES 128
52
+
53
+ /** number of rdma completion queue items to remove per progress loop */
54
+ #define MCA_BTL_UGNI_COMPLETIONS_PER_LOOP 16
55
+
56
+ /**
57
+ * Modex data
58
+ */
59
+ struct mca_btl_ugni_modex_t {
60
+ /** GNI NIC address */
61
+ uint32_t addr ;
62
+ /** CDM identifier (base) */
63
+ int id ;
64
+ };
65
+ typedef struct mca_btl_ugni_modex_t mca_btl_ugni_modex_t ;
66
+
51
67
/* ompi and smsg endpoint attributes */
52
68
typedef struct mca_btl_ugni_endpoint_attr_t {
53
69
opal_process_name_t proc_name ;
@@ -61,12 +77,73 @@ enum {
61
77
MCA_BTL_UGNI_RCACHE_GRDMA
62
78
};
63
79
80
+ enum mca_btl_ugni_free_list_id_t {
81
+ /* eager fragment list (registered) */
82
+ MCA_BTL_UGNI_LIST_EAGER_SEND ,
83
+ MCA_BTL_UGNI_LIST_EAGER_RECV ,
84
+ /* SMSG fragment list (unregistered) */
85
+ MCA_BTL_UGNI_LIST_SMSG ,
86
+ /* RDMA fragment list */
87
+ MCA_BTL_UGNI_LIST_RDMA ,
88
+ MCA_BTL_UGNI_LIST_RDMA_INT ,
89
+ MCA_BTL_UGNI_LIST_MAX ,
90
+ };
91
+
92
+ struct mca_btl_ugni_cq_t {
93
+ /** ugni CQ handle */
94
+ gni_cq_handle_t gni_handle ;
95
+ /** number of completions expected on the CQ */
96
+ int32_t active_operations ;
97
+ };
98
+ typedef struct mca_btl_ugni_cq_t mca_btl_ugni_cq_t ;
99
+
100
+ /**
101
+ * GNI virtual device
102
+ */
103
+ struct mca_btl_ugni_device_t {
104
+ /** Communication domain handle */
105
+ gni_cdm_handle_t dev_cd_handle ;
106
+
107
+ /** protection for ugni access */
108
+ volatile int32_t lock ;
109
+
110
+ /** Index of device in module devices array */
111
+ int dev_index ;
112
+
113
+ /** number of SMSG connections */
114
+ volatile int32_t smsg_connections ;
115
+
116
+ /** uGNI device handle */
117
+ gni_nic_handle_t dev_handle ;
118
+
119
+ /** uGNI rdma completion queue */
120
+ mca_btl_ugni_cq_t dev_rdma_local_cq ;
121
+
122
+ /** local rdma completion queue (async) */
123
+ mca_btl_ugni_cq_t dev_rdma_local_irq_cq ;
124
+
125
+ /** local SMSG completion queue */
126
+ mca_btl_ugni_cq_t dev_smsg_local_cq ;
127
+
128
+ /** IRQ memory handle for this device */
129
+ gni_mem_handle_t smsg_irq_mhndl ;
130
+
131
+ /** RDMA endpoint free list */
132
+ opal_free_list_t endpoints ;
133
+
134
+ /** post descriptors pending resources */
135
+ opal_list_t pending_post ;
136
+ };
137
+ typedef struct mca_btl_ugni_device_t mca_btl_ugni_device_t ;
138
+
139
+ typedef intptr_t (* mca_btl_ugni_device_serialize_fn_t ) (mca_btl_ugni_device_t * device , void * arg );
140
+
64
141
typedef struct mca_btl_ugni_module_t {
65
142
mca_btl_base_module_t super ;
66
143
67
144
bool initialized ;
68
145
69
- opal_common_ugni_device_t * device ;
146
+ mca_btl_ugni_device_t devices [ MCA_BTL_UGNI_MAX_DEV_HANDLES ] ;
70
147
71
148
opal_mutex_t endpoint_lock ;
72
149
size_t endpoint_count ;
@@ -82,9 +159,6 @@ typedef struct mca_btl_ugni_module_t {
82
159
opal_mutex_t eager_get_pending_lock ;
83
160
opal_list_t eager_get_pending ;
84
161
85
- opal_mutex_t pending_descriptors_lock ;
86
- opal_list_t pending_descriptors ;
87
-
88
162
opal_free_list_t post_descriptors ;
89
163
90
164
mca_mpool_base_module_t * mpool ;
@@ -95,23 +169,11 @@ typedef struct mca_btl_ugni_module_t {
95
169
96
170
struct mca_btl_ugni_endpoint_attr_t wc_remote_attr , wc_local_attr ;
97
171
98
- gni_cq_handle_t rdma_local_cq ;
99
172
gni_cq_handle_t smsg_remote_cq ;
100
- gni_cq_handle_t smsg_local_cq ;
101
173
gni_cq_handle_t smsg_remote_irq_cq ;
102
- gni_cq_handle_t rdma_local_irq_cq ;
103
-
104
- /* eager fragment list (registered) */
105
- opal_free_list_t eager_frags_send ;
106
- opal_free_list_t eager_frags_recv ;
107
-
108
- /* SMSG fragment list (unregistered) */
109
- opal_free_list_t smsg_frags ;
110
-
111
- /* RDMA fragment list */
112
- opal_free_list_t rdma_frags ;
113
- opal_free_list_t rdma_int_frags ;
114
174
175
+ /** fragment free lists (see enum mca_btl_ugni_free_list_id_t) */
176
+ opal_free_list_t frags_lists [MCA_BTL_UGNI_LIST_MAX ];
115
177
116
178
/* lock for this list */
117
179
opal_mutex_t ep_wait_list_lock ;
@@ -197,10 +259,62 @@ typedef struct mca_btl_ugni_component_t {
197
259
/* Indicate whether progress thread allowed */
198
260
bool progress_thread_enabled ;
199
261
262
+ /** Number of ugni device contexts to create per GNI device */
263
+ int virtual_device_count ;
264
+
265
+ /** Protection tag */
266
+ uint8_t ptag ;
267
+
268
+ /** Unique id for this process assigned by the system */
269
+ uint32_t cookie ;
270
+
271
+ /** Starting value of communication identifier */
272
+ uint32_t cdm_id_base ;
273
+
274
+ /** GNI CDM flags */
275
+ uint32_t cdm_flags ;
276
+
277
+ /** NIC address */
278
+ uint32_t dev_addr ;
200
279
} mca_btl_ugni_component_t ;
201
280
202
- int mca_btl_ugni_module_init (mca_btl_ugni_module_t * ugni_module ,
203
- opal_common_ugni_device_t * device );
281
+ /* Global structures */
282
+
283
+ OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component ;
284
+ OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module ;
285
+
286
+ /**
287
+ * Get a virtual device for communication
288
+ */
289
+ static inline mca_btl_ugni_device_t * mca_btl_ugni_ep_get_device (mca_btl_ugni_module_t * ugni_module )
290
+ {
291
+ static volatile uint32_t device_index = (uint32_t ) 0 ;
292
+ uint32_t dev_index ;
293
+
294
+ /* don't really care if the device index is atomically updated */
295
+ dev_index = (device_index ++ ) & (mca_btl_ugni_component .virtual_device_count - 1 );
296
+
297
+ return ugni_module -> devices + dev_index ;
298
+ }
299
+
300
+ static inline int mca_btl_rc_ugni_to_opal (gni_return_t rc )
301
+ {
302
+ static int codes [] = {OPAL_SUCCESS ,
303
+ OPAL_ERR_RESOURCE_BUSY ,
304
+ OPAL_ERR_BAD_PARAM ,
305
+ OPAL_ERR_OUT_OF_RESOURCE ,
306
+ OPAL_ERR_TIMEOUT ,
307
+ OPAL_ERR_PERM ,
308
+ OPAL_ERROR ,
309
+ OPAL_ERR_BAD_PARAM ,
310
+ OPAL_ERR_BAD_PARAM ,
311
+ OPAL_ERR_NOT_FOUND ,
312
+ OPAL_ERR_VALUE_OUT_OF_BOUNDS ,
313
+ OPAL_ERROR ,
314
+ OPAL_ERR_NOT_SUPPORTED ,
315
+ OPAL_ERR_OUT_OF_RESOURCE };
316
+ return codes [rc ];
317
+ }
204
318
205
319
/**
206
320
* BML->BTL notification of change in the process list.
@@ -324,10 +438,32 @@ typedef struct mca_btl_ugni_reg_t {
324
438
mca_btl_base_registration_handle_t handle ;
325
439
} mca_btl_ugni_reg_t ;
326
440
327
- /* Global structures */
441
+ /**
442
+ * Initialize uGNI support.
443
+ */
444
+ int mca_btl_ugni_init (void );
328
445
329
- OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component ;
330
- OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module ;
446
+ /**
447
+ * Finalize uGNI support.
448
+ */
449
+ int mca_btl_ugni_fini (void );
450
+
451
+ int mca_btl_ugni_module_init (mca_btl_ugni_module_t * ugni_module );
452
+
453
+ /**
454
+ * Intialize a virtual device for device index 0.
455
+ *
456
+ * @param[inout] device Device to initialize
457
+ * @param[in] virtual_device_id Virtual device identified (up to max handles)
458
+ */
459
+ int mca_btl_ugni_device_init (mca_btl_ugni_device_t * device , int virtual_device_id );
460
+
461
+ /**
462
+ * Finalize a virtual device.
463
+ *
464
+ * @param[in] device Device to finalize
465
+ */
466
+ int mca_btl_ugni_device_fini (mca_btl_ugni_device_t * dev );
331
467
332
468
/* Get a unique 64-bit id for the process name */
333
469
static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name ) {
@@ -338,6 +474,57 @@ static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
338
474
int mca_btl_ugni_spawn_progress_thread (struct mca_btl_base_module_t * btl );
339
475
int mca_btl_ugni_kill_progress_thread (void );
340
476
477
+ /**
478
+ * Try to lock a uGNI device for exclusive access
479
+ */
480
+ static inline int mca_btl_ugni_device_trylock (mca_btl_ugni_device_t * device )
481
+ {
482
+ /* checking the lock non-atomically first can reduce the number of
483
+ * unnecessary atomic operations. */
484
+ return (device -> lock || opal_atomic_swap_32 (& device -> lock , 1 ));
485
+ }
486
+
487
+ /**
488
+ * Lock a uGNI device for exclusive access
489
+ */
490
+ static inline void mca_btl_ugni_device_lock (mca_btl_ugni_device_t * device )
491
+ {
492
+ while (mca_btl_ugni_device_trylock (device ));
493
+ }
494
+
495
+ /**
496
+ * Release exclusive access to the device
497
+ */
498
+ static inline void mca_btl_ugni_device_unlock (mca_btl_ugni_device_t * device )
499
+ {
500
+ opal_atomic_wmb ();
501
+ device -> lock = 0 ;
502
+ }
503
+
504
+ /**
505
+ * Serialize an operation on a uGNI device
506
+ *
507
+ * @params[in] device ugni device
508
+ * @params[in] fn function to serialize
509
+ * @params[in] arg function argument
510
+ */
511
+ static inline intptr_t mca_btl_ugni_device_serialize (mca_btl_ugni_device_t * device ,
512
+ mca_btl_ugni_device_serialize_fn_t fn , void * arg )
513
+ {
514
+ intptr_t rc ;
515
+
516
+ if (!opal_using_threads ()) {
517
+ return fn (device , arg );
518
+ }
519
+
520
+ /* NTH: for now the device is just protected by a spin lock but this will change in the future */
521
+ mca_btl_ugni_device_lock (device );
522
+ rc = fn (device , arg );
523
+ mca_btl_ugni_device_unlock (device );
524
+ return rc ;
525
+ }
526
+
527
+
341
528
/** Number of times the progress thread has woken up */
342
529
extern unsigned int mca_btl_ugni_progress_thread_wakeups ;
343
530
0 commit comments