Skip to content

Commit 93ef7fe

Browse files
hjelmnbwbarrett
authored andcommitted
osc/rdma: bring bug/threading fixes into v3.1.x from master
This commit contains the contents of: 45db363 7f4872d to the v3.1.x branch. These commits fix a couple of bugs and improve the threading support (reference #2530). To keep the code mostly in sync with master I added code to osc_rdma_types.h to convert between the atomics support on master and v3.1.x. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 0d4d986 commit 93ef7fe

16 files changed

+1275
-906
lines changed

ompi/mca/osc/rdma/osc_rdma.h

Lines changed: 107 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
* University of Stuttgart. All rights reserved.
99
* Copyright (c) 2004-2005 The Regents of the University of California.
1010
* All rights reserved.
11-
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
11+
* Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights
1212
* reserved.
1313
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
15-
* Copyright (c) 2016 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -50,6 +50,11 @@
5050

5151
#include "opal_stdint.h"
5252

53+
enum {
54+
OMPI_OSC_RDMA_LOCKING_TWO_LEVEL,
55+
OMPI_OSC_RDMA_LOCKING_ON_DEMAND,
56+
};
57+
5358
/**
5459
* @brief osc rdma component structure
5560
*/
@@ -87,6 +92,9 @@ struct ompi_osc_rdma_component_t {
8792
/** Default value of the no_locks info key for new windows */
8893
bool no_locks;
8994

95+
/** Locking mode to use as the default for all windows */
96+
int locking_mode;
97+
9098
/** Accumulate operations will only operate on a single intrinsic datatype */
9199
bool acc_single_intrinsic;
92100

@@ -119,6 +127,8 @@ struct ompi_osc_rdma_module_t {
119127
/** Mutex lock protecting module data */
120128
opal_mutex_t lock;
121129

130+
/** locking mode to use */
131+
int locking_mode;
122132

123133
/* window configuration */
124134

@@ -147,10 +157,12 @@ struct ompi_osc_rdma_module_t {
147157
/** Local displacement unit. */
148158
int disp_unit;
149159

150-
151160
/** global leader */
152161
ompi_osc_rdma_peer_t *leader;
153162

163+
/** my peer structure */
164+
ompi_osc_rdma_peer_t *my_peer;
165+
154166
/** pointer to free on cleanup (may be NULL) */
155167
void *free_after;
156168

@@ -276,6 +288,16 @@ int ompi_osc_rdma_free (struct ompi_win_t *win);
276288
*/
277289
int ompi_osc_module_add_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer);
278290

291+
/**
292+
* @brief demand lock a peer
293+
*
294+
* @param[in] module osc rdma module
295+
* @param[in] peer peer to lock
296+
*
297+
* @returns OMPI_SUCCESS on success
298+
*/
299+
int ompi_osc_rdma_demand_lock_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer);
300+
279301
/**
280302
* @brief check if a peer object is cached for a remote rank
281303
*
@@ -449,10 +471,18 @@ static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_r
449471
}
450472

451473
return NULL;
452-
case OMPI_OSC_RDMA_SYNC_TYPE_FENCE:
453474
case OMPI_OSC_RDMA_SYNC_TYPE_LOCK:
454-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found fence/lock_all access epoch for target %d", target);
475+
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found lock_all access epoch for target %d", target);
476+
477+
*peer = ompi_osc_rdma_module_peer (module, target);
478+
if (OPAL_UNLIKELY(OMPI_OSC_RDMA_LOCKING_ON_DEMAND == module->locking_mode &&
479+
!ompi_osc_rdma_peer_is_demand_locked (*peer))) {
480+
ompi_osc_rdma_demand_lock_peer (module, *peer);
481+
}
455482

483+
return &module->all_sync;
484+
case OMPI_OSC_RDMA_SYNC_TYPE_FENCE:
485+
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found fence access epoch for target %d", target);
456486
/* fence epoch is now active */
457487
module->all_sync.epoch_active = true;
458488
*peer = ompi_osc_rdma_module_peer (module, target);
@@ -470,25 +500,94 @@ static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_r
470500
return NULL;
471501
}
472502

503+
static inline bool ompi_osc_rdma_use_btl_flush (ompi_osc_rdma_module_t *module)
504+
{
505+
#if defined(BTL_VERSION) && (BTL_VERSION >= 310)
506+
return !!(module->selected_btl->btl_flush);
507+
#else
508+
return false;
509+
#endif
510+
}
511+
512+
/**
513+
* @brief increment the outstanding rdma operation counter (atomic)
514+
*
515+
* @param[in] rdma_sync osc rdma synchronization object
516+
*/
517+
static inline void ompi_osc_rdma_sync_rdma_inc_always (ompi_osc_rdma_sync_t *rdma_sync)
518+
{
519+
ompi_osc_rdma_counter_add (&rdma_sync->outstanding_rdma.counter, 1);
520+
521+
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "inc: there are %ld outstanding rdma operations",
522+
(unsigned long) rdma_sync->outstanding_rdma.counter);
523+
}
524+
525+
static inline void ompi_osc_rdma_sync_rdma_inc (ompi_osc_rdma_sync_t *rdma_sync)
526+
{
527+
#if defined(BTL_VERSION) && (BTL_VERSION >= 310)
528+
if (ompi_osc_rdma_use_btl_flush (rdma_sync->module)) {
529+
return;
530+
}
531+
#endif
532+
ompi_osc_rdma_sync_rdma_inc_always (rdma_sync);
533+
}
534+
535+
/**
536+
* @brief decrement the outstanding rdma operation counter (atomic)
537+
*
538+
* @param[in] rdma_sync osc rdma synchronization object
539+
*/
540+
static inline void ompi_osc_rdma_sync_rdma_dec_always (ompi_osc_rdma_sync_t *rdma_sync)
541+
{
542+
opal_atomic_wmb ();
543+
ompi_osc_rdma_counter_add (&rdma_sync->outstanding_rdma.counter, -1);
544+
545+
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "dec: there are %ld outstanding rdma operations",
546+
(unsigned long) rdma_sync->outstanding_rdma.counter);
547+
}
548+
549+
static inline void ompi_osc_rdma_sync_rdma_dec (ompi_osc_rdma_sync_t *rdma_sync)
550+
{
551+
#if defined(BTL_VERSION) && (BTL_VERSION >= 310)
552+
if (ompi_osc_rdma_use_btl_flush (rdma_sync->module)) {
553+
return;
554+
}
555+
#endif
556+
ompi_osc_rdma_sync_rdma_dec_always (rdma_sync);
557+
}
558+
473559
/**
474560
* @brief complete all outstanding rdma operations to all peers
475561
*
476562
* @param[in] module osc rdma module
477563
*/
478564
static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync)
479565
{
480-
ompi_osc_rdma_aggregation_t *aggregation, *next;
481-
482566
if (opal_list_get_size (&sync->aggregations)) {
567+
ompi_osc_rdma_aggregation_t *aggregation, *next;
568+
483569
OPAL_THREAD_SCOPED_LOCK(&sync->lock,
484570
OPAL_LIST_FOREACH_SAFE(aggregation, next, &sync->aggregations, ompi_osc_rdma_aggregation_t) {
571+
fprintf (stderr, "Flushing aggregation %p, peer %p\n", (void*)aggregation, (void*)aggregation->peer);
485572
ompi_osc_rdma_peer_aggregate_flush (aggregation->peer);
486573
});
487574
}
488575

576+
#if !defined(BTL_VERSION) || (BTL_VERSION < 310)
489577
do {
490578
opal_progress ();
491-
} while (sync->outstanding_rdma);
579+
} while (ompi_osc_rdma_sync_get_count (sync));
580+
#else
581+
mca_btl_base_module_t *btl_module = sync->module->selected_btl;
582+
583+
do {
584+
if (!ompi_osc_rdma_use_btl_flush (sync->module)) {
585+
opal_progress ();
586+
} else {
587+
btl_module->btl_flush (btl_module, NULL);
588+
}
589+
} while (ompi_osc_rdma_sync_get_count (sync) || (sync->module->rdma_frag && (sync->module->rdma_frag->pending > 1)));
590+
#endif
492591
}
493592

494593
/**

0 commit comments

Comments
 (0)