88 * University of Stuttgart. All rights reserved.
99 * Copyright (c) 2004-2005 The Regents of the University of California.
1010 * All rights reserved.
11- * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
11+ * Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights
1212 * reserved.
1313 * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
1414 * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
15- * Copyright (c) 2016 Intel, Inc. All rights reserved.
15+ * Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
1616 * $COPYRIGHT$
1717 *
1818 * Additional copyrights may follow
5050
5151#include "opal_stdint.h"
5252
53+ enum {
54+ OMPI_OSC_RDMA_LOCKING_TWO_LEVEL ,
55+ OMPI_OSC_RDMA_LOCKING_ON_DEMAND ,
56+ };
57+
5358/**
5459 * @brief osc rdma component structure
5560 */
@@ -87,6 +92,9 @@ struct ompi_osc_rdma_component_t {
8792 /** Default value of the no_locks info key for new windows */
8893 bool no_locks ;
8994
95+ /** Locking mode to use as the default for all windows */
96+ int locking_mode ;
97+
9098 /** Accumulate operations will only operate on a single intrinsic datatype */
9199 bool acc_single_intrinsic ;
92100
@@ -119,6 +127,8 @@ struct ompi_osc_rdma_module_t {
119127 /** Mutex lock protecting module data */
120128 opal_mutex_t lock ;
121129
130+ /** locking mode to use */
131+ int locking_mode ;
122132
123133 /* window configuration */
124134
@@ -147,10 +157,12 @@ struct ompi_osc_rdma_module_t {
147157 /** Local displacement unit. */
148158 int disp_unit ;
149159
150-
151160 /** global leader */
152161 ompi_osc_rdma_peer_t * leader ;
153162
163+ /** my peer structure */
164+ ompi_osc_rdma_peer_t * my_peer ;
165+
154166 /** pointer to free on cleanup (may be NULL) */
155167 void * free_after ;
156168
@@ -276,6 +288,16 @@ int ompi_osc_rdma_free (struct ompi_win_t *win);
276288 */
277289int ompi_osc_module_add_peer (ompi_osc_rdma_module_t * module , ompi_osc_rdma_peer_t * peer );
278290
291+ /**
292+ * @brief demand lock a peer
293+ *
294+ * @param[in] module osc rdma module
295+ * @param[in] peer peer to lock
296+ *
297+ * @returns OMPI_SUCCESS on success
298+ */
299+ int ompi_osc_rdma_demand_lock_peer (ompi_osc_rdma_module_t * module , ompi_osc_rdma_peer_t * peer );
300+
279301/**
280302 * @brief check if a peer object is cached for a remote rank
281303 *
@@ -449,10 +471,18 @@ static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_r
449471 }
450472
451473 return NULL ;
452- case OMPI_OSC_RDMA_SYNC_TYPE_FENCE :
453474 case OMPI_OSC_RDMA_SYNC_TYPE_LOCK :
454- OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "found fence/lock_all access epoch for target %d" , target );
475+ OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "found lock_all access epoch for target %d" , target );
476+
477+ * peer = ompi_osc_rdma_module_peer (module , target );
478+ if (OPAL_UNLIKELY (OMPI_OSC_RDMA_LOCKING_ON_DEMAND == module -> locking_mode &&
479+ !ompi_osc_rdma_peer_is_demand_locked (* peer ))) {
480+ ompi_osc_rdma_demand_lock_peer (module , * peer );
481+ }
455482
483+ return & module -> all_sync ;
484+ case OMPI_OSC_RDMA_SYNC_TYPE_FENCE :
485+ OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "found fence access epoch for target %d" , target );
456486 /* fence epoch is now active */
457487 module -> all_sync .epoch_active = true;
458488 * peer = ompi_osc_rdma_module_peer (module , target );
@@ -470,25 +500,94 @@ static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_r
470500 return NULL ;
471501}
472502
503+ static inline bool ompi_osc_rdma_use_btl_flush (ompi_osc_rdma_module_t * module )
504+ {
505+ #if defined(BTL_VERSION ) && (BTL_VERSION >= 310 )
506+ return !!(module -> selected_btl -> btl_flush );
507+ #else
508+ return false;
509+ #endif
510+ }
511+
512+ /**
513+ * @brief increment the outstanding rdma operation counter (atomic)
514+ *
515+ * @param[in] rdma_sync osc rdma synchronization object
516+ */
517+ static inline void ompi_osc_rdma_sync_rdma_inc_always (ompi_osc_rdma_sync_t * rdma_sync )
518+ {
519+ ompi_osc_rdma_counter_add (& rdma_sync -> outstanding_rdma .counter , 1 );
520+
521+ OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_INFO , "inc: there are %ld outstanding rdma operations" ,
522+ (unsigned long ) rdma_sync -> outstanding_rdma .counter );
523+ }
524+
525+ static inline void ompi_osc_rdma_sync_rdma_inc (ompi_osc_rdma_sync_t * rdma_sync )
526+ {
527+ #if defined(BTL_VERSION ) && (BTL_VERSION >= 310 )
528+ if (ompi_osc_rdma_use_btl_flush (rdma_sync -> module )) {
529+ return ;
530+ }
531+ #endif
532+ ompi_osc_rdma_sync_rdma_inc_always (rdma_sync );
533+ }
534+
535+ /**
536+ * @brief decrement the outstanding rdma operation counter (atomic)
537+ *
538+ * @param[in] rdma_sync osc rdma synchronization object
539+ */
540+ static inline void ompi_osc_rdma_sync_rdma_dec_always (ompi_osc_rdma_sync_t * rdma_sync )
541+ {
542+ opal_atomic_wmb ();
543+ ompi_osc_rdma_counter_add (& rdma_sync -> outstanding_rdma .counter , -1 );
544+
545+ OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_INFO , "dec: there are %ld outstanding rdma operations" ,
546+ (unsigned long ) rdma_sync -> outstanding_rdma .counter );
547+ }
548+
549+ static inline void ompi_osc_rdma_sync_rdma_dec (ompi_osc_rdma_sync_t * rdma_sync )
550+ {
551+ #if defined(BTL_VERSION ) && (BTL_VERSION >= 310 )
552+ if (ompi_osc_rdma_use_btl_flush (rdma_sync -> module )) {
553+ return ;
554+ }
555+ #endif
556+ ompi_osc_rdma_sync_rdma_dec_always (rdma_sync );
557+ }
558+
473559/**
474560 * @brief complete all outstanding rdma operations to all peers
475561 *
476562 * @param[in] module osc rdma module
477563 */
478564static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t * sync )
479565{
480- ompi_osc_rdma_aggregation_t * aggregation , * next ;
481-
482566 if (opal_list_get_size (& sync -> aggregations )) {
567+ ompi_osc_rdma_aggregation_t * aggregation , * next ;
568+
483569 OPAL_THREAD_SCOPED_LOCK (& sync -> lock ,
484570 OPAL_LIST_FOREACH_SAFE (aggregation , next , & sync -> aggregations , ompi_osc_rdma_aggregation_t ) {
571+ fprintf (stderr , "Flushing aggregation %p, peer %p\n" , (void * )aggregation , (void * )aggregation -> peer );
485572 ompi_osc_rdma_peer_aggregate_flush (aggregation -> peer );
486573 });
487574 }
488575
576+ #if !defined(BTL_VERSION ) || (BTL_VERSION < 310 )
489577 do {
490578 opal_progress ();
491- } while (sync -> outstanding_rdma );
579+ } while (ompi_osc_rdma_sync_get_count (sync ));
580+ #else
581+ mca_btl_base_module_t * btl_module = sync -> module -> selected_btl ;
582+
583+ do {
584+ if (!ompi_osc_rdma_use_btl_flush (sync -> module )) {
585+ opal_progress ();
586+ } else {
587+ btl_module -> btl_flush (btl_module , NULL );
588+ }
589+ } while (ompi_osc_rdma_sync_get_count (sync ) || (sync -> module -> rdma_frag && (sync -> module -> rdma_frag -> pending > 1 )));
590+ #endif
492591}
493592
494593/**
0 commit comments