Skip to content

Commit e16f386

Browse files
authored
UCP/RKEY: Pack sender flush flag where applicable. (#10909)
1 parent 48d5734 commit e16f386

File tree

9 files changed

+89
-49
lines changed

9 files changed

+89
-49
lines changed

src/ucp/core/ucp_mm.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ ucp_mem_dummy_handle_t ucp_mem_dummy_handle = {
4040
.parent = &ucp_mem_dummy_handle.memh,
4141
.mem_type = UCS_MEMORY_TYPE_HOST,
4242
.sys_dev = UCS_SYS_DEVICE_ID_UNKNOWN,
43-
.packed_sys_dev = UCS_SYS_DEVICE_ID_UNKNOWN,
4443
.md_map = 0,
4544
.inv_md_map = 0,
4645
.reg_id = 0,
@@ -727,13 +726,6 @@ static void ucp_memh_init(ucp_mem_h memh, ucp_context_h context,
727726
memh->alloc_method = method;
728727
memh->mem_type = mem_type;
729728
memh->sys_dev = sys_dev;
730-
731-
/* Cache sys_dev in a format packed to rkey to minimize overhead during
732-
* rndv protocols. TODO remove if using another method to mark rkey with
733-
* remote flush requirement. */
734-
memh->packed_sys_dev = (sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) ?
735-
UCS_SYS_DEVICE_ID_UNKNOWN :
736-
ucp_rkey_pack_sys_dev(memh);
737729
}
738730

739731
static ucs_status_t

src/ucp/core/ucp_mm.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,21 @@ enum {
3636
/*
3737
* Memory handle was imported and points to some peer's memory buffer.
3838
*/
39-
UCP_MEMH_FLAG_IMPORTED = UCS_BIT(0),
40-
UCP_MEMH_FLAG_MLOCKED = UCS_BIT(1),
41-
UCP_MEMH_FLAG_HAS_AUTO_GVA = UCS_BIT(2),
39+
UCP_MEMH_FLAG_IMPORTED = UCS_BIT(0),
40+
UCP_MEMH_FLAG_MLOCKED = UCS_BIT(1),
41+
UCP_MEMH_FLAG_HAS_AUTO_GVA = UCS_BIT(2),
4242

4343
/**
4444
* Avoid using registration cache for the particular memory region.
4545
*/
46-
UCP_MEMH_FLAG_NO_RCACHE = UCS_BIT(3)
46+
UCP_MEMH_FLAG_NO_RCACHE = UCS_BIT(3),
47+
48+
/**
49+
* Track if sender-side flush is needed, check is only done when needed
50+
* and cached.
51+
*/
52+
UCP_MEMH_FLAG_SEND_FLUSH_CHECKED = UCS_BIT(4),
53+
UCP_MEMH_FLAG_SEND_FLUSH_NEEDED = UCS_BIT(5)
4754
};
4855

4956

@@ -68,7 +75,6 @@ typedef struct ucp_mem {
6875
ucp_context_h context; /* UCP context that owns a memory handle */
6976
uct_alloc_method_t alloc_method; /* Method used to allocate the memory */
7077
ucs_sys_device_t sys_dev; /* System device index */
71-
ucs_sys_device_t packed_sys_dev; /* System device index */
7278
ucs_memory_type_t mem_type; /* Type of allocated or registered memory */
7379
ucp_md_index_t alloc_md_index; /* Index of MD used to allocate the memory */
7480
uint64_t remote_uuid; /* Remote UUID */

src/ucp/core/ucp_rkey.c

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -150,29 +150,38 @@ ucp_sys_dev_map_t ucp_memh_sys_dev_map(ucp_mem_h memh)
150150
return 0;
151151
}
152152

153-
ucs_sys_device_t ucp_rkey_pack_sys_dev(ucp_mem_h memh)
153+
static int ucp_memh_send_flush_is_needed(ucp_mem_h memh)
154154
{
155-
ucs_sys_device_t sys_dev_packed = memh->sys_dev;
156155
ucp_md_index_t md_index;
157156
ucp_sys_dev_map_t sys_dev_map;
158157
ucs_sys_device_t sys_dev;
159158

160-
ucs_assert(sys_dev_packed <= UCP_SYS_DEVICE_MAX_PACKED);
159+
if (memh->flags & UCP_MEMH_FLAG_SEND_FLUSH_CHECKED) {
160+
return !!(memh->flags & UCP_MEMH_FLAG_SEND_FLUSH_NEEDED);
161+
}
162+
163+
memh->flags |= UCP_MEMH_FLAG_SEND_FLUSH_CHECKED;
161164

162-
ucs_for_each_bit(md_index, memh->md_map) {
163-
sys_dev_map = memh->context->tl_mds[md_index].sys_dev_map;
164-
ucs_for_each_bit(sys_dev, sys_dev_map) {
165-
if (ucs_topo_is_sibling(sys_dev, sys_dev_packed)) {
166-
/* PUT operation on such rkey requires remote flush.
167-
* Set a flag for the peer to recognize it. */
168-
sys_dev_packed |= UCP_SYS_DEVICE_FLUSH_BIT;
169-
goto out;
165+
if (memh->sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN) {
166+
ucs_assert(memh->sys_dev <= UCP_SYS_DEVICE_MAX_PACKED);
167+
168+
ucs_for_each_bit(md_index, memh->md_map) {
169+
sys_dev_map = memh->context->tl_mds[md_index].sys_dev_map;
170+
ucs_for_each_bit(sys_dev, sys_dev_map) {
171+
if (ucs_topo_is_sibling(sys_dev, memh->sys_dev)) {
172+
/*
173+
* PUT operation on such device will require remote flush
174+
* when using network devices.
175+
* Set a flag for the peer to recognize it.
176+
*/
177+
memh->flags |= UCP_MEMH_FLAG_SEND_FLUSH_NEEDED;
178+
return 1;
179+
}
170180
}
171181
}
172182
}
173183

174-
out:
175-
return sys_dev_packed;
184+
return 0;
176185
}
177186

178187
UCS_PROFILE_FUNC(ssize_t, ucp_rkey_pack_memh,
@@ -231,8 +240,14 @@ UCS_PROFILE_FUNC(ssize_t, ucp_rkey_pack_memh,
231240

232241
if (md_map != 0) {
233242
/* Since UCX 1.20: always pack sys_dev for non-empty rkeys. */
234-
ucs_assert(memh != NULL);
235-
*ucs_serialize_next(&p, uint8_t) = memh->packed_sys_dev;
243+
ucs_assert(memh != NULL);
244+
245+
sys_dev = memh->sys_dev;
246+
if (ucp_memh_send_flush_is_needed(memh)) {
247+
sys_dev |= UCP_SYS_DEVICE_FLUSH_BIT;
248+
}
249+
250+
*ucs_serialize_next(&p, uint8_t) = sys_dev;
236251
}
237252

238253
if ((mem_info->sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) || (md_map == 0)) {
@@ -829,15 +844,24 @@ ucp_rkey_unpack_lanes_distance(const ucp_ep_config_key_t *ep_config_key,
829844
}
830845
}
831846

832-
static UCS_F_ALWAYS_INLINE ucs_sys_device_t
847+
static UCS_F_ALWAYS_INLINE void
833848
ucp_rkey_extract_sys_dev(const ucp_ep_config_t *ep_config, ucp_rkey_h rkey,
834-
const void **buffer_p, const void *buffer_end)
849+
const void **buffer_p, const void *buffer_end,
850+
ucp_rkey_config_key_t *rkey_config_key)
835851
{
836852
if ((*buffer_p < buffer_end) ||
837853
((ep_config->key.dst_version > 19) && (rkey->md_map != 0))) {
838-
return *ucs_serialize_next(buffer_p, const uint8_t);
854+
rkey_config_key->sys_dev = *ucs_serialize_next(buffer_p, const uint8_t);
855+
} else {
856+
rkey_config_key->sys_dev = UCS_SYS_DEVICE_ID_UNKNOWN;
857+
}
858+
859+
if ((rkey_config_key->sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN) &&
860+
(rkey_config_key->sys_dev & UCP_SYS_DEVICE_FLUSH_BIT)) {
861+
rkey_config_key->flags = UCP_RKEY_CONFIG_FLAG_FLUSH;
862+
rkey_config_key->sys_dev &= ~UCP_SYS_DEVICE_FLUSH_BIT;
839863
} else {
840-
return UCS_SYS_DEVICE_ID_UNKNOWN;
864+
rkey_config_key->flags = 0;
841865
}
842866
}
843867

@@ -864,8 +888,8 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_rkey_proto_resolve,
864888
rkey_config_key.md_map = rkey->md_map;
865889
rkey_config_key.mem_type = rkey->mem_type;
866890
rkey_config_key.unreachable_md_map = unreachable_md_map;
867-
rkey_config_key.sys_dev = ucp_rkey_extract_sys_dev(
868-
ep_config, rkey, &p, buffer_end);
891+
892+
ucp_rkey_extract_sys_dev(ep_config, rkey, &p, buffer_end, &rkey_config_key);
869893

870894
/* Starting with UCX v1.20, lane distances are always packed if sys_dev is
871895
* not UNKNOWN. Even if the rkey length is not explicitly passed to the API,

src/ucp/core/ucp_rkey.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ enum {
4444
};
4545

4646

47+
/**
48+
* Rkey config flags
49+
*/
50+
enum {
51+
UCP_RKEY_CONFIG_FLAG_FLUSH = UCS_BIT(0) /* Put and atomic operations on this rkey
52+
require remote flush */
53+
};
54+
55+
4756
/**
4857
* Rkey configuration key
4958
*/
@@ -57,6 +66,9 @@ struct ucp_rkey_config_key {
5766
/* Remote system device id */
5867
ucs_sys_device_t sys_dev;
5968

69+
/* Rkey specific flags, like @a UCP_RKEY_CONFIG_FLAG_FLUSH */
70+
uint8_t flags;
71+
6072
/* Remote memory type */
6173
ucs_memory_type_t mem_type;
6274

@@ -65,8 +77,8 @@ struct ucp_rkey_config_key {
6577
};
6678

6779

68-
#define UCP_SYS_DEVICE_FLUSH_BIT UCS_BIT(7)
69-
#define UCP_SYS_DEVICE_MAX_PACKED UCP_SYS_DEVICE_FLUSH_BIT - 1
80+
#define UCP_SYS_DEVICE_FLUSH_BIT UCS_BIT(7)
81+
#define UCP_SYS_DEVICE_MAX_PACKED (UCP_SYS_DEVICE_FLUSH_BIT - 1)
7082

7183

7284
/**
@@ -243,7 +255,4 @@ void ucp_rkey_proto_select_dump(ucp_worker_h worker,
243255
ucp_worker_cfg_index_t rkey_cfg_index,
244256
ucs_string_buffer_t *strb);
245257

246-
247-
ucs_sys_device_t ucp_rkey_pack_sys_dev(ucp_mem_h memh);
248-
249258
#endif

src/ucp/core/ucp_rkey.inl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ ucp_rkey_config_hash_func(ucp_rkey_config_key_t rkey_config_key)
2020
(rkey_config_key.unreachable_md_map << 32)) ^
2121
(rkey_config_key.ep_cfg_index << 8) ^
2222
(rkey_config_key.sys_dev << 16) ^
23-
(rkey_config_key.mem_type << 24);
23+
(rkey_config_key.mem_type << 24) ^
24+
rkey_config_key.flags;
2425
}
2526

2627
static UCS_F_ALWAYS_INLINE int
@@ -31,6 +32,7 @@ ucp_rkey_config_is_equal(ucp_rkey_config_key_t rkey_config_key1,
3132
(rkey_config_key1.ep_cfg_index == rkey_config_key2.ep_cfg_index) &&
3233
(rkey_config_key1.sys_dev == rkey_config_key2.sys_dev) &&
3334
(rkey_config_key1.mem_type == rkey_config_key2.mem_type) &&
35+
(rkey_config_key1.flags == rkey_config_key2.flags) &&
3436
(rkey_config_key1.unreachable_md_map ==
3537
rkey_config_key2.unreachable_md_map);
3638
}
@@ -67,9 +69,7 @@ ucp_ep_rkey_unpack_reachable(ucp_ep_h ep, const void *buffer, size_t length,
6769
static UCS_F_ALWAYS_INLINE int
6870
ucp_rkey_need_remote_flush(const ucp_rkey_config_key_t *key)
6971
{
70-
return (key->sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN) &&
71-
(key->sys_dev & UCP_SYS_DEVICE_FLUSH_BIT);
72-
72+
return key->flags & UCP_RKEY_CONFIG_FLAG_FLUSH;
7373
}
7474

7575
#endif

src/ucp/proto/proto_multi.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,14 +141,20 @@ ucp_proto_multi_select_bw_lanes(const ucp_proto_init_params_t *params,
141141
* path ratio */
142142
}
143143

144-
static ucp_sys_dev_map_t
145-
ucp_proto_multi_init_flush_sys_dev_mask(const ucp_rkey_config_key_t *key)
144+
static ucp_sys_dev_map_t ucp_proto_multi_init_flush_sys_dev_mask(
145+
const ucp_proto_multi_init_params_t *params, ucp_lane_index_t lane)
146146
{
147-
if (key == NULL || !ucp_rkey_need_remote_flush(key)) {
147+
const ucp_rkey_config_key_t *key = params->super.super.rkey_config_key;
148+
const uct_iface_attr_t *iface_attr =
149+
ucp_proto_common_get_iface_attr(&params->super.super, lane);
150+
151+
if ((key == NULL) || !ucp_rkey_need_remote_flush(key) ||
152+
!(iface_attr->cap.flags & UCT_IFACE_FLAG_GET_BCOPY) ||
153+
!ucp_proto_common_is_net_dev(&params->super.super, lane)) {
148154
return 0;
149155
}
150156

151-
return UCS_BIT(key->sys_dev & ~UCP_SYS_DEVICE_FLUSH_BIT);
157+
return UCS_BIT(key->sys_dev);
152158
}
153159

154160
static ucp_lane_index_t ucp_proto_multi_filter_net_devices(
@@ -456,8 +462,8 @@ ucs_status_t ucp_proto_multi_init(const ucp_proto_multi_init_params_t *params,
456462
lpriv->max_frag_sum = mpriv->max_frag_sum;
457463
lpriv->opt_align = ucp_proto_multi_get_lane_opt_align(params, lane);
458464
mpriv->align_thresh = ucs_max(mpriv->align_thresh, lpriv->opt_align);
459-
lpriv->flush_sys_dev_mask = ucp_proto_multi_init_flush_sys_dev_mask(
460-
params->super.super.rkey_config_key);
465+
lpriv->flush_sys_dev_mask =
466+
ucp_proto_multi_init_flush_sys_dev_mask(params, lane);
461467
}
462468
ucs_assert(mpriv->num_lanes == ucs_popcount(selection.lane_map));
463469

src/ucp/rma/flush.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ static ucs_status_t ucp_ep_flush_mem_start(ucp_request_t *req)
243243

244244
ep->ext->flush_sys_dev_map = 0;
245245

246+
req->send.flush.mem.count = count;
246247
req->send.flush.mem.started = 0;
247248
req->send.uct.func = ucp_ep_flush_mem_progress;
248249
req->send.flush.mem.uct_comp.func = ucp_ep_flush_mem_completion;

src/ucp/rndv/proto_rndv.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ static ucs_status_t ucp_proto_rndv_ctrl_select_remote_proto(
158158
rkey_config_key.ep_cfg_index = ep_cfg_index;
159159
rkey_config_key.sys_dev = params->super.reg_mem_info.sys_dev;
160160
rkey_config_key.mem_type = params->super.reg_mem_info.type;
161+
rkey_config_key.flags = 0;
161162

162163
rkey_config_key.unreachable_md_map = 0;
163164

test/gtest/ucp/test_ucp_proto.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ test_ucp_proto::create_rkey_config_key(ucp_md_map_t md_map)
116116
rkey_config_key.mem_type = UCS_MEMORY_TYPE_HOST;
117117
rkey_config_key.sys_dev = UCS_SYS_DEVICE_ID_UNKNOWN;
118118
rkey_config_key.unreachable_md_map = 0;
119+
rkey_config_key.flags = 0;
119120

120121
return rkey_config_key;
121122
}

0 commit comments

Comments
 (0)