Skip to content

Commit 1abd735

Browse files
committed
Merge remote-tracking branch 'origin' into mem_type
2 parents 7763425 + 5959ce7 commit 1abd735

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+420
-338
lines changed

src/tools/vfs/vfs_main.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,10 +277,12 @@ static int vfs_unlink_socket(int silent_notexist)
277277
}
278278

279279
/* return 0 or the (negative) value of errno in case of error */
280-
static int vfs_listen(int silent_addinuse_err)
280+
static int vfs_listen(int silent_addrinuse_err)
281281
{
282282
int listen_fd, ret;
283283

284+
vfs_log("listening on socket %s", (const char*)g_sockaddr.sun_path);
285+
284286
ret = umask(~S_IRWXU);
285287
if (ret < 0) {
286288
ret = -errno;
@@ -304,7 +306,7 @@ static int vfs_listen(int silent_addinuse_err)
304306
sizeof(g_sockaddr));
305307
if (ret < 0) {
306308
ret = -errno;
307-
if ((errno != EADDRINUSE) || !silent_addinuse_err) {
309+
if ((errno != EADDRINUSE) || !silent_addrinuse_err) {
308310
vfs_error("bind(%s) failed: %m", g_sockaddr.sun_path);
309311
}
310312
goto out_close;

src/ucp/core/ucp_mm.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ ucp_mem_dummy_handle_t ucp_mem_dummy_handle = {
4040
.parent = &ucp_mem_dummy_handle.memh,
4141
.mem_type = UCS_MEMORY_TYPE_HOST,
4242
.sys_dev = UCS_SYS_DEVICE_ID_UNKNOWN,
43-
.packed_sys_dev = UCS_SYS_DEVICE_ID_UNKNOWN,
4443
.md_map = 0,
4544
.inv_md_map = 0,
4645
.reg_id = 0,
@@ -727,13 +726,6 @@ static void ucp_memh_init(ucp_mem_h memh, ucp_context_h context,
727726
memh->alloc_method = method;
728727
memh->mem_type = mem_type;
729728
memh->sys_dev = sys_dev;
730-
731-
/* Cache sys_dev in a format packed to rkey to minimize overhead during
732-
* rndv protocols. TODO remove if using another method to mark rkey with
733-
* remote flush requirement. */
734-
memh->packed_sys_dev = (sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) ?
735-
UCS_SYS_DEVICE_ID_UNKNOWN :
736-
ucp_rkey_pack_sys_dev(memh);
737729
}
738730

739731
static ucs_status_t

src/ucp/core/ucp_mm.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,21 @@ enum {
3636
/*
3737
* Memory handle was imported and points to some peer's memory buffer.
3838
*/
39-
UCP_MEMH_FLAG_IMPORTED = UCS_BIT(0),
40-
UCP_MEMH_FLAG_MLOCKED = UCS_BIT(1),
41-
UCP_MEMH_FLAG_HAS_AUTO_GVA = UCS_BIT(2),
39+
UCP_MEMH_FLAG_IMPORTED = UCS_BIT(0),
40+
UCP_MEMH_FLAG_MLOCKED = UCS_BIT(1),
41+
UCP_MEMH_FLAG_HAS_AUTO_GVA = UCS_BIT(2),
4242

4343
/**
4444
* Avoid using registration cache for the particular memory region.
4545
*/
46-
UCP_MEMH_FLAG_NO_RCACHE = UCS_BIT(3)
46+
UCP_MEMH_FLAG_NO_RCACHE = UCS_BIT(3),
47+
48+
/**
49+
* Track if sender-side flush is needed, check is only done when needed
50+
* and cached.
51+
*/
52+
UCP_MEMH_FLAG_SEND_FLUSH_CHECKED = UCS_BIT(4),
53+
UCP_MEMH_FLAG_SEND_FLUSH_NEEDED = UCS_BIT(5)
4754
};
4855

4956

@@ -68,7 +75,6 @@ typedef struct ucp_mem {
6875
ucp_context_h context; /* UCP context that owns a memory handle */
6976
uct_alloc_method_t alloc_method; /* Method used to allocate the memory */
7077
ucs_sys_device_t sys_dev; /* System device index */
71-
ucs_sys_device_t packed_sys_dev; /* System device index */
7278
ucs_memory_type_t mem_type; /* Type of allocated or registered memory */
7379
ucp_md_index_t alloc_md_index; /* Index of MD used to allocate the memory */
7480
uint64_t remote_uuid; /* Remote UUID */

src/ucp/core/ucp_rkey.c

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -150,29 +150,38 @@ ucp_sys_dev_map_t ucp_memh_sys_dev_map(ucp_mem_h memh)
150150
return 0;
151151
}
152152

153-
ucs_sys_device_t ucp_rkey_pack_sys_dev(ucp_mem_h memh)
153+
static int ucp_memh_send_flush_is_needed(ucp_mem_h memh)
154154
{
155-
ucs_sys_device_t sys_dev_packed = memh->sys_dev;
156155
ucp_md_index_t md_index;
157156
ucp_sys_dev_map_t sys_dev_map;
158157
ucs_sys_device_t sys_dev;
159158

160-
ucs_assert(sys_dev_packed <= UCP_SYS_DEVICE_MAX_PACKED);
159+
if (memh->flags & UCP_MEMH_FLAG_SEND_FLUSH_CHECKED) {
160+
return !!(memh->flags & UCP_MEMH_FLAG_SEND_FLUSH_NEEDED);
161+
}
162+
163+
memh->flags |= UCP_MEMH_FLAG_SEND_FLUSH_CHECKED;
161164

162-
ucs_for_each_bit(md_index, memh->md_map) {
163-
sys_dev_map = memh->context->tl_mds[md_index].sys_dev_map;
164-
ucs_for_each_bit(sys_dev, sys_dev_map) {
165-
if (ucs_topo_is_sibling(sys_dev, sys_dev_packed)) {
166-
/* PUT operation on such rkey requires remote flush.
167-
* Set a flag for the peer to recognize it. */
168-
sys_dev_packed |= UCP_SYS_DEVICE_FLUSH_BIT;
169-
goto out;
165+
if (memh->sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN) {
166+
ucs_assert(memh->sys_dev <= UCP_SYS_DEVICE_MAX_PACKED);
167+
168+
ucs_for_each_bit(md_index, memh->md_map) {
169+
sys_dev_map = memh->context->tl_mds[md_index].sys_dev_map;
170+
ucs_for_each_bit(sys_dev, sys_dev_map) {
171+
if (ucs_topo_is_sibling(sys_dev, memh->sys_dev)) {
172+
/*
173+
* PUT operation on such device will require remote flush
174+
* when using network devices.
175+
* Set a flag for the peer to recognize it.
176+
*/
177+
memh->flags |= UCP_MEMH_FLAG_SEND_FLUSH_NEEDED;
178+
return 1;
179+
}
170180
}
171181
}
172182
}
173183

174-
out:
175-
return sys_dev_packed;
184+
return 0;
176185
}
177186

178187
UCS_PROFILE_FUNC(ssize_t, ucp_rkey_pack_memh,
@@ -231,8 +240,14 @@ UCS_PROFILE_FUNC(ssize_t, ucp_rkey_pack_memh,
231240

232241
if (md_map != 0) {
233242
/* Since UCX 1.20: always pack sys_dev for non-empty rkeys. */
234-
ucs_assert(memh != NULL);
235-
*ucs_serialize_next(&p, uint8_t) = memh->packed_sys_dev;
243+
ucs_assert(memh != NULL);
244+
245+
sys_dev = memh->sys_dev;
246+
if (ucp_memh_send_flush_is_needed(memh)) {
247+
sys_dev |= UCP_SYS_DEVICE_FLUSH_BIT;
248+
}
249+
250+
*ucs_serialize_next(&p, uint8_t) = sys_dev;
236251
}
237252

238253
if ((mem_info->sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) || (md_map == 0)) {
@@ -829,15 +844,24 @@ ucp_rkey_unpack_lanes_distance(const ucp_ep_config_key_t *ep_config_key,
829844
}
830845
}
831846

832-
static UCS_F_ALWAYS_INLINE ucs_sys_device_t
847+
static UCS_F_ALWAYS_INLINE void
833848
ucp_rkey_extract_sys_dev(const ucp_ep_config_t *ep_config, ucp_rkey_h rkey,
834-
const void **buffer_p, const void *buffer_end)
849+
const void **buffer_p, const void *buffer_end,
850+
ucp_rkey_config_key_t *rkey_config_key)
835851
{
836852
if ((*buffer_p < buffer_end) ||
837853
((ep_config->key.dst_version > 19) && (rkey->md_map != 0))) {
838-
return *ucs_serialize_next(buffer_p, const uint8_t);
854+
rkey_config_key->sys_dev = *ucs_serialize_next(buffer_p, const uint8_t);
855+
} else {
856+
rkey_config_key->sys_dev = UCS_SYS_DEVICE_ID_UNKNOWN;
857+
}
858+
859+
if ((rkey_config_key->sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN) &&
860+
(rkey_config_key->sys_dev & UCP_SYS_DEVICE_FLUSH_BIT)) {
861+
rkey_config_key->flags = UCP_RKEY_CONFIG_FLAG_FLUSH;
862+
rkey_config_key->sys_dev &= ~UCP_SYS_DEVICE_FLUSH_BIT;
839863
} else {
840-
return UCS_SYS_DEVICE_ID_UNKNOWN;
864+
rkey_config_key->flags = 0;
841865
}
842866
}
843867

@@ -864,8 +888,8 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_rkey_proto_resolve,
864888
rkey_config_key.md_map = rkey->md_map;
865889
rkey_config_key.mem_type = rkey->mem_type;
866890
rkey_config_key.unreachable_md_map = unreachable_md_map;
867-
rkey_config_key.sys_dev = ucp_rkey_extract_sys_dev(
868-
ep_config, rkey, &p, buffer_end);
891+
892+
ucp_rkey_extract_sys_dev(ep_config, rkey, &p, buffer_end, &rkey_config_key);
869893

870894
/* Starting with UCX v1.20, lane distances are always packed if sys_dev is
871895
* not UNKNOWN. Even if the rkey length is not explicitly passed to the API,

src/ucp/core/ucp_rkey.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ enum {
4444
};
4545

4646

47+
/**
48+
* Rkey config flags
49+
*/
50+
enum {
51+
UCP_RKEY_CONFIG_FLAG_FLUSH = UCS_BIT(0) /* Put and atomic operations on this rkey
52+
require remote flush */
53+
};
54+
55+
4756
/**
4857
* Rkey configuration key
4958
*/
@@ -57,6 +66,9 @@ struct ucp_rkey_config_key {
5766
/* Remote system device id */
5867
ucs_sys_device_t sys_dev;
5968

69+
/* Rkey specific flags, like @a UCP_RKEY_CONFIG_FLAG_FLUSH */
70+
uint8_t flags;
71+
6072
/* Remote memory type */
6173
ucs_memory_type_t mem_type;
6274

@@ -65,8 +77,8 @@ struct ucp_rkey_config_key {
6577
};
6678

6779

68-
#define UCP_SYS_DEVICE_FLUSH_BIT UCS_BIT(7)
69-
#define UCP_SYS_DEVICE_MAX_PACKED UCP_SYS_DEVICE_FLUSH_BIT - 1
80+
#define UCP_SYS_DEVICE_FLUSH_BIT UCS_BIT(7)
81+
#define UCP_SYS_DEVICE_MAX_PACKED (UCP_SYS_DEVICE_FLUSH_BIT - 1)
7082

7183

7284
/**
@@ -243,7 +255,4 @@ void ucp_rkey_proto_select_dump(ucp_worker_h worker,
243255
ucp_worker_cfg_index_t rkey_cfg_index,
244256
ucs_string_buffer_t *strb);
245257

246-
247-
ucs_sys_device_t ucp_rkey_pack_sys_dev(ucp_mem_h memh);
248-
249258
#endif

src/ucp/core/ucp_rkey.inl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ ucp_rkey_config_hash_func(ucp_rkey_config_key_t rkey_config_key)
2020
(rkey_config_key.unreachable_md_map << 32)) ^
2121
(rkey_config_key.ep_cfg_index << 8) ^
2222
(rkey_config_key.sys_dev << 16) ^
23-
(rkey_config_key.mem_type << 24);
23+
(rkey_config_key.mem_type << 24) ^
24+
rkey_config_key.flags;
2425
}
2526

2627
static UCS_F_ALWAYS_INLINE int
@@ -31,6 +32,7 @@ ucp_rkey_config_is_equal(ucp_rkey_config_key_t rkey_config_key1,
3132
(rkey_config_key1.ep_cfg_index == rkey_config_key2.ep_cfg_index) &&
3233
(rkey_config_key1.sys_dev == rkey_config_key2.sys_dev) &&
3334
(rkey_config_key1.mem_type == rkey_config_key2.mem_type) &&
35+
(rkey_config_key1.flags == rkey_config_key2.flags) &&
3436
(rkey_config_key1.unreachable_md_map ==
3537
rkey_config_key2.unreachable_md_map);
3638
}
@@ -67,9 +69,7 @@ ucp_ep_rkey_unpack_reachable(ucp_ep_h ep, const void *buffer, size_t length,
6769
static UCS_F_ALWAYS_INLINE int
6870
ucp_rkey_need_remote_flush(const ucp_rkey_config_key_t *key)
6971
{
70-
return (key->sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN) &&
71-
(key->sys_dev & UCP_SYS_DEVICE_FLUSH_BIT);
72-
72+
return key->flags & UCP_RKEY_CONFIG_FLAG_FLUSH;
7373
}
7474

7575
#endif

src/ucp/proto/proto.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@
4646
_macro(ucp_rndv_get_mtype_proto) \
4747
_macro(ucp_rndv_ats_proto) \
4848
_macro(ucp_rndv_rtr_proto) \
49+
_macro(ucp_rndv_put_zcopy_proto) \
4950
_macro(ucp_rndv_rtr_mtype_proto) \
5051
_macro(ucp_rndv_send_ppln_proto) \
5152
_macro(ucp_rndv_recv_ppln_proto) \
52-
_macro(ucp_rndv_put_zcopy_proto) \
5353
_macro(ucp_rndv_put_mtype_proto) \
5454
_macro(ucp_rndv_rkey_ptr_proto) \
5555
_macro(ucp_rndv_rkey_ptr_mtype_proto) \

src/ucp/proto/proto_common.c

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -152,37 +152,6 @@ ucp_proto_common_get_sys_dev(const ucp_proto_init_params_t *params,
152152
return params->worker->context->tl_rscs[rsc_index].tl_rsc.sys_device;
153153
}
154154

155-
int ucp_proto_common_add_unique_sys_dev(ucs_sys_device_t sys_dev,
156-
ucs_sys_device_t *sys_devs,
157-
ucp_lane_index_t *num_sys_devs,
158-
ucp_lane_index_t max_sys_devs)
159-
{
160-
ucp_lane_index_t i;
161-
162-
for (i = 0; i < *num_sys_devs; ++i) {
163-
if (sys_dev == sys_devs[i]) {
164-
return 0; /* Already exists */
165-
}
166-
}
167-
168-
if (*num_sys_devs < max_sys_devs) {
169-
sys_devs[(*num_sys_devs)++] = sys_dev;
170-
return 1; /* Added */
171-
}
172-
173-
return 0; /* No space */
174-
}
175-
176-
ucp_lane_index_t
177-
ucp_proto_common_select_sys_dev_by_node_id(const ucp_proto_init_params_t *params,
178-
ucp_lane_index_t num_sys_devs)
179-
{
180-
if (num_sys_devs == 0) {
181-
return 0;
182-
}
183-
return params->worker->context->config.node_local_id % num_sys_devs;
184-
}
185-
186155
/* Pack/unpack local distance to make it equal to the remote one */
187156
static void
188157
ucp_proto_common_fp8_pack_unpack_distance(ucs_sys_dev_distance_t *distance)

src/ucp/proto/proto_common.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -255,15 +255,6 @@ ucs_sys_device_t
255255
ucp_proto_common_get_sys_dev(const ucp_proto_init_params_t *params,
256256
ucp_lane_index_t lane);
257257

258-
int ucp_proto_common_add_unique_sys_dev(ucs_sys_device_t sys_dev,
259-
ucs_sys_device_t *sys_devs,
260-
ucp_lane_index_t *num_sys_devs,
261-
ucp_lane_index_t max_sys_devs);
262-
263-
ucp_lane_index_t
264-
ucp_proto_common_select_sys_dev_by_node_id(const ucp_proto_init_params_t *params,
265-
ucp_lane_index_t num_sys_devs);
266-
267258

268259
void ucp_proto_common_get_lane_distance(const ucp_proto_init_params_t *params,
269260
ucp_lane_index_t lane,

src/ucp/proto/proto_multi.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -141,14 +141,20 @@ ucp_proto_multi_select_bw_lanes(const ucp_proto_init_params_t *params,
141141
* path ratio */
142142
}
143143

144-
static ucp_sys_dev_map_t
145-
ucp_proto_multi_init_flush_sys_dev_mask(const ucp_rkey_config_key_t *key)
144+
static ucp_sys_dev_map_t ucp_proto_multi_init_flush_sys_dev_mask(
145+
const ucp_proto_multi_init_params_t *params, ucp_lane_index_t lane)
146146
{
147-
if (key == NULL || !ucp_rkey_need_remote_flush(key)) {
147+
const ucp_rkey_config_key_t *key = params->super.super.rkey_config_key;
148+
const uct_iface_attr_t *iface_attr =
149+
ucp_proto_common_get_iface_attr(&params->super.super, lane);
150+
151+
if ((key == NULL) || !ucp_rkey_need_remote_flush(key) ||
152+
!(iface_attr->cap.flags & UCT_IFACE_FLAG_GET_BCOPY) ||
153+
!ucp_proto_common_is_net_dev(&params->super.super, lane)) {
148154
return 0;
149155
}
150156

151-
return UCS_BIT(key->sys_dev & ~UCP_SYS_DEVICE_FLUSH_BIT);
157+
return UCS_BIT(key->sys_dev);
152158
}
153159

154160
static ucp_lane_index_t ucp_proto_multi_filter_net_devices(
@@ -181,16 +187,22 @@ static ucp_lane_index_t ucp_proto_multi_filter_net_devices(
181187
}
182188

183189
sys_dev = ucp_proto_common_get_sys_dev(params, lane);
184-
ucp_proto_common_add_unique_sys_dev(sys_dev, sys_devs, &num_max_bw_devs,
185-
UCP_PROTO_MAX_LANES);
190+
for (i = 0; i < num_max_bw_devs; ++i) {
191+
if (sys_dev == sys_devs[i]) {
192+
break;
193+
}
194+
}
195+
196+
if (i == num_max_bw_devs) {
197+
sys_devs[num_max_bw_devs++] = sys_dev;
198+
}
186199
}
187200

188201
if (num_max_bw_devs == 0) {
189202
return num_lanes;
190203
}
191204

192-
seed = ucp_proto_common_select_sys_dev_by_node_id(params, num_max_bw_devs);
193-
205+
seed = params->worker->context->config.node_local_id % num_max_bw_devs;
194206
for (i = !!fixed_first_lane, num_filtered_lanes = i; i < num_lanes; ++i) {
195207
lane = lanes[i];
196208
tl_rsc = ucp_proto_common_get_tl_rsc(params, lane);
@@ -450,8 +462,8 @@ ucs_status_t ucp_proto_multi_init(const ucp_proto_multi_init_params_t *params,
450462
lpriv->max_frag_sum = mpriv->max_frag_sum;
451463
lpriv->opt_align = ucp_proto_multi_get_lane_opt_align(params, lane);
452464
mpriv->align_thresh = ucs_max(mpriv->align_thresh, lpriv->opt_align);
453-
lpriv->flush_sys_dev_mask = ucp_proto_multi_init_flush_sys_dev_mask(
454-
params->super.super.rkey_config_key);
465+
lpriv->flush_sys_dev_mask =
466+
ucp_proto_multi_init_flush_sys_dev_mask(params, lane);
455467
}
456468
ucs_assert(mpriv->num_lanes == ucs_popcount(selection.lane_map));
457469

0 commit comments

Comments
 (0)