Skip to content

Commit 179a0e7

Browse files
authored
UCP: Fix multi ppn perf estimation (#10937)
1 parent 6c5b307 commit 179a0e7

File tree

4 files changed

+15
-6
lines changed

4 files changed

+15
-6
lines changed

src/ucp/proto/proto_common.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -392,10 +392,10 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
392392
tl_perf->send_pre_overhead = perf_attr.send_pre_overhead + params->overhead;
393393
tl_perf->send_post_overhead = perf_attr.send_post_overhead;
394394
tl_perf->recv_overhead = perf_attr.recv_overhead + params->overhead;
395-
tl_perf->bandwidth = ucp_tl_iface_bandwidth(context,
396-
&perf_attr.bandwidth);
397-
tl_perf->path_ratio = ucp_tl_iface_bandwidth(context,
398-
&perf_attr.path_bandwidth) /
395+
tl_perf->bandwidth = ucp_proto_common_iface_bandwidth(
396+
context, &perf_attr.bandwidth);
397+
tl_perf->path_ratio = ucp_proto_common_iface_bandwidth(
398+
context, &perf_attr.path_bandwidth) /
399399
tl_perf->bandwidth;
400400
tl_perf->latency = ucp_tl_iface_latency(context,
401401
&perf_attr.latency) +

src/ucp/proto/proto_common.inl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,4 +429,12 @@ ucp_proto_common_bandwidth_equal(double bw1, double bw2)
429429
return fabs(bw1 - bw2) <= UCP_PROTO_PERF_EPSILON;
430430
}
431431

432+
static UCS_F_ALWAYS_INLINE double
433+
ucp_proto_common_iface_bandwidth(ucp_context_h context,
434+
const uct_ppn_bandwidth_t *bandwidth)
435+
{
436+
return bandwidth->dedicated +
437+
(bandwidth->shared / ucs_min(context->config.est_num_ppn, 8));
438+
}
439+
432440
#endif

src/ucp/proto/proto_init.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "proto_init.h"
1212
#include "proto_debug.h"
1313
#include "proto_select.inl"
14+
#include "proto_common.inl"
1415

1516
#include <ucp/core/ucp_ep.inl>
1617
#include <ucs/datastruct/array.h>
@@ -387,7 +388,7 @@ ucp_proto_init_add_buffer_copy_time(ucp_worker_h worker, const char *title,
387388
perf_factors[buffer_copy_factor_id].c +=
388389
ucp_tl_iface_latency(context, &perf_attr.latency);
389390
perf_factors[buffer_copy_factor_id].m +=
390-
1.0 / ucp_tl_iface_bandwidth(context, &perf_attr.bandwidth);
391+
1.0 / ucp_proto_common_iface_bandwidth(context, &perf_attr.bandwidth);
391392

392393
if ((memtype_op == UCT_EP_OP_GET_SHORT) ||
393394
(memtype_op == UCT_EP_OP_GET_ZCOPY)) {

src/uct/ib/base/ib_iface.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
* XDR single path ratio for RDMA_READ operations.
5151
* 4 QPs are needed to achieve full bandwidth with PCIe Gen6.
5252
*/
53-
#define UCT_IB_XDR_READ_PATH_RATIO 0.25
53+
#define UCT_IB_XDR_READ_PATH_RATIO 0.4
5454

5555
static UCS_CONFIG_DEFINE_ARRAY(path_bits_spec,
5656
sizeof(ucs_range_spec_t),

0 commit comments

Comments
 (0)