Skip to content

Commit 19615ab

Browse files
authored
UCT: Add XDR perf recognition (#10915)
1 parent a0f89a0 commit 19615ab

File tree

6 files changed

+96
-26
lines changed

6 files changed

+96
-26
lines changed

src/ucp/proto/proto.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@
4646
_macro(ucp_rndv_get_mtype_proto) \
4747
_macro(ucp_rndv_ats_proto) \
4848
_macro(ucp_rndv_rtr_proto) \
49+
_macro(ucp_rndv_put_zcopy_proto) \
4950
_macro(ucp_rndv_rtr_mtype_proto) \
5051
_macro(ucp_rndv_send_ppln_proto) \
5152
_macro(ucp_rndv_recv_ppln_proto) \
52-
_macro(ucp_rndv_put_zcopy_proto) \
5353
_macro(ucp_rndv_put_mtype_proto) \
5454
_macro(ucp_rndv_rkey_ptr_proto) \
5555
_macro(ucp_rndv_rkey_ptr_mtype_proto) \

src/uct/base/uct_iface.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,6 +1092,20 @@ static UCS_F_ALWAYS_INLINE int uct_ep_op_is_zcopy(uct_ep_operation_t op)
10921092
UCS_BIT(UCT_EP_OP_EAGER_ZCOPY));
10931093
}
10941094

1095+
static UCS_F_ALWAYS_INLINE int uct_ep_op_is_get(uct_ep_operation_t op)
1096+
{
1097+
return UCS_BIT(op) & (UCS_BIT(UCT_EP_OP_GET_SHORT) |
1098+
UCS_BIT(UCT_EP_OP_GET_BCOPY) |
1099+
UCS_BIT(UCT_EP_OP_GET_ZCOPY));
1100+
}
1101+
1102+
static UCS_F_ALWAYS_INLINE int uct_ep_op_is_put(uct_ep_operation_t op)
1103+
{
1104+
return UCS_BIT(op) & (UCS_BIT(UCT_EP_OP_PUT_SHORT) |
1105+
UCS_BIT(UCT_EP_OP_PUT_BCOPY) |
1106+
UCS_BIT(UCT_EP_OP_PUT_ZCOPY));
1107+
}
1108+
10951109
static UCS_F_ALWAYS_INLINE int uct_ep_op_is_fetch(uct_ep_operation_t op)
10961110
{
10971111
return UCS_BIT(op) & (UCS_BIT(UCT_EP_OP_GET_SHORT) |

src/uct/ib/base/ib_iface.c

Lines changed: 53 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
/**
3535
* Maximum bandwidth of NDR single path with PCIe Gen5 and RDMA_READ operation.
3636
*/
37-
#define UCT_IB_NDR_PATH_BANDWIDTH 38e9
37+
#define UCT_IB_NDR_READ_PATH_BANDWIDTH 38e9
38+
#define UCT_IB_XDR_READ_PATH_BANDWIDTH 35e9
3839

3940
/**
4041
* Minimal NDR single path ratio.
@@ -43,7 +44,13 @@
4344
* single path still does not consume the full interface bandwidth for RDMA_READ
4445
* operations, but around 95% of it according to measurements.
4546
*/
46-
#define UCT_IB_NDR_PATH_RATIO 0.95
47+
#define UCT_IB_NDR_READ_PATH_RATIO 0.95
48+
49+
/**
50+
* XDR single path ratio for RDMA_READ operations.
51+
* 4 QPs are needed to achieve full bandwidth with PCIe Gen6.
52+
*/
53+
#define UCT_IB_XDR_READ_PATH_RATIO 0.25
4754

4855
static UCS_CONFIG_DEFINE_ARRAY(path_bits_spec,
4956
sizeof(ucs_range_spec_t),
@@ -1391,7 +1398,7 @@ static void uct_ib_iface_set_num_paths(uct_ib_iface_t *iface,
13911398
}
13921399

13931400
if ((iface->num_paths == 1) &&
1394-
(uct_ib_iface_port_attr(iface)->active_speed == UCT_IB_SPEED_NDR)) {
1401+
(uct_ib_iface_port_active_speed(iface) >= UCT_IB_SPEED_NDR)) {
13951402
iface->num_paths = 2;
13961403
}
13971404
} else {
@@ -1878,16 +1885,17 @@ ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len,
18781885
{[1] = 1, [2] = 4, [4] = 8, [8] = 12, [16] = 2};
18791886
uct_ib_device_t *dev = uct_ib_iface_device(iface);
18801887
uct_ib_md_t *md = uct_ib_iface_md(iface);
1881-
uint8_t active_width, active_speed, active_mtu, width;
1888+
uint8_t active_width, active_mtu, width;
1889+
uint32_t active_speed;
18821890
double encoding, signal_rate, wire_speed;
18831891
size_t mtu, extra_pkt_len;
18841892
unsigned num_path;
18851893

18861894
uct_base_iface_query(&iface->super, iface_attr);
18871895

18881896
active_width = uct_ib_iface_port_attr(iface)->active_width;
1889-
active_speed = uct_ib_iface_port_attr(iface)->active_speed;
18901897
active_mtu = uct_ib_iface_port_attr(iface)->active_mtu;
1898+
active_speed = uct_ib_iface_port_active_speed(iface);
18911899

18921900
/*
18931901
* Parse active width.
@@ -1958,6 +1966,11 @@ ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len,
19581966
signal_rate = 100e9;
19591967
encoding = 64.0/66.0;
19601968
break;
1969+
case UCT_IB_SPEED_XDR:
1970+
iface_attr->latency.c = 600e-9;
1971+
signal_rate = 200e9;
1972+
encoding = 64.0/66.0;
1973+
break;
19611974
}
19621975

19631976
iface_attr->latency.m = 0;
@@ -1990,6 +2003,32 @@ ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len,
19902003
return UCS_OK;
19912004
}
19922005

2006+
static double
2007+
uct_ib_iface_estimate_path_bw(uct_ib_iface_t *iface,
2008+
uct_iface_attr_t *iface_attr,
2009+
uct_perf_attr_t *perf_attr)
2010+
{
2011+
double max_path_bandwidth = DBL_MAX;
2012+
double path_ratio = 1.0;
2013+
uct_ep_operation_t op = UCT_ATTR_VALUE(PERF, perf_attr, operation,
2014+
OPERATION, UCT_EP_OP_LAST);
2015+
2016+
if (uct_ib_iface_is_roce(iface) &&
2017+
(uct_ib_iface_roce_lag_level(iface) > 1)) {
2018+
path_ratio = 1.0 / iface_attr->dev_num_paths;
2019+
} else if (uct_ep_op_is_get(op)) {
2020+
if (uct_ib_iface_port_is_ndr(iface)) {
2021+
max_path_bandwidth = UCT_IB_NDR_READ_PATH_BANDWIDTH;
2022+
path_ratio = UCT_IB_NDR_READ_PATH_RATIO;
2023+
} else if (uct_ib_iface_port_is_xdr(iface)) {
2024+
max_path_bandwidth = UCT_IB_XDR_READ_PATH_BANDWIDTH;
2025+
path_ratio = UCT_IB_XDR_READ_PATH_RATIO;
2026+
}
2027+
}
2028+
2029+
return ucs_min(iface_attr->bandwidth.shared * path_ratio, max_path_bandwidth);
2030+
}
2031+
19932032
ucs_status_t
19942033
uct_ib_iface_estimate_perf(uct_iface_h iface, uct_perf_attr_t *perf_attr)
19952034
{
@@ -1998,9 +2037,8 @@ uct_ib_iface_estimate_perf(uct_iface_h iface, uct_perf_attr_t *perf_attr)
19982037
OPERATION, UCT_EP_OP_LAST);
19992038
const uct_ib_iface_send_overhead_t *send_overhead =
20002039
&ib_iface->config.send_overhead;
2001-
double max_path_bandwidth = DBL_MAX;
2002-
double path_ratio;
20032040
uct_iface_attr_t iface_attr;
2041+
double max_bandwidth;
20042042
ucs_status_t status;
20052043

20062044
status = uct_iface_query(iface, &iface_attr);
@@ -2028,26 +2066,18 @@ uct_ib_iface_estimate_perf(uct_iface_h iface, uct_perf_attr_t *perf_attr)
20282066

20292067
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_BANDWIDTH) {
20302068
perf_attr->bandwidth = iface_attr.bandwidth;
2069+
if (uct_ep_op_is_get(op) && uct_ib_iface_port_is_xdr(ib_iface)) {
2070+
max_bandwidth = perf_attr->bandwidth.shared *
2071+
iface_attr.dev_num_paths * UCT_IB_XDR_READ_PATH_RATIO;
2072+
perf_attr->bandwidth.shared = ucs_min(perf_attr->bandwidth.shared,
2073+
max_bandwidth);
2074+
}
20312075
}
20322076

20332077
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_PATH_BANDWIDTH) {
2034-
if (uct_ib_iface_is_roce(ib_iface) &&
2035-
(uct_ib_iface_roce_lag_level(ib_iface) > 1)) {
2036-
path_ratio = 1.0 / iface_attr.dev_num_paths;
2037-
} else if (((op == UCT_EP_OP_GET_BCOPY) ||
2038-
(op == UCT_EP_OP_GET_ZCOPY)) &&
2039-
(uct_ib_iface_port_attr(ib_iface)->active_speed ==
2040-
UCT_IB_SPEED_NDR)) {
2041-
max_path_bandwidth = UCT_IB_NDR_PATH_BANDWIDTH;
2042-
path_ratio = UCT_IB_NDR_PATH_RATIO;
2043-
} else {
2044-
path_ratio = 1.0;
2045-
}
2046-
20472078
perf_attr->path_bandwidth.dedicated = 0;
2048-
perf_attr->path_bandwidth.shared =
2049-
ucs_min(iface_attr.bandwidth.shared * path_ratio,
2050-
max_path_bandwidth);
2079+
perf_attr->path_bandwidth.shared = uct_ib_iface_estimate_path_bw(
2080+
ib_iface, &iface_attr, perf_attr);
20512081
}
20522082

20532083
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_LATENCY) {

src/uct/ib/base/ib_iface.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ enum {
5959
UCT_IB_SPEED_EDR = 32,
6060
UCT_IB_SPEED_HDR = 64,
6161
UCT_IB_SPEED_NDR = 128,
62+
UCT_IB_SPEED_XDR = 256,
6263
UCT_IB_SPEED_LAST
6364
};
6465

@@ -804,4 +805,27 @@ uct_ib_wc_to_ucs_status(enum ibv_wc_status status)
804805
}
805806
}
806807

808+
static UCS_F_ALWAYS_INLINE uint32_t
809+
uct_ib_iface_port_active_speed(uct_ib_iface_t *iface)
810+
{
811+
#if HAVE_STRUCT_IBV_PORT_ATTR_ACTIVE_SPEED_EX
812+
if (uct_ib_iface_port_attr(iface)->active_speed_ex != 0) {
813+
return uct_ib_iface_port_attr(iface)->active_speed_ex;
814+
}
815+
#endif
816+
return uct_ib_iface_port_attr(iface)->active_speed;
817+
}
818+
819+
static UCS_F_ALWAYS_INLINE int
820+
uct_ib_iface_port_is_ndr(uct_ib_iface_t *iface)
821+
{
822+
return uct_ib_iface_port_active_speed(iface) == UCT_IB_SPEED_NDR;
823+
}
824+
825+
static UCS_F_ALWAYS_INLINE int
826+
uct_ib_iface_port_is_xdr(uct_ib_iface_t *iface)
827+
{
828+
return uct_ib_iface_port_active_speed(iface) == UCT_IB_SPEED_XDR;
829+
}
830+
807831
#endif

src/uct/ib/configure.m4

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,9 @@ AS_IF([test "x$with_ib" = "xyes"],
233233
struct ibv_device_attr_ex.odp_caps],
234234
[], [], [[#include <infiniband/verbs.h>]])
235235
236+
AC_CHECK_MEMBERS([struct ibv_port_attr.active_speed_ex],
237+
[], [], [[#include <infiniband/verbs.h>]])
238+
236239
AC_CHECK_DECLS([IBV_ACCESS_RELAXED_ORDERING,
237240
IBV_ACCESS_ON_DEMAND,
238241
IBV_QPF_GRH_REQUIRED],

src/uct/ib/mlx5/dc/dc_mlx5.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -914,8 +914,7 @@ static ucs_status_t uct_dc_mlx5_iface_estimate_perf(uct_iface_h tl_iface,
914914
}
915915

916916
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_FLAGS) {
917-
if (uct_ib_iface_port_attr(ib_iface)->active_speed ==
918-
UCT_IB_SPEED_NDR) {
917+
if (uct_ib_iface_port_is_ndr(ib_iface)) {
919918
perf_attr->flags |= UCT_PERF_ATTR_FLAGS_TX_RX_SHARED;
920919
}
921920
}

0 commit comments

Comments
 (0)