3434/**
3535 * Maximum bandwidth of NDR single path with PCIe Gen5 and RDMA_READ operation.
3636 */
37- #define UCT_IB_NDR_PATH_BANDWIDTH 38e9
37+ #define UCT_IB_NDR_READ_PATH_BANDWIDTH 38e9
38+ #define UCT_IB_XDR_READ_PATH_BANDWIDTH 35e9
3839
3940/**
4041 * Minimal NDR single path ratio.
4344 * single path still does not consume the full interface bandwidth for RDMA_READ
4445 * operations, but around 95% of it according to measurements.
4546 */
46- #define UCT_IB_NDR_PATH_RATIO 0.95
47+ #define UCT_IB_NDR_READ_PATH_RATIO 0.95
48+
49+ /**
50+ * XDR single path ratio for RDMA_READ operations.
51+ * 4 QPs are needed to achieve full bandwidth with PCIe Gen6.
52+ */
53+ #define UCT_IB_XDR_READ_PATH_RATIO 0.25
4754
4855static UCS_CONFIG_DEFINE_ARRAY (path_bits_spec ,
4956 sizeof (ucs_range_spec_t ),
@@ -1391,7 +1398,7 @@ static void uct_ib_iface_set_num_paths(uct_ib_iface_t *iface,
13911398 }
13921399
13931400 if ((iface -> num_paths == 1 ) &&
1394- (uct_ib_iface_port_attr (iface )-> active_speed = = UCT_IB_SPEED_NDR )) {
1401+ (uct_ib_iface_port_active_speed (iface ) > = UCT_IB_SPEED_NDR )) {
13951402 iface -> num_paths = 2 ;
13961403 }
13971404 } else {
@@ -1878,16 +1885,17 @@ ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len,
18781885 {[1 ] = 1 , [2 ] = 4 , [4 ] = 8 , [8 ] = 12 , [16 ] = 2 };
18791886 uct_ib_device_t * dev = uct_ib_iface_device (iface );
18801887 uct_ib_md_t * md = uct_ib_iface_md (iface );
1881- uint8_t active_width , active_speed , active_mtu , width ;
1888+ uint8_t active_width , active_mtu , width ;
1889+ uint32_t active_speed ;
18821890 double encoding , signal_rate , wire_speed ;
18831891 size_t mtu , extra_pkt_len ;
18841892 unsigned num_path ;
18851893
18861894 uct_base_iface_query (& iface -> super , iface_attr );
18871895
18881896 active_width = uct_ib_iface_port_attr (iface )-> active_width ;
1889- active_speed = uct_ib_iface_port_attr (iface )-> active_speed ;
18901897 active_mtu = uct_ib_iface_port_attr (iface )-> active_mtu ;
1898+ active_speed = uct_ib_iface_port_active_speed (iface );
18911899
18921900 /*
18931901 * Parse active width.
@@ -1958,6 +1966,11 @@ ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len,
19581966 signal_rate = 100e9 ;
19591967 encoding = 64.0 /66.0 ;
19601968 break ;
1969+ case UCT_IB_SPEED_XDR :
1970+ iface_attr -> latency .c = 600e-9 ;
1971+ signal_rate = 200e9 ;
1972+ encoding = 64.0 /66.0 ;
1973+ break ;
19611974 }
19621975
19631976 iface_attr -> latency .m = 0 ;
@@ -1990,6 +2003,32 @@ ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len,
19902003 return UCS_OK ;
19912004}
19922005
2006+ static double
2007+ uct_ib_iface_estimate_path_bw (uct_ib_iface_t * iface ,
2008+ uct_iface_attr_t * iface_attr ,
2009+ uct_perf_attr_t * perf_attr )
2010+ {
2011+ double max_path_bandwidth = DBL_MAX ;
2012+ double path_ratio = 1.0 ;
2013+ uct_ep_operation_t op = UCT_ATTR_VALUE (PERF , perf_attr , operation ,
2014+ OPERATION , UCT_EP_OP_LAST );
2015+
2016+ if (uct_ib_iface_is_roce (iface ) &&
2017+ (uct_ib_iface_roce_lag_level (iface ) > 1 )) {
2018+ path_ratio = 1.0 / iface_attr -> dev_num_paths ;
2019+ } else if (uct_ep_op_is_get (op )) {
2020+ if (uct_ib_iface_port_is_ndr (iface )) {
2021+ max_path_bandwidth = UCT_IB_NDR_READ_PATH_BANDWIDTH ;
2022+ path_ratio = UCT_IB_NDR_READ_PATH_RATIO ;
2023+ } else if (uct_ib_iface_port_is_xdr (iface )) {
2024+ max_path_bandwidth = UCT_IB_XDR_READ_PATH_BANDWIDTH ;
2025+ path_ratio = UCT_IB_XDR_READ_PATH_RATIO ;
2026+ }
2027+ }
2028+
2029+ return ucs_min (iface_attr -> bandwidth .shared * path_ratio , max_path_bandwidth );
2030+ }
2031+
19932032ucs_status_t
19942033uct_ib_iface_estimate_perf (uct_iface_h iface , uct_perf_attr_t * perf_attr )
19952034{
@@ -1998,9 +2037,8 @@ uct_ib_iface_estimate_perf(uct_iface_h iface, uct_perf_attr_t *perf_attr)
19982037 OPERATION , UCT_EP_OP_LAST );
19992038 const uct_ib_iface_send_overhead_t * send_overhead =
20002039 & ib_iface -> config .send_overhead ;
2001- double max_path_bandwidth = DBL_MAX ;
2002- double path_ratio ;
20032040 uct_iface_attr_t iface_attr ;
2041+ double max_bandwidth ;
20042042 ucs_status_t status ;
20052043
20062044 status = uct_iface_query (iface , & iface_attr );
@@ -2028,26 +2066,18 @@ uct_ib_iface_estimate_perf(uct_iface_h iface, uct_perf_attr_t *perf_attr)
20282066
20292067 if (perf_attr -> field_mask & UCT_PERF_ATTR_FIELD_BANDWIDTH ) {
20302068 perf_attr -> bandwidth = iface_attr .bandwidth ;
2069+ if (uct_ep_op_is_get (op ) && uct_ib_iface_port_is_xdr (ib_iface )) {
2070+ max_bandwidth = perf_attr -> bandwidth .shared *
2071+ iface_attr .dev_num_paths * UCT_IB_XDR_READ_PATH_RATIO ;
2072+ perf_attr -> bandwidth .shared = ucs_min (perf_attr -> bandwidth .shared ,
2073+ max_bandwidth );
2074+ }
20312075 }
20322076
20332077 if (perf_attr -> field_mask & UCT_PERF_ATTR_FIELD_PATH_BANDWIDTH ) {
2034- if (uct_ib_iface_is_roce (ib_iface ) &&
2035- (uct_ib_iface_roce_lag_level (ib_iface ) > 1 )) {
2036- path_ratio = 1.0 / iface_attr .dev_num_paths ;
2037- } else if (((op == UCT_EP_OP_GET_BCOPY ) ||
2038- (op == UCT_EP_OP_GET_ZCOPY )) &&
2039- (uct_ib_iface_port_attr (ib_iface )-> active_speed ==
2040- UCT_IB_SPEED_NDR )) {
2041- max_path_bandwidth = UCT_IB_NDR_PATH_BANDWIDTH ;
2042- path_ratio = UCT_IB_NDR_PATH_RATIO ;
2043- } else {
2044- path_ratio = 1.0 ;
2045- }
2046-
20472078 perf_attr -> path_bandwidth .dedicated = 0 ;
2048- perf_attr -> path_bandwidth .shared =
2049- ucs_min (iface_attr .bandwidth .shared * path_ratio ,
2050- max_path_bandwidth );
2079+ perf_attr -> path_bandwidth .shared = uct_ib_iface_estimate_path_bw (
2080+ ib_iface , & iface_attr , perf_attr );
20512081 }
20522082
20532083 if (perf_attr -> field_mask & UCT_PERF_ATTR_FIELD_LATENCY ) {
0 commit comments