Skip to content

Commit abd477a

Browse files
authored
Merge pull request #10676 from iyastreb/ucp/proto-find-lanes-callback
UCP/PROTO: Find lanes callback to minimize overhead
2 parents cccee45 + 82eaa01 commit abd477a

File tree

5 files changed

+119
-136
lines changed

5 files changed

+119
-136
lines changed

src/ucp/proto/proto_common.c

Lines changed: 97 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -468,13 +468,100 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
468468
return status;
469469
}
470470

471+
static int ucp_proto_common_find_lanes_check_mem_type(
472+
const ucp_proto_common_init_params_t *params, ucp_rsc_index_t rsc_index)
473+
{
474+
uct_perf_attr_t perf_attr = {0};
475+
ucp_worker_iface_t *wiface = ucp_worker_iface(params->super.worker, rsc_index);
476+
477+
ucp_proto_common_perf_attr_set_mem_type(params, &perf_attr);
478+
/* TODO: Use memory reachability UCT API, when available, to check memory
479+
type support */
480+
return uct_iface_estimate_perf(wiface->iface, &perf_attr) == UCS_OK;
481+
}
482+
483+
int
484+
ucp_proto_common_filter_min_frag(const ucp_proto_init_params_t *params,
485+
ucp_lane_index_t lane, const char *lane_desc)
486+
{
487+
const ucp_proto_common_init_params_t *common_params =
488+
ucs_derived_of(params, ucp_proto_common_init_params_t);
489+
ucs_memory_type_t reg_mem_type = common_params->reg_mem_info.type;
490+
unsigned flags = common_params->flags;
491+
ucp_context_h context = params->worker->context;
492+
ucp_rsc_index_t rsc_index = params->ep_config_key->lanes[lane].rsc_index;
493+
ucp_md_index_t md_index = context->tl_rscs[rsc_index].md_index;
494+
const uct_md_attr_v2_t *md_attr = &context->tl_mds[md_index].attr;
495+
const uct_iface_attr_t *iface_attr;
496+
size_t max_iov, tl_min_frag, tl_max_frag;
497+
498+
/* Check memory registration capabilities for zero-copy case */
499+
if (reg_mem_type != UCS_MEMORY_TYPE_UNKNOWN) {
500+
ucs_assertv((reg_mem_type == params->select_param->mem_type) ||
501+
!(flags & UCP_PROTO_COMMON_INIT_FLAG_SEND_ZCOPY),
502+
"flags=0x%x reg_mem_type=%s select_param->mem_type=%s",
503+
flags, ucs_memory_type_names[reg_mem_type],
504+
ucs_memory_type_names[params->select_param->mem_type]);
505+
506+
if (md_attr->flags & UCT_MD_FLAG_NEED_MEMH) {
507+
/* Memory domain must support registration on the relevant memory
508+
* type */
509+
if (!(context->reg_md_map[reg_mem_type] & UCS_BIT(md_index))) {
510+
ucs_trace("%s: md %s cannot register %s memory", lane_desc,
511+
context->tl_mds[md_index].rsc.md_name,
512+
ucs_memory_type_names[reg_mem_type]);
513+
return 0;
514+
}
515+
} else if (!(md_attr->access_mem_types & UCS_BIT(reg_mem_type))) {
516+
/* Memory domain which does not require a registration for zero
517+
* copy operation must be able to access the relevant memory type */
518+
ucs_trace("%s: no access to mem type %s", lane_desc,
519+
ucs_memory_type_names[reg_mem_type]);
520+
return 0;
521+
}
522+
}
523+
524+
iface_attr = ucp_proto_common_get_iface_attr(params, lane);
525+
max_iov = ucp_proto_common_get_iface_attr_field(
526+
iface_attr, common_params->max_iov_offs, SIZE_MAX);
527+
if (max_iov < common_params->min_iov) {
528+
ucs_trace("%s: max iov %zu is less than min iov %zu", lane_desc,
529+
max_iov, common_params->min_iov);
530+
return 0;
531+
}
532+
533+
ucp_proto_common_get_frag_size(common_params, iface_attr, lane,
534+
&tl_min_frag, &tl_max_frag);
535+
536+
/* Minimal fragment size must be 0, unless 'MIN_FRAG' flag is set */
537+
if (!(flags & UCP_PROTO_COMMON_INIT_FLAG_MIN_FRAG) && (tl_min_frag > 0)) {
538+
ucs_trace("%s: minimal fragment %zu is not 0", lane_desc, tl_min_frag);
539+
return 0;
540+
}
541+
542+
/* Maximal fragment size should be larger than header size */
543+
if (tl_max_frag <= common_params->hdr_size) {
544+
ucs_trace("%s: max fragment is too small %zu, need > %zu", lane_desc,
545+
tl_max_frag, common_params->hdr_size);
546+
return 0;
547+
}
548+
549+
if (!ucp_proto_common_find_lanes_check_mem_type(common_params, rsc_index)) {
550+
ucs_trace("%s: mem type %s is not supported", lane_desc,
551+
ucs_memory_type_names[reg_mem_type]);
552+
return 0;
553+
}
554+
555+
return 1;
556+
}
557+
471558
ucp_lane_index_t
472559
ucp_proto_common_find_lanes(const ucp_proto_init_params_t *params,
473-
unsigned flags, ptrdiff_t max_iov_offs,
474-
size_t min_iov, ucp_lane_type_t lane_type,
475-
ucs_memory_type_t reg_mem_type,
560+
unsigned flags, ucp_lane_type_t lane_type,
476561
uint64_t tl_cap_flags, ucp_lane_index_t max_lanes,
477-
ucp_lane_map_t exclude_map, ucp_lane_index_t *lanes)
562+
ucp_lane_map_t exclude_map,
563+
ucp_proto_common_filter_lane_cb_t filter,
564+
ucp_lane_index_t *lanes)
478565
{
479566
UCS_STRING_BUFFER_ONSTACK(sel_param_strb, UCP_PROTO_SELECT_PARAM_STR_MAX);
480567
ucp_context_h context = params->worker->context;
@@ -489,7 +576,6 @@ ucp_proto_common_find_lanes(const ucp_proto_init_params_t *params,
489576
ucp_md_index_t md_index;
490577
ucp_lane_map_t lane_map;
491578
char lane_desc[64];
492-
size_t max_iov;
493579

494580
if (max_lanes == 0) {
495581
return 0;
@@ -551,44 +637,20 @@ ucp_proto_common_find_lanes(const ucp_proto_init_params_t *params,
551637

552638
if ((flags & UCP_PROTO_COMMON_INIT_FLAG_RKEY_PTR) &&
553639
!(cmpt_attr->flags & UCT_COMPONENT_FLAG_RKEY_PTR)) {
554-
ucs_trace("protocol requires rkey ptr but it is not "
555-
"supported by the component");
640+
ucs_trace("%s: protocol requires rkey ptr but it is not "
641+
"supported by the component", lane_desc);
556642
continue;
557643
}
558644

559-
/* Check memory registration capabilities for zero-copy case */
560-
if (reg_mem_type != UCS_MEMORY_TYPE_UNKNOWN) {
561-
ucs_assertv((reg_mem_type == select_param->mem_type) ||
562-
!(flags & UCP_PROTO_COMMON_INIT_FLAG_SEND_ZCOPY),
563-
"flags=0x%x reg_mem_type=%s select_param->mem_type=%s",
564-
flags, ucs_memory_type_names[reg_mem_type],
565-
ucs_memory_type_names[select_param->mem_type]);
566-
567-
if (md_attr->flags & UCT_MD_FLAG_NEED_MEMH) {
568-
/* Memory domain must support registration on the relevant
569-
* memory type */
570-
if (!(context->reg_md_map[reg_mem_type] & UCS_BIT(md_index))) {
571-
ucs_trace("%s: md %s cannot register %s memory", lane_desc,
572-
context->tl_mds[md_index].rsc.md_name,
573-
ucs_memory_type_names[reg_mem_type]);
574-
continue;
575-
}
576-
} else if (!(md_attr->access_mem_types & UCS_BIT(reg_mem_type))) {
577-
/*
578-
* Memory domain which does not require a registration for zero
579-
* copy operation must be able to access the relevant memory type
580-
*/
581-
ucs_trace("%s: no access to mem type %s", lane_desc,
582-
ucs_memory_type_names[reg_mem_type]);
583-
continue;
584-
}
645+
if (filter != NULL && !filter(params, lane, lane_desc)) {
646+
continue;
585647
}
586648

587649
/* Check remote access capabilities */
588650
if (flags & UCP_PROTO_COMMON_INIT_FLAG_REMOTE_ACCESS) {
589651
if (rkey_config_key == NULL) {
590-
ucs_trace("protocol requires remote access but remote key is "
591-
"not present");
652+
ucs_trace("%s: protocol requires remote access but remote key "
653+
"is not present", lane_desc);
592654
goto out;
593655
}
594656

@@ -614,12 +676,6 @@ ucp_proto_common_find_lanes(const ucp_proto_init_params_t *params,
614676
}
615677
}
616678

617-
max_iov = ucp_proto_common_get_iface_attr_field(iface_attr,
618-
max_iov_offs, SIZE_MAX);
619-
if (max_iov < min_iov) {
620-
continue;
621-
}
622-
623679
ucs_trace("%s: added as lane %d", lane_desc, lane);
624680
lanes[num_lanes++] = lane;
625681
}
@@ -663,81 +719,6 @@ ucp_proto_common_reg_md_map(const ucp_proto_common_init_params_t *params,
663719
return reg_md_map;
664720
}
665721

666-
static int ucp_proto_common_find_lanes_check_mem_type(
667-
const ucp_proto_common_init_params_t *params, ucp_lane_index_t lane)
668-
{
669-
uct_perf_attr_t perf_attr = {0};
670-
ucp_rsc_index_t rsc_index;
671-
ucp_worker_iface_t *wiface;
672-
673-
ucp_proto_common_perf_attr_set_mem_type(params, &perf_attr);
674-
675-
rsc_index = ucp_proto_common_get_rsc_index(&params->super, lane);
676-
wiface = ucp_worker_iface(params->super.worker, rsc_index);
677-
/* TODO: Use memory reachability UCT API, when available, to check memory
678-
type support */
679-
return uct_iface_estimate_perf(wiface->iface, &perf_attr) == UCS_OK;
680-
}
681-
682-
ucp_lane_index_t ucp_proto_common_find_lanes_with_min_frag(
683-
const ucp_proto_common_init_params_t *params, ucp_lane_type_t lane_type,
684-
uint64_t tl_cap_flags, ucp_lane_index_t max_lanes,
685-
ucp_lane_map_t exclude_map, ucp_lane_index_t *lanes)
686-
{
687-
ucp_lane_index_t lane_index, lane, num_lanes, num_valid_lanes;
688-
const uct_iface_attr_t *iface_attr;
689-
size_t tl_min_frag, tl_max_frag;
690-
ucp_lane_index_t tmp_lanes[UCP_PROTO_MAX_LANES];
691-
692-
/* TODO: Request more lanes than needed in order to avoid skipping protocol
693-
* if the first found candidate is filtered out. Refactor this code to pass
694-
* filter callback to ucp_proto_common_find_lanes() */
695-
num_lanes = ucp_proto_common_find_lanes(
696-
&params->super, params->flags, params->max_iov_offs,
697-
params->min_iov, lane_type, params->reg_mem_info.type,
698-
tl_cap_flags, ucs_max(max_lanes, 4), exclude_map, tmp_lanes);
699-
700-
num_valid_lanes = 0;
701-
for (lane_index = 0; lane_index < num_lanes; ++lane_index) {
702-
lane = tmp_lanes[lane_index];
703-
iface_attr = ucp_proto_common_get_iface_attr(&params->super, lane);
704-
705-
ucp_proto_common_get_frag_size(params, iface_attr, lane, &tl_min_frag,
706-
&tl_max_frag);
707-
708-
/* Minimal fragment size must be 0, unless 'MIN_FRAG' flag is set */
709-
if (!(params->flags & UCP_PROTO_COMMON_INIT_FLAG_MIN_FRAG) &&
710-
(tl_min_frag > 0)) {
711-
ucs_trace("lane[%d]: minimal fragment %zu is not 0", lane,
712-
tl_min_frag);
713-
continue;
714-
}
715-
716-
/* Maximal fragment size should be larger than header size */
717-
if (tl_max_frag <= params->hdr_size) {
718-
ucs_trace("lane[%d]: max fragment is too small %zu, need > %zu",
719-
lane, tl_max_frag, params->hdr_size);
720-
continue;
721-
}
722-
723-
if (!ucp_proto_common_find_lanes_check_mem_type(params, lane)) {
724-
continue;
725-
}
726-
727-
lanes[num_valid_lanes++] = lane;
728-
if (num_valid_lanes >= max_lanes) {
729-
break;
730-
}
731-
}
732-
733-
if (num_valid_lanes != num_lanes) {
734-
ucs_assert(num_valid_lanes < num_lanes);
735-
ucs_trace("selected %d/%d valid lanes", num_valid_lanes, num_lanes);
736-
}
737-
738-
return num_valid_lanes;
739-
}
740-
741722
void ucp_proto_request_zcopy_completion(uct_completion_t *self)
742723
{
743724
ucp_request_t *req = ucs_container_of(self, ucp_request_t,

src/ucp/proto/proto_common.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -283,20 +283,22 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
283283
ucp_proto_perf_node_t **perf_node_p);
284284

285285

286-
/* @return number of lanes found */
287-
ucp_lane_index_t ucp_proto_common_find_lanes_with_min_frag(
288-
const ucp_proto_common_init_params_t *params, ucp_lane_type_t lane_type,
289-
uint64_t tl_cap_flags, ucp_lane_index_t max_lanes,
290-
ucp_lane_map_t exclude_map, ucp_lane_index_t *lanes);
286+
typedef int (*ucp_proto_common_filter_lane_cb_t)(
287+
const ucp_proto_init_params_t *params,
288+
ucp_lane_index_t lane, const char *lane_desc);
289+
290+
291+
int
292+
ucp_proto_common_filter_min_frag(const ucp_proto_init_params_t *params,
293+
ucp_lane_index_t lane, const char *lane_desc);
291294

292295

293296
ucp_lane_index_t
294297
ucp_proto_common_find_lanes(const ucp_proto_init_params_t *params,
295-
unsigned flags, ptrdiff_t max_iov_offs,
296-
size_t min_iov, ucp_lane_type_t lane_type,
297-
ucs_memory_type_t reg_mem_type,
298+
unsigned flags, ucp_lane_type_t lane_type,
298299
uint64_t tl_cap_flags, ucp_lane_index_t max_lanes,
299300
ucp_lane_map_t exclude_map,
301+
ucp_proto_common_filter_lane_cb_t filter,
300302
ucp_lane_index_t *lanes);
301303

302304

src/ucp/proto/proto_multi.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,20 +181,21 @@ ucs_status_t ucp_proto_multi_init(const ucp_proto_multi_init_params_t *params,
181181
}
182182

183183
/* Find first lane */
184-
num_lanes = ucp_proto_common_find_lanes_with_min_frag(
185-
&params->super, params->first.lane_type, params->first.tl_cap_flags,
186-
1, 0, lanes);
184+
num_lanes = ucp_proto_common_find_lanes(
185+
&params->super.super, params->super.flags, params->first.lane_type,
186+
params->first.tl_cap_flags, 1, 0, ucp_proto_common_filter_min_frag,
187+
lanes);
187188
if (num_lanes == 0) {
188189
ucs_trace("no lanes for %s",
189190
ucp_proto_id_field(params->super.super.proto_id, name));
190191
return UCS_ERR_NO_ELEM;
191192
}
192193

193194
/* Find rest of the lanes */
194-
num_lanes += ucp_proto_common_find_lanes_with_min_frag(
195-
&params->super, params->middle.lane_type,
195+
num_lanes += ucp_proto_common_find_lanes(
196+
&params->super.super, params->super.flags, params->middle.lane_type,
196197
params->middle.tl_cap_flags, UCP_PROTO_MAX_LANES - 1,
197-
UCS_BIT(lanes[0]), lanes + 1);
198+
UCS_BIT(lanes[0]), ucp_proto_common_filter_min_frag, lanes + 1);
198199

199200
/* Get bandwidth of all lanes and max_bandwidth */
200201
max_bandwidth = 0;

src/ucp/proto/proto_single.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,10 @@ ucs_status_t ucp_proto_single_init(const ucp_proto_single_init_params_t *params,
3535
return UCS_ERR_UNSUPPORTED;
3636
}
3737

38-
num_lanes = ucp_proto_common_find_lanes_with_min_frag(
39-
&params->super, params->lane_type, params->tl_cap_flags, 1,
40-
params->super.exclude_map, &lane);
38+
num_lanes = ucp_proto_common_find_lanes(
39+
&params->super.super, params->super.flags, params->lane_type,
40+
params->tl_cap_flags, 1, params->super.exclude_map,
41+
ucp_proto_common_filter_min_frag, &lane);
4142
if (num_lanes == 0) {
4243
ucs_trace("no lanes for %s",
4344
ucp_proto_id_field(params->super.super.proto_id, name));

src/ucp/rndv/proto_rndv.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,10 +504,8 @@ ucp_proto_rndv_find_ctrl_lane(const ucp_proto_init_params_t *params)
504504

505505
num_lanes = ucp_proto_common_find_lanes(params,
506506
UCP_PROTO_COMMON_INIT_FLAG_HDR_ONLY,
507-
UCP_PROTO_COMMON_OFFSET_INVALID, 1,
508507
UCP_LANE_TYPE_AM,
509-
UCS_MEMORY_TYPE_UNKNOWN,
510-
UCT_IFACE_FLAG_AM_BCOPY, 1, 0,
508+
UCT_IFACE_FLAG_AM_BCOPY, 1, 0, NULL,
511509
&lane);
512510
if (num_lanes == 0) {
513511
ucs_debug("no active message lane for %s",

0 commit comments

Comments
 (0)