Skip to content

Commit 89ec57c

Browse files
authored
UCP/PROTO: Refactoring: move perf node into ucp_proto_perf_node_t (#11101)
UCP/PROTO: Moved perf node into ucp_proto_perf_node_t
1 parent 5f64b52 commit 89ec57c

File tree

6 files changed

+45
-56
lines changed

6 files changed

+45
-56
lines changed

src/ucp/proto/proto_common.c

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ ucp_proto_common_get_frag_size(const ucp_proto_common_init_params_t *params,
235235

236236
/* Update 'perf' with the distance */
237237
static void ucp_proto_common_update_lane_perf_by_distance(
238-
ucp_proto_common_tl_perf_t *perf, ucp_proto_perf_node_t *perf_node,
238+
ucp_proto_common_tl_perf_t *perf,
239239
const ucs_sys_dev_distance_t *distance, const char *perf_name,
240240
const char *perf_fmt, ...)
241241
{
@@ -261,7 +261,7 @@ static void ucp_proto_common_update_lane_perf_by_distance(
261261
sys_perf_node = ucp_proto_perf_node_new_data(perf_name, "%s",
262262
perf_node_desc);
263263
ucp_proto_perf_node_add_data(sys_perf_node, "", distance_func);
264-
ucp_proto_perf_node_own_child(perf_node, &sys_perf_node);
264+
ucp_proto_perf_node_own_child(perf->node, &sys_perf_node);
265265
}
266266

267267
void ucp_proto_common_lane_perf_node(ucp_context_h context,
@@ -317,6 +317,7 @@ static void ucp_proto_common_tl_perf_reset(ucp_proto_common_tl_perf_t *tl_perf)
317317
tl_perf->sys_latency = 0;
318318
tl_perf->min_length = 0;
319319
tl_perf->max_frag = SIZE_MAX;
320+
tl_perf->node = NULL;
320321
}
321322

322323
static void ucp_proto_common_perf_attr_set_mem_type(
@@ -337,15 +338,14 @@ static void ucp_proto_common_perf_attr_set_mem_type(
337338
ucs_status_t
338339
ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
339340
ucp_lane_index_t lane,
340-
ucp_proto_common_tl_perf_t *tl_perf,
341-
ucp_proto_perf_node_t **perf_node_p)
341+
ucp_proto_common_tl_perf_t *tl_perf)
342342
{
343343
ucp_worker_h worker = params->super.worker;
344344
ucp_context_h context = worker->context;
345345
ucp_rsc_index_t rsc_index = ucp_proto_common_get_rsc_index(&params->super,
346346
lane);
347347
ucp_worker_iface_t *wiface = ucp_worker_iface(worker, rsc_index);
348-
ucp_proto_perf_node_t *perf_node, *lane_perf_node;
348+
ucp_proto_perf_node_t *lane_perf_node;
349349
const ucp_rkey_config_t *rkey_config;
350350
ucs_sys_dev_distance_t distance;
351351
size_t tl_min_frag, tl_max_frag;
@@ -356,7 +356,6 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
356356

357357
if (lane == UCP_NULL_LANE) {
358358
ucp_proto_common_tl_perf_reset(tl_perf);
359-
*perf_node_p = NULL;
360359
return UCS_OK;
361360
}
362361

@@ -370,9 +369,9 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
370369
return UCS_ERR_INVALID_PARAM;
371370
}
372371

373-
perf_node = ucp_proto_perf_node_new_data("lane", "%u ppn %u eps",
374-
context->config.est_num_ppn,
375-
context->config.est_num_eps);
372+
tl_perf->node = ucp_proto_perf_node_new_data("lane", "%u ppn %u eps",
373+
context->config.est_num_ppn,
374+
context->config.est_num_eps);
376375

377376
perf_attr.field_mask = UCT_PERF_ATTR_FIELD_OPERATION |
378377
UCT_PERF_ATTR_FIELD_SEND_PRE_OVERHEAD |
@@ -406,7 +405,7 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
406405

407406
ucp_proto_common_lane_perf_node(context, rsc_index, &perf_attr,
408407
&lane_perf_node);
409-
ucp_proto_perf_node_own_child(perf_node, &lane_perf_node);
408+
ucp_proto_perf_node_own_child(tl_perf->node, &lane_perf_node);
410409

411410
/* If reg_mem_info type is not unknown we assume the protocol is going to
412411
* send that mem type in a zero copy fashion. So, need to consider the
@@ -423,7 +422,7 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
423422
ucp_proto_common_get_lane_distance(&params->super, lane, sys_dev,
424423
&distance);
425424
ucp_proto_common_update_lane_perf_by_distance(
426-
tl_perf, perf_node, &distance, "local system", "%s %s",
425+
tl_perf, &distance, "local system", "%s %s",
427426
ucs_topo_sys_device_get_name(sys_dev),
428427
ucs_topo_sys_device_bdf_name(sys_dev, bdf_name,
429428
sizeof(bdf_name)));
@@ -437,7 +436,7 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
437436
rkey_config = &worker->rkey_config[params->super.rkey_cfg_index];
438437
distance = rkey_config->lanes_distance[lane];
439438
ucp_proto_common_update_lane_perf_by_distance(
440-
tl_perf, perf_node, &distance, "remote system", "sys-dev %d %s",
439+
tl_perf, &distance, "remote system", "sys-dev %d %s",
441440
rkey_config->key.sys_dev,
442441
ucs_memory_type_names[rkey_config->key.mem_type]);
443442
}
@@ -451,20 +450,18 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
451450
params->hdr_size);
452451
ucs_assert(tl_perf->sys_latency >= 0);
453452

454-
ucp_proto_perf_node_add_bandwidth(perf_node, "bw", tl_perf->bandwidth);
455-
ucp_proto_perf_node_add_scalar(perf_node, "lat", tl_perf->latency);
456-
ucp_proto_perf_node_add_scalar(perf_node, "sys-lat", tl_perf->sys_latency);
457-
ucp_proto_perf_node_add_scalar(perf_node, "send-pre",
453+
ucp_proto_perf_node_add_bandwidth(tl_perf->node, "bw", tl_perf->bandwidth);
454+
ucp_proto_perf_node_add_scalar(tl_perf->node, "lat", tl_perf->latency);
455+
ucp_proto_perf_node_add_scalar(tl_perf->node, "sys-lat", tl_perf->sys_latency);
456+
ucp_proto_perf_node_add_scalar(tl_perf->node, "send-pre",
458457
tl_perf->send_pre_overhead);
459-
ucp_proto_perf_node_add_scalar(perf_node, "send-post",
458+
ucp_proto_perf_node_add_scalar(tl_perf->node, "send-post",
460459
tl_perf->send_post_overhead);
461-
ucp_proto_perf_node_add_scalar(perf_node, "recv", tl_perf->recv_overhead);
462-
463-
*perf_node_p = perf_node;
460+
ucp_proto_perf_node_add_scalar(tl_perf->node, "recv", tl_perf->recv_overhead);
464461
return UCS_OK;
465462

466463
err_deref_perf_node:
467-
ucp_proto_perf_node_deref(&perf_node);
464+
ucp_proto_perf_node_deref(&tl_perf->node);
468465
return status;
469466
}
470467

src/ucp/proto/proto_common.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,9 @@ typedef struct {
176176

177177
/* Maximum single message length */
178178
size_t max_frag;
179+
180+
/* Performance selection tree node */
181+
ucp_proto_perf_node_t *node;
179182
} ucp_proto_common_tl_perf_t;
180183

181184

@@ -279,8 +282,7 @@ void ucp_proto_common_lane_perf_node(ucp_context_h context,
279282
ucs_status_t
280283
ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
281284
ucp_lane_index_t lane,
282-
ucp_proto_common_tl_perf_t *perf,
283-
ucp_proto_perf_node_t **perf_node_p);
285+
ucp_proto_common_tl_perf_t *perf);
284286

285287

286288
typedef int (*ucp_proto_common_filter_lane_cb_t)(

src/ucp/proto/proto_init.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,6 @@ ucp_proto_init_skip_recv_overhead(const ucp_proto_common_init_params_t *params,
130130
static ucs_status_t
131131
ucp_proto_init_add_tl_perf(const ucp_proto_common_init_params_t *params,
132132
const ucp_proto_common_tl_perf_t *tl_perf,
133-
ucp_proto_perf_node_t *const tl_perf_node,
134133
size_t range_start, size_t range_end,
135134
ucp_proto_perf_t *perf)
136135
{
@@ -186,7 +185,7 @@ ucp_proto_init_add_tl_perf(const ucp_proto_common_init_params_t *params,
186185
return ucp_proto_perf_add_funcs(perf, range_start, range_end, perf_factors,
187186
ucp_proto_perf_node_new_data("transport",
188187
""),
189-
tl_perf_node);
188+
tl_perf->node);
190189
}
191190

192191
/**
@@ -504,7 +503,6 @@ ucp_proto_common_check_mem_access(const ucp_proto_common_init_params_t *params)
504503

505504
ucs_status_t ucp_proto_init_perf(const ucp_proto_common_init_params_t *params,
506505
const ucp_proto_common_tl_perf_t *tl_perf,
507-
ucp_proto_perf_node_t *const tl_perf_node,
508506
ucp_md_map_t reg_md_map, const char *perf_name,
509507
ucp_proto_perf_t **perf_p)
510508
{
@@ -533,8 +531,8 @@ ucs_status_t ucp_proto_init_perf(const ucp_proto_common_init_params_t *params,
533531
return status;
534532
}
535533

536-
status = ucp_proto_init_add_tl_perf(params, tl_perf, tl_perf_node,
537-
range_start, range_end, perf);
534+
status = ucp_proto_init_add_tl_perf(params, tl_perf, range_start, range_end,
535+
perf);
538536
if (status != UCS_OK) {
539537
goto err_cleanup_perf;
540538
}

src/ucp/proto/proto_init.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ ucp_proto_init_add_buffer_copy_time(ucp_worker_h worker, const char *title,
6868

6969
ucs_status_t ucp_proto_init_perf(const ucp_proto_common_init_params_t *params,
7070
const ucp_proto_common_tl_perf_t *tl_perf,
71-
ucp_proto_perf_node_t *const tl_perf_node,
7271
ucp_md_map_t reg_md_map, const char *perf_name,
7372
ucp_proto_perf_t **perf_p);
7473

src/ucp/proto/proto_multi.c

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,8 @@ static ucp_sys_dev_map_t ucp_proto_multi_init_flush_sys_dev_mask(
160160

161161
static ucp_lane_index_t ucp_proto_multi_filter_net_devices(
162162
ucp_lane_index_t num_lanes, const ucp_proto_init_params_t *params,
163-
const ucp_proto_common_tl_perf_t *tl_perfs, int fixed_first_lane,
164-
ucp_lane_index_t *lanes, ucp_proto_perf_node_t **perf_nodes)
163+
ucp_proto_common_tl_perf_t *tl_perfs, int fixed_first_lane,
164+
ucp_lane_index_t *lanes)
165165
{
166166
ucp_lane_index_t num_max_bw_devs = 0;
167167
double max_bandwidth;
@@ -209,7 +209,7 @@ static ucp_lane_index_t ucp_proto_multi_filter_net_devices(
209209
tl_rsc = ucp_proto_common_get_tl_rsc(params, lane);
210210
if ((tl_rsc->dev_type == UCT_DEVICE_TYPE_NET) &&
211211
(tl_rsc->sys_device != sys_devs[seed])) {
212-
ucp_proto_perf_node_deref(&perf_nodes[lane]);
212+
ucp_proto_perf_node_deref(&tl_perfs[lane].node);
213213
ucs_trace("filtered out " UCP_PROTO_LANE_FMT,
214214
UCP_PROTO_LANE_ARG(params, lane, &tl_perfs[lane]));
215215
} else {
@@ -225,16 +225,14 @@ ucs_status_t ucp_proto_multi_init(const ucp_proto_multi_init_params_t *params,
225225
ucp_proto_perf_t **perf_p,
226226
ucp_proto_multi_priv_t *mpriv)
227227
{
228-
ucp_context_h context = params->super.super.worker->context;
229-
const double max_bw_ratio = context->config.ext.multi_lane_max_ratio;
230-
ucp_proto_perf_node_t *lanes_perf_nodes[UCP_PROTO_MAX_LANES];
228+
ucp_context_h context = params->super.super.worker->context;
229+
const double max_bw_ratio = context->config.ext.multi_lane_max_ratio;
231230
ucp_proto_common_tl_perf_t lanes_perf[UCP_PROTO_MAX_LANES];
232231
ucp_proto_common_tl_perf_t *lane_perf, perf;
233232
ucp_lane_index_t lanes[UCP_PROTO_MAX_LANES];
234233
double max_bandwidth, max_frag_ratio, min_bandwidth;
235234
ucp_lane_index_t i, lane, num_lanes, num_fast_lanes;
236235
ucp_proto_multi_lane_priv_t *lpriv;
237-
ucp_proto_perf_node_t *perf_node;
238236
size_t max_frag, min_length, min_end_offset, min_chunk;
239237
ucp_proto_lane_selection_t selection;
240238
ucp_md_map_t reg_md_map;
@@ -282,8 +280,7 @@ ucs_status_t ucp_proto_multi_init(const ucp_proto_multi_init_params_t *params,
282280
lane = lanes[i];
283281
lane_perf = &lanes_perf[lane];
284282

285-
status = ucp_proto_common_get_lane_perf(&params->super, lane, lane_perf,
286-
&lanes_perf_nodes[lane]);
283+
status = ucp_proto_common_get_lane_perf(&params->super, lane, lane_perf);
287284
if (status != UCS_OK) {
288285
return status;
289286
}
@@ -310,7 +307,7 @@ ucs_status_t ucp_proto_multi_init(const ucp_proto_multi_init_params_t *params,
310307
if ((lane_perf->bandwidth * max_bw_ratio) < max_bandwidth) {
311308
/* Bandwidth on this lane is too low compared to the fastest
312309
available lane, so it's not worth using it */
313-
ucp_proto_perf_node_deref(&lanes_perf_nodes[lane]);
310+
ucp_proto_perf_node_deref(&lanes_perf[lane].node);
314311
ucs_trace("drop " UCP_PROTO_LANE_FMT,
315312
UCP_PROTO_LANE_ARG(&params->super.super, lane, lane_perf));
316313
} else {
@@ -325,8 +322,7 @@ ucs_status_t ucp_proto_multi_init(const ucp_proto_multi_init_params_t *params,
325322
num_lanes = ucp_proto_multi_filter_net_devices(num_lanes,
326323
&params->super.super,
327324
lanes_perf,
328-
fixed_first_lane, lanes,
329-
lanes_perf_nodes);
325+
fixed_first_lane, lanes);
330326
}
331327

332328
ucp_proto_multi_select_bw_lanes(&params->super.super, lanes, num_lanes,
@@ -468,27 +464,26 @@ ucs_status_t ucp_proto_multi_init(const ucp_proto_multi_init_params_t *params,
468464
}
469465
ucs_assert(mpriv->num_lanes == ucs_popcount(selection.lane_map));
470466

471-
/* After this block, 'perf_node' and 'lane_perf_nodes[]' have extra ref */
472467
if (mpriv->num_lanes == 1) {
473-
perf_node = lanes_perf_nodes[ucs_ffs64(selection.lane_map)];
474-
ucp_proto_perf_node_ref(perf_node);
468+
perf.node = lanes_perf[ucs_ffs64(selection.lane_map)].node;
469+
ucp_proto_perf_node_ref(perf.node);
475470
} else {
476-
perf_node = ucp_proto_perf_node_new_data("multi", "%u lanes",
471+
perf.node = ucp_proto_perf_node_new_data("multi", "%u lanes",
477472
mpriv->num_lanes);
478473
ucs_for_each_bit(lane, selection.lane_map) {
479474
ucs_assert(lane < UCP_MAX_LANES);
480-
ucp_proto_perf_node_add_child(perf_node, lanes_perf_nodes[lane]);
475+
ucp_proto_perf_node_add_child(perf.node, lanes_perf[lane].node);
481476
}
482477
}
483478

484-
status = ucp_proto_init_perf(&params->super, &perf, perf_node, reg_md_map,
485-
perf_name, perf_p);
479+
status = ucp_proto_init_perf(&params->super, &perf, reg_md_map, perf_name,
480+
perf_p);
486481

487482
/* Deref unused nodes */
488483
for (i = 0; i < num_lanes; ++i) {
489-
ucp_proto_perf_node_deref(&lanes_perf_nodes[lanes[i]]);
484+
ucp_proto_perf_node_deref(&lanes_perf[lanes[i]].node);
490485
}
491-
ucp_proto_perf_node_deref(&perf_node);
486+
ucp_proto_perf_node_deref(&perf.node);
492487

493488
return status;
494489
}

src/ucp/proto/proto_single.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ ucs_status_t ucp_proto_single_init(const ucp_proto_single_init_params_t *params,
2424
{
2525
const char *proto_name = ucp_proto_id_field(params->super.super.proto_id,
2626
name);
27-
ucp_proto_perf_node_t *tl_perf_node;
2827
ucp_proto_common_tl_perf_t tl_perf;
2928
ucp_lane_index_t num_lanes;
3029
ucp_md_map_t reg_md_map;
@@ -57,15 +56,14 @@ ucs_status_t ucp_proto_single_init(const ucp_proto_single_init_params_t *params,
5756

5857
ucp_proto_common_lane_priv_init(&params->super, reg_md_map, lane,
5958
&spriv->super);
60-
status = ucp_proto_common_get_lane_perf(&params->super, lane, &tl_perf,
61-
&tl_perf_node);
59+
status = ucp_proto_common_get_lane_perf(&params->super, lane, &tl_perf);
6260
if (status != UCS_OK) {
6361
return status;
6462
}
6563

66-
status = ucp_proto_init_perf(&params->super, &tl_perf, tl_perf_node,
67-
reg_md_map, proto_name, perf_p);
68-
ucp_proto_perf_node_deref(&tl_perf_node);
64+
status = ucp_proto_init_perf(&params->super, &tl_perf, reg_md_map,
65+
proto_name, perf_p);
66+
ucp_proto_perf_node_deref(&tl_perf.node);
6967

7068
return status;
7169
}

0 commit comments

Comments
 (0)