|
6 | 6 |
|
7 | 7 | #include "tl_cuda_team_topo.h" |
8 | 8 | #include "tl_cuda.h" |
| 9 | +#include "core/ucc_team.h" |
9 | 10 |
|
10 | 11 | #define UCC_TL_CUDA_TEAM_TOPO_SAME_DEVICE ((ucc_rank_t)(UCC_RANK_MAX)) |
11 | 12 |
|
@@ -341,27 +342,32 @@ static ucc_status_t |
341 | 342 | ucc_tl_cuda_team_topo_init_matrix(const ucc_tl_cuda_team_t *team, |
342 | 343 | ucc_rank_t *matrix) |
343 | 344 | { |
344 | | - ucc_tl_cuda_topo_t *topo = UCC_TL_CUDA_TEAM_CTX(team)->topo; |
345 | | - int size = UCC_TL_TEAM_SIZE(team); |
346 | | - ucc_status_t status; |
347 | | - int i, j; |
| 345 | + ucc_topo_t *topo = UCC_TL_CORE_TEAM(team)->topo; |
| 346 | + ucc_proc_info_t *procs = topo->topo->procs; |
| 347 | + ucc_device_id_t *dev_ids = topo->device_map.device_ids; |
| 348 | + int size = UCC_TL_TEAM_SIZE(team); |
| 349 | + int i, j; |
| 350 | + ucc_rank_t ci, cj; |
| 351 | + ucc_device_id_t di, dj; |
| 352 | + const ucc_host_info_t *host_i; |
348 | 353 |
|
349 | 354 | for (i = 0; i < size; i++) { |
350 | | - matrix[i + i*size] = UCC_TL_CUDA_TEAM_TOPO_SAME_DEVICE; |
| 355 | + matrix[i + i * size] = UCC_TL_CUDA_TEAM_TOPO_SAME_DEVICE; |
| 356 | + ci = ucc_ep_map_eval(topo->set.map, i); |
| 357 | + di = dev_ids[i]; |
| 358 | + host_i = &topo->topo->hosts[ci]; |
351 | 359 | for (j = i + 1; j < size; j++) { |
352 | | - if (ucc_tl_cuda_topo_device_id_equal(&team->ids[i].pci_id, |
353 | | - &team->ids[j].pci_id)) { |
354 | | - matrix[i + j*size] = UCC_TL_CUDA_TEAM_TOPO_SAME_DEVICE; |
| 360 | + cj = ucc_ep_map_eval(topo->set.map, j); |
| 361 | + dj = dev_ids[j]; |
| 362 | + if (procs[ci].host_hash != procs[cj].host_hash) { |
| 363 | + /* Cross-node pair: no intra-node NVLink */ |
| 364 | + matrix[i + j * size] = 0; |
| 365 | + } else if (di == dj) { |
| 366 | + matrix[i + j * size] = UCC_TL_CUDA_TEAM_TOPO_SAME_DEVICE; |
355 | 367 | } else { |
356 | | - status = ucc_tl_cuda_topo_num_links(topo, |
357 | | - &team->ids[i].pci_id, |
358 | | - &team->ids[j].pci_id, |
359 | | - &matrix[i + j*size]); |
360 | | - if (status != UCC_OK) { |
361 | | - return status; |
362 | | - } |
| 368 | + matrix[i + j * size] = host_i->nvlink_matrix[di][dj]; |
363 | 369 | } |
364 | | - matrix[j + i*size] = matrix[i +j*size]; |
| 370 | + matrix[j + i * size] = matrix[i + j * size]; |
365 | 371 | } |
366 | 372 | } |
367 | 373 |
|
@@ -394,12 +400,28 @@ ucc_status_t ucc_tl_cuda_team_topo_create(const ucc_tl_team_t *cuda_team, |
394 | 400 | goto free_matrix; |
395 | 401 | } |
396 | 402 |
|
397 | | - status = ucc_tl_cuda_team_topo_init_proxies(team, topo); |
398 | | - if (status != UCC_OK) { |
399 | | - if (status != UCC_ERR_NOT_SUPPORTED) { |
400 | | - tl_error(UCC_TL_TEAM_LIB(team), "failed to init cuda topo proxy"); |
| 403 | + /* Use the authoritative ucc_topo NVLink check to determine full |
| 404 | + * connectivity. This handles NVSwitch, fabric clique, and direct NVLink |
| 405 | + * connections consistently and avoids rescanning the matrix for zeros. */ |
| 406 | + { |
| 407 | + ucc_topo_t *utopo = UCC_TL_CORE_TEAM(team)->topo; |
| 408 | + ucc_sbgp_t *node_sg = ucc_topo_get_sbgp(utopo, UCC_SBGP_NODE); |
| 409 | + topo->is_fully_connected = |
| 410 | + ucc_topo_is_nvlink_fully_connected(utopo, node_sg); |
| 411 | + } |
| 412 | + |
| 413 | + if (topo->is_fully_connected) { |
| 414 | + topo->num_proxies = 0; |
| 415 | + topo->proxy_needed = 0; |
| 416 | + } else { |
| 417 | + status = ucc_tl_cuda_team_topo_init_proxies(team, topo); |
| 418 | + if (status != UCC_OK) { |
| 419 | + if (status != UCC_ERR_NOT_SUPPORTED) { |
| 420 | + tl_error(UCC_TL_TEAM_LIB(team), |
| 421 | + "failed to init cuda topo proxy"); |
| 422 | + } |
| 423 | + goto free_matrix; |
401 | 424 | } |
402 | | - goto free_matrix; |
403 | 425 | } |
404 | 426 |
|
405 | 427 | status = ucc_tl_cuda_team_topo_init_rings(team, topo); |
|
0 commit comments