|
6 | 6 | * reserved. |
7 | 7 | * Copyright (c) 2020-2021 Cisco Systems, Inc. All rights reserved. |
8 | 8 | * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. |
9 | | - * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights |
| 9 | + * Copyright (c) 2021-2025 Amazon.com, Inc. or its affiliates. All rights |
10 | 10 | * reserved. |
11 | 11 | * Copyright (c) 2023 UT-Battelle, LLC. All rights reserved. |
12 | 12 | * $COPYRIGHT$ |
|
42 | 42 | extern opal_accelerator_base_module_t opal_accelerator; |
43 | 43 | opal_common_ofi_module_t opal_common_ofi = {.prov_include = NULL, |
44 | 44 | .prov_exclude = NULL, |
45 | | - .output = -1}; |
| 45 | + .output = -1, |
| 46 | + .fabric = NULL, |
| 47 | + .domain = NULL, |
| 48 | + .fabric_ref_count = 0, |
| 49 | + .domain_ref_count = 0}; |
46 | 50 | static const char default_prov_exclude_list[] = "shm,sockets,tcp,udp,rstream,usnic,net"; |
47 | 51 | static opal_mutex_t opal_common_ofi_mutex = OPAL_MUTEX_STATIC_INIT; |
48 | 52 | static int opal_common_ofi_verbose_level = 0; |
@@ -1257,3 +1261,156 @@ OPAL_DECLSPEC int opal_common_ofi_fi_getname(fid_t fid, void **addr, size_t *add |
1257 | 1261 | } |
1258 | 1262 | return ret; |
1259 | 1263 | } |
| 1264 | + |
| 1265 | +/** |
| 1266 | + * Get or create fabric object |
| 1267 | + * |
| 1268 | + * Reuses existing fabric from fabric_attr->fabric if available, |
| 1269 | + * otherwise creates new fabric using fi_fabric(). |
| 1270 | + * |
| 1271 | + * @param fabric_attr (IN) Fabric attributes |
| 1272 | + * @param fabric (OUT) Fabric object (new or existing) |
| 1273 | + * |
| 1274 | + * @return OPAL_SUCCESS or error code |
| 1275 | + */ |
| 1276 | +int opal_common_ofi_fi_fabric(struct fi_fabric_attr *fabric_attr, |
| 1277 | + struct fid_fabric **fabric) |
| 1278 | +{ |
| 1279 | + int ret; |
| 1280 | + |
| 1281 | + OPAL_THREAD_LOCK(&opal_common_ofi_mutex); |
| 1282 | + |
| 1283 | + if (fabric_attr->fabric) { |
| 1284 | + *fabric = fabric_attr->fabric; |
| 1285 | + opal_common_ofi.fabric_ref_count++; |
| 1286 | + opal_output_verbose(1, opal_common_ofi.output, "Reusing existing fabric: %s", |
| 1287 | + fabric_attr->name); |
| 1288 | + } else { |
| 1289 | + ret = fi_fabric(fabric_attr, fabric, NULL); |
| 1290 | + if (0 != ret) { |
| 1291 | + OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex); |
| 1292 | + return ret; |
| 1293 | + } |
| 1294 | + opal_common_ofi.fabric = *fabric; |
| 1295 | + opal_common_ofi.fabric_ref_count = 1; |
| 1296 | + } |
| 1297 | + |
| 1298 | + OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex); |
| 1299 | + return OPAL_SUCCESS; |
| 1300 | +} |
| 1301 | + |
| 1302 | +/** |
| 1303 | + * Get or create domain object |
| 1304 | + * |
| 1305 | + * Reuses existing domain from info->domain_attr->domain if available, |
| 1306 | + * otherwise creates new domain using fi_domain(). |
| 1307 | + * |
| 1308 | + * @param fabric (IN) Fabric object |
| 1309 | + * @param info (IN) Provider info |
| 1310 | + * @param domain (OUT) Domain object (new or existing) |
| 1311 | + * |
| 1312 | + * @return OPAL_SUCCESS or OPAL error code |
| 1313 | + */ |
| 1314 | +int opal_common_ofi_fi_domain(struct fid_fabric *fabric, struct fi_info *info, |
| 1315 | + struct fid_domain **domain) |
| 1316 | +{ |
| 1317 | + int ret; |
| 1318 | + |
| 1319 | + OPAL_THREAD_LOCK(&opal_common_ofi_mutex); |
| 1320 | + |
| 1321 | + if (info->domain_attr->domain) { |
| 1322 | + *domain = info->domain_attr->domain; |
| 1323 | + opal_common_ofi.domain_ref_count++; |
| 1324 | + opal_output_verbose(1, opal_common_ofi.output, "Reusing existing domain: %s", |
| 1325 | + info->domain_attr->name); |
| 1326 | + } else { |
| 1327 | + ret = fi_domain(fabric, info, domain, NULL); |
| 1328 | + if (0 != ret) { |
| 1329 | + OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex); |
| 1330 | + return ret; |
| 1331 | + } |
| 1332 | + opal_common_ofi.domain = *domain; |
| 1333 | + opal_common_ofi.domain_ref_count = 1; |
| 1334 | + } |
| 1335 | + |
| 1336 | + OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex); |
| 1337 | + return OPAL_SUCCESS; |
| 1338 | +} |
| 1339 | + |
| 1340 | +/** |
| 1341 | + * Release fabric reference |
| 1342 | + * |
| 1343 | + * Decrements fabric reference count and closes fabric if count reaches zero. |
| 1344 | + * |
| 1345 | + * @param fabric (IN) Fabric object to release |
| 1346 | + * |
| 1347 | + * @return OPAL_SUCCESS or error code |
| 1348 | + */ |
| 1349 | +int opal_common_ofi_fabric_release(struct fid_fabric *fabric) |
| 1350 | +{ |
| 1351 | + int ret = OPAL_SUCCESS; |
| 1352 | + |
| 1353 | + OPAL_THREAD_LOCK(&opal_common_ofi_mutex); |
| 1354 | + |
| 1355 | + if (fabric == opal_common_ofi.fabric && opal_common_ofi.fabric_ref_count > 0) { |
| 1356 | + opal_common_ofi.fabric_ref_count--; |
| 1357 | + if (opal_common_ofi.fabric_ref_count == 0) { |
| 1358 | + ret = fi_close(&fabric->fid); |
| 1359 | + if (0 != ret) { |
| 1360 | + opal_output_verbose(1, opal_common_ofi.output, |
| 1361 | + "%s:%d: fi_close failed for fabric: %s (%d)", |
| 1362 | + __FILE__, __LINE__, fi_strerror(-ret), ret); |
| 1363 | + } |
| 1364 | + opal_common_ofi.fabric = NULL; |
| 1365 | + } |
| 1366 | + } else { |
| 1367 | + ret = fi_close(&fabric->fid); |
| 1368 | + if (0 != ret) { |
| 1369 | + opal_output_verbose(1, opal_common_ofi.output, |
| 1370 | + "%s:%d: fi_close failed for fabric: %s (%d)", |
| 1371 | + __FILE__, __LINE__, fi_strerror(-ret), ret); |
| 1372 | + } |
| 1373 | + } |
| 1374 | + |
| 1375 | + OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex); |
| 1376 | + return ret; |
| 1377 | +} |
| 1378 | + |
| 1379 | +/** |
| 1380 | + * Release domain reference |
| 1381 | + * |
| 1382 | + * Decrements domain reference count and closes domain if count reaches zero. |
| 1383 | + * |
| 1384 | + * @param domain (IN) Domain object to release |
| 1385 | + * |
| 1386 | + * @return OPAL_SUCCESS or error code |
| 1387 | + */ |
| 1388 | +int opal_common_ofi_domain_release(struct fid_domain *domain) |
| 1389 | +{ |
| 1390 | + int ret = OPAL_SUCCESS; |
| 1391 | + |
| 1392 | + OPAL_THREAD_LOCK(&opal_common_ofi_mutex); |
| 1393 | + |
| 1394 | + if (domain == opal_common_ofi.domain && opal_common_ofi.domain_ref_count > 0) { |
| 1395 | + opal_common_ofi.domain_ref_count--; |
| 1396 | + if (opal_common_ofi.domain_ref_count == 0) { |
| 1397 | + ret = fi_close(&domain->fid); |
| 1398 | + if (0 != ret) { |
| 1399 | + opal_output_verbose(1, opal_common_ofi.output, |
| 1400 | + "%s:%d: fi_close failed for domain: %s (%d)", |
| 1401 | + __FILE__, __LINE__, fi_strerror(-ret), ret); |
| 1402 | + } |
| 1403 | + opal_common_ofi.domain = NULL; |
| 1404 | + } |
| 1405 | + } else { |
| 1406 | + ret = fi_close(&domain->fid); |
| 1407 | + if (0 != ret) { |
| 1408 | + opal_output_verbose(1, opal_common_ofi.output, |
| 1409 | + "%s:%d: fi_close failed for domain: %s (%d)", |
| 1410 | + __FILE__, __LINE__, fi_strerror(-ret), ret); |
| 1411 | + } |
| 1412 | + } |
| 1413 | + |
| 1414 | + OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex); |
| 1415 | + return ret; |
| 1416 | +} |
0 commit comments