|
5 | 5 | */ |
6 | 6 |
|
7 | 7 | #include "ucp_test.h" |
| 8 | + |
| 9 | +#include <set> |
| 10 | + |
8 | 11 | extern "C" { |
| 12 | +#include <ucp/core/ucp_context.h> |
9 | 13 | #include <ucs/sys/sys.h> |
10 | 14 | } |
11 | 15 |
|
@@ -118,3 +122,272 @@ UCS_TEST_P(test_ucp_version, version_string) { |
118 | 122 | } |
119 | 123 |
|
120 | 124 | UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_version, all, "all") |
| 125 | + |
| 126 | +class test_ucp_net_devices_config : public ucp_test { |
| 127 | +public: |
| 128 | + static void get_test_variants(std::vector<ucp_test_variant> &variants) { |
| 129 | + add_variant(variants, UCP_FEATURE_TAG); |
| 130 | + } |
| 131 | + |
| 132 | +protected: |
| 133 | + /* Iterate over all network devices and apply action to each */ |
| 134 | + template<typename Action> |
| 135 | + static void for_each_net_device(const entity &e, Action action) { |
| 136 | + ucp_context_h ctx = e.ucph(); |
| 137 | + for (ucp_rsc_index_t i = 0; i < ctx->num_tls; ++i) { |
| 138 | + const uct_tl_resource_desc_t *rsc = &ctx->tl_rscs[i].tl_rsc; |
| 139 | + if (rsc->dev_type == UCT_DEVICE_TYPE_NET) { |
| 140 | + action(rsc); |
| 141 | + } |
| 142 | + } |
| 143 | + } |
| 144 | + |
| 145 | + /* Get all mlx5 network device names from the context */ |
| 146 | + static std::set<std::string> get_mlx5_device_names(const entity &e) { |
| 147 | + std::set<std::string> device_names; |
| 148 | + for_each_net_device(e, [&](const uct_tl_resource_desc_t *rsc) { |
| 149 | + if (strncmp(rsc->dev_name, "mlx5_", 5) == 0) { |
| 150 | + device_names.insert(rsc->dev_name); |
| 151 | + } |
| 152 | + }); |
| 153 | + return device_names; |
| 154 | + } |
| 155 | + |
| 156 | + /* Get a list of all mlx5 device base names (without port suffix) */ |
| 157 | + static std::set<std::string> |
| 158 | + get_mlx5_base_names(const std::set<std::string> &mlx5_devices) { |
| 159 | + std::set<std::string> base_names; |
| 160 | + |
| 161 | + for (const std::string &dev_name : mlx5_devices) { |
| 162 | + size_t colon_pos = dev_name.find(':'); |
| 163 | + if (colon_pos != std::string::npos) { |
| 164 | + base_names.insert(dev_name.substr(0, colon_pos)); |
| 165 | + } else { |
| 166 | + base_names.insert(dev_name); |
| 167 | + } |
| 168 | + } |
| 169 | + |
| 170 | + return base_names; |
| 171 | + } |
| 172 | + |
| 173 | + /* Count mlx5 resources matching a device name prefix */ |
| 174 | + static size_t |
| 175 | + count_mlx5_resources_with_prefix(const std::set<std::string> &mlx5_devices, |
| 176 | + const std::string &prefix) { |
| 177 | + size_t count = 0; |
| 178 | + |
| 179 | + for (const std::string &dev_name : mlx5_devices) { |
| 180 | + if (dev_name.compare(0, prefix.length(), prefix) == 0) { |
| 181 | + ++count; |
| 182 | + } |
| 183 | + } |
| 184 | + |
| 185 | + return count; |
| 186 | + } |
| 187 | + |
| 188 | + /* Check if a specific device name exists in the set */ |
| 189 | + static bool has_device(const std::set<std::string> &devices, |
| 190 | + const std::string &dev_name) { |
| 191 | + return devices.find(dev_name) != devices.end(); |
| 192 | + } |
| 193 | +}; |
| 194 | + |
| 195 | +/* |
| 196 | + * Test that when UCX_NET_DEVICES is set to a base name (e.g., "mlx5_0"), |
| 197 | + * devices with the default port suffix ":1" are selected. |
| 198 | + */ |
| 199 | +UCS_TEST_P(test_ucp_net_devices_config, base_name_selects_default_port) |
| 200 | +{ |
| 201 | + entity *e = create_entity(); |
| 202 | + |
| 203 | + std::set<std::string> mlx5_devices = get_mlx5_device_names(*e); |
| 204 | + if (mlx5_devices.empty()) { |
| 205 | + UCS_TEST_SKIP_R("No mlx5 network device available"); |
| 206 | + } |
| 207 | + |
| 208 | + std::set<std::string> base_names = get_mlx5_base_names(mlx5_devices); |
| 209 | + if (base_names.empty()) { |
| 210 | + UCS_TEST_SKIP_R("No mlx5 devices with port suffix found"); |
| 211 | + } |
| 212 | + |
| 213 | + /* Pick the first base name for testing */ |
| 214 | + std::string test_base_name = *base_names.begin(); |
| 215 | + |
| 216 | + m_entities.clear(); |
| 217 | + |
| 218 | + /* Now create a new context with NET_DEVICES set to the base name */ |
| 219 | + modify_config("NET_DEVICES", test_base_name.c_str()); |
| 220 | + e = create_entity(); |
| 221 | + |
| 222 | + /* Verify that devices matching the base name were selected */ |
| 223 | + std::set<std::string> selected_devices = get_mlx5_device_names(*e); |
| 224 | + size_t count = count_mlx5_resources_with_prefix(selected_devices, test_base_name); |
| 225 | + EXPECT_GT(count, 0) << "Expected at least one device with base name '" |
| 226 | + << test_base_name << "' to be selected"; |
| 227 | + |
| 228 | + std::string expected_dev = test_base_name + ":1"; |
| 229 | + EXPECT_TRUE(has_device(selected_devices, expected_dev)) |
| 230 | + << "Device '" << expected_dev << "' should be selected when " |
| 231 | + << "UCX_NET_DEVICES=" << test_base_name; |
| 232 | +} |
| 233 | + |
| 234 | +/* |
| 235 | + * Test that explicit port suffix specification works correctly. |
| 236 | + */ |
| 237 | +UCS_TEST_P(test_ucp_net_devices_config, explicit_port_suffix) |
| 238 | +{ |
| 239 | + entity *e = create_entity(); |
| 240 | + |
| 241 | + std::set<std::string> mlx5_devices = get_mlx5_device_names(*e); |
| 242 | + if (mlx5_devices.empty()) { |
| 243 | + UCS_TEST_SKIP_R("No mlx5 network device available"); |
| 244 | + } |
| 245 | + |
| 246 | + /* Find a device with port suffix (contains ':') */ |
| 247 | + std::string test_dev_name = *mlx5_devices.begin(); |
| 248 | + ASSERT_NE(test_dev_name.find(':'), std::string::npos) |
| 249 | + << "No port suffix found in device name"; |
| 250 | + |
| 251 | + m_entities.clear(); |
| 252 | + |
| 253 | + /* Create context with explicit device:port specification */ |
| 254 | + modify_config("NET_DEVICES", test_dev_name.c_str()); |
| 255 | + e = create_entity(); |
| 256 | + |
| 257 | + /* Verify the specific device was selected */ |
| 258 | + std::set<std::string> selected_devices = get_mlx5_device_names(*e); |
| 259 | + EXPECT_TRUE(has_device(selected_devices, test_dev_name)) |
| 260 | + << "Device '" << test_dev_name << "' should be selected"; |
| 261 | +} |
| 262 | + |
| 263 | +/* |
| 264 | + * Test that device name range specification works with base names. |
| 265 | + * E.g., "mlx5_[0-1]" should match mlx5_0:1 and mlx5_1:1 |
| 266 | + */ |
| 267 | +UCS_TEST_P(test_ucp_net_devices_config, range_with_base_names) |
| 268 | +{ |
| 269 | + entity *e = create_entity(); |
| 270 | + |
| 271 | + std::set<std::string> mlx5_devices = get_mlx5_device_names(*e); |
| 272 | + if (mlx5_devices.empty()) { |
| 273 | + UCS_TEST_SKIP_R("No mlx5 network device available"); |
| 274 | + } |
| 275 | + |
| 276 | + size_t num_mlx5_devices = mlx5_devices.size(); |
| 277 | + if (num_mlx5_devices < 2) { |
| 278 | + UCS_TEST_SKIP_R("Need at least 2 mlx5 devices for range test"); |
| 279 | + } |
| 280 | + |
| 281 | + m_entities.clear(); |
| 282 | + |
| 283 | + /* Use a range that should match all mlx devices */ |
| 284 | + modify_config("NET_DEVICES", "mlx5_[0-99]"); |
| 285 | + e = create_entity(); |
| 286 | + |
| 287 | + /* Verify that mlx5 devices were selected */ |
| 288 | + std::set<std::string> selected_devices = get_mlx5_device_names(*e); |
| 289 | + EXPECT_EQ(selected_devices.size(), num_mlx5_devices) |
| 290 | + << "Expected " << num_mlx5_devices |
| 291 | + << " mlx5 devices to be selected with range"; |
| 292 | +} |
| 293 | + |
| 294 | +/* |
| 295 | + * Test that specifying a device multiple times (e.g., via range and explicit) |
| 296 | + * produces a warning about duplicate device specification. |
| 297 | + */ |
| 298 | +UCS_TEST_P(test_ucp_net_devices_config, duplicate_device_warning) |
| 299 | +{ |
| 300 | + entity *e = create_entity(); |
| 301 | + |
| 302 | + std::set<std::string> mlx5_devices = get_mlx5_device_names(*e); |
| 303 | + if (mlx5_devices.empty()) { |
| 304 | + UCS_TEST_SKIP_R("No mlx5 network device available"); |
| 305 | + } |
| 306 | + |
| 307 | + std::set<std::string> base_names = get_mlx5_base_names(mlx5_devices); |
| 308 | + if (base_names.empty()) { |
| 309 | + UCS_TEST_SKIP_R("No mlx5 devices with port suffix found"); |
| 310 | + } |
| 311 | + |
| 312 | + /* Pick the first base name for testing */ |
| 313 | + std::string test_base_name = *base_names.begin(); |
| 314 | + |
| 315 | + m_entities.clear(); |
| 316 | + |
| 317 | + /* Set NET_DEVICES to include both a range and an explicit device that |
| 318 | + * overlaps with the range, e.g., "mlx5_[0-99],mlx5_0" */ |
| 319 | + std::string devices_config = "mlx5_[0-99]," + test_base_name; |
| 320 | + modify_config("NET_DEVICES", devices_config.c_str()); |
| 321 | + |
| 322 | + size_t warn_count; |
| 323 | + { |
| 324 | + scoped_log_handler slh(hide_warns_logger); |
| 325 | + warn_count = m_warnings.size(); |
| 326 | + create_entity(); |
| 327 | + } |
| 328 | + |
| 329 | + /* Check that a warning about duplicate device was printed */ |
| 330 | + std::string expected_warn = "device '" + test_base_name + |
| 331 | + "' is specified multiple times"; |
| 332 | + bool found_warning = false; |
| 333 | + for (size_t i = warn_count; i < m_warnings.size(); ++i) { |
| 334 | + if (m_warnings[i].find(expected_warn) != std::string::npos) { |
| 335 | + found_warning = true; |
| 336 | + break; |
| 337 | + } |
| 338 | + } |
| 339 | + |
| 340 | + EXPECT_TRUE(found_warning) << "Expected warning about duplicate device '" |
| 341 | + << test_base_name << "'"; |
| 342 | +} |
| 343 | + |
| 344 | +/* |
| 345 | + * Test that non-mlx devices are not affected by the mlx default port logic. |
| 346 | + */ |
| 347 | +UCS_TEST_P(test_ucp_net_devices_config, non_mlx_device_unaffected) |
| 348 | +{ |
| 349 | + std::string devices_list; |
| 350 | + std::set<std::string> non_mlx_devices; |
| 351 | + |
| 352 | + entity *e = create_entity(); |
| 353 | + |
| 354 | + std::set<std::string> mlx5_devices = get_mlx5_device_names(*e); |
| 355 | + |
| 356 | + /* Find all non-mlx network devices */ |
| 357 | + for_each_net_device(*e, [&](const uct_tl_resource_desc_t *rsc) { |
| 358 | + if (!has_device(mlx5_devices, rsc->dev_name)) { |
| 359 | + non_mlx_devices.insert(rsc->dev_name); |
| 360 | + } |
| 361 | + }); |
| 362 | + |
| 363 | + if (non_mlx_devices.empty()) { |
| 364 | + GTEST_SKIP() << "No non-mlx network devices available"; |
| 365 | + } |
| 366 | + |
| 367 | + /* Build comma-separated list of all non-mlx devices */ |
| 368 | + for (const std::string &dev : non_mlx_devices) { |
| 369 | + if (!devices_list.empty()) { |
| 370 | + devices_list += ","; |
| 371 | + } |
| 372 | + devices_list += dev; |
| 373 | + } |
| 374 | + |
| 375 | + m_entities.clear(); |
| 376 | + |
| 377 | + /* Create context with all non-mlx devices */ |
| 378 | + modify_config("NET_DEVICES", devices_list.c_str()); |
| 379 | + e = create_entity(); |
| 380 | + |
| 381 | + /* Verify all devices were selected */ |
| 382 | + std::set<std::string> selected_devices; |
| 383 | + for_each_net_device(*e, [&](const uct_tl_resource_desc_t *rsc) { |
| 384 | + selected_devices.insert(rsc->dev_name); |
| 385 | + }); |
| 386 | + |
| 387 | + for (const std::string &dev : non_mlx_devices) { |
| 388 | + EXPECT_TRUE(has_device(selected_devices, dev)) |
| 389 | + << "Non-mlx device '" << dev << "' should be selected"; |
| 390 | + } |
| 391 | +} |
| 392 | + |
| 393 | +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_net_devices_config, all, "all") |
0 commit comments