Skip to content

Commit 77411b4

Browse files
UCP/CORE: Add unit tests for NET_DEVICES
1 parent db68bd1 commit 77411b4

File tree

2 files changed

+274
-2
lines changed

2 files changed

+274
-2
lines changed

src/ucp/core/ucp_context.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,8 +1031,7 @@ static uint64_t ucp_str_array_search_in_ranges(const char **array,
10311031
n = 0;
10321032
if (sscanf(p, "[%lu-%lu]%n", &range_start, &range_end, &n) != 2 ||
10331033
n == 0 || p[n] != '\0' || range_start > range_end) {
1034-
ucs_warn("invalid device range: %s", array[i]);
1035-
continue;
1034+
continue; /* Invalid range */
10361035
}
10371036

10381037
str_id = strtoul(str + prefix_len, &endptr, 10);

test/gtest/ucp/test_ucp_context.cc

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
*/
66

77
#include "ucp_test.h"
8+
9+
#include <set>
10+
811
extern "C" {
12+
#include <ucp/core/ucp_context.h>
913
#include <ucs/sys/sys.h>
1014
}
1115

@@ -118,3 +122,272 @@ UCS_TEST_P(test_ucp_version, version_string) {
118122
}
119123

120124
UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_version, all, "all")
125+
126+
class test_ucp_net_devices_config : public ucp_test {
127+
public:
128+
static void get_test_variants(std::vector<ucp_test_variant> &variants) {
129+
add_variant(variants, UCP_FEATURE_TAG);
130+
}
131+
132+
protected:
133+
/* Iterate over all network devices and apply action to each */
134+
template<typename Action>
135+
static void for_each_net_device(const entity &e, Action action) {
136+
ucp_context_h ctx = e.ucph();
137+
for (ucp_rsc_index_t i = 0; i < ctx->num_tls; ++i) {
138+
const uct_tl_resource_desc_t *rsc = &ctx->tl_rscs[i].tl_rsc;
139+
if (rsc->dev_type == UCT_DEVICE_TYPE_NET) {
140+
action(rsc);
141+
}
142+
}
143+
}
144+
145+
/* Get all mlx5 network device names from the context */
146+
static std::set<std::string> get_mlx5_device_names(const entity &e) {
147+
std::set<std::string> device_names;
148+
for_each_net_device(e, [&](const uct_tl_resource_desc_t *rsc) {
149+
if (strncmp(rsc->dev_name, "mlx5_", 5) == 0) {
150+
device_names.insert(rsc->dev_name);
151+
}
152+
});
153+
return device_names;
154+
}
155+
156+
/* Get a list of all mlx5 device base names (without port suffix) */
157+
static std::set<std::string>
158+
get_mlx5_base_names(const std::set<std::string> &mlx5_devices) {
159+
std::set<std::string> base_names;
160+
161+
for (const std::string &dev_name : mlx5_devices) {
162+
size_t colon_pos = dev_name.find(':');
163+
if (colon_pos != std::string::npos) {
164+
base_names.insert(dev_name.substr(0, colon_pos));
165+
} else {
166+
base_names.insert(dev_name);
167+
}
168+
}
169+
170+
return base_names;
171+
}
172+
173+
/* Count mlx5 resources matching a device name prefix */
174+
static size_t
175+
count_mlx5_resources_with_prefix(const std::set<std::string> &mlx5_devices,
176+
const std::string &prefix) {
177+
size_t count = 0;
178+
179+
for (const std::string &dev_name : mlx5_devices) {
180+
if (dev_name.compare(0, prefix.length(), prefix) == 0) {
181+
++count;
182+
}
183+
}
184+
185+
return count;
186+
}
187+
188+
/* Check if a specific device name exists in the set */
189+
static bool has_device(const std::set<std::string> &devices,
190+
const std::string &dev_name) {
191+
return devices.find(dev_name) != devices.end();
192+
}
193+
};
194+
195+
/*
196+
* Test that when UCX_NET_DEVICES is set to a base name (e.g., "mlx5_0"),
197+
* devices with the default port suffix ":1" are selected.
198+
*/
199+
UCS_TEST_P(test_ucp_net_devices_config, base_name_selects_default_port)
200+
{
201+
entity *e = create_entity();
202+
203+
std::set<std::string> mlx5_devices = get_mlx5_device_names(*e);
204+
if (mlx5_devices.empty()) {
205+
UCS_TEST_SKIP_R("No mlx5 network device available");
206+
}
207+
208+
std::set<std::string> base_names = get_mlx5_base_names(mlx5_devices);
209+
if (base_names.empty()) {
210+
UCS_TEST_SKIP_R("No mlx5 devices with port suffix found");
211+
}
212+
213+
/* Pick the first base name for testing */
214+
std::string test_base_name = *base_names.begin();
215+
216+
m_entities.clear();
217+
218+
/* Now create a new context with NET_DEVICES set to the base name */
219+
modify_config("NET_DEVICES", test_base_name.c_str());
220+
e = create_entity();
221+
222+
/* Verify that devices matching the base name were selected */
223+
std::set<std::string> selected_devices = get_mlx5_device_names(*e);
224+
size_t count = count_mlx5_resources_with_prefix(selected_devices, test_base_name);
225+
EXPECT_GT(count, 0) << "Expected at least one device with base name '"
226+
<< test_base_name << "' to be selected";
227+
228+
std::string expected_dev = test_base_name + ":1";
229+
EXPECT_TRUE(has_device(selected_devices, expected_dev))
230+
<< "Device '" << expected_dev << "' should be selected when "
231+
<< "UCX_NET_DEVICES=" << test_base_name;
232+
}
233+
234+
/*
235+
* Test that explicit port suffix specification works correctly.
236+
*/
237+
UCS_TEST_P(test_ucp_net_devices_config, explicit_port_suffix)
238+
{
239+
entity *e = create_entity();
240+
241+
std::set<std::string> mlx5_devices = get_mlx5_device_names(*e);
242+
if (mlx5_devices.empty()) {
243+
UCS_TEST_SKIP_R("No mlx5 network device available");
244+
}
245+
246+
/* Find a device with port suffix (contains ':') */
247+
std::string test_dev_name = *mlx5_devices.begin();
248+
ASSERT_NE(test_dev_name.find(':'), std::string::npos)
249+
<< "No port suffix found in device name";
250+
251+
m_entities.clear();
252+
253+
/* Create context with explicit device:port specification */
254+
modify_config("NET_DEVICES", test_dev_name.c_str());
255+
e = create_entity();
256+
257+
/* Verify the specific device was selected */
258+
std::set<std::string> selected_devices = get_mlx5_device_names(*e);
259+
EXPECT_TRUE(has_device(selected_devices, test_dev_name))
260+
<< "Device '" << test_dev_name << "' should be selected";
261+
}
262+
263+
/*
264+
* Test that device name range specification works with base names.
265+
* E.g., "mlx5_[0-1]" should match mlx5_0:1 and mlx5_1:1
266+
*/
267+
UCS_TEST_P(test_ucp_net_devices_config, range_with_base_names)
268+
{
269+
entity *e = create_entity();
270+
271+
std::set<std::string> mlx5_devices = get_mlx5_device_names(*e);
272+
if (mlx5_devices.empty()) {
273+
UCS_TEST_SKIP_R("No mlx5 network device available");
274+
}
275+
276+
size_t num_mlx5_devices = mlx5_devices.size();
277+
if (num_mlx5_devices < 2) {
278+
UCS_TEST_SKIP_R("Need at least 2 mlx5 devices for range test");
279+
}
280+
281+
m_entities.clear();
282+
283+
/* Use a range that should match all mlx devices */
284+
modify_config("NET_DEVICES", "mlx5_[0-99]");
285+
e = create_entity();
286+
287+
/* Verify that mlx5 devices were selected */
288+
std::set<std::string> selected_devices = get_mlx5_device_names(*e);
289+
EXPECT_EQ(selected_devices.size(), num_mlx5_devices)
290+
<< "Expected " << num_mlx5_devices
291+
<< " mlx5 devices to be selected with range";
292+
}
293+
294+
/*
295+
* Test that specifying a device multiple times (e.g., via range and explicit)
296+
* produces a warning about duplicate device specification.
297+
*/
298+
UCS_TEST_P(test_ucp_net_devices_config, duplicate_device_warning)
299+
{
300+
entity *e = create_entity();
301+
302+
std::set<std::string> mlx5_devices = get_mlx5_device_names(*e);
303+
if (mlx5_devices.empty()) {
304+
UCS_TEST_SKIP_R("No mlx5 network device available");
305+
}
306+
307+
std::set<std::string> base_names = get_mlx5_base_names(mlx5_devices);
308+
if (base_names.empty()) {
309+
UCS_TEST_SKIP_R("No mlx5 devices with port suffix found");
310+
}
311+
312+
/* Pick the first base name for testing */
313+
std::string test_base_name = *base_names.begin();
314+
315+
m_entities.clear();
316+
317+
/* Set NET_DEVICES to include both a range and an explicit device that
318+
* overlaps with the range, e.g., "mlx5_[0-99],mlx5_0" */
319+
std::string devices_config = "mlx5_[0-99]," + test_base_name;
320+
modify_config("NET_DEVICES", devices_config.c_str());
321+
322+
size_t warn_count;
323+
{
324+
scoped_log_handler slh(hide_warns_logger);
325+
warn_count = m_warnings.size();
326+
create_entity();
327+
}
328+
329+
/* Check that a warning about duplicate device was printed */
330+
std::string expected_warn = "device '" + test_base_name +
331+
"' is specified multiple times";
332+
bool found_warning = false;
333+
for (size_t i = warn_count; i < m_warnings.size(); ++i) {
334+
if (m_warnings[i].find(expected_warn) != std::string::npos) {
335+
found_warning = true;
336+
break;
337+
}
338+
}
339+
340+
EXPECT_TRUE(found_warning) << "Expected warning about duplicate device '"
341+
<< test_base_name << "'";
342+
}
343+
344+
/*
345+
* Test that non-mlx devices are not affected by the mlx default port logic.
346+
*/
347+
UCS_TEST_P(test_ucp_net_devices_config, non_mlx_device_unaffected)
348+
{
349+
std::string devices_list;
350+
std::set<std::string> non_mlx_devices;
351+
352+
entity *e = create_entity();
353+
354+
std::set<std::string> mlx5_devices = get_mlx5_device_names(*e);
355+
356+
/* Find all non-mlx network devices */
357+
for_each_net_device(*e, [&](const uct_tl_resource_desc_t *rsc) {
358+
if (!has_device(mlx5_devices, rsc->dev_name)) {
359+
non_mlx_devices.insert(rsc->dev_name);
360+
}
361+
});
362+
363+
if (non_mlx_devices.empty()) {
364+
GTEST_SKIP() << "No non-mlx network devices available";
365+
}
366+
367+
/* Build comma-separated list of all non-mlx devices */
368+
for (const std::string &dev : non_mlx_devices) {
369+
if (!devices_list.empty()) {
370+
devices_list += ",";
371+
}
372+
devices_list += dev;
373+
}
374+
375+
m_entities.clear();
376+
377+
/* Create context with all non-mlx devices */
378+
modify_config("NET_DEVICES", devices_list.c_str());
379+
e = create_entity();
380+
381+
/* Verify all devices were selected */
382+
std::set<std::string> selected_devices;
383+
for_each_net_device(*e, [&](const uct_tl_resource_desc_t *rsc) {
384+
selected_devices.insert(rsc->dev_name);
385+
});
386+
387+
for (const std::string &dev : non_mlx_devices) {
388+
EXPECT_TRUE(has_device(selected_devices, dev))
389+
<< "Non-mlx device '" << dev << "' should be selected";
390+
}
391+
}
392+
393+
UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_net_devices_config, all, "all")

0 commit comments

Comments
 (0)