Skip to content

Commit 76b7c00

Browse files
authored
[Lustre] Fix device discovery for older versions of Lustre (<2.15.5) (#21901)
* Add test for non-yaml output of lctl dl * Implement fallback to non-yaml version of lctl dl * Add changelog * Format * Assert for exact match of devices in test * Unify device list fixtures for readability * Improve code based on review * Remove redundant devices definition * Format * Minor syntax improvement
1 parent 81263a9 commit 76b7c00

File tree

5 files changed

+108
-3
lines changed

5 files changed

+108
-3
lines changed

lustre/changelog.d/21901.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix device discovery for older versions of Lustre (<2.15.5)

lustre/datadog_checks/lustre/check.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .constants import (
1616
CURATED_PARAMS,
1717
DEFAULT_STATS,
18+
DEVICE_ATTR_NAMES,
1819
EXTRA_STATS,
1920
FILESYSTEM_DISCOVERY_PARAM_MAPPING,
2021
IGNORED_LNET_GROUPS,
@@ -117,6 +118,7 @@ def __init__(self, name: str, init_config: Dict[str, Any], instances: List[Dict[
117118
self.filesystems: List[str] = self.instance.get('filesystems', [])
118119
# If filesystems were provided by the instance, do not update the filesystem list
119120
self.filesystem_discovery: bool = False if self.filesystems else True
121+
self._use_yaml: bool = True # Older versions of Lustre (<2.15.5) do not support yaml as an output
120122
self.node_type: str = self.instance.get('node_type', self._find_node_type())
121123

122124
self.tags: List[str] = self.instance.get('tags', [])
@@ -172,9 +174,28 @@ def _update_devices(self) -> None:
172174
Find devices using the lctl dl command.
173175
'''
174176
self.log.debug('Updating device list...')
175-
output = self._run_command('lctl', 'dl', '-y')
176-
device_data = yaml.safe_load(output)
177-
self.devices = device_data.get('devices', [])
177+
devices = []
178+
if self._use_yaml:
179+
try:
180+
output = self._run_command('lctl', 'dl', '-y')
181+
device_data = yaml.safe_load(output)
182+
devices = device_data.get('devices', [])
183+
except AttributeError:
184+
self.log.debug('Device update failed with yaml flag, retrying without it.')
185+
self._use_yaml = False
186+
if not self._use_yaml:
187+
output = self._run_command('lctl', 'dl')
188+
for device_line in output.splitlines():
189+
device_attr = device_line.split()
190+
if not len(device_attr) == len(DEVICE_ATTR_NAMES):
191+
self.log.error('Could not parse device info: %s', device_line)
192+
continue
193+
devices.append(dict(zip(DEVICE_ATTR_NAMES, device_attr)))
194+
if not devices:
195+
self.log.error("No devices detected.")
196+
return
197+
self.devices = devices
198+
self.log.debug('Devices successfully updated.')
178199

179200
def _update_filesystems(self) -> None:
180201
'''

lustre/datadog_checks/lustre/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
'client': (r'llite.*.stats', r'(?<=llite\.).*(?=-[^-]*\.stats)'),
1111
}
1212

13+
DEVICE_ATTR_NAMES = ['index', 'status', 'type', 'name', 'uuid', 'refcount']
14+
1315
IGNORED_STATS = {
1416
'snapshot_time',
1517
'start_time',
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
0 UP mgc MGC172.31.16.218@tcp 7d3988a7-145f-444e-9953-58e3e6d97385 5
2+
1 UP lov lustre-clilov-ffff8b904341d000 ac8e54e3-1334-4865-a3f5-4f61ce87bdd1 4
3+
2 UP lmv lustre-clilmv-ffff8b904341d000 ac8e54e3-1334-4865-a3f5-4f61ce87bdd1 5
4+
3 UP mdc lustre-MDT0000-mdc-ffff8b904341d000 ac8e54e3-1334-4865-a3f5-4f61ce87bdd1 5
5+
4 UP osc lustre-OST0001-osc-ffff8b904341d000 ac8e54e3-1334-4865-a3f5-4f61ce87bdd1 5

lustre/tests/test_unit.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,3 +653,79 @@ def test_sanitize_command(bin_path, should_pass):
653653
# Should raise ValueError
654654
with pytest.raises(ValueError):
655655
_sanitize_command(bin_path)
656+
657+
658+
@pytest.mark.parametrize(
659+
['yaml_fixture', 'non_yaml_fixture'],
660+
[
661+
pytest.param('', '', id='no devices'),
662+
pytest.param('client_dl_yaml.txt', '', id='devices from yaml'),
663+
pytest.param('', 'client_dl.txt', id='devices without yaml'),
664+
],
665+
)
666+
def test_device_discovery(mock_lustre_commands, yaml_fixture, non_yaml_fixture):
667+
"""Devices should be discovered regardless of Lustre version"""
668+
mapping = {
669+
'lctl get_param -ny version': 'all_version.txt',
670+
'lctl dl -y': yaml_fixture,
671+
'lctl dl': non_yaml_fixture,
672+
'lfs changelog': 'test_changelog',
673+
}
674+
675+
with mock_lustre_commands(mapping):
676+
check = LustreCheck('lustre', {}, [{}])
677+
678+
# Assert device contents
679+
if not yaml_fixture and not non_yaml_fixture:
680+
assert check.devices == []
681+
else:
682+
# Expected device structure - same for both YAML and non-YAML fixtures
683+
expected_devices = [
684+
{
685+
'index': '0',
686+
'status': 'UP',
687+
'type': 'mgc',
688+
'name': 'MGC172.31.16.218@tcp',
689+
'uuid': '7d3988a7-145f-444e-9953-58e3e6d97385',
690+
'refcount': '5',
691+
},
692+
{
693+
'index': '1',
694+
'status': 'UP',
695+
'type': 'lov',
696+
'name': 'lustre-clilov-ffff8b904341d000',
697+
'uuid': 'ac8e54e3-1334-4865-a3f5-4f61ce87bdd1',
698+
'refcount': '4',
699+
},
700+
{
701+
'index': '2',
702+
'status': 'UP',
703+
'type': 'lmv',
704+
'name': 'lustre-clilmv-ffff8b904341d000',
705+
'uuid': 'ac8e54e3-1334-4865-a3f5-4f61ce87bdd1',
706+
'refcount': '5',
707+
},
708+
{
709+
'index': '3',
710+
'status': 'UP',
711+
'type': 'mdc',
712+
'name': 'lustre-MDT0000-mdc-ffff8b904341d000',
713+
'uuid': 'ac8e54e3-1334-4865-a3f5-4f61ce87bdd1',
714+
'refcount': '5',
715+
},
716+
{
717+
'index': '4',
718+
'status': 'UP',
719+
'type': 'osc',
720+
'name': 'lustre-OST0001-osc-ffff8b904341d000',
721+
'uuid': 'ac8e54e3-1334-4865-a3f5-4f61ce87bdd1',
722+
'refcount': '5',
723+
},
724+
]
725+
726+
# Convert YAML integer types to strings for consistency
727+
actual_devices = [
728+
{k: str(v) if isinstance(v, int) else v for k, v in device.items()} for device in check.devices
729+
]
730+
731+
assert actual_devices == expected_devices

0 commit comments

Comments
 (0)