Skip to content

Commit df4367e

Browse files
authored
Merge pull request ceph#60395 from guits/replace-osd-enhancement
ceph-volume: support zapping by osd-id for RAW OSDs
2 parents eb6dbee + 4941d09 commit df4367e

File tree

9 files changed

+517
-325
lines changed

9 files changed

+517
-325
lines changed

src/ceph-volume/ceph_volume/devices/lvm/zap.py

Lines changed: 113 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from ceph_volume.util import system, encryption, disk, arg_validators, str_to_int, merge_dict
1111
from ceph_volume.util.device import Device
1212
from ceph_volume.systemd import systemctl
13-
from typing import Any, Dict, List
13+
from ceph_volume.devices.raw.list import direct_report
14+
from typing import Any, Dict, List, Set
1415

1516
logger = logging.getLogger(__name__)
1617
mlogger = terminal.MultiLogger(__name__)
@@ -95,83 +96,126 @@ def zap_data(path):
9596
'conv=fsync'
9697
])
9798

98-
def find_associated_devices(osd_id: str = '', osd_fsid: str = '') -> List[api.Volume]:
99-
"""
100-
From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the
101-
system that match those tag values, further detect if any partitions are
102-
part of the OSD, and then return the set of LVs and partitions (if any).
103-
"""
104-
lv_tags = {}
105-
lv_tags = {key: value for key, value in {
106-
'ceph.osd_id': osd_id,
107-
'ceph.osd_fsid': osd_fsid
108-
}.items() if value}
10999

110-
lvs = api.get_lvs(tags=lv_tags)
100+
class Zap:
101+
help = 'Removes all data and filesystems from a logical volume or partition.'
111102

112-
if not lvs:
113-
raise RuntimeError('Unable to find any LV for zapping OSD: '
114-
f'{osd_id or osd_fsid}')
115-
devices_to_zap = ensure_associated_lvs(lvs, lv_tags)
103+
def __init__(self, argv: List[str]) -> None:
104+
self.argv = argv
105+
self.osd_ids_to_zap: List[str] = []
116106

117-
return [Device(path) for path in set(devices_to_zap) if path]
107+
def ensure_associated_raw(self, raw_report: Dict[str, Any]) -> List[str]:
108+
osd_id: str = self.args.osd_id
109+
osd_uuid: str = self.args.osd_fsid
110+
raw_devices: Set[str] = set()
118111

119-
def ensure_associated_lvs(lvs: List[api.Volume],
120-
lv_tags: Dict[str, Any] = {}) -> List[str]:
121-
"""
122-
Go through each LV and ensure if backing devices (journal, wal, block)
123-
are LVs or partitions, so that they can be accurately reported.
124-
"""
125-
# look for many LVs for each backing type, because it is possible to
126-
# receive a filtering for osd.1, and have multiple failed deployments
127-
# leaving many journals with osd.1 - usually, only a single LV will be
128-
# returned
129-
130-
db_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'db'}))
131-
wal_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'wal'}))
132-
backing_devices = [(db_lvs, 'db'),
133-
(wal_lvs, 'wal')]
134-
135-
verified_devices = []
136-
137-
for lv in lvs:
138-
# go through each lv and append it, otherwise query `blkid` to find
139-
# a physical device. Do this for each type (journal,db,wal) regardless
140-
# if they have been processed in the previous LV, so that bad devices
141-
# with the same ID can be caught
142-
for ceph_lvs, _type in backing_devices:
143-
if ceph_lvs:
144-
verified_devices.extend([l.lv_path for l in ceph_lvs])
145-
continue
146-
147-
# must be a disk partition, by querying blkid by the uuid we are
148-
# ensuring that the device path is always correct
149-
try:
150-
device_uuid = lv.tags['ceph.%s_uuid' % _type]
151-
except KeyError:
152-
# Bluestore will not have ceph.journal_uuid, and Filestore
153-
# will not not have ceph.db_uuid
154-
continue
112+
if len([details.get('osd_id') for _, details in raw_report.items() if details.get('osd_id') == osd_id]) > 1:
113+
if not osd_uuid:
114+
raise RuntimeError(f'Multiple OSDs found with id {osd_id}, pass --osd-fsid')
155115

156-
osd_device = disk.get_device_from_partuuid(device_uuid)
157-
if not osd_device:
158-
# if the osd_device is not found by the partuuid, then it is
159-
# not possible to ensure this device exists anymore, so skip it
160-
continue
161-
verified_devices.append(osd_device)
116+
if not osd_uuid:
117+
for _, details in raw_report.items():
118+
if details.get('osd_id') == int(osd_id):
119+
osd_uuid = details.get('osd_uuid')
120+
break
162121

163-
verified_devices.append(lv.lv_path)
122+
for osd_uuid, details in raw_report.items():
123+
device: str = details.get('device')
124+
if details.get('osd_uuid') == osd_uuid:
125+
raw_devices.add(device)
164126

165-
# reduce the list from all the duplicates that were added
166-
return list(set(verified_devices))
127+
return list(raw_devices)
128+
167129

130+
def find_associated_devices(self) -> List[api.Volume]:
131+
"""From an ``osd_id`` and/or an ``osd_fsid``, filter out all the Logical Volumes (LVs) in the
132+
system that match those tag values, further detect if any partitions are
133+
part of the OSD, and then return the set of LVs and partitions (if any).
168134
169-
class Zap:
170-
help = 'Removes all data and filesystems from a logical volume or partition.'
135+
The function first queries the LVM-based OSDs using the provided `osd_id` or `osd_fsid`.
136+
If no matches are found, it then searches the system for RAW-based OSDs.
171137
172-
def __init__(self, argv: List[str]) -> None:
173-
self.argv = argv
174-
self.osd_ids_to_zap: List[str] = []
138+
Raises:
139+
SystemExit: If no OSDs are found, the function raises a `SystemExit` with an appropriate message.
140+
141+
Returns:
142+
List[api.Volume]: A list of `api.Volume` objects corresponding to the OSD's Logical Volumes (LVs)
143+
or partitions that are associated with the given `osd_id` or `osd_fsid`.
144+
145+
Notes:
146+
- If neither `osd_id` nor `osd_fsid` are provided, the function will not be able to find OSDs.
147+
- The search proceeds from LVM-based OSDs to RAW-based OSDs if no Logical Volumes are found.
148+
"""
149+
lv_tags = {}
150+
lv_tags = {key: value for key, value in {
151+
'ceph.osd_id': self.args.osd_id,
152+
'ceph.osd_fsid': self.args.osd_fsid
153+
}.items() if value}
154+
devices_to_zap: List[str] = []
155+
lvs = api.get_lvs(tags=lv_tags)
156+
157+
if lvs:
158+
devices_to_zap = self.ensure_associated_lvs(lvs, lv_tags)
159+
else:
160+
mlogger.debug(f'No OSD identified by "{self.args.osd_id or self.args.osd_fsid}" was found among LVM-based OSDs.')
161+
mlogger.debug('Proceeding to check RAW-based OSDs.')
162+
raw_osds: Dict[str, Any] = direct_report()
163+
if raw_osds:
164+
devices_to_zap = self.ensure_associated_raw(raw_osds)
165+
if not devices_to_zap:
166+
raise SystemExit('No OSD were found.')
167+
168+
return [Device(path) for path in set(devices_to_zap) if path]
169+
170+
def ensure_associated_lvs(self,
171+
lvs: List[api.Volume],
172+
lv_tags: Dict[str, Any] = {}) -> List[str]:
173+
"""
174+
Go through each LV and ensure if backing devices (journal, wal, block)
175+
are LVs or partitions, so that they can be accurately reported.
176+
"""
177+
# look for many LVs for each backing type, because it is possible to
178+
# receive a filtering for osd.1, and have multiple failed deployments
179+
# leaving many journals with osd.1 - usually, only a single LV will be
180+
# returned
181+
182+
db_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'db'}))
183+
wal_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'wal'}))
184+
backing_devices = [(db_lvs, 'db'),
185+
(wal_lvs, 'wal')]
186+
187+
verified_devices = []
188+
189+
for lv in lvs:
190+
# go through each lv and append it, otherwise query `blkid` to find
191+
# a physical device. Do this for each type (journal,db,wal) regardless
192+
# if they have been processed in the previous LV, so that bad devices
193+
# with the same ID can be caught
194+
for ceph_lvs, _type in backing_devices:
195+
if ceph_lvs:
196+
verified_devices.extend([l.lv_path for l in ceph_lvs])
197+
continue
198+
199+
# must be a disk partition, by querying blkid by the uuid we are
200+
# ensuring that the device path is always correct
201+
try:
202+
device_uuid = lv.tags['ceph.%s_uuid' % _type]
203+
except KeyError:
204+
# Bluestore will not have ceph.journal_uuid, and Filestore
205+
# will not not have ceph.db_uuid
206+
continue
207+
208+
osd_device = disk.get_device_from_partuuid(device_uuid)
209+
if not osd_device:
210+
# if the osd_device is not found by the partuuid, then it is
211+
# not possible to ensure this device exists anymore, so skip it
212+
continue
213+
verified_devices.append(osd_device)
214+
215+
verified_devices.append(lv.lv_path)
216+
217+
# reduce the list from all the duplicates that were added
218+
return list(set(verified_devices))
175219

176220
def unmount_lv(self, lv: api.Volume) -> None:
177221
if lv.tags.get('ceph.cluster_name') and lv.tags.get('ceph.osd_id'):
@@ -355,7 +399,6 @@ def zap(self) -> None:
355399
SystemExit: When the device is a mapper and not a mpath device.
356400
"""
357401
devices = self.args.devices
358-
359402
for device in devices:
360403
mlogger.info("Zapping: %s", device.path)
361404
if device.is_mapper and not device.is_mpath:
@@ -388,7 +431,7 @@ def zap_osd(self) -> None:
388431
mlogger.error("OSD ID %s is running, stop it with:" % self.args.osd_id)
389432
mlogger.error("systemctl stop ceph-osd@%s" % self.args.osd_id)
390433
raise SystemExit("Unable to zap devices associated with OSD ID: %s" % self.args.osd_id)
391-
self.args.devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid)
434+
self.args.devices = self.find_associated_devices()
392435
self.zap()
393436

394437
def dmcrypt_close(self, dmcrypt_uuid: str) -> None:

0 commit comments

Comments
 (0)