Skip to content

Commit 7dfecee

Browse files
committed
ceph-volume: support splitting db even on collocated scenario
This change enables ceph-volume to create OSDs where the DB is explicitly placed on a separate LVM partition, even in collocated scenarios (i.e., block and DB on the same device). This helps mitigate BlueStore fragmentation issues. Given that ceph-volume can't automatically predict a proper default size for the db device, the idea is to use the `--block-db-size` parameter: Passing `--block-db-size` and `--db-devices` makes ceph-volume create db devices on dedicated devices (current implementation): ``` Total OSDs: 2 Type Path LV Size % of device ---------------------------------------------------------------------------------------------------- data /dev/vdb 200.00 GB 100.00% block_db /dev/vdd 4.00 GB 2.00% ---------------------------------------------------------------------------------------------------- data /dev/vdc 200.00 GB 100.00% block_db /dev/vdd 4.00 GB 2.00% ``` Passing `--block-db-size` without `--db-devices` makes ceph-volume create a separate LV for db device on the same device (new behavior): ``` Total OSDs: 2 Type Path LV Size % of device ---------------------------------------------------------------------------------------------------- data /dev/vdb 196.00 GB 98.00% block_db /dev/vdb 4.00 GB 2.00% ---------------------------------------------------------------------------------------------------- data /dev/vdc 196.00 GB 98.00% block_db /dev/vdc 4.00 GB 2.00% ``` This new behavior is supported with the `--osds-per-device` parameter: ``` Total OSDs: 4 Type Path LV Size % of device ---------------------------------------------------------------------------------------------------- data /dev/vdb 96.00 GB 48.00% block_db /dev/vdb 4.00 GB 2.00% ---------------------------------------------------------------------------------------------------- data /dev/vdb 96.00 GB 48.00% block_db /dev/vdb 4.00 GB 2.00% ---------------------------------------------------------------------------------------------------- data /dev/vdc 96.00 GB 48.00% block_db /dev/vdc 4.00 GB 2.00% ---------------------------------------------------------------------------------------------------- data /dev/vdc 96.00 GB 48.00% block_db /dev/vdc 4.00 GB 2.00% ``` Fixes: https://tracker.ceph.com/issues/69996 Signed-off-by: Guillaume Abrioux <[email protected]>
1 parent 8d8f203 commit 7dfecee

File tree

3 files changed

+113
-65
lines changed

3 files changed

+113
-65
lines changed

src/ceph-volume/ceph_volume/devices/lvm/batch.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
* {path: <25} {size: <10} {state}"""
2020

2121

22-
def ensure_disjoint_device_lists(data: List[str],
23-
db: Optional[List[str]] = None,
24-
wal: Optional[List[str]] = None) -> None:
22+
def ensure_disjoint_device_lists(data: List[device.Device],
23+
db: Optional[List[device.Device]] = None,
24+
wal: Optional[List[device.Device]] = None) -> None:
2525
if db is None:
2626
db = []
2727
if wal is None:
@@ -341,6 +341,10 @@ def main(self) -> None:
341341
self.parser.print_help()
342342
raise SystemExit(0)
343343

344+
self.args.has_block_db_size_without_db_devices = (
345+
self.args.block_db_size is not None and not self.args.db_devices
346+
)
347+
344348
if (self.args.auto and not self.args.db_devices and not
345349
self.args.wal_devices):
346350
self._sort_rotational_disks()
@@ -393,7 +397,10 @@ def get_deployment_layout(self) -> List["OSD"]:
393397
functions.
394398
'''
395399
devices = self.args.devices
396-
fast_devices = self.args.db_devices
400+
if self.args.block_db_size is not None:
401+
fast_devices = self.args.db_devices or self.args.devices
402+
else:
403+
fast_devices = self.args.db_devices
397404
very_fast_devices = self.args.wal_devices
398405
plan = []
399406
phys_devs, lvm_devs = separate_devices_from_lvs(devices)
@@ -436,10 +443,20 @@ def get_deployment_layout(self) -> List["OSD"]:
436443
len(very_fast_allocations), num_osds))
437444
exit(1)
438445

439-
for osd in plan:
440-
if fast_devices:
441-
osd.add_fast_device(*fast_allocations.pop(),
442-
type_=fast_type)
446+
if fast_devices:
447+
fast_alloc: Optional[tuple[str, float, disk.Size, int]] = None
448+
for osd in plan:
449+
if self.args.has_block_db_size_without_db_devices:
450+
for i, _fast_alloc in enumerate(fast_allocations):
451+
if osd.data.path == _fast_alloc[0]:
452+
fast_alloc = fast_allocations.pop(i)
453+
break
454+
else:
455+
fast_alloc = fast_allocations.pop() if fast_allocations else None
456+
457+
if fast_alloc:
458+
osd.add_fast_device(*fast_alloc, type_=fast_type)
459+
443460
if very_fast_devices and self.args.objectstore == 'bluestore':
444461
osd.add_very_fast_device(*very_fast_allocations.pop())
445462
return plan
@@ -586,13 +603,25 @@ def get_physical_osds(devices: List[device.Device], args: argparse.Namespace) ->
586603
data_slots = args.osds_per_device
587604
if args.data_slots:
588605
data_slots = max(args.data_slots, args.osds_per_device)
589-
rel_data_size = args.data_allocate_fraction / data_slots
590-
mlogger.debug('relative data size: {}'.format(rel_data_size))
606+
#rel_data_size = args.data_allocate_fraction / data_slots
607+
#mlogger.debug('relative data size: {}'.format(rel_data_size))
591608
ret = []
592609
for dev in devices:
610+
rel_data_size = args.data_allocate_fraction / data_slots
593611
if dev.available_lvm:
612+
total_dev_size = dev.vg_size[0]
594613
dev_size = dev.vg_size[0]
614+
615+
if args.has_block_db_size_without_db_devices:
616+
all_db_space = args.block_db_size * data_slots
617+
dev_size -= all_db_space.b.as_int()
618+
595619
abs_size = disk.Size(b=int(dev_size * rel_data_size))
620+
mlogger.error(f'{dev_size} {abs_size} {rel_data_size}')
621+
622+
if args.has_block_db_size_without_db_devices:
623+
rel_data_size = abs_size / disk.Size(b=total_dev_size)
624+
596625
free_size = dev.vg_free[0]
597626
for _ in range(args.osds_per_device):
598627
if abs_size > free_size:

src/ceph-volume/ceph_volume/tests/conftest.py

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import pytest
3-
from mock.mock import patch, PropertyMock, create_autospec, Mock
3+
import argparse
4+
from mock.mock import patch, PropertyMock, create_autospec, Mock, MagicMock
45
from ceph_volume.api import lvm
56
from ceph_volume.util import disk
67
from ceph_volume.util import device
@@ -29,14 +30,14 @@ def __call__(self, *a, **kw):
2930

3031
class Factory(object):
3132

32-
def __init__(self, **kw):
33+
def __init__(self, **kw: Any) -> None:
3334
for k, v in kw.items():
3435
setattr(self, k, v)
3536

3637

3738
@pytest.fixture
38-
def factory():
39-
return Factory
39+
def factory() -> Callable[..., argparse.Namespace]:
40+
return argparse.Namespace
4041

4142
def objectstore_bluestore_factory(**kw):
4243
o = objectstore.bluestore.BlueStore([])
@@ -70,29 +71,29 @@ def mock_lv():
7071
return dev
7172
return mock_lv
7273

73-
def mock_device(name='foo',
74-
vg_name='vg_foo',
75-
vg_size=None,
76-
lv_name='lv_foo',
77-
lv_size=None,
78-
path='foo',
79-
lv_path='',
80-
number_lvs=0):
74+
def mock_device(**kw: Any) -> MagicMock:
75+
number_lvs = kw.get('number_lvs', 0)
76+
default_values = {
77+
'vg_size': [21474836480],
78+
'lv_size': kw.get('vg_size', [21474836480]),
79+
'path': f"/dev/{kw.get('path', 'foo')}",
80+
'vg_name': 'vg_foo',
81+
'lv_name': 'lv_foo',
82+
'symlink': None,
83+
'available_lvm': True,
84+
'vg_free': kw.get('vg_size', [21474836480]),
85+
'lvs': [],
86+
'lv_path': f"/dev/{kw.get('vg_name', 'vg_foo')}/{kw.get('lv_name', 'lv_foo')}",
87+
'vgs': [lvm.VolumeGroup(vg_name=kw.get('vg_name', 'vg_foo'), lv_name=kw.get('lv_name', 'lv_foo'))],
88+
}
89+
for key, value in default_values.items():
90+
kw.setdefault(key, value)
91+
8192
dev = create_autospec(device.Device)
82-
if vg_size is None:
83-
dev.vg_size = [21474836480]
84-
if lv_size is None:
85-
lv_size = dev.vg_size
86-
dev.lv_size = lv_size
87-
dev.path = f'/dev/{path}'
88-
dev.vg_name = f'{vg_name}'
89-
dev.lv_name = f'{lv_name}'
90-
dev.lv_path = lv_path if lv_path else f'/dev/{dev.vg_name}/{dev.lv_name}'
91-
dev.symlink = None
92-
dev.vgs = [lvm.VolumeGroup(vg_name=dev.vg_name, lv_name=dev.lv_name)]
93-
dev.available_lvm = True
94-
dev.vg_free = dev.vg_size
95-
dev.lvs = []
93+
94+
for k, v in kw.items():
95+
dev.__dict__[k] = v
96+
9697
for n in range(0, number_lvs):
9798
dev.lvs.append(lvm.Volume(vg_name=f'{dev.vg_name}{n}',
9899
lv_name=f'{dev.lv_name}-{n}',

src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py

Lines changed: 47 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
import json
33
import random
44

5-
from argparse import ArgumentError
5+
from argparse import ArgumentError, Namespace
66
from mock import MagicMock, patch
77

88
from ceph_volume.devices.lvm import batch
9-
from ceph_volume.util import arg_validators
9+
from ceph_volume.util import arg_validators, disk, device
10+
from ceph_volume.configuration import Conf
11+
from typing import List, Callable
1012

1113

1214
class TestBatch(object):
@@ -20,9 +22,9 @@ def test_invalid_osd_ids_passed(self):
2022
with pytest.raises(SystemExit):
2123
batch.Batch(argv=['--osd-ids', '1', 'foo']).main()
2224

23-
def test_disjoint_device_lists(self, factory):
24-
device1 = factory(used_by_ceph=False, available=True, abspath="/dev/sda")
25-
device2 = factory(used_by_ceph=False, available=True, abspath="/dev/sdb")
25+
def test_disjoint_device_lists(self, mock_device_generator: Callable) -> None:
26+
device1 = mock_device_generator(used_by_ceph=False, available=True, abspath='/dev/sda')
27+
device2 = mock_device_generator(used_by_ceph=False, available=True, abspath='/dev/sdb')
2628
devices = [device1, device2]
2729
db_devices = [device2]
2830
with pytest.raises(Exception) as disjoint_ex:
@@ -55,7 +57,8 @@ def test_report(self, format_, factory, conf_ceph_stub, mock_device_generator):
5557
db_devices=[],
5658
wal_devices=[],
5759
objectstore='bluestore',
58-
block_db_size="1G",
60+
block_db_size=disk.Size(gb=1),
61+
block_db_slots=1,
5962
dmcrypt=True,
6063
data_allocate_fraction=1.0,
6164
)
@@ -174,38 +177,53 @@ def test_batch_sort_mixed(self, factory, objectstore):
174177
assert len(b.args.devices) == 2
175178
assert len(b.args.db_devices) == 1
176179

177-
def test_get_physical_osds_return_len(self, factory,
178-
mock_devices_available,
179-
conf_ceph_stub,
180-
osds_per_device):
180+
def test_get_physical_osds_return_len(self,
181+
factory: Callable[..., Namespace],
182+
mock_devices_available: List[device.Device],
183+
conf_ceph_stub: Callable[[str], Conf],
184+
osds_per_device: int) -> None:
181185
conf_ceph_stub('[global]\nfsid=asdf-lkjh')
182-
args = factory(data_slots=1, osds_per_device=osds_per_device,
183-
osd_ids=[], dmcrypt=False,
184-
data_allocate_fraction=1.0)
186+
args = factory(data_slots=1,
187+
osds_per_device=osds_per_device,
188+
osd_ids=[],
189+
dmcrypt=False,
190+
data_allocate_fraction=1.0,
191+
block_db_size=None,
192+
db_devices=[])
185193
osds = batch.get_physical_osds(mock_devices_available, args)
186194
assert len(osds) == len(mock_devices_available) * osds_per_device
187195

188-
def test_get_physical_osds_rel_size(self, factory,
189-
mock_devices_available,
190-
conf_ceph_stub,
191-
osds_per_device,
192-
data_allocate_fraction):
193-
args = factory(data_slots=1, osds_per_device=osds_per_device,
194-
osd_ids=[], dmcrypt=False,
195-
data_allocate_fraction=data_allocate_fraction)
196+
def test_get_physical_osds_rel_size(self,
197+
factory: Callable[..., Namespace],
198+
mock_devices_available: List[device.Device],
199+
conf_ceph_stub: Callable[[str], Conf],
200+
osds_per_device: int,
201+
data_allocate_fraction: float) -> None:
202+
args = factory(data_slots=1,
203+
osds_per_device=osds_per_device,
204+
osd_ids=[],
205+
dmcrypt=False,
206+
data_allocate_fraction=data_allocate_fraction,
207+
block_db_size=None,
208+
db_devices=[])
196209
osds = batch.get_physical_osds(mock_devices_available, args)
197210
for osd in osds:
198211
assert osd.data[1] == data_allocate_fraction / osds_per_device
199212

200-
def test_get_physical_osds_abs_size(self, factory,
201-
mock_devices_available,
202-
conf_ceph_stub,
203-
osds_per_device,
204-
data_allocate_fraction):
213+
def test_get_physical_osds_abs_size(self,
214+
factory: Callable[..., Namespace],
215+
mock_devices_available: List[device.Device],
216+
conf_ceph_stub: Callable[[str], Conf],
217+
osds_per_device: int,
218+
data_allocate_fraction: float) -> None:
205219
conf_ceph_stub('[global]\nfsid=asdf-lkjh')
206-
args = factory(data_slots=1, osds_per_device=osds_per_device,
207-
osd_ids=[], dmcrypt=False,
208-
data_allocate_fraction=data_allocate_fraction)
220+
args = factory(data_slots=1,
221+
osds_per_device=osds_per_device,
222+
osd_ids=[],
223+
dmcrypt=False,
224+
data_allocate_fraction=data_allocate_fraction,
225+
block_db_size=None,
226+
db_devices=[])
209227
osds = batch.get_physical_osds(mock_devices_available, args)
210228
for osd, dev in zip(osds, mock_devices_available):
211229
assert osd.data[2] == int(dev.vg_size[0] * (data_allocate_fraction / osds_per_device))

0 commit comments

Comments
 (0)