Skip to content

Commit a3f69e4

Browse files
committed
mgr/cephadm: additional debug logging for autotuner
This came from trying to debug behavior of the autotuner in the upstream mailing list. The test case being added was what that user was seeing. The debug logging being added was useful in getting a full understanding of how the autotuner got the result it did. Therefore, why not add the logging to the actual codebase so we can make use of it to debug autotuner issues in the future Signed-off-by: Adam King <[email protected]>
1 parent 4774249 commit a3f69e4

File tree

3 files changed

+50
-4
lines changed

3 files changed

+50
-4
lines changed

src/pybind/mgr/cephadm/autotune.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,38 @@ def __init__(
3232
def tune(self) -> Tuple[Optional[int], List[str]]:
3333
tuned_osds: List[str] = []
3434
total = self.total_mem
35+
logger.debug('Autotuning OSD memory with given parameters:\n'
36+
f'Total memory: {total}\nDaemons: {self.daemons}')
3537
for d in self.daemons:
3638
if d.daemon_type == 'mds':
37-
total -= self.config_get(d.name(), 'mds_cache_memory_limit')
39+
mds_mem = self.config_get(d.name(), 'mds_cache_memory_limit')
40+
logger.debug(f'Subtracting {mds_mem} from total for mds daemon')
41+
total -= mds_mem
42+
logger.debug(f'new total: {total}')
3843
continue
3944
if d.daemon_type != 'osd':
4045
assert d.daemon_type
41-
total -= max(
46+
daemon_mem = max(
4247
self.min_size_by_type.get(d.daemon_type, self.default_size),
4348
d.memory_usage or 0
4449
)
50+
logger.debug(f'Subtracting {daemon_mem} from total for {d.daemon_type} daemon')
51+
total -= daemon_mem
52+
logger.debug(f'new total: {total}')
4553
continue
4654
if not self.config_get(d.name(), 'osd_memory_target_autotune'):
47-
total -= self.config_get(d.name(), 'osd_memory_target')
55+
osd_mem = self.config_get(d.name(), 'osd_memory_target')
56+
logger.debug('osd_memory_target_autotune disabled. '
57+
f'Subtracting {osd_mem} from total for osd daemon')
58+
total -= osd_mem
59+
logger.debug(f'new total: {total}')
4860
continue
4961
tuned_osds.append(d.name())
5062
if total < 0:
5163
return None, []
5264
if not tuned_osds:
5365
return None, []
66+
logger.debug(f'Final total is {total} to be split among {len(tuned_osds)} OSDs')
5467
per = total // len(tuned_osds)
68+
logger.debug(f'Result is {per} per OSD')
5569
return int(per), tuned_osds

src/pybind/mgr/cephadm/serve.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,9 @@ def _autotune_host_memory(self, host: str) -> None:
195195
val = None
196196
else:
197197
total_mem *= 1024 # kb -> bytes
198+
self.log.debug(f'Autotuning memory for host {host} with '
199+
f'{total_mem} total bytes of memory and '
200+
f'{self.mgr.autotune_memory_target_ratio} target ratio')
198201
total_mem *= self.mgr.autotune_memory_target_ratio
199202
a = MemoryAutotuner(
200203
daemons=self.mgr.cache.get_daemons_by_host(host),
@@ -231,6 +234,9 @@ def _autotune_host_memory(self, host: str) -> None:
231234
# options as users may be using them. Since there is no way to set autotuning
232235
# on/off at a host level, best we can do is check if it is globally on.
233236
if self.mgr.get_foreign_ceph_option('osd', 'osd_memory_target_autotune'):
237+
self.mgr.log.debug(f'Removing osd_memory_target for OSDs on {host}'
238+
' as either there were no OSDs to tune or the '
239+
' per OSD memory calculation result was <= 0')
234240
self.mgr.check_mon_command({
235241
'prefix': 'config rm',
236242
'who': f'osd/host:{host.split(".")[0]}',

src/pybind/mgr/cephadm/tests/test_autotune.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,31 @@
5757
],
5858
{},
5959
60 * 1024 * 1024 * 1024,
60-
)
60+
),
61+
( # Taken from an actual user case
62+
int(32827840 * 1024 * 0.7),
63+
[
64+
DaemonDescription('crash', 'a', 'host1'),
65+
DaemonDescription('grafana', 'a', 'host1'),
66+
DaemonDescription('mds', 'a', 'host1'),
67+
DaemonDescription('mds', 'b', 'host1'),
68+
DaemonDescription('mds', 'c', 'host1'),
69+
DaemonDescription('mgr', 'a', 'host1'),
70+
DaemonDescription('mon', 'a', 'host1'),
71+
DaemonDescription('node-exporter', 'a', 'host1'),
72+
DaemonDescription('osd', '1', 'host1'),
73+
DaemonDescription('osd', '2', 'host1'),
74+
DaemonDescription('osd', '3', 'host1'),
75+
DaemonDescription('osd', '4', 'host1'),
76+
DaemonDescription('prometheus', 'a', 'host1'),
77+
],
78+
{
79+
'mds.a': 4 * 1024 * 1024 * 1024, # 4294967296
80+
'mds.b': 4 * 1024 * 1024 * 1024,
81+
'mds.c': 4 * 1024 * 1024 * 1024,
82+
},
83+
480485376,
84+
),
6185
])
6286
def test_autotune(total, daemons, config, result):
6387
def fake_getter(who, opt):
@@ -69,6 +93,8 @@ def fake_getter(who, opt):
6993
if opt == 'osd_memory_target':
7094
return config.get(who, 4 * 1024 * 1024 * 1024)
7195
if opt == 'mds_cache_memory_limit':
96+
if who in config:
97+
return config.get(who, 16 * 1024 * 1024 * 1024)
7298
return 16 * 1024 * 1024 * 1024
7399

74100
a = MemoryAutotuner(

0 commit comments

Comments
 (0)