Skip to content

Commit c405e70

Browse files
cephadm: refactor the infer_local_ceph_image function
Refactor the infer_local_ceph_image function to use the low level daemons_matching function and avoid ever looping over the list of running daemons multiple times. Clean up the logic in infer_local_ceph_image such that it better matches the behavior in the original doc string as well as cleaning up said doc string. Fixes: https://tracker.ceph.com/issues/69278 Signed-off-by: John Mulligan <[email protected]>
1 parent 5b34735 commit c405e70

File tree

3 files changed

+83
-35
lines changed

3 files changed

+83
-35
lines changed

src/cephadm/cephadm.py

Lines changed: 69 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
concurrent_tasks,
8686
)
8787
from cephadmlib.container_engines import (
88+
ImageInfo,
8889
Podman,
8990
check_container_engine,
9091
find_container_engine,
@@ -202,7 +203,6 @@
202203
MemUsageStatusUpdater,
203204
VersionStatusUpdater,
204205
)
205-
from cephadmlib.container_lookup import get_container_info
206206

207207

208208
FuncT = TypeVar('FuncT', bound=Callable)
@@ -466,42 +466,82 @@ def update_default_image(ctx: CephadmContext) -> None:
466466
ctx.image = _get_default_image(ctx)
467467

468468

469-
def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]:
470-
"""
471-
Infer the local ceph image based on the following priority criteria:
472-
1- the image specified by --image arg (if provided).
473-
2- the same image as the daemon container specified by --name arg (if provided).
474-
3- image used by any ceph container running on the host. In this case we use daemon types.
475-
4- if no container is found then we use the most ceph recent image on the host.
476-
477-
Note: any selected container must have the same fsid inferred previously.
469+
def infer_local_ceph_image(
470+
ctx: CephadmContext, container_path: str = ''
471+
) -> Optional[str]:
472+
"""Infer the best ceph image to use based on the following criteria:
473+
Out of all images labeled as ceph that are non-dangling, prefer
474+
1. the same image as the daemon container specified by -name arg (if provided).
475+
2. the image used by any ceph container running on the host
476+
3. the most ceph recent image on the host
478477
479-
:return: The most recent local ceph image (already pulled)
478+
:return: An image name or none
480479
"""
480+
from operator import itemgetter
481+
482+
# enumerate ceph images on the system
481483
images = parsed_container_image_list(
482484
ctx,
483485
filters=['dangling=false', 'label=ceph=True'],
484486
container_path=container_path,
485487
)
488+
if not images:
489+
logger.warning('No non-dangling ceph images found')
490+
return None # no images at all cached on host
491+
492+
# find running ceph daemons
493+
_daemons = ceph_daemons()
494+
daemon_name = getattr(ctx, 'name', '')
495+
_cinfo_key = '_container_info'
496+
_updater = CoreStatusUpdater(keep_container_info=_cinfo_key)
497+
matching_daemons = [
498+
itemgetter(_cinfo_key, 'name')(_updater.expand(ctx, entry))
499+
for entry in daemons_matching(
500+
ctx, fsid=ctx.fsid, daemon_type_predicate=lambda t: t in _daemons
501+
)
502+
]
503+
# collect the running ceph daemon image ids
504+
images_in_use_by_daemon = set(
505+
d.image_id for d, n in matching_daemons if n == daemon_name
506+
)
507+
images_in_use = set(d.image_id for d, _ in matching_daemons)
508+
509+
# prioritize images
510+
def _keyfunc(image: ImageInfo) -> Tuple[bool, bool, str]:
511+
return (
512+
bool(
513+
image.digest
514+
and any(
515+
v.startswith(image.image_id)
516+
for v in images_in_use_by_daemon
517+
)
518+
),
519+
bool(
520+
image.digest
521+
and any(v.startswith(image.image_id) for v in images_in_use)
522+
),
523+
image.created,
524+
)
486525

487-
container_info = None
488-
daemon_name = ctx.name if ('name' in ctx and ctx.name and '.' in ctx.name) else None
489-
daemons_ls = [daemon_name] if daemon_name is not None else ceph_daemons() # daemon types: 'mon', 'mgr', etc
490-
for daemon in daemons_ls:
491-
container_info = get_container_info(ctx, daemon, daemon_name is not None)
492-
if container_info is not None:
493-
logger.debug(f"Using container info for daemon '{daemon}'")
494-
break
495-
496-
for image in images:
497-
if container_info is not None and image.image_id not in container_info.image_id:
498-
continue
499-
if image.digest:
500-
logger.info(f"Using ceph image with id '{image.image_id}' and tag '{image.tag}' created on {image.created}\n{image.name}")
501-
return image.name
502-
if container_info is not None:
503-
logger.warning(f"Not using image '{container_info.image_id}' as it's not in list of non-dangling images with ceph=True label")
504-
return None
526+
images.sort(key=_keyfunc, reverse=True)
527+
best_image = images[0]
528+
name_match, ceph_match, _ = _keyfunc(best_image)
529+
reason = 'not in the list of non-dangling images with ceph=True label'
530+
if images_in_use_by_daemon and not name_match:
531+
expected = list(images_in_use_by_daemon)[0]
532+
logger.warning(
533+
'Not using image %r of named daemon: %s',
534+
expected,
535+
reason,
536+
)
537+
if images_in_use and not ceph_match:
538+
expected = list(images_in_use)[0]
539+
logger.warning(
540+
'Not using image %r of ceph daemon: %s',
541+
expected,
542+
reason,
543+
)
544+
return best_image.name
505545

506546

507547
def get_log_dir(fsid, log_dir):

src/cephadm/tests/fixtures.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def with_cephadm_ctx(
169169
with contextlib.ExitStack() as stack:
170170
stack.enter_context(mock.patch('cephadmlib.net_utils.attempt_bind'))
171171
stack.enter_context(mock.patch('cephadmlib.exe_utils.find_executable', return_value='foo'))
172-
stack.enter_context(mock.patch('cephadm.get_container_info', return_value=None))
172+
stack.enter_context(mock.patch('cephadmlib.container_lookup.get_container_info', return_value=None))
173173
stack.enter_context(mock.patch('cephadm.is_available', return_value=True))
174174
stack.enter_context(mock.patch('cephadm.json_loads_retry', return_value={'epoch' : 1}))
175175
stack.enter_context(mock.patch('cephadm.logger'))

src/cephadm/tests/test_cephadm.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,7 @@ def test_dict_get_join(self):
644644
'expected': 'quay.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508',
645645
},
646646
# ceph images in local store do not match running instance
647+
# return a valid ceph image
647648
{
648649
'container_info': _container_info(
649650
'a1bb549714b8f007c6a4e29c758689cf9e8e69f2e0f51180506492974b90a972',
@@ -657,7 +658,7 @@ def test_dict_get_join(self):
657658
quay.ceph.io/ceph-ci/ceph@sha256:eeddcc536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|pacific|2022-03-23 16:29:19 +0000 UTC
658659
'''
659660
),
660-
'expected': None,
661+
'expected': 'quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185',
661662
},
662663
# ceph image in store have been pulled (new image) since daemon started
663664
{
@@ -686,7 +687,11 @@ def test_infer_local_ceph_image(self, params, funkypatch):
686687
out = params.get('images_output', '')
687688
expected = params.get('expected', None)
688689
funkypatch.patch('cephadmlib.call_wrappers.call').return_value = out, '', 0
689-
funkypatch.patch('cephadm.get_container_info').return_value = cinfo
690+
funkypatch.patch('cephadmlib.listing_updaters.CoreStatusUpdater')().expand.return_value = {
691+
'_container_info': cinfo,
692+
'name': 'mon.foo',
693+
}
694+
funkypatch.patch('cephadmlib.listing.daemons_matching').return_value = [0] if cinfo else []
690695
image = _cephadm.infer_local_ceph_image(
691696
ctx, ctx.container_engine
692697
)
@@ -840,6 +845,7 @@ def test_get_container_info(
840845
):
841846
import cephadmlib.listing
842847
import cephadmlib.daemon_identity
848+
from cephadmlib.container_lookup import get_container_info
843849

844850
ctx = _cephadm.CephadmContext()
845851
ctx.fsid = '00000000-0000-0000-0000-0000deadbeef'
@@ -873,12 +879,14 @@ def _dms(*args, **kwargs):
873879
'cephadmlib.container_types.get_container_stats'
874880
).return_value = cinfo
875881
assert (
876-
_cephadm.get_container_info(ctx, daemon_filter, by_name) == output
882+
get_container_info(ctx, daemon_filter, by_name) == output
877883
)
878884

879885
def test_get_container_info_daemon_down(self, funkypatch):
880886
import cephadmlib.listing
881887
import cephadmlib.daemon_identity
888+
from cephadmlib.container_lookup import get_container_info
889+
882890
funkypatch.patch('cephadmlib.systemd.check_unit').return_value = (True, 'stopped', '')
883891
funkypatch.patch('cephadmlib.call_wrappers.call').side_effect = ValueError
884892
_get_stats_by_name = funkypatch.patch('cephadmlib.container_engines.parsed_container_image_stats')
@@ -947,7 +955,7 @@ def _dms(*args, **kwargs):
947955
# redundant
948956
_get_stats_by_name.return_value = expected_container_info
949957

950-
assert _cephadm.get_container_info(ctx, 'osd.2', by_name=True) == expected_container_info
958+
assert get_container_info(ctx, 'osd.2', by_name=True) == expected_container_info
951959
assert not _get_stats.called, 'only get_container_stats_by_image_name should have been called'
952960

953961
# If there is one down and one up daemon of the same name, it should use the up one
@@ -966,7 +974,7 @@ def _dms(*args, **kwargs):
966974
start='the_past',
967975
version='')
968976

969-
assert _cephadm.get_container_info(ctx, 'osd.2', by_name=True) == expected_container_info
977+
assert get_container_info(ctx, 'osd.2', by_name=True) == expected_container_info
970978

971979
def test_should_log_to_journald(self):
972980
from cephadmlib import context_getters

0 commit comments

Comments
 (0)