Skip to content

Commit 6d6063a

Browse files
committed
mgr/cephadm: don't remove and deploy new daemon if ports change during upgrade
If we're not mid-upgrade, a port/ip change is most likely a user changing the configuration for the daemon and starting a new daemon with the new ports can be justified. During upgrade, their are a number of different upgrade paths (espeically considering our N+2 upgrade support) where something internal to cephadm has made the ip we're binding to or the ports being used change. In these cases, the process of upgrading the daemon will resolve the issue. Howeverm by having the scheduler unilaterally remove and deploy fresh versions of daemons when it sees port changes, we may effectively "upgrade" some daemons out of the intended order just to make the ports match up. This was seen with nvmeof which needs to be upgraded after the mon daemons, but was being removed and redeployed after the mgr upgrade once cephadm saw the set of expected ports had changed. This patch adds a new "upgrade_in_progress" attribute to the HostAssignment class in the scheduler to make it aware of an ongoing upgrade. It also changes the behavior specifically around whether a daemon matches another if the ports match up when "upgrade_in_progress" was set to True. Signed-off-by: Adam King <[email protected]>
1 parent ebf9c99 commit 6d6063a

File tree

2 files changed

+16
-8
lines changed

2 files changed

+16
-8
lines changed

src/pybind/mgr/cephadm/schedule.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -97,19 +97,24 @@ def assign_rank_generation(
9797
gen,
9898
)
9999

100-
def matches_daemon(self, dd: DaemonDescription) -> bool:
100+
def matches_daemon(self, dd: DaemonDescription, upgrade_in_progress: bool = False) -> bool:
101101
if self.daemon_type != dd.daemon_type:
102102
return False
103103
if self.hostname != dd.hostname:
104104
return False
105105
# fixme: how to match against network?
106106
if self.name and self.name != dd.daemon_id:
107107
return False
108-
if self.ports:
109-
if self.ports != dd.ports and dd.ports:
110-
return False
111-
if self.ip != dd.ip and dd.ip:
112-
return False
108+
# only consider daemon "not matching" on port/ip
109+
# differences if we're not mid upgrade. During upgrade
110+
# it's very likely we'll deploy the daemon with the
111+
# new port/ips as part of the upgrade process
112+
if not upgrade_in_progress:
113+
if self.ports:
114+
if self.ports != dd.ports and dd.ports:
115+
return False
116+
if self.ip != dd.ip and dd.ip:
117+
return False
113118
return True
114119

115120
def matches_rank_map(
@@ -154,6 +159,7 @@ def __init__(self,
154159
per_host_daemon_type: Optional[str] = None,
155160
rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] = None,
156161
blocking_daemon_hosts: Optional[List[orchestrator.HostSpec]] = None,
162+
upgrade_in_progress: bool = False
157163
):
158164
assert spec
159165
self.spec = spec # type: ServiceSpec
@@ -171,6 +177,7 @@ def __init__(self,
171177
self.per_host_daemon_type = per_host_daemon_type
172178
self.ports_start = spec.get_port_start()
173179
self.rank_map = rank_map
180+
self.upgrade_in_progress = upgrade_in_progress
174181

175182
def hosts_by_label(self, label: str) -> List[orchestrator.HostSpec]:
176183
return [h for h in self.hosts if label in h.labels]
@@ -234,7 +241,7 @@ def place_per_host_daemons(
234241
for dd in existing:
235242
found = False
236243
for p in host_slots:
237-
if p.matches_daemon(dd):
244+
if p.matches_daemon(dd, self.upgrade_in_progress):
238245
host_slots.remove(p)
239246
found = True
240247
break
@@ -311,7 +318,7 @@ def expand_candidates(ls: List[DaemonPlacement], num: int) -> List[DaemonPlaceme
311318
for dd in daemons:
312319
found = False
313320
for p in others:
314-
if p.matches_daemon(dd) and p.matches_rank_map(dd, self.rank_map, ranks):
321+
if p.matches_daemon(dd, self.upgrade_in_progress) and p.matches_rank_map(dd, self.rank_map, ranks):
315322
others.remove(p)
316323
if dd.is_active:
317324
existing_active.append(dd)

src/pybind/mgr/cephadm/serve.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,7 @@ def has_interface_for_vip(host: str, sspec: ServiceSpec) -> bool:
826826
primary_daemon_type=svc.primary_daemon_type(spec),
827827
per_host_daemon_type=svc.per_host_daemon_type(spec),
828828
rank_map=rank_map,
829+
upgrade_in_progress=(self.mgr.upgrade.upgrade_state is not None)
829830
)
830831

831832
try:

0 commit comments

Comments
 (0)