Skip to content

Commit 4c43f85

Browse files
authored
[Feature] OptionalReplace Upgrade Mode (#1939)
1 parent 9cd2858 commit 4c43f85

File tree

7 files changed

+72
-13
lines changed

7 files changed

+72
-13
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
- (Feature) (Platform) Enable HTTP to HTTPS Redirect
1616
- (Bugfix) (Platform) Fix NoAuth Mode
1717
- (Feature) Define OptionalReplace Upgrade Mode option
18+
- (Feature) OptionalReplace Upgrade Mode
1819

1920
## [1.2.50](https://github.com/arangodb/kube-arangodb/tree/1.2.50) (2025-07-04)
2021
- (Feature) (Platform) MetaV1 Integration Service

pkg/deployment/reconcile/action_compact_member.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ func (a *actionCompactMember) Start(ctx context.Context) (bool, error) {
5454
}
5555

5656
switch g {
57-
case api.ServerGroupDBServers:
57+
case api.ServerGroupDBServers, api.ServerGroupSingle:
5858
dbc, err := a.actionCtx.GetServerAsyncClient(m.ID)
5959
if err != nil {
6060
return false, errors.Wrapf(err, "Unable to create client")
@@ -91,7 +91,7 @@ func (a actionCompactMember) CheckProgress(ctx context.Context) (bool, bool, err
9191
}
9292

9393
switch g {
94-
case api.ServerGroupDBServers:
94+
case api.ServerGroupDBServers, api.ServerGroupSingle:
9595
dbc, err := a.actionCtx.GetServerAsyncClient(m.ID)
9696
if err != nil {
9797
return false, false, errors.Wrapf(err, "Unable to create client")

pkg/deployment/reconcile/plan_builder_rotate_upgrade.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,11 @@ func (r *Reconciler) createUpgradePlanInternal(apiObject k8sutil.APIObject, spec
287287
return nil, false
288288
}
289289

290+
if m.Member.Conditions.IsTrue(api.ConditionTypeMarkedToRemove) {
291+
// Server is supposed to be removed, lets not continue
292+
return nil, false
293+
}
294+
290295
if decision.AreGroupsPendingUpgrade(upgradeOrder[:id]...) {
291296
// Not all members before were upgraded
292297
return nil, false
@@ -313,20 +318,19 @@ func (r *Reconciler) createUpgradePlanInternal(apiObject k8sutil.APIObject, spec
313318
switch group {
314319
case api.ServerGroupDBServers:
315320
// Members are supposed to be replaced
316-
if !m.Member.Conditions.IsTrue(api.ConditionTypeMarkedToRemove) {
317-
return api.Plan{actions.NewAction(api.ActionTypeMarkToRemoveMember, m.Group, m.Member, "Replace by Upgrade")}, false
318-
}
319-
return nil, false
321+
return api.Plan{actions.NewAction(api.ActionTypeMarkToRemoveMember, m.Group, m.Member, "Replace by Upgrade")}, false
320322
}
321323
}
322324

325+
compact := um.Get() == api.ServerGroupUpgradeModeOptionalReplace
326+
323327
if d.updateAllowed {
324328
// We are fine, group is alive so we can proceed
325329
r.planLogger.Str("member", m.Member.ID).Str("Reason", d.updateMessage).Info("Upgrade allowed")
326-
return r.createUpgradeMemberPlan(m.Member, m.Group, "Version upgrade", spec, status, !d.upgradeDecision.AutoUpgradeNeeded, agencyCache), false
330+
return r.createUpgradeMemberPlan(m.Member, m.Group, "Version upgrade", spec, status, !d.upgradeDecision.AutoUpgradeNeeded, compact, agencyCache), false
327331
} else if d.unsafeUpdateAllowed {
328332
r.planLogger.Str("member", m.Member.ID).Str("Reason", d.updateMessage).Info("Pod needs upgrade but cluster is not ready. Either some shards are not in sync or some member is not ready, but unsafe upgrade is allowed")
329-
return r.createUpgradeMemberPlan(m.Member, m.Group, "Version upgrade", spec, status, !d.upgradeDecision.AutoUpgradeNeeded, agencyCache), false
333+
return r.createUpgradeMemberPlan(m.Member, m.Group, "Version upgrade", spec, status, !d.upgradeDecision.AutoUpgradeNeeded, compact, agencyCache), false
330334
} else {
331335
r.planLogger.Str("member", m.Member.ID).Str("Reason", d.updateMessage).Info("Pod needs upgrade but cluster is not ready. Either some shards are not in sync or some member is not ready.")
332336
return nil, true
@@ -596,7 +600,7 @@ func (r *Reconciler) createMemberAllowUpgradeConditionPlan(ctx context.Context,
596600
// createUpgradeMemberPlan creates a plan to upgrade (stop-recreateWithAutoUpgrade-stop-start) an existing
597601
// member.
598602
func (r *Reconciler) createUpgradeMemberPlan(member api.MemberStatus,
599-
group api.ServerGroup, reason string, spec api.DeploymentSpec, status api.DeploymentStatus, rotateStatefull bool, agencyCache state.State) api.Plan {
603+
group api.ServerGroup, reason string, spec api.DeploymentSpec, status api.DeploymentStatus, rotateStatefull, compact bool, agencyCache state.State) api.Plan {
600604
upgradeAction := api.ActionTypeUpgradeMember
601605
if rotateStatefull || group.IsStateless() {
602606
upgradeAction = api.ActionTypeRotateMember
@@ -608,7 +612,7 @@ func (r *Reconciler) createUpgradeMemberPlan(member api.MemberStatus,
608612
Str("action", string(upgradeAction)).
609613
Info("Creating upgrade plan")
610614

611-
plan := createRotateMemberPlanWithAction(member, group, upgradeAction, spec, reason, util.CheckConditionalP1Nil(agencyCache.GetRebootID, state.Server(member.ID)))
615+
plan := createRotateMemberPlanWithAction(member, group, upgradeAction, spec, reason, util.CheckConditionalP1Nil(agencyCache.GetRebootID, state.Server(member.ID)), compact)
612616

613617
if member.Image == nil || member.Image.Image != spec.GetImage() {
614618
plan = plan.Before(actions.NewAction(api.ActionTypeSetMemberCurrentImage, group, member, reason).SetImage(spec.GetImage()))

pkg/deployment/reconcile/plan_builder_utils.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,25 @@ func (r *Reconciler) createRotateMemberPlan(member api.MemberStatus,
3737
Str("role", group.AsRole()).
3838
Str("reason", reason).
3939
Debug("Creating rotation plan")
40-
return createRotateMemberPlanWithAction(member, group, api.ActionTypeRotateMember, spec, reason, rebootId)
40+
return createRotateMemberPlanWithAction(member, group, api.ActionTypeRotateMember, spec, reason, rebootId, false)
4141
}
4242

4343
// createRotateMemberPlanWithAction creates a plan to rotate (stop-<action>>-start) an existing
4444
// member.
4545
func createRotateMemberPlanWithAction(member api.MemberStatus,
46-
group api.ServerGroup, action api.ActionType, spec api.DeploymentSpec, reason string, rebootId *int) api.Plan {
46+
group api.ServerGroup, action api.ActionType, spec api.DeploymentSpec, reason string, rebootId *int, compact bool) api.Plan {
4747

4848
var plan = api.Plan{
4949
actions.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, group, member, "Remove server keyfile and enforce renewal/recreation"),
5050
}
5151
plan = withSecureWrap(member, group, spec, rebootId, plan...)
5252

53+
if compact {
54+
plan = plan.After(
55+
actions.NewAction(api.ActionTypeCompactMember, group, member, reason),
56+
)
57+
}
58+
5359
plan = plan.After(
5460
actions.NewAction(api.ActionTypeKillMemberPod, group, member, reason),
5561
actions.NewAction(action, group, member, reason),

pkg/deployment/resources/pod_creator_arangod.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,13 @@ func (a *ArangoUpgradeContainer) GetCommand() ([]string, error) {
638638
pod.UpgradeDebug().Args(a.input).Sort().AsArgs()...,
639639
)
640640

641+
if a.input.Group == api.ServerGroupDBServers || a.input.Group == api.ServerGroupSingle {
642+
if a.input.GroupSpec.UpgradeMode.Get() == api.ServerGroupUpgradeModeOptionalReplace ||
643+
(a.input.GroupSpec.UpgradeMode.Get() == api.ServerGroupUpgradeModeManual && a.input.GroupSpec.ManualUpgradeMode.Get() == api.ServerGroupUpgradeModeOptionalReplace) {
644+
upgradeArgs = append(upgradeArgs, "--database.auto-upgrade-full-compaction")
645+
}
646+
}
647+
641648
return append(args, upgradeArgs...), nil
642649
}
643650

pkg/deployment/resources/pod_inspector.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ package resources
2323
import (
2424
"context"
2525
"fmt"
26+
"slices"
2627
goStrings "strings"
2728
"time"
2829

@@ -136,13 +137,24 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
136137
switch containers[id] {
137138
case api.ServerGroupReservedInitContainerNameVersionCheck:
138139
if c, ok := kresources.GetAnyContainerStatusByName(pod.Status.InitContainerStatuses, containers[id]); ok {
139-
if t := c.State.Terminated; t != nil && t.ExitCode == 11 {
140+
if t := c.State.Terminated; t != nil && t.ExitCode == constants.ArangoDBExitCodeUpgradeRequired {
140141
memberStatus.Upgrade = true
141142
updateMemberStatusNeeded = true
142143
}
143144
}
144145
case api.ServerGroupReservedInitContainerNameUpgrade:
145146
memberStatus.Conditions.Update(api.ConditionTypeUpgradeFailed, true, "Upgrade Failed", "")
147+
if group == api.ServerGroupDBServers {
148+
if c, ok := kresources.GetAnyContainerStatusByName(pod.Status.InitContainerStatuses, containers[id]); ok {
149+
if t := c.State.Terminated; t != nil && slices.Contains([]int32{
150+
constants.ArangoDBExitCodeUpgradeFailedCompaction,
151+
//constants.ContainerExitCodeSegmentationFault, // Also in case of Segv
152+
constants.ArangoDBExitCodeInvalidArgument, // If Arg is not known
153+
}, t.ExitCode) {
154+
memberStatus.Conditions.Update(api.ConditionTypeMarkedToRemove, true, "Replace Required due to the mismatch", "")
155+
}
156+
}
157+
}
146158
}
147159
}
148160
}

pkg/util/constants/adb_codes.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2025 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
21+
package constants
22+
23+
const (
24+
ArangoDBExitCodeInvalidArgument = 3
25+
ArangoDBExitCodeUpgradeRequired = 11
26+
ArangoDBExitCodeUpgradeFailedCompaction = 30
27+
28+
ContainerExitCodeSegmentationFault = 139
29+
)

0 commit comments

Comments
 (0)