Skip to content

Commit 2cffa5f

Browse files
authored
[Bugfix] Fix update decision maker (#890)
1 parent 7f233d1 commit 2cffa5f

File tree

3 files changed

+182
-102
lines changed

3 files changed

+182
-102
lines changed

pkg/deployment/reconcile/action_runtime_container_image_update.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,7 @@ func (a actionRuntimeContainerImageUpdate) CheckProgress(ctx context.Context) (b
267267
return false, false, nil
268268
}
269269

270-
if k8sutil.IsPodReady(pod) {
271-
// Pod is alive again
272-
return true, false, nil
273-
}
274-
return false, false, nil
270+
return true, false, nil
275271
} else {
276272
// Unknown state
277273
return false, false, nil

pkg/deployment/reconcile/plan_builder_rotate_upgrade.go

Lines changed: 81 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ import (
3333
upgraderules "github.com/arangodb/go-upgrade-rules"
3434
"github.com/arangodb/kube-arangodb/pkg/apis/deployment"
3535
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
36-
"github.com/arangodb/kube-arangodb/pkg/util"
3736
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
3837
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
3938
"github.com/rs/zerolog"
@@ -117,116 +116,101 @@ func createMarkToRemovePlan(ctx context.Context,
117116
}
118117

119118
func createRotateOrUpgradePlanInternal(log zerolog.Logger, apiObject k8sutil.APIObject, spec api.DeploymentSpec, status api.DeploymentStatus, cachedStatus inspectorInterface.Inspector, context PlanBuilderContext) (api.Plan, bool) {
120-
member, group, decision, update := createRotateOrUpgradeDecision(log, spec, status, context)
121-
if !update {
122-
// Nothing to do
123-
return nil, false
124-
}
119+
decision := createRotateOrUpgradeDecision(log, spec, status, context)
125120

126-
if decision != nil {
121+
if decision.IsUpgrade() {
127122
// Upgrade phase
128-
if decision.Hold {
129-
// Holding upgrade
130-
return nil, false
131-
}
132-
133-
if !decision.UpgradeNeeded {
134-
// In upgrade scenario but upgrade is not needed
135-
return nil, false
136-
}
137-
138-
if !decision.UpgradeAllowed {
139-
context.CreateEvent(k8sutil.NewUpgradeNotAllowedEvent(apiObject, decision.FromVersion, decision.ToVersion, decision.FromLicense, decision.ToLicense))
140-
return nil, false
141-
}
142-
143-
if groupReadyForRestart(context, spec, status, member, group) {
144-
return createUpgradeMemberPlan(log, member, group, "Version upgrade", spec, status, !decision.AutoUpgradeNeeded), false
145-
} else if util.BoolOrDefault(spec.AllowUnsafeUpgrade, false) {
146-
log.Info().Msg("Pod needs upgrade but cluster is not ready. Either some shards are not in sync or some member is not ready, but unsafe upgrade is allowed")
147-
return createUpgradeMemberPlan(log, member, group, "Version upgrade", spec, status, !decision.AutoUpgradeNeeded), false
148-
} else {
149-
log.Info().Msg("Pod needs upgrade but cluster is not ready. Either some shards are not in sync or some member is not ready.")
150-
return nil, true
151-
}
152-
}
123+
// During upgrade always get first member which needs to be upgraded
124+
for _, m := range status.Members.AsList() {
125+
d := decision[m.Member.ID]
126+
if !d.upgrade {
127+
continue
128+
}
153129

154-
// Rotate phase
155-
if !rotation.CheckPossible(member) {
156-
return nil, false
157-
}
130+
// We have member to upgrade
131+
if d.upgradeDecision.Hold {
132+
// Holding upgrade
133+
return nil, false
134+
}
158135

159-
if member.Conditions.IsTrue(api.ConditionTypeRestart) {
160-
return createRotateMemberPlan(log, member, group, "Restart flag present"), false
161-
}
136+
if !d.upgradeDecision.UpgradeNeeded {
137+
// In upgrade scenario but upgrade is not needed
138+
return nil, false
139+
}
162140

163-
if member.Conditions.IsTrue(api.ConditionTypePendingUpdate) {
164-
arangoMember, ok := cachedStatus.ArangoMember(member.ArangoMemberName(apiObject.GetName(), group))
165-
if !ok {
166-
return nil, false
167-
}
168-
p, ok := cachedStatus.Pod(member.PodName)
169-
if !ok {
170-
p = nil
171-
}
141+
if !d.upgradeDecision.UpgradeAllowed {
142+
context.CreateEvent(k8sutil.NewUpgradeNotAllowedEvent(apiObject, d.upgradeDecision.FromVersion, d.upgradeDecision.ToVersion, d.upgradeDecision.FromLicense, d.upgradeDecision.ToLicense))
143+
return nil, false
144+
}
172145

173-
if mode, p, reason, err := rotation.IsRotationRequired(log, cachedStatus, spec, member, group, p, arangoMember.Spec.Template, arangoMember.Status.Template); err != nil {
174-
log.Err(err).Msgf("Error while generating update plan")
175-
return nil, false
176-
} else if mode != rotation.InPlaceRotation {
177-
return api.Plan{api.NewAction(api.ActionTypeSetMemberCondition, group, member.ID, "Cleaning update").
178-
AddParam(api.ConditionTypePendingUpdate.String(), "").
179-
AddParam(api.ConditionTypeUpdating.String(), "T")}, false
180-
} else {
181-
p = p.After(
182-
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID),
183-
api.NewAction(api.ActionTypeWaitForMemberInSync, group, member.ID))
184-
185-
p = p.Wrap(api.NewAction(api.ActionTypeSetMemberCondition, group, member.ID, reason).
186-
AddParam(api.ConditionTypePendingUpdate.String(), "").AddParam(api.ConditionTypeUpdating.String(), "T"),
187-
api.NewAction(api.ActionTypeSetMemberCondition, group, member.ID, reason).
188-
AddParam(api.ConditionTypeUpdating.String(), ""))
189-
190-
return p, false
146+
if d.updateAllowed {
147+
// We are fine, group is alive so we can proceed
148+
return createUpgradeMemberPlan(log, m.Member, m.Group, "Version upgrade", spec, status, !d.upgradeDecision.AutoUpgradeNeeded), false
149+
} else if d.unsafeUpdateAllowed {
150+
log.Info().Str("member", m.Member.ID).Msg("Pod needs upgrade but cluster is not ready. Either some shards are not in sync or some member is not ready, but unsafe upgrade is allowed")
151+
return createUpgradeMemberPlan(log, m.Member, m.Group, "Version upgrade", spec, status, !d.upgradeDecision.AutoUpgradeNeeded), false
152+
} else {
153+
log.Info().Str("member", m.Member.ID).Msg("Pod needs upgrade but cluster is not ready. Either some shards are not in sync or some member is not ready.")
154+
return nil, true
155+
}
191156
}
192-
}
193-
return nil, false
194-
}
195157

196-
func createRotateOrUpgradeDecision(log zerolog.Logger, spec api.DeploymentSpec, status api.DeploymentStatus, context PlanBuilderContext) (api.MemberStatus, api.ServerGroup, *upgradeDecision, bool) {
197-
// Upgrade phase
198-
for _, m := range status.Members.AsList() {
199-
if m.Member.Phase != api.MemberPhaseCreated || m.Member.PodName == "" {
200-
// Only rotate when phase is created
201-
continue
202-
}
158+
log.Warn().Msg("Pod upgrade plan has been made, but it has been dropped due to missing flag")
159+
return nil, false
160+
} else if decision.IsUpdate() {
161+
// Update phase
162+
for _, m := range status.Members.AsList() {
163+
d := decision[m.Member.ID]
164+
if !d.update {
165+
continue
166+
}
203167

204-
// Got pod, compare it with what it should be
205-
decision := podNeedsUpgrading(log, m.Member, spec, status.Images)
168+
if !d.updateAllowed {
169+
// Update is not allowed due to constraint
170+
if !d.unsafeUpdateAllowed {
171+
log.Info().Str("member", m.Member.ID).Msg("Pod needs restart but cluster is not ready. Either some shards are not in sync or some member is not ready.")
172+
continue
173+
}
174+
log.Info().Str("member", m.Member.ID).Msg("Pod needs restart but cluster is not ready. Either some shards are not in sync or some member is not ready, but unsafe upgrade is allowed")
175+
}
206176

207-
if decision.UpgradeNeeded || decision.Hold {
208-
return m.Member, m.Group, &decision, true
209-
}
210-
}
177+
if m.Member.Conditions.IsTrue(api.ConditionTypeRestart) {
178+
return createRotateMemberPlan(log, m.Member, m.Group, "Restart flag present"), false
179+
}
180+
arangoMember, ok := cachedStatus.ArangoMember(m.Member.ArangoMemberName(apiObject.GetName(), m.Group))
181+
if !ok {
182+
continue
183+
}
211184

212-
// Update phase
213-
for _, m := range status.Members.AsList() {
214-
if !groupReadyForRestart(context, spec, status, m.Member, m.Group) {
215-
continue
216-
}
185+
p, ok := cachedStatus.Pod(m.Member.PodName)
186+
if !ok {
187+
p = nil
188+
}
217189

218-
if rotation.CheckPossible(m.Member) {
219-
if m.Member.Conditions.IsTrue(api.ConditionTypeRestart) {
220-
return m.Member, m.Group, nil, true
221-
} else if m.Member.Conditions.IsTrue(api.ConditionTypePendingUpdate) {
222-
if !m.Member.Conditions.IsTrue(api.ConditionTypeUpdating) && !m.Member.Conditions.IsTrue(api.ConditionTypeUpdateFailed) {
223-
return m.Member, m.Group, nil, true
224-
}
190+
if mode, p, reason, err := rotation.IsRotationRequired(log, cachedStatus, spec, m.Member, m.Group, p, arangoMember.Spec.Template, arangoMember.Status.Template); err != nil {
191+
log.Err(err).Str("member", m.Member.ID).Msgf("Error while generating update plan")
192+
continue
193+
} else if mode != rotation.InPlaceRotation {
194+
return api.Plan{api.NewAction(api.ActionTypeSetMemberCondition, m.Group, m.Member.ID, "Cleaning update").
195+
AddParam(api.ConditionTypePendingUpdate.String(), "").
196+
AddParam(api.ConditionTypeUpdating.String(), "T")}, false
197+
} else {
198+
p = p.After(
199+
api.NewAction(api.ActionTypeWaitForMemberUp, m.Group, m.Member.ID),
200+
api.NewAction(api.ActionTypeWaitForMemberInSync, m.Group, m.Member.ID))
201+
202+
p = p.Wrap(api.NewAction(api.ActionTypeSetMemberCondition, m.Group, m.Member.ID, reason).
203+
AddParam(api.ConditionTypePendingUpdate.String(), "").AddParam(api.ConditionTypeUpdating.String(), "T"),
204+
api.NewAction(api.ActionTypeSetMemberCondition, m.Group, m.Member.ID, reason).
205+
AddParam(api.ConditionTypeUpdating.String(), ""))
206+
207+
return p, false
225208
}
226209
}
210+
return nil, true
227211
}
228212

229-
return api.MemberStatus{}, api.ServerGroupUnknown, nil, false
213+
return nil, false
230214
}
231215

232216
// podNeedsUpgrading decides if an upgrade of the pod is needed (to comply with
@@ -374,8 +358,8 @@ func arangoMemberPodTemplateNeedsUpdate(ctx context.Context, log zerolog.Logger,
374358
// clusterReadyForUpgrade returns true if the cluster is ready for the next update, that is:
375359
// - all shards are in sync
376360
// - all members are ready and fine
377-
func groupReadyForRestart(context PlanBuilderContext, spec api.DeploymentSpec, status api.DeploymentStatus, member api.MemberStatus, group api.ServerGroup) bool {
378-
if util.BoolOrDefault(spec.AllowUnsafeUpgrade, false) {
361+
func groupReadyForRestart(context PlanBuilderContext, status api.DeploymentStatus, member api.MemberStatus, group api.ServerGroup) bool {
362+
if group == api.ServerGroupSingle {
379363
return true
380364
}
381365

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
21+
package reconcile
22+
23+
import (
24+
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
25+
"github.com/arangodb/kube-arangodb/pkg/deployment/rotation"
26+
"github.com/arangodb/kube-arangodb/pkg/util"
27+
"github.com/rs/zerolog"
28+
)
29+
30+
type updateUpgradeDecisionMap map[string]updateUpgradeDecision
31+
32+
func (u updateUpgradeDecisionMap) IsUpgrade() bool {
33+
for _, k := range u {
34+
if k.upgrade {
35+
return true
36+
}
37+
}
38+
39+
return false
40+
}
41+
42+
func (u updateUpgradeDecisionMap) IsUpdate() bool {
43+
for _, k := range u {
44+
if k.update {
45+
return true
46+
}
47+
}
48+
49+
return false
50+
}
51+
52+
type updateUpgradeDecision struct {
53+
upgrade bool
54+
upgradeDecision upgradeDecision
55+
56+
unsafeUpdateAllowed bool
57+
updateAllowed bool
58+
update bool
59+
restartRequired bool
60+
}
61+
62+
func createRotateOrUpgradeDecision(log zerolog.Logger, spec api.DeploymentSpec, status api.DeploymentStatus, context PlanBuilderContext) updateUpgradeDecisionMap {
63+
d := updateUpgradeDecisionMap{}
64+
65+
// Init phase
66+
for _, m := range status.Members.AsList() {
67+
d[m.Member.ID] = createRotateOrUpgradeDecisionMember(log, spec, status, context, m)
68+
}
69+
70+
return d
71+
}
72+
73+
func createRotateOrUpgradeDecisionMember(log zerolog.Logger, spec api.DeploymentSpec, status api.DeploymentStatus, context PlanBuilderContext, element api.DeploymentStatusMemberElement) (d updateUpgradeDecision) {
74+
if element.Member.Phase == api.MemberPhaseCreated && element.Member.PodName != "" {
75+
// Only upgrade when phase is created
76+
77+
// Got pod, compare it with what it should be
78+
decision := podNeedsUpgrading(log, element.Member, spec, status.Images)
79+
80+
if decision.UpgradeNeeded || decision.Hold {
81+
d.upgrade = true
82+
d.upgradeDecision = decision
83+
}
84+
}
85+
86+
d.updateAllowed = groupReadyForRestart(context, status, element.Member, element.Group)
87+
d.unsafeUpdateAllowed = util.BoolOrDefault(spec.AllowUnsafeUpgrade, false)
88+
89+
if rotation.CheckPossible(element.Member) {
90+
if element.Member.Conditions.IsTrue(api.ConditionTypeRestart) {
91+
d.update = true
92+
d.restartRequired = true
93+
} else if element.Member.Conditions.IsTrue(api.ConditionTypePendingUpdate) {
94+
if !element.Member.Conditions.IsTrue(api.ConditionTypeUpdating) && !element.Member.Conditions.IsTrue(api.ConditionTypeUpdateFailed) {
95+
d.update = true
96+
}
97+
}
98+
}
99+
return
100+
}

0 commit comments

Comments
 (0)