Skip to content

Commit aaa7042

Browse files
authored
[Feature] Advanced sync check (#549)
1 parent ea615a3 commit aaa7042

File tree

8 files changed

+120
-0
lines changed

8 files changed

+120
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Change Log
22

33
## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A)
4+
- Added extended Rotation check for Cluster mode
45
- Removed old rotation logic (rotation of ArangoDeployment may be enforced after Operator upgrade)
56
- Added UpToDate condition in ArangoDeployment Status
67

pkg/apis/deployment/v1/deployment_mode.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ func (m DeploymentMode) Validate() error {
4949
}
5050
}
5151

52+
// Get mode or default value
53+
func (m *DeploymentMode) Get() DeploymentMode {
54+
if m == nil {
55+
return DeploymentModeCluster
56+
}
57+
58+
return *m
59+
}
60+
5261
// HasSingleServers returns true when the given mode is "Single" or "ActiveFailover".
5362
func (m DeploymentMode) HasSingleServers() bool {
5463
return m == DeploymentModeSingle || m == DeploymentModeActiveFailover

pkg/apis/deployment/v1/plan.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ const (
5353
ActionTypeUpgradeMember ActionType = "UpgradeMember"
5454
// ActionTypeWaitForMemberUp causes the plan to wait until the member is considered "up".
5555
ActionTypeWaitForMemberUp ActionType = "WaitForMemberUp"
56+
// ActionTypeWaitForMemberInSync causes the plan to wait until members are considered "up" and cluster is healthy.
57+
ActionTypeWaitForMemberInSync ActionType = "WaitForMemberInSync"
5658
// ActionTypeRenewTLSCertificate causes the TLS certificate of a member to be renewed.
5759
ActionTypeRenewTLSCertificate ActionType = "RenewTLSCertificate"
5860
// ActionTypeRenewTLSCACertificate causes the TLS CA certificate of the entire deployment to be renewed.

pkg/apis/deployment/v1/server_group_spec.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ type ServerGroupSpec struct {
8484
Volumes ServerGroupSpecVolumes `json:"volumes,omitempty"`
8585
// VolumeMounts define list of volume mounts mounted into server container
8686
VolumeMounts ServerGroupSpecVolumeMounts `json:"volumeMounts,omitempty"`
87+
// ExtendedRotationCheck extend checks for rotation
88+
ExtendedRotationCheck *bool `json:"extendedRotationCheck,omitempty"`
8789
}
8890

8991
// ServerGroupSpecSecurityContext contains specification for pod security context

pkg/deployment/reconcile/action_context.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ type ActionContext interface {
101101
SetCurrentImage(imageInfo api.ImageInfo) error
102102
// GetDeploymentHealth returns a copy of the latest known state of cluster health
103103
GetDeploymentHealth() (driver.ClusterHealth, error)
104+
// GetShardSyncStatus returns true if all shards are in sync
105+
GetShardSyncStatus() bool
104106
// InvalidateSyncStatus resets the sync state to false and triggers an inspection
105107
InvalidateSyncStatus()
106108
// GetSpec returns a copy of the spec
@@ -127,6 +129,10 @@ type actionContext struct {
127129
context Context
128130
}
129131

132+
func (ac *actionContext) GetShardSyncStatus() bool {
133+
return ac.context.GetShardSyncStatus()
134+
}
135+
130136
func (ac *actionContext) UpdateClusterCondition(conditionType api.ConditionType, status bool, reason, message string) error {
131137
return ac.context.WithStatusUpdate(func(s *api.DeploymentStatus) bool {
132138
return s.Conditions.Update(conditionType, status, reason, message)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2020 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
// Author Adam Janikowski
21+
//
22+
23+
package reconcile
24+
25+
import (
26+
"context"
27+
28+
"github.com/arangodb/kube-arangodb/pkg/util"
29+
30+
"github.com/rs/zerolog"
31+
32+
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
33+
)
34+
35+
func init() {
36+
registerAction(api.ActionTypeWaitForMemberInSync, newWaitForMemberInSync)
37+
}
38+
39+
// newWaitForMemberUpAction creates a new Action that implements the given
40+
// planned WaitForShardInSync action.
41+
func newWaitForMemberInSync(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action {
42+
a := &actionWaitForMemberInSync{}
43+
44+
a.actionImpl = newActionImplDefRef(log, action, actionCtx, waitForMemberUpTimeout)
45+
46+
return a
47+
}
48+
49+
// actionWaitForMemberInSync implements an WaitForShardInSync.
50+
type actionWaitForMemberInSync struct {
51+
// actionImpl implement timeout and member id functions
52+
actionImpl
53+
}
54+
55+
// Start performs the start of the action.
56+
// Returns true if the action is completely finished, false in case
57+
// the start time needs to be recorded and a ready condition needs to be checked.
58+
func (a *actionWaitForMemberInSync) Start(ctx context.Context) (bool, error) {
59+
ready, _, err := a.CheckProgress(ctx)
60+
return ready, err
61+
}
62+
63+
// CheckProgress checks the progress of the action.
64+
// Returns true if the action is completely finished, false otherwise.
65+
func (a *actionWaitForMemberInSync) CheckProgress(ctx context.Context) (bool, bool, error) {
66+
ready, err := a.check(ctx)
67+
if err != nil {
68+
return false, false, err
69+
}
70+
71+
return ready, false, nil
72+
}
73+
74+
func (a *actionWaitForMemberInSync) check(ctx context.Context) (bool, error) {
75+
spec := a.actionCtx.GetSpec()
76+
77+
groupSpec := spec.GetServerGroupSpec(a.action.Group)
78+
79+
if !util.BoolOrDefault(groupSpec.ExtendedRotationCheck, false) {
80+
return true, nil
81+
}
82+
83+
switch spec.Mode.Get() {
84+
case api.DeploymentModeCluster:
85+
return a.checkCluster(ctx, spec, groupSpec)
86+
default:
87+
return true, nil
88+
}
89+
}
90+
91+
func (a *actionWaitForMemberInSync) checkCluster(ctx context.Context, spec api.DeploymentSpec, groupSpec api.ServerGroupSpec) (bool, error) {
92+
if !a.actionCtx.GetShardSyncStatus() {
93+
a.log.Info().Str("mode", "cluster").Msgf("Shards are not in sync")
94+
return false, nil
95+
}
96+
97+
return true, nil
98+
}

pkg/deployment/reconcile/plan_builder.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ func createRotateMemberPlan(log zerolog.Logger, member api.MemberStatus,
234234
plan := api.Plan{
235235
api.NewAction(api.ActionTypeRotateMember, group, member.ID, reason),
236236
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID),
237+
api.NewAction(api.ActionTypeWaitForMemberInSync, group, member.ID),
237238
}
238239
return plan
239240
}

pkg/deployment/reconcile/plan_builder_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,7 @@ func TestCreatePlan(t *testing.T) {
682682
ExpectedPlan: []api.Action{
683683
api.NewAction(api.ActionTypeRotateMember, api.ServerGroupAgents, ""),
684684
api.NewAction(api.ActionTypeWaitForMemberUp, api.ServerGroupAgents, ""),
685+
api.NewAction(api.ActionTypeWaitForMemberInSync, api.ServerGroupAgents, ""),
685686
},
686687
ExpectedLog: "Creating rotation plan",
687688
},

0 commit comments

Comments
 (0)