2323package reconcile
2424
2525import (
26+ goContext "context"
27+ "fmt"
28+ "github.com/arangodb/kube-arangodb/pkg/deployment/agency"
29+ "time"
30+
2631 driver "github.com/arangodb/go-driver"
2732 upgraderules "github.com/arangodb/go-upgrade-rules"
2833 "github.com/rs/zerolog"
@@ -78,6 +83,27 @@ func (d *Reconciler) CreatePlan() error {
7883 return nil
7984}
8085
86+ func fetchAgency (log zerolog.Logger ,
87+ spec api.DeploymentSpec , status api.DeploymentStatus ,
88+ context PlanBuilderContext ) (* agency.ArangoPlanDatabases , error ) {
89+ if spec .GetMode () != api .DeploymentModeCluster && spec .GetMode () != api .DeploymentModeActiveFailover {
90+ return nil , nil
91+ } else if status .Members .Agents .MembersReady () > 0 {
92+ agencyCtx , agencyCancel := goContext .WithTimeout (goContext .Background (), time .Minute )
93+ defer agencyCancel ()
94+
95+ ret := & agency.ArangoPlanDatabases {}
96+
97+ if err := context .GetAgencyData (agencyCtx , ret , agency .ArangoKey , agency .PlanKey , agency .PlanCollectionsKey ); err != nil {
98+ return nil , err
99+ }
100+
101+ return ret , nil
102+ } else {
103+ return nil , fmt .Errorf ("not able to read from agency when agency is down" )
104+ }
105+ }
106+
81107// createPlan considers the given specification & status and creates a plan to get the status in line with the specification.
82108// If a plan already exists, the given plan is returned with false.
83109// Otherwise the new plan is returned with a boolean true.
@@ -90,30 +116,63 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject,
90116 return currentPlan , false
91117 }
92118
119+ // Fetch agency plan
120+ agencyPlan , agencyErr := fetchAgency (log , spec , status , context )
121+
93122 // Check for various scenario's
94123 var plan api.Plan
95124
96125 // Check for members in failed state
97126 status .Members .ForeachServerGroup (func (group api.ServerGroup , members api.MemberStatusList ) error {
98127 for _ , m := range members {
99- if m .Phase == api .MemberPhaseFailed && plan .IsEmpty () {
100- log .Debug ().
101- Str ("id" , m .ID ).
102- Str ("role" , group .AsRole ()).
103- Msg ("Creating member replacement plan because member has failed" )
104- newID := ""
105- if group == api .ServerGroupAgents {
106- newID = m .ID // Agents cannot (yet) be replaced with new IDs
128+ if m .Phase != api .MemberPhaseFailed || len (plan ) > 0 {
129+ continue
130+ }
131+
132+ memberLog := log .Info ().Str ("id" , m .ID ).Str ("role" , group .AsRole ())
133+
134+ if group == api .ServerGroupDBServers && spec .GetMode () == api .DeploymentModeCluster {
135+ // Do pre check for DBServers. If agency is down DBServers should not be touch
136+ if agencyErr != nil {
137+ memberLog .Msg ("Error in agency" )
138+ continue
107139 }
108- plan = append (plan ,
109- api .NewAction (api .ActionTypeRemoveMember , group , m .ID ),
110- api .NewAction (api .ActionTypeAddMember , group , newID ),
111- )
140+
141+ if agencyPlan == nil {
142+ memberLog .Msg ("AgencyPlan is nil" )
143+ continue
144+ }
145+
146+ if agencyPlan .IsDBServerInDatabases (m .ID ) {
147+ // DBServer still exists in agency plan! Will not be removed, but needs to be recreated
148+ memberLog .Msg ("Recreating DBServer - it cannot be removed gracefully" )
149+ plan = append (plan ,
150+ api .NewAction (api .ActionTypeRecreateMember , group , m .ID ))
151+ continue
152+ }
153+
154+ // Everything is fine, proceed
112155 }
156+
157+ memberLog .Msg ("Creating member replacement plan because member has failed" )
158+ newID := ""
159+ if group == api .ServerGroupAgents {
160+ newID = m .ID // Agents cannot (yet) be replaced with new IDs
161+ }
162+ plan = append (plan ,
163+ api .NewAction (api .ActionTypeRemoveMember , group , m .ID ),
164+ api .NewAction (api .ActionTypeAddMember , group , newID ),
165+ )
113166 }
114167 return nil
115168 })
116169
170+ // Ensure that we were able to get agency info
171+ if len (plan ) == 0 && agencyErr != nil {
172+ log .Err (agencyErr ).Msg ("unable to build further plan without access to agency" )
173+ return plan , false
174+ }
175+
117176 // Check for cleaned out dbserver in created state
118177 for _ , m := range status .Members .DBServers {
119178 if plan .IsEmpty () && m .Phase .IsCreatedOrDrain () && m .Conditions .IsTrue (api .ConditionTypeCleanedOut ) {
0 commit comments