Skip to content

Commit e87c2e0

Browse files
authored
MGMT-19762: Don't use a disconnected Agent (#170)
Previously when selecting an Agent for the AgentMachine, there were no checks for whether or not the Agent was still connected. This is a problem if an Agent becomes disconnected and the controller selects it to be installed into the hosted cluster since the Agent cannot be contacted. This modifies the valid agent function so that an Agent is valid only if it meets the following criteria: 1. It is not already bound to a ClusterDeployment 2. It is connected 3. It is approved 4. Its validations are passing
1 parent 6cf9df6 commit e87c2e0

File tree

3 files changed

+44
-35
lines changed

3 files changed

+44
-35
lines changed

config/crd/bases/capi-provider.agent-install.openshift.io_agentmachines.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ spec:
105105
description: The machine address.
106106
type: string
107107
type:
108-
description: Machine address type, one of Hostname, ExternalIP
109-
or InternalIP.
108+
description: Machine address type, one of Hostname, ExternalIP,
109+
InternalIP, ExternalDNS or InternalDNS.
110110
type: string
111111
required:
112112
- address
@@ -310,8 +310,8 @@ spec:
310310
description: The machine address.
311311
type: string
312312
type:
313-
description: Machine address type, one of Hostname, ExternalIP
314-
or InternalIP.
313+
description: Machine address type, one of Hostname, ExternalIP,
314+
InternalIP, ExternalDNS or InternalDNS.
315315
type: string
316316
required:
317317
- address

controllers/agentmachine_controller.go

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,8 @@ func (r *AgentMachineReconciler) findAgent(ctx context.Context, log logrus.Field
323323
clusterDeploymentRef capiproviderv1.ClusterDeploymentReference, machineConfigPool string,
324324
ignitionTokenSecretRef *aiv1beta1.IgnitionEndpointTokenReference, ignitionEndpointHTTPHeaders map[string]string) (*aiv1beta1.Agent, error) {
325325

326+
// In the event this is a restored hub cluster, there will already be Agents
327+
// that have been attached to this AgentMachine, so we want to reassociate them
326328
foundAgent, err := r.findAgentWithAgentMachineLabel(ctx, log, agentMachine)
327329
if err != nil {
328330
log.WithError(err).Error("failed while finding agents")
@@ -521,20 +523,25 @@ func (r *AgentMachineReconciler) processBootstrapDataSecret(ctx context.Context,
521523
return machineConfigPool, ignitionTokenSecretRef, ignitionEndpointHTTPHeaders, nil
522524
}
523525

526+
// An Agent is considered valid only if it meets all of the following:
527+
// 1. it is not bound to a ClusterDeployment
528+
// 2. it is still connected (contacting the hub)
529+
// 3. it is approved
530+
// 4. its validations are passing
524531
func isValidAgent(agent *aiv1beta1.Agent) bool {
525-
if !agent.Spec.Approved {
526-
return false
527-
}
532+
var valid, notBound, connected bool
533+
528534
for _, condition := range agent.Status.Conditions {
529-
if condition.Type == aiv1beta1.BoundCondition && condition.Status != "False" {
530-
return false
531-
}
532-
if condition.Type == aiv1beta1.ValidatedCondition && condition.Status != "True" {
533-
return false
535+
switch condition.Type {
536+
case aiv1beta1.BoundCondition:
537+
notBound = condition.Status == "False"
538+
case aiv1beta1.ConnectedCondition:
539+
connected = condition.Status == "True"
540+
case aiv1beta1.ValidatedCondition:
541+
valid = condition.Status == "True"
534542
}
535543
}
536-
537-
return true
544+
return agent.Spec.Approved && valid && connected && notBound
538545
}
539546

540547
func (r *AgentMachineReconciler) updateStatus(ctx context.Context, log logrus.FieldLogger, agentMachine *capiproviderv1.AgentMachine, err error) error {

controllers/agentmachine_controller_test.go

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,15 @@ func boolToConditionStatus(b bool) corev1.ConditionStatus {
189189
return corev1.ConditionFalse
190190
}
191191

192-
func newAgentWithProperties(name, namespace string, approved, bound, validated bool, labels map[string]string) *aiv1beta1.Agent {
192+
func newValidAgent(name, namespace string, labels map[string]string) *aiv1beta1.Agent {
193+
return newAgentWithProperties(name, namespace, true, false, true, true, labels)
194+
}
195+
196+
func newAgentWithProperties(name, namespace string, approved, bound, validated, connected bool, labels map[string]string) *aiv1beta1.Agent {
193197
agent := newAgent(name, namespace, aiv1beta1.AgentSpec{Approved: approved})
194198
agent.Status.Conditions = append(agent.Status.Conditions, v1.Condition{Type: aiv1beta1.BoundCondition, Status: boolToConditionStatus(bound)})
195199
agent.Status.Conditions = append(agent.Status.Conditions, v1.Condition{Type: aiv1beta1.ValidatedCondition, Status: boolToConditionStatus(validated)})
200+
agent.Status.Conditions = append(agent.Status.Conditions, v1.Condition{Type: aiv1beta1.ConnectedCondition, Status: boolToConditionStatus(connected)})
196201
agent.Status.Conditions = append(agent.Status.Conditions, v1.Condition{Type: aiv1beta1.SpecSyncedCondition, Status: boolToConditionStatus(true)})
197202
agent.Status.Conditions = append(agent.Status.Conditions, v1.Condition{Type: aiv1beta1.RequirementsMetCondition, Status: boolToConditionStatus(validated)})
198203
agent.Status.Conditions = append(agent.Status.Conditions, v1.Condition{Type: aiv1beta1.InstalledCondition, Status: boolToConditionStatus(false), Reason: aiv1beta1.InstallationNotStartedMsg, Message: ""})
@@ -313,16 +318,17 @@ var _ = Describe("agentmachine reconcile", func() {
313318
badLabels4 := map[string]string{"location": "datacenter2"}
314319
badLabels5 := map[string]string{"hasGpu": "true"}
315320

316-
Expect(c.Create(ctx, newAgentWithProperties("agent-0", testNamespace, false, false, true, goodLabels))).To(BeNil()) // Agent0: not approved
317-
Expect(c.Create(ctx, newAgentWithProperties("agent-1", testNamespace, true, true, true, goodLabels))).To(BeNil()) // Agent1: already bound
318-
Expect(c.Create(ctx, newAgentWithProperties("agent-2", testNamespace, true, true, true, badLabels1))).To(BeNil()) // Agent2: bad labels
319-
Expect(c.Create(ctx, newAgentWithProperties("agent-3", testNamespace, true, true, false, goodLabels))).To(BeNil()) // Agent3: validations are failing
320-
Expect(c.Create(ctx, newAgentWithProperties("agent-4", testNamespace, true, false, true, badLabels2))).To(BeNil()) // Agent4: bad labels
321-
Expect(c.Create(ctx, newAgentWithProperties("agent-5", testNamespace, true, false, true, badLabels3))).To(BeNil()) // Agent5: bad labels
322-
Expect(c.Create(ctx, newAgentWithProperties("agent-6", testNamespace, true, false, true, badLabels4))).To(BeNil()) // Agent6: bad labels
323-
Expect(c.Create(ctx, newAgentWithProperties("agent-7", testNamespace, true, false, true, badLabels5))).To(BeNil()) // Agent7: bad labels
324-
Expect(c.Create(ctx, newAgentWithProperties("agent-8", testNamespace, true, false, true, otherAgentMachineLabel))).To(BeNil()) // Agent8: should be skipped
325-
Expect(c.Create(ctx, newAgentWithProperties("agent-9", testNamespace, true, false, true, goodLabels))).To(BeNil()) // Agent9: the chosen one
321+
Expect(c.Create(ctx, newAgentWithProperties("agent-0", testNamespace, false, false, true, true, goodLabels))).To(BeNil()) // Agent0: not approved
322+
Expect(c.Create(ctx, newAgentWithProperties("agent-1", testNamespace, true, true, true, true, goodLabels))).To(BeNil()) // Agent1: already bound
323+
Expect(c.Create(ctx, newAgentWithProperties("agent-2", testNamespace, true, true, true, true, badLabels1))).To(BeNil()) // Agent2: bad labels
324+
Expect(c.Create(ctx, newAgentWithProperties("agent-3", testNamespace, true, true, false, true, goodLabels))).To(BeNil()) // Agent3: validations are failing
325+
Expect(c.Create(ctx, newAgentWithProperties("agent-4", testNamespace, true, false, true, true, badLabels2))).To(BeNil()) // Agent4: bad labels
326+
Expect(c.Create(ctx, newAgentWithProperties("agent-5", testNamespace, true, false, true, true, badLabels3))).To(BeNil()) // Agent5: bad labels
327+
Expect(c.Create(ctx, newAgentWithProperties("agent-6", testNamespace, true, false, true, true, badLabels4))).To(BeNil()) // Agent6: bad labels
328+
Expect(c.Create(ctx, newAgentWithProperties("agent-7", testNamespace, true, false, true, true, badLabels5))).To(BeNil()) // Agent7: bad labels
329+
Expect(c.Create(ctx, newAgentWithProperties("agent-8", testNamespace, true, false, true, true, otherAgentMachineLabel))).To(BeNil()) // Agent8: should be skipped
330+
Expect(c.Create(ctx, newAgentWithProperties("agent-9", testNamespace, true, false, true, false, goodLabels))).To(BeNil()) // Agent9: disconnected
331+
Expect(c.Create(ctx, newValidAgent("agent-10", testNamespace, goodLabels))).To(BeNil()) // Agent10: the chosen one
326332

327333
// find agent
328334
result, err := amr.Reconcile(ctx, agentMachineRequest)
@@ -338,7 +344,7 @@ var _ = Describe("agentmachine reconcile", func() {
338344
Expect(conditions.Get(agentMachine, capiproviderv1.InstalledCondition).Severity).To(BeEquivalentTo(clusterv1.ConditionSeverityInfo))
339345
Expect(conditions.Get(agentMachine, clusterv1.ReadyCondition).Status).To(BeEquivalentTo("False"))
340346

341-
Expect(agentMachine.Status.AgentRef.Name).To(BeEquivalentTo("agent-9"))
347+
Expect(agentMachine.Status.AgentRef.Name).To(BeEquivalentTo("agent-10"))
342348
Expect(len(agentMachine.Status.Addresses)).To(BeEquivalentTo(4))
343349
expectedAddresses := []string{"1.2.3.4", "2.3.4.5", "3.4.5.6", "agent-hostname"}
344350
expectedTypes := []string{string(clusterv1.MachineExternalIP), string(clusterv1.MachineInternalDNS)}
@@ -348,7 +354,7 @@ var _ = Describe("agentmachine reconcile", func() {
348354
}
349355

350356
agent := &aiv1beta1.Agent{}
351-
Expect(c.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "agent-9"}, agent)).To(BeNil())
357+
Expect(c.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "agent-10"}, agent)).To(BeNil())
352358
Expect(agent.Spec.ClusterDeploymentName.Name).To(BeEquivalentTo("cluster-deployment-agentMachine"))
353359
Expect(agent.Spec.IgnitionEndpointTokenReference.Name).To(BeEquivalentTo("agent-userdata-secret-agentMachine"))
354360
Expect(agent.Spec.IgnitionEndpointTokenReference.Namespace).To(BeEquivalentTo(testNamespace))
@@ -364,7 +370,7 @@ var _ = Describe("agentmachine reconcile", func() {
364370
Expect(c.Create(ctx, agentMachine)).To(BeNil())
365371
agentMachineRequest := newAgentMachineRequest(agentMachine)
366372

367-
Expect(c.Create(ctx, newAgentWithProperties("agent", testNamespace, true, false, true, map[string]string{}))).To(BeNil())
373+
Expect(c.Create(ctx, newAgentWithProperties("agent", testNamespace, true, false, true, true, map[string]string{}))).To(BeNil())
368374

369375
originalAgent := &aiv1beta1.Agent{}
370376
Expect(c.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "agent"}, originalAgent)).To(BeNil())
@@ -405,7 +411,7 @@ var _ = Describe("agentmachine reconcile", func() {
405411
Expect(c.Create(ctx, agentMachine)).To(BeNil())
406412
agentMachineRequest := newAgentMachineRequest(agentMachine)
407413

408-
agent := newAgentWithProperties("agent-1", testNamespace, false, false, true, map[string]string{})
414+
agent := newAgentWithProperties("agent-1", testNamespace, false, false, true, true, map[string]string{})
409415
Expect(c.Create(ctx, agent)).To(BeNil())
410416

411417
clusterDepRef := capiproviderv1.ClusterDeploymentReference{Namespace: testNamespace, Name: "my-cd"}
@@ -856,9 +862,7 @@ var _ = Describe("mapAgentToAgentMachine", func() {
856862
})
857863

858864
It("returns nothing if the agent isn't valid", func() {
859-
agent := newAgent("agent", testNamespace, aiv1beta1.AgentSpec{Approved: true})
860-
agent.Status.Conditions = append(agent.Status.Conditions, v1.Condition{Type: aiv1beta1.BoundCondition, Status: boolToConditionStatus(false)})
861-
agent.Status.Conditions = append(agent.Status.Conditions, v1.Condition{Type: aiv1beta1.ValidatedCondition, Status: boolToConditionStatus(false)})
865+
agent := newAgentWithProperties("agent", testNamespace, true, false, false, true, map[string]string{})
862866
Expect(c.Create(ctx, agent)).To(BeNil())
863867
agentMachine1, _ := newAgentMachine("agentMachine-1", testNamespace, capiproviderv1.AgentMachineSpec{}, ctx, c)
864868
Expect(c.Create(ctx, agentMachine1)).To(Succeed())
@@ -870,9 +874,7 @@ var _ = Describe("mapAgentToAgentMachine", func() {
870874
})
871875

872876
It("returns unmatched agent machines if no match is found", func() {
873-
agent := newAgent("agent", testNamespace, aiv1beta1.AgentSpec{Approved: true})
874-
agent.Status.Conditions = append(agent.Status.Conditions, v1.Condition{Type: aiv1beta1.BoundCondition, Status: boolToConditionStatus(false)})
875-
agent.Status.Conditions = append(agent.Status.Conditions, v1.Condition{Type: aiv1beta1.ValidatedCondition, Status: boolToConditionStatus(true)})
877+
agent := newValidAgent("agent", testNamespace, map[string]string{})
876878
Expect(c.Create(ctx, agent)).To(BeNil())
877879
agentMachine1, _ := newAgentMachine("agentMachine-1", testNamespace, capiproviderv1.AgentMachineSpec{}, ctx, c)
878880
Expect(c.Create(ctx, agentMachine1)).To(Succeed())

0 commit comments

Comments
 (0)