Skip to content

Commit 5ed5e4d

Browse files
committed
fix: build the mc status correctly
Signed-off-by: Zhiying Lin <[email protected]>
1 parent 6f3e972 commit 5ed5e4d

File tree

3 files changed

+260
-48
lines changed

3 files changed

+260
-48
lines changed

apis/cluster/v1beta1/membercluster_types.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,17 @@ func (m *MemberCluster) GetAgentStatus(agentType AgentType) *AgentStatus {
244244
return nil
245245
}
246246

247+
// SetAgentStatus is used to set the agentStatus for a given agentType.
248+
func (m *MemberCluster) SetAgentStatus(agentType AgentType, status AgentStatus) {
249+
for i, s := range m.Status.AgentStatus {
250+
if s.Type == agentType {
251+
m.Status.AgentStatus[i] = status
252+
return
253+
}
254+
}
255+
m.Status.AgentStatus = append(m.Status.AgentStatus, status)
256+
}
257+
247258
// GetAgentCondition queries the conditions in an agent status for a specific condition type.
248259
func (m *MemberCluster) GetAgentCondition(agentType AgentType, conditionType AgentConditionType) *metav1.Condition {
249260
if s := m.GetAgentStatus(agentType); s != nil {

pkg/controllers/membercluster/v1beta1/membercluster_controller.go

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -515,25 +515,46 @@ func (r *Reconciler) syncInternalMemberClusterStatus(imc *clusterv1beta1.Interna
515515
}
516516

517517
// TODO: We didn't handle condition type: clusterv1beta1.ConditionTypeMemberClusterHealthy.
518-
// Copy Agent status and set ObservedGeneration for agent conditions.
519-
if len(imc.Status.AgentStatus) > 0 {
520-
mc.Status.AgentStatus = make([]clusterv1beta1.AgentStatus, len(imc.Status.AgentStatus))
521-
}
518+
var latestAgentStatus []clusterv1beta1.AgentStatus
519+
var memberAgentStatusUpdated bool
522520
for i := range imc.Status.AgentStatus {
523-
mc.Status.AgentStatus[i] = *imc.Status.AgentStatus[i].DeepCopy()
524-
// Set ObservedGeneration for each agent condition.
525-
for j := range mc.Status.AgentStatus[i].Conditions {
526-
mc.Status.AgentStatus[i].Conditions[j].ObservedGeneration = mc.GetGeneration()
521+
if imc.Status.AgentStatus[i].Conditions == nil {
522+
err := controller.NewUnexpectedBehaviorError(fmt.Errorf("find an unexpected agent status for member cluster %s", mc.Name))
523+
klog.ErrorS(controller.NewUnexpectedBehaviorError(err), "Skipping invalid agent status", "memberCluster", klog.KObj(mc), "agentStatus", imc.Status.AgentStatus[i].Type)
524+
continue // skip this invalid agent status
525+
}
526+
527+
if imc.Status.AgentStatus[i].Conditions[0].ObservedGeneration < imc.GetGeneration() { // assuming the agent status conditions are always updated together
528+
klog.V(2).InfoS("Skipping stale agent status", "memberCluster", klog.KObj(mc), "agentType", imc.Status.AgentStatus[i].Type)
529+
continue // skip stale agent status
527530
}
531+
agentType := imc.Status.AgentStatus[i].Type
532+
if agentType == clusterv1beta1.MemberAgent {
533+
memberAgentStatusUpdated = true
534+
}
535+
536+
mcAgentStatus := *imc.Status.AgentStatus[i].DeepCopy()
537+
for j := range mcAgentStatus.Conditions {
538+
mcAgentStatus.Conditions[j].ObservedGeneration = mc.GetGeneration() // using the mc generation
539+
}
540+
latestAgentStatus = append(latestAgentStatus, mcAgentStatus)
541+
mc.SetAgentStatus(agentType, mcAgentStatus)
542+
}
543+
544+
r.aggregateJoinedCondition(latestAgentStatus, mc)
545+
if !memberAgentStatusUpdated {
546+
klog.V(2).InfoS("Member agent status not found in internal member cluster status, skip updating member cluster status related fields", "memberCluster", klog.KObj(mc))
547+
return
528548
}
529549

530-
r.aggregateJoinedCondition(mc)
550+
// The remaning fields are only updated when member-agent is updated.
531551
// Copy resource usages.
532552
mc.Status.ResourceUsage = imc.Status.ResourceUsage
533553
// Copy additional conditions.
554+
// Right now all the additional conditions are reported by the member-agent.
534555
for idx := range imc.Status.Conditions {
535556
cond := imc.Status.Conditions[idx]
536-
cond.ObservedGeneration = mc.GetGeneration()
557+
cond.ObservedGeneration = mc.GetGeneration() // using the mc generation
537558
meta.SetStatusCondition(&mc.Status.Conditions, cond)
538559
}
539560
// Copy the cluster properties.
@@ -564,32 +585,33 @@ func (r *Reconciler) updateMemberClusterStatus(ctx context.Context, mc *clusterv
564585
}
565586

566587
// aggregateJoinedCondition is used to calculate and mark the joined or left status for member cluster based on join conditions from all agents.
567-
func (r *Reconciler) aggregateJoinedCondition(mc *clusterv1beta1.MemberCluster) {
588+
// agentStatus contains the latest status reported by the agents based on the latest mc spec.
589+
func (r *Reconciler) aggregateJoinedCondition(agentStatus []clusterv1beta1.AgentStatus, mc *clusterv1beta1.MemberCluster) {
568590
klog.V(4).InfoS("Aggregate joined condition from all agents", "memberCluster", klog.KObj(mc))
569591
var unknownMessage string
570-
if len(mc.Status.AgentStatus) < len(r.agents) {
571-
unknownMessage = fmt.Sprintf("Member cluster %s has not reported all the expected agents, expected %d, got %d", mc.Name, len(r.agents), len(mc.Status.AgentStatus))
592+
if len(agentStatus) < len(r.agents) {
593+
unknownMessage = fmt.Sprintf("Member cluster %s has not reported all the expected agents, expected %d, got %d", mc.Name, len(r.agents), len(agentStatus))
572594
markMemberClusterUnknown(r.recorder, mc, unknownMessage)
573595
return
574596
}
575597
joined := true
576598
left := true
577599
reportedAgents := make(map[clusterv1beta1.AgentType]bool)
578-
for _, agentStatus := range mc.Status.AgentStatus {
579-
if !r.agents[agentStatus.Type] {
580-
_ = controller.NewUnexpectedBehaviorError(fmt.Errorf("find an unexpected agent type %s for member cluster %s", agentStatus.Type, mc.Name))
600+
for _, status := range agentStatus {
601+
if !r.agents[status.Type] {
602+
_ = controller.NewUnexpectedBehaviorError(fmt.Errorf("find an unexpected agent type %s for member cluster %s", status.Type, mc.Name))
581603
continue // ignore any unexpected agent type
582604
}
583-
condition := meta.FindStatusCondition(agentStatus.Conditions, string(clusterv1beta1.AgentJoined))
605+
condition := meta.FindStatusCondition(status.Conditions, string(clusterv1beta1.AgentJoined))
584606
if condition == nil {
585-
unknownMessage = fmt.Sprintf("Member cluster %s has not reported the join condition for agent %s", mc.Name, agentStatus.Type)
607+
unknownMessage = fmt.Sprintf("Member cluster %s has not reported the join condition for agent %s", mc.Name, status.Type)
586608
markMemberClusterUnknown(r.recorder, mc, unknownMessage)
587609
return
588610
}
589611

590612
joined = joined && condition.Status == metav1.ConditionTrue
591613
left = left && condition.Status == metav1.ConditionFalse
592-
reportedAgents[agentStatus.Type] = true
614+
reportedAgents[status.Type] = true
593615
}
594616

595617
if len(reportedAgents) < len(r.agents) {

0 commit comments

Comments
 (0)