Skip to content

Commit e9afeeb

Browse files
author
Arvind Thirumurugan
committed
fix new member cluster helm install
Signed-off-by: Arvind Thirumurugan <[email protected]>
1 parent 5ad42da commit e9afeeb

File tree

9 files changed

+186
-38
lines changed

9 files changed

+186
-38
lines changed

charts/member-agent/templates/crds/placement.kubernetes-fleet.io_metriccollectors.yaml

Lines changed: 0 additions & 1 deletion
This file was deleted.

cmd/memberagent/main.go

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ import (
5151
clusterv1beta1 "github.com/kubefleet-dev/kubefleet/apis/cluster/v1beta1"
5252
placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1"
5353
imcv1beta1 "github.com/kubefleet-dev/kubefleet/pkg/controllers/internalmembercluster/v1beta1"
54-
"github.com/kubefleet-dev/kubefleet/pkg/controllers/metriccollector"
5554
"github.com/kubefleet-dev/kubefleet/pkg/controllers/workapplier"
5655
"github.com/kubefleet-dev/kubefleet/pkg/propertyprovider"
5756
"github.com/kubefleet-dev/kubefleet/pkg/propertyprovider/azure"
@@ -465,15 +464,15 @@ func Start(ctx context.Context, hubCfg, memberConfig *rest.Config, hubOpts, memb
465464
return fmt.Errorf("failed to set up InternalMemberCluster v1beta1 controller with the controller manager: %w", err)
466465
}
467466

468-
// Set up the MetricCollector controller.
469-
mcReconciler := &metriccollector.Reconciler{
470-
MemberClient: memberMgr.GetClient(),
471-
HubClient: hubMgr.GetClient(),
472-
}
473-
if err := mcReconciler.SetupWithManager(memberMgr); err != nil {
474-
klog.ErrorS(err, "Failed to set up MetricCollector controller with the controller manager")
475-
return fmt.Errorf("failed to set up MetricCollector controller with the controller manager: %w", err)
476-
}
467+
// // Set up the MetricCollector controller.
468+
// mcReconciler := &metriccollector.Reconciler{
469+
// MemberClient: memberMgr.GetClient(),
470+
// HubClient: hubMgr.GetClient(),
471+
// }
472+
// if err := mcReconciler.SetupWithManager(memberMgr); err != nil {
473+
// klog.ErrorS(err, "Failed to set up MetricCollector controller with the controller manager")
474+
// return fmt.Errorf("failed to set up MetricCollector controller with the controller manager: %w", err)
475+
// }
477476
}
478477

479478
klog.InfoS("starting hub manager")

pkg/controllers/approvalrequest/controller.go

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,13 @@ func (r *Reconciler) reconcileApprovalRequestObj(ctx context.Context, approvalRe
109109
return r.handleDelete(ctx, approvalReqObj, isClusterScoped)
110110
}
111111

112+
// Check if the approval request is already approved or rejected - stop reconciliation if so
113+
approvedCond := meta.FindStatusCondition(approvalReqObj.GetApprovalRequestStatus().Conditions, string(placementv1beta1.ApprovalRequestConditionApproved))
114+
if approvedCond != nil && approvedCond.Status == metav1.ConditionTrue {
115+
klog.V(2).InfoS("ApprovalRequest has been approved, stopping reconciliation", "approvalRequest", approvalReqRef)
116+
return ctrl.Result{}, nil
117+
}
118+
112119
// Add finalizer if not present
113120
if !controllerutil.ContainsFinalizer(obj, metricCollectorFinalizer) {
114121
controllerutil.AddFinalizer(obj, metricCollectorFinalizer)
@@ -119,13 +126,6 @@ func (r *Reconciler) reconcileApprovalRequestObj(ctx context.Context, approvalRe
119126
klog.V(2).InfoS("Added finalizer to ApprovalRequest", "approvalRequest", approvalReqRef)
120127
}
121128

122-
// Check if the approval request is approved
123-
approvedCond := meta.FindStatusCondition(approvalReqObj.GetApprovalRequestStatus().Conditions, string(placementv1beta1.ApprovalRequestConditionApproved))
124-
if approvedCond != nil && approvedCond.Status == metav1.ConditionTrue {
125-
klog.V(2).InfoS("ApprovalRequest has been approved, skipping", "approvalRequest", approvalReqRef)
126-
return ctrl.Result{}, nil
127-
}
128-
129129
// Get the UpdateRun (ClusterStagedUpdateRun or StagedUpdateRun)
130130
spec := approvalReqObj.GetApprovalRequestSpec()
131131
updateRunName := spec.TargetUpdateRun
@@ -190,13 +190,13 @@ func (r *Reconciler) reconcileApprovalRequestObj(ctx context.Context, approvalRe
190190
klog.V(2).InfoS("Successfully ensured MetricCollector resources", "approvalRequest", approvalReqRef, "clusters", clusterNames)
191191

192192
// Check workload health and approve if all workloads are healthy
193-
result, err := r.checkWorkloadHealthAndApprove(ctx, approvalReqObj, clusterNames, updateRunName, stageName)
194-
if err != nil {
193+
if err := r.checkWorkloadHealthAndApprove(ctx, approvalReqObj, clusterNames, updateRunName, stageName); err != nil {
195194
klog.ErrorS(err, "Failed to check workload health", "approvalRequest", approvalReqRef)
196-
return ctrl.Result{RequeueAfter: 30 * time.Second}, err
195+
return ctrl.Result{RequeueAfter: 15 * time.Second}, err
197196
}
198197

199-
return result, nil
198+
// Requeue after 15 seconds to check again (will stop if approved in next reconciliation)
199+
return ctrl.Result{RequeueAfter: 15 * time.Second}, nil
200200
}
201201

202202
// ensureMetricCollectorResources creates the Namespace, MetricCollector, CRP, and ResourceOverrides
@@ -360,7 +360,7 @@ func (r *Reconciler) checkWorkloadHealthAndApprove(
360360
approvalReqObj placementv1beta1.ApprovalRequestObj,
361361
clusterNames []string,
362362
updateRunName, stageName string,
363-
) (ctrl.Result, error) {
363+
) error {
364364
obj := approvalReqObj.(client.Object)
365365
approvalReqRef := klog.KObj(obj)
366366

@@ -370,12 +370,12 @@ func (r *Reconciler) checkWorkloadHealthAndApprove(
370370
workloadTrackerList := &placementv1beta1.WorkloadTrackerList{}
371371
if err := r.Client.List(ctx, workloadTrackerList); err != nil {
372372
klog.ErrorS(err, "Failed to list WorkloadTracker", "approvalRequest", approvalReqRef)
373-
return ctrl.Result{}, fmt.Errorf("failed to list WorkloadTracker: %w", err)
373+
return fmt.Errorf("failed to list WorkloadTracker: %w", err)
374374
}
375375

376376
if len(workloadTrackerList.Items) == 0 {
377377
klog.V(2).InfoS("No WorkloadTracker found, skipping health check", "approvalRequest", approvalReqRef)
378-
return ctrl.Result{}, nil
378+
return nil
379379
}
380380

381381
// Use the first WorkloadTracker (assuming there's only one)
@@ -384,7 +384,7 @@ func (r *Reconciler) checkWorkloadHealthAndApprove(
384384

385385
if len(workloadTracker.Workloads) == 0 {
386386
klog.V(2).InfoS("WorkloadTracker has no workloads defined, skipping health check", "approvalRequest", approvalReqRef)
387-
return ctrl.Result{}, nil
387+
return nil
388388
}
389389

390390
// MetricCollectorReport name is same as MetricCollector name
@@ -426,7 +426,7 @@ func (r *Reconciler) checkWorkloadHealthAndApprove(
426426
"cluster", clusterName,
427427
"report", metricCollectorName,
428428
"namespace", reportNamespace)
429-
return ctrl.Result{}, fmt.Errorf("failed to get MetricCollectorReport for cluster %s: %w", clusterName, err)
429+
return fmt.Errorf("failed to get MetricCollectorReport for cluster %s: %w", clusterName, err)
430430
}
431431

432432
klog.V(2).InfoS("Found MetricCollectorReport",
@@ -500,7 +500,7 @@ func (r *Reconciler) checkWorkloadHealthAndApprove(
500500
approvalReqObj.SetApprovalRequestStatus(*status)
501501
if err := r.Client.Status().Update(ctx, obj); err != nil {
502502
klog.ErrorS(err, "Failed to approve ApprovalRequest", "approvalRequest", approvalReqRef)
503-
return ctrl.Result{}, fmt.Errorf("failed to approve ApprovalRequest: %w", err)
503+
return fmt.Errorf("failed to approve ApprovalRequest: %w", err)
504504
}
505505

506506
klog.InfoS("Successfully approved ApprovalRequest", "approvalRequest", approvalReqRef)
@@ -509,17 +509,16 @@ func (r *Reconciler) checkWorkloadHealthAndApprove(
509509
klog.V(2).InfoS("ApprovalRequest already approved", "approvalRequest", approvalReqRef)
510510
}
511511

512-
// Stop reconciliation since we're approved
513-
return ctrl.Result{}, nil
512+
// Approval successful or already approved
513+
return nil
514514
}
515515

516-
// Not all workloads are healthy yet, requeue
517-
klog.V(2).InfoS("Not all workloads are healthy yet, will requeue",
516+
// Not all workloads are healthy yet, log details and return nil (reconcile will requeue)
517+
klog.V(2).InfoS("Not all workloads are healthy yet",
518518
"approvalRequest", approvalReqRef,
519519
"unhealthyDetails", unhealthyDetails)
520520

521-
// Requeue after 30 seconds to check again
522-
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
521+
return nil
523522
}
524523

525524
// handleDelete handles the deletion of an ApprovalRequest or ClusterApprovalRequest

standalone-metric-collector/charts/metric-collector/templates/rbac-member.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ rules:
99
# MetricCollector CRD access on member cluster
1010
- apiGroups: ["placement.kubernetes-fleet.io"]
1111
resources: ["metriccollectors"]
12-
verbs: ["get", "list", "watch"]
12+
verbs: ["get", "list", "watch", "update", "patch"]
1313
- apiGroups: ["placement.kubernetes-fleet.io"]
1414
resources: ["metriccollectors/status"]
1515
verbs: ["update", "patch"]

standalone-metric-collector/charts/metric-collector/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Controller image configuration
66
image:
7-
repository: ghcr.io/kubefleet-dev/metric-collector
7+
repository: metric-collector
88
pullPolicy: IfNotPresent
99
tag: "latest"
1010

standalone-metric-collector/cmd/metriccollector/main.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,14 @@ func buildHubConfig() (*rest.Config, error) {
8585
// Check for custom headers
8686
customHeader := os.Getenv("HUB_KUBE_HEADER")
8787

88+
// Check TLS insecure flag
89+
tlsInsecure := os.Getenv("TLS_INSECURE") == "true"
90+
8891
// Initialize hub config
8992
hubConfig := &rest.Config{
9093
Host: hubURL,
9194
TLSClientConfig: rest.TLSClientConfig{
92-
Insecure: false,
95+
Insecure: tlsInsecure,
9396
},
9497
WrapTransport: func(rt http.RoundTripper) http.RoundTripper {
9598
if customHeader != "" {

standalone-metric-collector/docker/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM golang:1.21 AS builder
1+
FROM golang:1.24 AS builder
22
WORKDIR /workspace
33

44
# Copy go mod files
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: placement.kubernetes-fleet.io/v1beta1
2+
kind: MetricCollector
3+
metadata:
4+
name: mc-example-run-staging
5+
spec:
6+
prometheusURL: "http://prometheus.test-ns:9090"
7+
promQLQuery: "workload_health"
8+
reportNamespace: "fleet-member-cluster-1"
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Configuration
5+
HUB_CONTEXT="kind-hub"
6+
MEMBER_CONTEXT="kind-cluster-1"
7+
MEMBER_CLUSTER_NAME="cluster-1"
8+
MEMBER_NAMESPACE="default"
9+
HUB_NAMESPACE="fleet-member-${MEMBER_CLUSTER_NAME}"
10+
PROMETHEUS_URL="http://prometheus.test-ns:9090"
11+
IMAGE_NAME="metric-collector"
12+
IMAGE_TAG="latest"
13+
14+
# Get hub cluster API server URL dynamically using docker inspect (following kubefleet pattern)
15+
HUB_API_SERVER="https://$(docker inspect hub-control-plane --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}'):6443"
16+
17+
echo "=== Installing MetricCollector on member cluster ==="
18+
echo "Hub cluster: ${HUB_CONTEXT}"
19+
echo "Hub API server: ${HUB_API_SERVER}"
20+
echo "Member cluster: ${MEMBER_CONTEXT}"
21+
echo "Member cluster name: ${MEMBER_CLUSTER_NAME}"
22+
echo ""
23+
24+
# Step 0: Build and load Docker image
25+
echo "Step 0: Building and loading Docker image..."
26+
docker buildx build \
27+
--file docker/Dockerfile \
28+
--output=type=docker \
29+
--platform=linux/$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') \
30+
--tag ${IMAGE_NAME}:${IMAGE_TAG} \
31+
--build-arg GOARCH=$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') \
32+
--build-arg GOOS=linux \
33+
.
34+
kind load docker-image ${IMAGE_NAME}:${IMAGE_TAG} --name cluster-1
35+
echo "✓ Docker image built and loaded into kind cluster"
36+
echo ""
37+
38+
# Step 1: Setup RBAC on hub cluster
39+
echo "Step 1: Setting up RBAC on hub cluster..."
40+
kubectl --context=${HUB_CONTEXT} create namespace ${HUB_NAMESPACE} --dry-run=client -o yaml | kubectl --context=${HUB_CONTEXT} apply -f -
41+
kubectl --context=${HUB_CONTEXT} create serviceaccount metric-collector-sa -n ${HUB_NAMESPACE} --dry-run=client -o yaml | kubectl --context=${HUB_CONTEXT} apply -f -
42+
43+
cat <<EOF | kubectl --context=${HUB_CONTEXT} apply -f -
44+
apiVersion: rbac.authorization.k8s.io/v1
45+
kind: ClusterRole
46+
metadata:
47+
name: metric-collector-hub-access
48+
rules:
49+
- apiGroups: ["placement.kubernetes-fleet.io"]
50+
resources: ["metriccollectorreports"]
51+
verbs: ["get", "list", "create", "update", "patch", "delete"]
52+
- apiGroups: [""]
53+
resources: ["namespaces"]
54+
verbs: ["get", "list"]
55+
---
56+
apiVersion: rbac.authorization.k8s.io/v1
57+
kind: ClusterRoleBinding
58+
metadata:
59+
name: metric-collector-${MEMBER_CLUSTER_NAME}
60+
roleRef:
61+
apiGroup: rbac.authorization.k8s.io
62+
kind: ClusterRole
63+
name: metric-collector-hub-access
64+
subjects:
65+
- kind: ServiceAccount
66+
name: metric-collector-sa
67+
namespace: ${HUB_NAMESPACE}
68+
EOF
69+
70+
echo "✓ RBAC configured on hub cluster"
71+
echo ""
72+
73+
# Step 2: Create token secret on hub cluster
74+
echo "Step 2: Creating token secret on hub cluster..."
75+
cat <<EOF | kubectl --context=${HUB_CONTEXT} apply -f -
76+
apiVersion: v1
77+
kind: Secret
78+
metadata:
79+
name: metric-collector-token
80+
namespace: ${HUB_NAMESPACE}
81+
annotations:
82+
kubernetes.io/service-account.name: metric-collector-sa
83+
type: kubernetes.io/service-account-token
84+
EOF
85+
86+
# Wait for token to be created
87+
echo "Waiting for token to be created..."
88+
sleep 3
89+
90+
# Get token
91+
TOKEN=$(kubectl --context=${HUB_CONTEXT} get secret metric-collector-token -n ${HUB_NAMESPACE} -o jsonpath='{.data.token}' | base64 -d)
92+
if [ -z "$TOKEN" ]; then
93+
echo "Error: Failed to get token from hub cluster"
94+
exit 1
95+
fi
96+
97+
echo "✓ Token created on hub cluster"
98+
echo ""
99+
100+
# Step 3: Create namespace and secrets on member cluster
101+
echo "Step 3: Creating secrets on member cluster..."
102+
103+
kubectl --context=${MEMBER_CONTEXT} create secret generic hub-token \
104+
--from-literal=token="${TOKEN}" \
105+
-n ${MEMBER_NAMESPACE} \
106+
--dry-run=client -o yaml | kubectl --context=${MEMBER_CONTEXT} apply -f -
107+
108+
echo "✓ Secrets created on member cluster"
109+
echo ""
110+
111+
# Step 4: Install helm chart on member cluster (includes CRD)
112+
echo "Step 4: Installing helm chart on member cluster..."
113+
helm upgrade --install metric-collector ./charts/metric-collector \
114+
--kube-context=${MEMBER_CONTEXT} \
115+
--namespace ${MEMBER_NAMESPACE} \
116+
--set memberCluster.name=${MEMBER_CLUSTER_NAME} \
117+
--set hubCluster.url=${HUB_API_SERVER} \
118+
--set hubCluster.tls.insecure=true \
119+
--set prometheus.url=${PROMETHEUS_URL} \
120+
--set image.repository=${IMAGE_NAME} \
121+
--set image.tag=${IMAGE_TAG} \
122+
--set image.pullPolicy=IfNotPresent
123+
124+
echo "✓ Helm chart installed on member cluster"
125+
echo ""
126+
127+
# Step 5: Verify installation
128+
echo "Step 5: Verifying installation..."
129+
echo "Checking pods on member cluster..."
130+
kubectl --context=${MEMBER_CONTEXT} get pods -n ${MEMBER_NAMESPACE}
131+
132+
echo ""
133+
echo "=== Installation Complete ==="
134+
echo ""
135+
echo "To check logs:"
136+
echo " kubectl --context=${MEMBER_CONTEXT} logs -n ${MEMBER_NAMESPACE} -l app.kubernetes.io/name=metric-collector -f"
137+
echo ""
138+
echo "To check MetricCollectorReports on hub:"
139+
echo " kubectl --context=${HUB_CONTEXT} get metriccollectorreports -n fleet-${MEMBER_CLUSTER_NAME}"
140+
echo ""

0 commit comments

Comments
 (0)