Skip to content

Commit b46de17

Browse files
Merge pull request #30307 from wangke19/fix/hypershift-disruption-test
CNTRLPLANE-1485:fix(disruption): Using correct internal LB of apiserver for monitor test on ARO and Baremetal Hypershift
2 parents a91d3d0 + ed55852 commit b46de17

File tree

2 files changed

+167
-12
lines changed

2 files changed

+167
-12
lines changed

pkg/monitortests/kubeapiserver/disruptioninclusterapiserver/monitortest.go

Lines changed: 129 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,30 @@ var (
6767
rbacMonitorCRBName string
6868
)
6969

70+
// HostedClusterType represents the type of cluster hosting model
71+
type HostedClusterType string
72+
73+
const (
74+
// HostedClusterTypeStandalone represents a standard OpenShift cluster with self-hosted control plane
75+
HostedClusterTypeStandalone HostedClusterType = "Standalone"
76+
// HostedClusterTypeAROHCP represents an ARO HCP (Azure Red Hat OpenShift Hosted Control Plane) cluster
77+
HostedClusterTypeAROHCP HostedClusterType = "AROHCP"
78+
// HostedClusterTypeBareMetal represents a bare metal HyperShift hosted cluster
79+
HostedClusterTypeBareMetal HostedClusterType = "BareMetal"
80+
// HostedClusterTypeOther represents other HyperShift hosted cluster types
81+
HostedClusterTypeOther HostedClusterType = "Other"
82+
)
83+
7084
type InvariantInClusterDisruption struct {
7185
namespaceName string
7286
openshiftTestsImagePullSpec string
7387
payloadImagePullSpec string
7488
notSupportedReason string
7589
replicas int32
7690
controlPlaneNodes int32
77-
78-
adminRESTConfig *rest.Config
79-
kubeClient kubernetes.Interface
91+
hostedClusterType HostedClusterType
92+
adminRESTConfig *rest.Config
93+
kubeClient kubernetes.Interface
8094
}
8195

8296
func NewInvariantInClusterDisruption(info monitortestframework.MonitorTestInitializationInfo) monitortestframework.MonitorTest {
@@ -85,6 +99,69 @@ func NewInvariantInClusterDisruption(info monitortestframework.MonitorTestInitia
8599
}
86100
}
87101

102+
// parseAdminRESTConfigHost parses the adminRESTConfig.Host URL and returns hostname and port
103+
func (i *InvariantInClusterDisruption) parseAdminRESTConfigHost() (hostname, port string, err error) {
104+
parsedURL, err := url.Parse(i.adminRESTConfig.Host)
105+
if err != nil {
106+
return "", "", fmt.Errorf("failed to parse adminRESTConfig.Host %q: %v", i.adminRESTConfig.Host, err)
107+
}
108+
109+
hostname = parsedURL.Hostname()
110+
if hostname == "" {
111+
return "", "", fmt.Errorf("no hostname found in adminRESTConfig.Host %q", i.adminRESTConfig.Host)
112+
}
113+
114+
port = parsedURL.Port()
115+
if port == "" {
116+
port = "6443" // default port
117+
}
118+
119+
return hostname, port, nil
120+
}
121+
122+
// setKubernetesServiceEnvVars sets the KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT environment variables
123+
// based on the hosted cluster type
124+
func (i *InvariantInClusterDisruption) setKubernetesServiceEnvVars(envVars []corev1.EnvVar, apiIntHost, apiIntPort string) []corev1.EnvVar {
125+
// Parse adminRESTConfig.Host once for bare metal HyperShift
126+
var bareMetalHost, bareMetalPort string
127+
var bareMetalErr error
128+
if i.hostedClusterType == HostedClusterTypeBareMetal {
129+
bareMetalHost, bareMetalPort, bareMetalErr = i.parseAdminRESTConfigHost()
130+
if bareMetalErr != nil {
131+
logrus.WithError(bareMetalErr).Errorf("Failed to parse adminRESTConfig.Host for bare metal HyperShift")
132+
}
133+
}
134+
135+
for j, env := range envVars {
136+
switch env.Name {
137+
case "KUBERNETES_SERVICE_HOST":
138+
if i.hostedClusterType == HostedClusterTypeBareMetal {
139+
if bareMetalErr != nil {
140+
envVars[j].Value = apiIntHost
141+
} else {
142+
envVars[j].Value = bareMetalHost
143+
}
144+
} else {
145+
envVars[j].Value = apiIntHost
146+
}
147+
case "KUBERNETES_SERVICE_PORT":
148+
if i.hostedClusterType == HostedClusterTypeAROHCP {
149+
// ARO HCP uses port 7443 for the internal API server load balancer
150+
envVars[j].Value = "7443"
151+
} else if i.hostedClusterType == HostedClusterTypeBareMetal {
152+
if bareMetalErr != nil {
153+
envVars[j].Value = apiIntPort
154+
} else {
155+
envVars[j].Value = bareMetalPort
156+
}
157+
} else {
158+
envVars[j].Value = apiIntPort
159+
}
160+
}
161+
}
162+
return envVars
163+
}
164+
88165
func (i *InvariantInClusterDisruption) createDeploymentAndWaitToRollout(ctx context.Context, deploymentObj *appsv1.Deployment) error {
89166
deploymentID := uuid.New().String()
90167
deploymentObj = disruptionlibrary.UpdateDeploymentENVs(deploymentObj, deploymentID, "")
@@ -113,15 +190,18 @@ func (i *InvariantInClusterDisruption) createDeploymentAndWaitToRollout(ctx cont
113190
return nil
114191
}
115192

116-
func (i *InvariantInClusterDisruption) createInternalLBDeployment(ctx context.Context, apiIntHost string) error {
193+
func (i *InvariantInClusterDisruption) createInternalLBDeployment(ctx context.Context, apiIntHost, apiIntPort string) error {
117194
deploymentObj := resourceread.ReadDeploymentV1OrDie(internalLBDeploymentYaml)
118195
deploymentObj.SetNamespace(i.namespaceName)
119-
deploymentObj.Spec.Template.Spec.Containers[0].Env[0].Value = apiIntHost
120196
// set amount of deployment replicas to make sure it runs on all nodes
121197
deploymentObj.Spec.Replicas = &i.replicas
122198
// we need to use the openshift-tests image of the destination during an upgrade.
123199
deploymentObj.Spec.Template.Spec.Containers[0].Image = i.openshiftTestsImagePullSpec
124200

201+
// Set the correct host and port for internal API server based on cluster type
202+
deploymentObj.Spec.Template.Spec.Containers[0].Env = i.setKubernetesServiceEnvVars(
203+
deploymentObj.Spec.Template.Spec.Containers[0].Env, apiIntHost, apiIntPort)
204+
125205
err := i.createDeploymentAndWaitToRollout(ctx, deploymentObj)
126206
if err != nil {
127207
return err
@@ -304,9 +384,10 @@ func (i *InvariantInClusterDisruption) StartCollection(ctx context.Context, admi
304384
var err error
305385
log := logrus.WithField("monitorTest", "apiserver-incluster-availability").WithField("namespace", i.namespaceName).WithField("func", "StartCollection")
306386

307-
// Check for ARO HCP and skip if detected
387+
// Determine hosted cluster type
308388
oc := exutil.NewCLI("apiserver-incluster-availability").AsAdmin()
309-
var isAROHCPcluster bool
389+
i.hostedClusterType = HostedClusterTypeStandalone // Default to standalone
390+
310391
isHypershift, _ := exutil.IsHypershift(ctx, oc.AdminConfigClient())
311392
if isHypershift {
312393
_, hcpNamespace, err := exutil.GetHypershiftManagementClusterConfigAndNamespace()
@@ -318,12 +399,29 @@ func (i *InvariantInClusterDisruption) StartCollection(ctx context.Context, admi
318399
// For Hypershift, only skip if it's specifically ARO HCP
319400
// Use management cluster client to check the control-plane-operator deployment
320401
managementOC := exutil.NewHypershiftManagementCLI(hcpNamespace)
402+
var isAROHCPcluster bool
321403
if isAROHCPcluster, err = exutil.IsAroHCP(ctx, hcpNamespace, managementOC.AdminKubeClient()); err != nil {
322404
logrus.WithError(err).Warning("Failed to check if ARO HCP, assuming it's not")
323405
} else if isAROHCPcluster {
324406
i.notSupportedReason = "platform Hypershift - ARO HCP not supported"
325407
return nil
326408
}
409+
410+
// Determine the specific HyperShift variant
411+
if isAROHCPcluster {
412+
i.hostedClusterType = HostedClusterTypeAROHCP
413+
} else {
414+
// Check if this is a bare metal HyperShift cluster
415+
isBareMetalHypershift, err := exutil.IsBareMetalHyperShiftCluster(ctx, managementOC)
416+
if err != nil {
417+
logrus.WithError(err).Warning("Failed to check if bare metal HyperShift, assuming other HyperShift type")
418+
i.hostedClusterType = HostedClusterTypeOther
419+
} else if isBareMetalHypershift {
420+
i.hostedClusterType = HostedClusterTypeBareMetal
421+
} else {
422+
i.hostedClusterType = HostedClusterTypeOther
423+
}
424+
}
327425
}
328426

329427
if len(i.payloadImagePullSpec) == 0 {
@@ -378,11 +476,30 @@ func (i *InvariantInClusterDisruption) StartCollection(ctx context.Context, admi
378476
return fmt.Errorf("error getting openshift infrastructure: %v", err)
379477
}
380478

381-
internalAPI, err := url.Parse(infra.Status.APIServerInternalURL)
382-
if err != nil {
383-
return fmt.Errorf("error parsing api int url: %v", err)
479+
var apiIntHost string
480+
var apiIntPort string
481+
// Hosted clusters use adminRESTConfig.Host, standalone clusters use APIServerInternalURL
482+
isHostedCluster := i.hostedClusterType == HostedClusterTypeAROHCP ||
483+
i.hostedClusterType == HostedClusterTypeBareMetal ||
484+
i.hostedClusterType == HostedClusterTypeOther
485+
486+
if isHostedCluster {
487+
apiIntHost, apiIntPort, err = i.parseAdminRESTConfigHost()
488+
if err != nil {
489+
return fmt.Errorf("failed to parse adminRESTConfig.Host: %v", err)
490+
}
491+
} else {
492+
internalAPI, err := url.Parse(infra.Status.APIServerInternalURL)
493+
if err != nil {
494+
return fmt.Errorf("error parsing api int url: %v", err)
495+
}
496+
apiIntHost = internalAPI.Hostname()
497+
if internalAPI.Port() != "" {
498+
apiIntPort = internalAPI.Port()
499+
} else {
500+
apiIntPort = "6443" // default port
501+
}
384502
}
385-
apiIntHost := internalAPI.Hostname()
386503

387504
allNodes, err := i.kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
388505
if err != nil {
@@ -438,7 +555,7 @@ func (i *InvariantInClusterDisruption) StartCollection(ctx context.Context, admi
438555
if err != nil {
439556
return fmt.Errorf("error creating localhost: %v", err)
440557
}
441-
err = i.createInternalLBDeployment(ctx, apiIntHost)
558+
err = i.createInternalLBDeployment(ctx, apiIntHost, apiIntPort)
442559
if err != nil {
443560
return fmt.Errorf("error creating internal LB: %v", err)
444561
}

test/extended/util/managed_services.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package util
22

33
import (
44
"context"
5+
"fmt"
6+
"strings"
57

68
"github.com/sirupsen/logrus"
79
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -95,3 +97,39 @@ func IsAroHCP(ctx context.Context, namespace string, kubeClient kubernetes.Inter
9597
logrus.Infof("No deployment found with control-plane-operator container in namespace %s", namespace)
9698
return false, nil
9799
}
100+
101+
// IsBareMetalHyperShiftCluster checks if the HyperShift cluster is running on bare metal
102+
// by checking the platform type of the hosted cluster. It uses kubectl commands to query
103+
// the hosted cluster's platform type and returns true if it's "None" or "Agent".
104+
func IsBareMetalHyperShiftCluster(ctx context.Context, managementOC *CLI) (bool, error) {
105+
// Get the hosted cluster namespace
106+
_, hcpNamespace, err := GetHypershiftManagementClusterConfigAndNamespace()
107+
if err != nil {
108+
return false, fmt.Errorf("failed to get hypershift management cluster config and namespace: %v", err)
109+
}
110+
111+
// Get the first hosted cluster name
112+
clusterNames, err := managementOC.AsAdmin().WithoutNamespace().Run("get").Args(
113+
"-n", hcpNamespace, "hostedclusters", "-o=jsonpath={.items[*].metadata.name}").Output()
114+
if err != nil {
115+
return false, fmt.Errorf("failed to get hosted cluster names: %v", err)
116+
}
117+
118+
if len(clusterNames) == 0 {
119+
return false, fmt.Errorf("no hosted clusters found")
120+
}
121+
122+
// Get the first hosted cluster name
123+
clusterName := strings.Split(strings.TrimSpace(clusterNames), " ")[0]
124+
125+
// Get the platform type of the hosted cluster
126+
platformType, err := managementOC.AsAdmin().WithoutNamespace().Run("get").Args(
127+
"hostedcluster", clusterName, "-n", hcpNamespace, `-ojsonpath={.spec.platform.type}`).Output()
128+
if err != nil {
129+
return false, fmt.Errorf("failed to get hosted cluster platform type: %v", err)
130+
}
131+
132+
// Check if it's bare metal (None or Agent platform)
133+
platformTypeStr := strings.TrimSpace(platformType)
134+
return platformTypeStr == "None" || platformTypeStr == "Agent", nil
135+
}

0 commit comments

Comments
 (0)