Skip to content

Commit 0ce6df7

Browse files
authored
vsan max wldi automation (#3371)
1 parent 9705c6b commit 0ce6df7

File tree

4 files changed

+320
-7
lines changed

4 files changed

+320
-7
lines changed

tests/e2e/e2e_common.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,8 @@ var (
504504
envZone2DatastoreUrl = "ZONE2_DATASTORE_URL"
505505
envIsolationSharedStoragePolicyNameLateBidning = "WORKLOAD_ISOLATION_SHARED_STORAGE_POLICY_WFFC"
506506
envSharedZone1Zone2Zone3StoragePolicyName = "SHARED_ZONE1_ZONE2_ZONE3_STORAGE_POLICY_IMM"
507+
nimbusWorkerIp = "NIMBUS_WORKER_IP"
508+
vsanMaxFaultDomainName = "VSAN_MAX_FD_NAME"
507509
)
508510

509511
// storage policy usages for storage quota validation

tests/e2e/mgmt_wrkld_domain_isolation_utils.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,3 +490,49 @@ func passZonesToStayInMap(allowedTopologyMap map[string][]string,
490490
}
491491
return allowedTopologyMap
492492
}
493+
494+
// Power off hosts from given zone.
495+
// clusterDown: If True , Powering off all hosts in cluster and ignoring numberOfHost param
496+
// clusterDown: if False, then considering numberOfHost to power of the hosts
497+
func powerOffHostsFromZone(ctx context.Context, zone string, clusterDown bool, numberOfHost int) []string {
498+
var hostIpsToPowerOff []string
499+
clusterName := getClusterNameFromZone(ctx, zone)
500+
//Get all hosts of given zone cluster
501+
nodes := getHostsByClusterName(ctx, clusterComputeResource, clusterName)
502+
gomega.Expect(len(nodes) > 0).To(gomega.BeTrue())
503+
for i, node := range nodes {
504+
host := node.Common.InventoryPath
505+
hostIpString := strings.Split(host, "/")
506+
hostIp := hostIpString[len(hostIpString)-1]
507+
hostIpsToPowerOff = append(hostIpsToPowerOff, hostIp)
508+
if !clusterDown {
509+
if i+1 == numberOfHost {
510+
break
511+
}
512+
}
513+
}
514+
// Power off Host
515+
powerOffHostParallel(ctx, hostIpsToPowerOff)
516+
return hostIpsToPowerOff
517+
}
518+
519+
// Power off hosts from given fault domain.
520+
// fdDown: If True , Powering off all hosts in fault domain and ignoring numberOfHost param
521+
// fdDown: if False, then considering numberOfHost to power of the hosts
522+
func powerOffHostsFromFaultDomain(ctx context.Context, fdName string, fdMap map[string]string, fdDown bool,
523+
numberOfHost int) []string {
524+
var hostIpsToPowerOff []string
525+
for hostIp, site := range fdMap {
526+
if strings.Contains(site, fdName) {
527+
hostIpsToPowerOff = append(hostIpsToPowerOff, hostIp)
528+
if !fdDown {
529+
if len(hostIpsToPowerOff) == numberOfHost {
530+
break
531+
}
532+
}
533+
}
534+
}
535+
// Power off Host
536+
powerOffHostParallel(ctx, hostIpsToPowerOff)
537+
return hostIpsToPowerOff
538+
}

tests/e2e/nimbus_utils.go

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ import (
2424

2525
"github.com/davecgh/go-spew/spew"
2626
"github.com/onsi/gomega"
27+
"golang.org/x/crypto/ssh"
2728
"k8s.io/kubernetes/test/e2e/framework"
29+
fssh "k8s.io/kubernetes/test/e2e/framework/ssh"
2830
)
2931

3032
type TestbedBasicInfo struct {
@@ -50,14 +52,31 @@ func vMPowerMgmt(user string, location string, podname string, hostList string,
5052
nimbusCmd := fmt.Sprintf("USER=%s /mts/git/bin/nimbus-ctl --nimbusLocation %s --nimbus %s %s %s", user,
5153
location, podname, op, hostList)
5254
framework.Logf("Running command: %s", nimbusCmd)
53-
cmd := exec.Command("/bin/bash", "-c", nimbusCmd)
54-
err = cmd.Start()
55-
if err != nil {
56-
return err
57-
}
58-
err = cmd.Wait()
5955

60-
framework.Logf("stdout:\n%v\nstderr:\n%v\n", cmd.Stdout, cmd.Stderr)
56+
// Following Changes are added as nimbus-ctl cmd can't run from local mac
57+
if os.Getenv(nimbusWorkerIp) != "" {
58+
sshClientConfig := &ssh.ClientConfig{
59+
User: "worker",
60+
Auth: []ssh.AuthMethod{
61+
ssh.Password(GetAndExpectStringEnvVar(vcUIPwd)),
62+
},
63+
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
64+
}
65+
powerOffVm, er := sshExec(sshClientConfig, GetAndExpectStringEnvVar(nimbusWorkerIp), nimbusCmd)
66+
if er != nil && powerOffVm.Code != 0 {
67+
fssh.LogResult(powerOffVm)
68+
err = er
69+
}
70+
} else {
71+
cmd := exec.Command("/bin/bash", "-c", nimbusCmd)
72+
err = cmd.Start()
73+
if err != nil {
74+
return err
75+
}
76+
err = cmd.Wait()
77+
78+
framework.Logf("stdout:\n%v\nstderr:\n%v\n", cmd.Stdout, cmd.Stderr)
79+
}
6180
return err
6281
}
6382

tests/e2e/vsan_max_tkg_wldi.go

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package e2e
18+
19+
import (
20+
"context"
21+
"fmt"
22+
23+
"github.com/onsi/ginkgo/v2"
24+
"github.com/onsi/gomega"
25+
26+
v1 "k8s.io/api/core/v1"
27+
apierrors "k8s.io/apimachinery/pkg/api/errors"
28+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29+
clientset "k8s.io/client-go/kubernetes"
30+
"k8s.io/kubernetes/test/e2e/framework"
31+
fnodes "k8s.io/kubernetes/test/e2e/framework/node"
32+
fss "k8s.io/kubernetes/test/e2e/framework/statefulset"
33+
admissionapi "k8s.io/pod-security-admission/api"
34+
)
35+
36+
var _ bool = ginkgo.Describe("[tkg-domain-isolation-vsan-max] TKG-WLDI-Vsan-Max", func() {
37+
38+
f := framework.NewDefaultFramework("tkg-domain-isolation-vsan-max")
39+
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
40+
var (
41+
client clientset.Interface
42+
namespace string
43+
vcRestSessionId string
44+
allowedTopologies []v1.TopologySelectorLabelRequirement
45+
replicas int32
46+
topologyAffinityDetails map[string][]string
47+
topologyCategories []string
48+
labelsMap map[string]string
49+
labels_ns map[string]string
50+
sharedStoragePolicyNameWffc string
51+
)
52+
53+
ginkgo.BeforeEach(func() {
54+
namespace = getNamespaceToRunTests(f)
55+
ctx, cancel := context.WithCancel(context.Background())
56+
defer cancel()
57+
58+
// making vc connection
59+
client = f.ClientSet
60+
bootstrap()
61+
62+
// reading vc session id
63+
if vcRestSessionId == "" {
64+
vcRestSessionId = createVcSession4RestApis(ctx)
65+
}
66+
67+
// reading topology map set for management domain and workload domain
68+
topologyMap := GetAndExpectStringEnvVar(envTopologyMap)
69+
allowedTopologies = createAllowedTopolgies(topologyMap)
70+
topologyAffinityDetails, topologyCategories = createTopologyMapLevel5(topologyMap)
71+
72+
// required for pod creation
73+
labels_ns = map[string]string{}
74+
labels_ns[admissionapi.EnforceLevelLabel] = string(admissionapi.LevelPrivileged)
75+
labels_ns["e2e-framework"] = f.BaseName
76+
77+
//setting map values
78+
labelsMap = make(map[string]string)
79+
labelsMap["app"] = "test"
80+
81+
// reading shared storage policy
82+
sharedStoragePolicyNameWffc = GetAndExpectStringEnvVar(envIsolationSharedStoragePolicyNameLateBidning)
83+
84+
svcNamespace = GetAndExpectStringEnvVar(envSupervisorClusterNamespace)
85+
86+
// Read testbedInfo.json and populate tbinfo
87+
readVcEsxIpsViaTestbedInfoJson(GetAndExpectStringEnvVar(envTestbedInfoJsonPath))
88+
})
89+
90+
ginkgo.AfterEach(func() {
91+
ctx, cancel := context.WithCancel(context.Background())
92+
defer cancel()
93+
94+
framework.Logf("Power on the hosts")
95+
if len(fds.hostsDown) > 0 && fds.hostsDown != nil {
96+
powerOnHostParallel(fds.hostsDown)
97+
fds.hostsDown = nil
98+
}
99+
100+
ginkgo.By(fmt.Sprintf("Deleting service nginx in namespace: %v", namespace))
101+
err := client.CoreV1().Services(namespace).Delete(ctx, servicename, *metav1.NewDeleteOptions(0))
102+
if !apierrors.IsNotFound(err) {
103+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
104+
}
105+
106+
dumpSvcNsEventsOnTestFailure(client, namespace)
107+
108+
framework.Logf("Collecting supervisor PVC events before performing PV/PVC cleanup")
109+
eventList, err := client.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{})
110+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
111+
for _, item := range eventList.Items {
112+
framework.Logf("%q", item.Message)
113+
}
114+
})
115+
116+
/*
117+
Testcase-5 & 6
118+
vSAN Max with Fault Domains with HCI mounted datastore - Block & file Volume
119+
120+
Steps:
121+
1. Deploy statefulsets with 3 replica on namespace-4 in the TKG cluster
122+
1.1 Use vsan-shared policy with WFFC volume binding mode storageclass 1
123+
1.2 Access Mode as ReadWriteOnce and ReadWriteMany
124+
2. Bring down one of the host from vsphere cluster in zone-2
125+
3. Bring down one of the host from vSAN max fault domain-1
126+
4. Bring up host from zone-2 and host from fault domain-1
127+
5. Bring down both hosts from vsphere cluster in zone-3
128+
6. Bring down both hosts from vSAN max fault domain-2
129+
7. Bring up all hosts from zone-3 and all hosts from fault domain-2
130+
*/
131+
132+
ginkgo.It("vSAN Max with Fault Domains with HCI mounted datastore", ginkgo.Label(p0, wldi, vc90), func() {
133+
ctx, cancel := context.WithCancel(context.Background())
134+
defer cancel()
135+
136+
// get the vsan-max FD name
137+
vsanMaxFdName := GetAndExpectStringEnvVar(vsanMaxFaultDomainName)
138+
139+
// statefulset replica count
140+
replicas = 3
141+
142+
// Get the fault domain and host map
143+
fdMap := createFaultDomainMap(ctx, &e2eVSphere)
144+
145+
// Flag to check the status of hosts
146+
isHostDown := false
147+
isFaultDomainHostDown := false
148+
isClusterDown := false
149+
isFaultDomainDown := false
150+
151+
ginkgo.By("Read shared storage policy tagged to wcp namespace")
152+
storageclass, err := client.StorageV1().StorageClasses().Get(ctx, sharedStoragePolicyNameWffc, metav1.GetOptions{})
153+
if !apierrors.IsNotFound(err) {
154+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
155+
}
156+
157+
ginkgo.By("Creating service")
158+
service := CreateService(namespace, client)
159+
defer func() {
160+
deleteService(namespace, client, service)
161+
}()
162+
163+
ginkgo.By("Creating statefulset with ReadWriteOnce")
164+
statefulsetRwo := createCustomisedStatefulSets(ctx, client, namespace, true, replicas, true, allowedTopologies,
165+
true, true, "", "", storageclass, storageclass.Name)
166+
defer func() {
167+
fss.DeleteAllStatefulSets(ctx, client, namespace)
168+
}()
169+
170+
ginkgo.By("Verify svc pv affinity, pvc annotation and pod node affinity")
171+
err = verifyPvcAnnotationPvAffinityPodAnnotationInSvc(ctx, client, statefulsetRwo, nil, nil, namespace,
172+
allowedTopologies)
173+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
174+
175+
ginkgo.By("Creating statefulset with ReadWriteMany")
176+
statefulsetRwm := createCustomisedStatefulSets(ctx, client, namespace, true, replicas, true, allowedTopologies,
177+
true, true, "", v1.ReadWriteMany, storageclass, storageclass.Name)
178+
defer func() {
179+
fss.DeleteAllStatefulSets(ctx, client, namespace)
180+
}()
181+
182+
ginkgo.By("Verify svc pv affinity, pvc annotation and pod node affinity")
183+
err = verifyPvcAnnotationPvAffinityPodAnnotationInSvc(ctx, client, statefulsetRwm, nil, nil, namespace,
184+
allowedTopologies)
185+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
186+
187+
// Bring down a host from zone2
188+
zone2 := topologyAffinityDetails[topologyCategories[0]][1]
189+
poweredOffHostIps := powerOffHostsFromZone(ctx, zone2, false, 1)
190+
isHostDown = true
191+
defer func() {
192+
if isHostDown {
193+
powerOnHostParallel(poweredOffHostIps)
194+
}
195+
}()
196+
197+
// Bring down one of the host from vSAN max fault domain-1
198+
poweredOffFdHostIps := powerOffHostsFromFaultDomain(ctx, vsanMaxFdName, fdMap, false, 1)
199+
isFaultDomainHostDown = true
200+
defer func() {
201+
if isFaultDomainHostDown {
202+
powerOnHostParallel(poweredOffFdHostIps)
203+
}
204+
}()
205+
206+
// Check for TKG VM and STS pod status
207+
nodeList, err := fnodes.GetReadySchedulableNodes(ctx, f.ClientSet)
208+
framework.ExpectNoError(err, "Unable to find ready and schedulable Node")
209+
gomega.Expect(len(nodeList.Items) == 6).To(gomega.BeTrue())
210+
fss.WaitForStatusReadyReplicas(ctx, client, statefulsetRwo, replicas)
211+
fss.WaitForStatusReadyReplicas(ctx, client, statefulsetRwm, replicas)
212+
213+
// Bring up host from zone-2 and host from fault domain-1
214+
powerOnHostParallel(poweredOffHostIps)
215+
isHostDown = false
216+
powerOnHostParallel(poweredOffFdHostIps)
217+
isFaultDomainHostDown = false
218+
219+
//Bring down all hosts from vsphere cluster in zone-3
220+
zone3 := topologyAffinityDetails[topologyCategories[0]][2]
221+
poweredOffHostIps = powerOffHostsFromZone(ctx, zone3, true, 0)
222+
isClusterDown = true
223+
defer func() {
224+
if isClusterDown {
225+
powerOnHostParallel(poweredOffHostIps)
226+
}
227+
}()
228+
229+
//Bring down all hosts from vSAN max fault domain-2
230+
poweredOffFdHostIps = powerOffHostsFromFaultDomain(ctx, vsanMaxFdName, fdMap, true, 0)
231+
isFaultDomainDown = true
232+
defer func() {
233+
if isFaultDomainDown {
234+
powerOnHostParallel(poweredOffFdHostIps)
235+
}
236+
}()
237+
238+
//Check for TKG VM and STS pod status
239+
nodeList, err = fnodes.GetReadySchedulableNodes(ctx, f.ClientSet)
240+
framework.ExpectNoError(err, "Unable to find ready and schedulable Node")
241+
gomega.Expect(len(nodeList.Items) > 0).To(gomega.BeTrue())
242+
fss.WaitForStatusReadyReplicas(ctx, client, statefulsetRwo, replicas)
243+
fss.WaitForStatusReadyReplicas(ctx, client, statefulsetRwm, replicas)
244+
})
245+
246+
})

0 commit comments

Comments
 (0)