Skip to content

Commit e5d8249

Browse files
committed
Updates for code consolidation, remove unneeded files
1 parent bfd77e2 commit e5d8249

File tree

5 files changed

+260
-155
lines changed

5 files changed

+260
-155
lines changed

tests/interop/test_subscription_status_edge.py

Lines changed: 0 additions & 25 deletions
This file was deleted.

tests/interop/test_subscription_status_hub.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ def test_subscription_status_hub(openshift_dyn_client):
1313
# These are the operator subscriptions and their associated namespaces
1414
expected_subs = {
1515
"openshift-gitops-operator": ["openshift-operators"],
16-
"advanced-cluster-management": ["open-cluster-management"],
17-
"multicluster-engine": ["multicluster-engine"],
16+
"prometheus": ["llm-monitoring"],
17+
"grafana-operator": ["llm-monitoring"],
18+
"nfd": ["openshift-nfd"],
19+
"gpu-operator-certified": ["nvidia-gpu-operator"],
1820
}
1921

2022
err_msg = subscription.subscription_status(

tests/interop/test_validate_edge_site_components.py

Lines changed: 0 additions & 74 deletions
This file was deleted.
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
import os
2+
import re
3+
import subprocess
4+
import pytest
5+
import logging
6+
import yaml
7+
from ocp_resources.machine_set import MachineSet
8+
from ocp_resources.node import Node
9+
from ocp_resources.pod import Pod
10+
from . import __loggername__
11+
from openshift.dynamic.exceptions import NotFoundError
12+
13+
logger = logging.getLogger(__loggername__)
14+
15+
oc = os.environ["HOME"] + "/oc_client/oc"
16+
17+
18+
@pytest.mark.validate_gpu_machineset
19+
def test_validate_gpu_nodes(openshift_dyn_client):
20+
"""
21+
Check for the existence of the GPU machineset
22+
"""
23+
logger.info("Checking GPU machineset")
24+
machinesets = MachineSet.get(
25+
dyn_client=openshift_dyn_client, namespace="openshift-machine-api"
26+
)
27+
28+
found = False
29+
for machineset in machinesets:
30+
logger.info(machineset.instance.metadata.name)
31+
if re.search("gpu", machineset.instance.metadata.name):
32+
gpu_machineset = machineset
33+
found = True
34+
break
35+
36+
err_msg = "GPU machineset not found"
37+
if found == True:
38+
logger.info(
39+
f"PASS: Found GPU machineset: {gpu_machineset.instance.metadata.name}"
40+
)
41+
else:
42+
logger.error(f"FAIL: {err_msg}")
43+
assert False, err_msg
44+
45+
"""
46+
Check for the existence of the GPU machineset taint
47+
"""
48+
logger.info("Checking GPU machineset taint")
49+
50+
err_msg = "No taints found for GPU machineset"
51+
try:
52+
logger.info(gpu_machineset.instance.spec.template.spec.taints)
53+
except AttributeError:
54+
logger.error(f"FAIL: {err_msg}")
55+
assert False, err_msg
56+
57+
if gpu_machineset.instance.spec.template.spec.taints == "None":
58+
logger.error(f"FAIL: {err_msg}")
59+
assert False, err_msg
60+
61+
logger.info(
62+
f"PASS: Found GPU machineset taint: {gpu_machineset.instance.spec.template.spec.taints}"
63+
)
64+
65+
"""
66+
Check for the existence of the GPU machineset label
67+
"""
68+
logger.info("Checking GPU machineset label")
69+
70+
err_msg = "No label found for GPU machineset"
71+
try:
72+
logger.info(gpu_machineset.instance.spec.template.spec.metadata.labels)
73+
except AttributeError:
74+
logger.error(f"FAIL: {err_msg}")
75+
assert False, err_msg
76+
77+
labels = str(gpu_machineset.instance.spec.template.spec.metadata.labels)
78+
if labels == "None":
79+
logger.error(f"FAIL: {err_msg}")
80+
assert False, err_msg
81+
82+
logger.info(f"PASS: Found GPU machineset labels: {labels}")
83+
84+
"""
85+
Check for the existence of the GPU machineset instance type
86+
"""
87+
logger.info("Checking GPU machineset instance type")
88+
89+
err_msg = "No instanceType found for GPU machineset"
90+
try:
91+
logger.info(
92+
machineset.instance.spec.template.spec.providerSpec.value.instanceType
93+
)
94+
except AttributeError:
95+
logger.error(f"FAIL: {err_msg}")
96+
assert False, err_msg
97+
98+
instance_type = str(
99+
machineset.instance.spec.template.spec.providerSpec.value.instanceType
100+
)
101+
if instance_type == "None":
102+
logger.error(f"FAIL: {err_msg}")
103+
assert False, err_msg
104+
105+
logger.info(f"PASS: Found GPU machineset instance type: {instance_type}")
106+
107+
108+
@pytest.mark.validate_gpu_node_role_labels_pods
109+
def test_validate_gpu_node_role_labels_pods(openshift_dyn_client):
110+
"""
111+
Check for the expected node-role labels for GPU nodes
112+
"""
113+
logger.info("Checking GPU node-role labels")
114+
115+
nodes = Node.get(dyn_client=openshift_dyn_client)
116+
gpu_nodes = []
117+
for node in nodes:
118+
logger.info(node.instance.metadata.name)
119+
labels = node.instance.metadata.labels
120+
logger.info(labels)
121+
label_str = str(labels)
122+
123+
odh_label = "'node-role.kubernetes.io/odh-notebook': ''"
124+
worker_label = "'node-role.kubernetes.io/worker': ''"
125+
126+
if odh_label in label_str and worker_label in label_str:
127+
gpu_nodes.append(node)
128+
129+
# logger.info(node_count)
130+
131+
if len(gpu_nodes) == 3:
132+
logger.info(f"PASS: Found 'worker' and 'odh-notebook' GPU node-role labels")
133+
else:
134+
err_msg = "Could not find 'worker' and 'odh-notebook' GPU node-role label"
135+
logger.error(f"FAIL: {err_msg}")
136+
assert False, err_msg
137+
138+
"""
139+
Check for the expected number of pods deployed on GPU nodes
140+
"""
141+
logger.info("Checking pod count on GPU nodes")
142+
143+
for gpu_node in gpu_nodes:
144+
name = gpu_node.instance.metadata.name
145+
field_select = "--field-selector=spec.host=" + name
146+
pod_count = 0
147+
expected_count = 20
148+
failed_nodes = []
149+
cmd_out = subprocess.run(
150+
[oc, "get", "pod", "-A", field_select, "--no-headers"], capture_output=True
151+
)
152+
153+
if cmd_out.stdout:
154+
out_decoded = cmd_out.stdout.decode("utf-8")
155+
logger.info(node.instance.metadata.name + "\n" + out_decoded)
156+
out_split = out_decoded.splitlines()
157+
158+
for line in out_split:
159+
if "Completed" in line:
160+
continue
161+
else:
162+
pod_count += 1
163+
164+
if pod_count < expected_count:
165+
failed_nodes.append(node.instance.metadata.name)
166+
else:
167+
assert False, cmd_out.stderr
168+
169+
if failed_nodes:
170+
err_msg = f"Did not find the expected pod count on: {failed_nodes}"
171+
logger.error(f"FAIL: {err_msg}")
172+
assert False, err_msg
173+
else:
174+
logger.info(f"PASS: Found the expected pod count for GPU nodes")

0 commit comments

Comments
 (0)