Skip to content

Commit 8663b5d

Browse files
authored
Merge branch 'main' into hammerdb_workload
2 parents 3dc2ab5 + 98c98b7 commit 8663b5d

File tree

259 files changed

+24209
-2254
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

259 files changed

+24209
-2254
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[bumpversion]
22
commit = False
33
tag = True
4-
current_version = 1.0.960
4+
current_version = 1.0.965
55
tag_name = v{current_version}
66
message = GitHub Actions Build {current_version}
77

README.md

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Choose one from the following list:
7474

7575
Not mandatory:
7676

77-
**auto:** NAMESPACE=benchmark-operator [ The default namespace is benchmark-operator ]
77+
**auto:** NAMESPACE=benchmark-runner [ The default namespace is benchmark-runner ]
7878

7979
**auto:** ODF_PVC=True [ True=ODF PVC storage, False=Ephemeral storage, default True ]
8080

@@ -84,8 +84,6 @@ Not mandatory:
8484

8585
**auto:** RUNNER_PATH=/tmp [ The default work space is /tmp ]
8686

87-
**optional:** PIN_NODE_BENCHMARK_OPERATOR=$PIN_NODE_BENCHMARK_OPERATOR [node selector for benchmark operator pod]
88-
8987
**optional:** PIN_NODE1=$PIN_NODE1 [node1 selector for running the workload]
9088

9189
**optional:** PIN_NODE2=$PIN_NODE2 [node2 selector for running the workload, i.e. uperf server and client, hammerdb database and workload]
@@ -111,17 +109,16 @@ Not mandatory:
111109
For example:
112110

113111
```sh
114-
podman run --rm -e WORKLOAD="hammerdb_pod_mariadb" -e KUBEADMIN_PASSWORD="1234" -e PIN_NODE_BENCHMARK_OPERATOR="node_name-0" -e PIN_NODE1="node_name-1" -e PIN_NODE2="node_name-2" -e log_level=INFO -v /root/.kube/config:/root/.kube/config --privileged quay.io/benchmark-runner/benchmark-runner:latest
112+
podman run --rm -e WORKLOAD="hammerdb_pod_mariadb" -e KUBEADMIN_PASSWORD="1234" -e PIN_NODE1="node_name-1" -e PIN_NODE2="node_name-2" -e log_level=INFO -v /root/.kube/config:/root/.kube/config --privileged quay.io/benchmark-runner/benchmark-runner:latest
115113
```
116114
or
117115
```sh
118-
docker run --rm -e WORKLOAD="hammerdb_vm_mariadb" -e KUBEADMIN_PASSWORD="1234" -e PIN_NODE_BENCHMARK_OPERATOR="node_name-0" -e PIN_NODE1="node_name-1" -e PIN_NODE2="node_name-2" -e log_level=INFO -v /root/.kube/config:/root/.kube/config --privileged quay.io/benchmark-runner/benchmark-runner:latest
116+
docker run --rm -e WORKLOAD="hammerdb_vm_mariadb" -e KUBEADMIN_PASSWORD="1234" -e PIN_NODE1="node_name-1" -e PIN_NODE2="node_name-2" -e log_level=INFO -v /root/.kube/config:/root/.kube/config --privileged quay.io/benchmark-runner/benchmark-runner:latest
119117
```
120118

121119
SAVE RUN ARTIFACTS LOCAL:
122120
1. add `-e SAVE_ARTIFACTS_LOCAL='True'` or `--save-artifacts-local=true`
123121
2. add `-v /tmp/benchmark-runner-run-artifacts:/tmp/benchmark-runner-run-artifacts`
124-
3. git clone -b v1.0.3 https://github.com/cloud-bulldozer/benchmark-operator /tmp/benchmark-operator
125122

126123
### Run vdbench workload in Pod using OpenShift
127124
![](media/benchmark-runner-demo.gif)

benchmark_runner/common/oc/oc.py

Lines changed: 111 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -802,18 +802,27 @@ def collect_events(self):
802802

803803
@typechecked
804804
@logger_time_stamp
805-
def get_pod(self, label: str, database: str = '', namespace: str = environment_variables.environment_variables_dict['namespace']):
805+
def get_pod(self, label: str = '', database: str = '', namespace: str = environment_variables.environment_variables_dict['namespace'], label_selector: str = ''):
806806
"""
807-
This method gets pods according to label
808-
:param label:
807+
This method gets pod name by name pattern or label selector
808+
:param label: pod name pattern (grep match)
809809
:param database:
810810
:param namespace:
811-
:return:
811+
:param label_selector: Kubernetes label selector (e.g. 'app=stressng_workload-<uuid>')
812+
:return: pod name
812813
"""
813814
if database:
814815
return self.run(
815816
f"{self._cli} get pods -n '{database}-db'" + " --no-headers | awk '{ print $1; }' | grep " + database,
816817
is_check=True).rstrip().decode('ascii')
818+
elif label_selector:
819+
namespace_opt = f'-n {namespace}' if namespace else ''
820+
result = self.run(
821+
f"{self._cli} get pods {namespace_opt} -l '{label_selector}' -o jsonpath='{{.items[0].metadata.name}}'",
822+
is_check=True)
823+
if isinstance(result, bytes):
824+
return result.decode('utf-8').strip().strip("'")
825+
return str(result).strip().strip("'") if result else ''
817826
else:
818827
namespace = f'-n {namespace}' if namespace else ''
819828
return self.run(f"{self._cli} get pods {namespace} --no-headers | awk '{{ print $1; }}' | grep -w '{label}'", is_check=True).rstrip().decode('ascii')
@@ -862,26 +871,27 @@ def get_pods(self):
862871

863872
@typechecked
864873
@logger_time_stamp
865-
def wait_for_pod_create(self, pod_name: str,
874+
def wait_for_pod_create(self, pod_name: str = '', label: str = '',
866875
namespace: str = environment_variables.environment_variables_dict['namespace'],
867876
timeout: int = int(environment_variables.environment_variables_dict['timeout'])):
868877
"""
869-
This method waits till pod name is creating or throw exception after timeout
878+
This method waits till pod is created or throws exception after timeout.
879+
Can match by pod_name or label selector (for Job pods with random suffixes).
880+
:param pod_name: Pod name to match
881+
:param label: Label selector to match (e.g. 'app=stressng_workload-<uuid>')
870882
:param namespace:
871-
:param pod_name:
872883
:param timeout:
873-
:return: True if getting pod name or raise PodNameError
884+
:return: True if pod found or raise PodNotCreateTimeout
874885
"""
875886
current_wait_time = 0
876887
while timeout <= 0 or current_wait_time <= timeout:
877-
if self.pod_exists(pod_name=pod_name, namespace=namespace):
878-
self.describe_pod(pod_name=pod_name, namespace=namespace)
888+
if label and self.pod_label_exists(label_name=label, namespace=namespace):
889+
return True
890+
elif pod_name and self.pod_exists(pod_name=pod_name, namespace=namespace):
879891
return True
880-
# sleep for x seconds
881892
time.sleep(OC.SLEEP_TIME)
882893
current_wait_time += OC.SLEEP_TIME
883-
self.describe_pod(pod_name=pod_name, namespace=namespace)
884-
raise PodNotCreateTimeout(pod_name)
894+
raise PodNotCreateTimeout(pod_name or label)
885895

886896
@typechecked
887897
@logger_time_stamp
@@ -1163,13 +1173,23 @@ def wait_for_pod_completed(self, label: str, workload: str = '', label_uuid: boo
11631173
f"{self._cli} {namespace} wait --for=condition=failed -l {label}-{self.__get_short_uuid(workload=workload)} jobs --timeout={OC.SLEEP_TIME}s")
11641174
if 'met' in result:
11651175
return False
1166-
if not job:
1176+
elif job:
1177+
# Handle job=True with label_uuid=False (direct pod workloads)
1178+
result = self.run(
1179+
f"{self._cli} {namespace} wait --for=condition=complete -l {label} jobs --timeout={OC.SHORT_TIMEOUT}s")
1180+
if 'met' in result:
1181+
return True
1182+
result = self.run(
1183+
f"{self._cli} {namespace} wait --for=condition=failed -l {label} jobs --timeout={OC.SLEEP_TIME}s")
1184+
if 'met' in result:
1185+
return False
1186+
elif not job:
11671187
result = self.run(f"{self._cli} get pod -l {label}" + " -n benchmark-runner --no-headers | awk '{ print $3; }'")
11681188
if 'Completed' in result:
11691189
return True
1170-
# sleep for x seconds
1171-
time.sleep(OC.SLEEP_TIME)
1172-
current_wait_time += OC.SLEEP_TIME
1190+
# sleep for x seconds
1191+
time.sleep(OC.SLEEP_TIME)
1192+
current_wait_time += OC.SLEEP_TIME
11731193
except Exception as err:
11741194
raise PodNotCompletedTimeout(workload=workload)
11751195

@@ -1245,6 +1265,55 @@ def get_vm(self, label: str = '', namespace: str = environment_variables.environ
12451265
else:
12461266
return self.run(f'{self._cli} get vmi', is_check=True)
12471267

1268+
def _get_pod_field(self, field: str, label: str = '', pod_name: str = '', namespace: str = '') -> str:
1269+
"""
1270+
Get a pod field via jsonpath, by label selector or pod name
1271+
"""
1272+
namespace = namespace or environment_variables.environment_variables_dict.get('namespace', '')
1273+
try:
1274+
if label:
1275+
result = self.run(
1276+
cmd=f"{self._cli} get pods -n {namespace} -l {label} -o jsonpath='{{.items[0].{field}}}'")
1277+
else:
1278+
result = self.run(
1279+
cmd=f"{self._cli} get pod -n {namespace} {pod_name} -o jsonpath='{{.{field}}}'")
1280+
return result.strip().strip(b"'").decode('ascii') if isinstance(result, bytes) else str(result).strip().strip("'")
1281+
except Exception:
1282+
return ''
1283+
1284+
def get_pod_ip(self, label: str = '', pod_name: str = '', namespace: str = '') -> str:
1285+
return self._get_pod_field('status.podIP', label=label, pod_name=pod_name, namespace=namespace)
1286+
1287+
def get_pod_node(self, label: str = '', pod_name: str = '', namespace: str = '') -> str:
1288+
return self._get_pod_field('spec.nodeName', label=label, pod_name=pod_name, namespace=namespace)
1289+
1290+
def get_vmi_ip(self, namespace: str, vm_name: str, retries: int = 30) -> str:
1291+
"""
1292+
Get the IP address of a VirtualMachineInstance, retrying until available
1293+
"""
1294+
for attempt in range(retries):
1295+
try:
1296+
result = self.run(
1297+
cmd=f"{self._cli} get vmi -n {namespace} {vm_name} -o jsonpath='{{.status.interfaces[0].ipAddress}}'")
1298+
ip = result.strip().strip(b"'").decode('ascii') if isinstance(result, bytes) else str(result).strip().strip("'")
1299+
if ip and ip != '<none>':
1300+
return ip
1301+
except Exception:
1302+
pass
1303+
time.sleep(2)
1304+
return ''
1305+
1306+
def get_cluster_name(self) -> str:
1307+
"""
1308+
Get the cluster name/ID
1309+
"""
1310+
try:
1311+
result = self.run(cmd=f"{self._cli} get infrastructure cluster -o jsonpath='{{.status.infrastructureName}}'")
1312+
return result.strip().strip(b"'").decode('ascii') if isinstance(result, bytes) else str(result).strip().strip("'")
1313+
except Exception:
1314+
return ''
1315+
1316+
12481317
@logger_time_stamp
12491318
def __verify_vm_log_complete(self, vm_name: str, timeout: int = int(environment_variables.environment_variables_dict['timeout'])):
12501319
"""
@@ -1452,19 +1521,26 @@ def create_vm_sync(self, yaml: str, vm_name: str,
14521521

14531522
@typechecked
14541523
@logger_time_stamp
1455-
def delete_vm_sync(self, yaml: str, vm_name: str,
1524+
def delete_vm_sync(self, yaml: str = '', vm_name: str = '',
14561525
namespace: str = environment_variables.environment_variables_dict['namespace'],
14571526
timeout: int = int(environment_variables.environment_variables_dict['timeout'])):
14581527
"""
1459-
This method deletes specified VM synchronously; return False if it does not exist
1528+
This method deletes specified VM synchronously; return False if it does not exist.
1529+
Can delete by YAML file or by VM name directly.
1530+
:param yaml: YAML file to delete (deletes all resources in YAML)
1531+
:param vm_name: VM name to delete (used when no YAML, or to delete specific VM)
14601532
:param namespace:
14611533
:param timeout:
1462-
:param vm_name:
1463-
:param yaml:
14641534
:return: return False if vm does not exist
14651535
"""
14661536
if self.vm_exists(vm_name=vm_name, namespace=namespace):
1467-
self.delete_async(yaml)
1537+
if yaml:
1538+
self.delete_async(yaml)
1539+
else:
1540+
try:
1541+
self.run(f"{self._cli} delete vm {vm_name} -n {namespace} --ignore-not-found")
1542+
except Exception:
1543+
pass
14681544
return self.wait_for_vm_delete(vm_name=vm_name, namespace=namespace, timeout=timeout)
14691545
else:
14701546
return False
@@ -1492,9 +1568,19 @@ def wait_for_vm_completed(self, workload: str = '', vm_name: str = '',
14921568
current_wait_time = 0
14931569
namespace = f'-n {namespace}' if namespace else ''
14941570
while timeout <= 0 or current_wait_time <= timeout:
1495-
if self.run(
1496-
f"{self._cli} {namespace} get benchmark {workload} -o jsonpath={{.status.complete}}") == 'true':
1497-
return True
1571+
# Check VMI phase for direct VM workloads
1572+
if vm_name:
1573+
vmi_phase = self.run(
1574+
f"{self._cli} {namespace} get vmi {vm_name} -o jsonpath={{.status.phase}}")
1575+
if vmi_phase == 'Succeeded':
1576+
return True
1577+
elif vmi_phase == 'Failed':
1578+
return False
1579+
else:
1580+
# Fallback to benchmark CR for operator-based workloads
1581+
if self.run(
1582+
f"{self._cli} {namespace} get benchmark {workload} -o jsonpath={{.status.complete}}") == 'true':
1583+
return True
14981584
# sleep for x seconds
14991585
time.sleep(OC.SLEEP_TIME)
15001586
current_wait_time += OC.SLEEP_TIME

0 commit comments

Comments
 (0)