redhat-performance
diff --git a/‎.bumpversion.cfg‎
Lines changed: 1 addition & 1 deletion b/‎.bumpversion.cfg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 3 additions & 6 deletions b/‎README.md‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎benchmark_runner/common/oc/oc.py‎
Lines changed: 111 additions & 25 deletions b/‎benchmark_runner/common/oc/oc.py‎
Lines changed: 111 additions & 25 deletions
@@ -1,7 +1,7 @@
 [bumpversion]
 commit = False
 tag = True
-current_version = 1.0.960
+current_version = 1.0.965
 tag_name = v{current_version}
 message = GitHub Actions Build {current_version}
 
 
@@ -74,7 +74,7 @@ Choose one from the following list:
 
 Not mandatory:
 
-**auto:** NAMESPACE=benchmark-operator [ The default namespace is benchmark-operator ]
+**auto:** NAMESPACE=benchmark-runner [ The default namespace is benchmark-runner ]
 
 **auto:** ODF_PVC=True [ True=ODF PVC storage, False=Ephemeral storage, default True ]
 
@@ -84,8 +84,6 @@ Not mandatory:
 
 **auto:** RUNNER_PATH=/tmp [ The default work space is /tmp ]
 
-**optional:** PIN_NODE_BENCHMARK_OPERATOR=$PIN_NODE_BENCHMARK_OPERATOR [node selector for benchmark operator pod]
-
 **optional:** PIN_NODE1=$PIN_NODE1 [node1 selector for running the workload]
 
 **optional:** PIN_NODE2=$PIN_NODE2 [node2 selector for running the workload, i.e. uperf server and client, hammerdb database and workload]
@@ -111,17 +109,16 @@ Not mandatory:
 For example:
 
 ```sh
-podman run --rm -e WORKLOAD="hammerdb_pod_mariadb" -e KUBEADMIN_PASSWORD="1234" -e PIN_NODE_BENCHMARK_OPERATOR="node_name-0" -e PIN_NODE1="node_name-1" -e PIN_NODE2="node_name-2" -e log_level=INFO -v /root/.kube/config:/root/.kube/config --privileged quay.io/benchmark-runner/benchmark-runner:latest
+podman run --rm -e WORKLOAD="hammerdb_pod_mariadb" -e KUBEADMIN_PASSWORD="1234" -e PIN_NODE1="node_name-1" -e PIN_NODE2="node_name-2" -e log_level=INFO -v /root/.kube/config:/root/.kube/config --privileged quay.io/benchmark-runner/benchmark-runner:latest
 ```
 or
 ```sh
-docker run --rm -e WORKLOAD="hammerdb_vm_mariadb" -e KUBEADMIN_PASSWORD="1234" -e PIN_NODE_BENCHMARK_OPERATOR="node_name-0" -e PIN_NODE1="node_name-1" -e PIN_NODE2="node_name-2" -e log_level=INFO -v /root/.kube/config:/root/.kube/config --privileged quay.io/benchmark-runner/benchmark-runner:latest
+docker run --rm -e WORKLOAD="hammerdb_vm_mariadb" -e KUBEADMIN_PASSWORD="1234" -e PIN_NODE1="node_name-1" -e PIN_NODE2="node_name-2" -e log_level=INFO -v /root/.kube/config:/root/.kube/config --privileged quay.io/benchmark-runner/benchmark-runner:latest
 ```
 
 SAVE RUN ARTIFACTS LOCAL:
 1. add `-e SAVE_ARTIFACTS_LOCAL='True'` or `--save-artifacts-local=true`
 2. add `-v /tmp/benchmark-runner-run-artifacts:/tmp/benchmark-runner-run-artifacts`
-3. git clone -b v1.0.3 https://github.com/cloud-bulldozer/benchmark-operator /tmp/benchmark-operator
 
 ### Run vdbench workload in Pod using OpenShift
 ![](media/benchmark-runner-demo.gif)
 
@@ -802,18 +802,27 @@ def collect_events(self):
 
     @typechecked
     @logger_time_stamp
-    def get_pod(self, label: str, database: str = '', namespace: str = environment_variables.environment_variables_dict['namespace']):
+    def get_pod(self, label: str = '', database: str = '', namespace: str = environment_variables.environment_variables_dict['namespace'], label_selector: str = ''):
         """
-        This method gets pods according to label
-        :param label:
+        This method gets pod name by name pattern or label selector
+        :param label: pod name pattern (grep match)
         :param database:
         :param namespace:
-        :return:
+        :param label_selector: Kubernetes label selector (e.g. 'app=stressng_workload-<uuid>')
+        :return: pod name
         """
         if database:
             return self.run(
                 f"{self._cli} get pods -n '{database}-db'" + " --no-headers | awk '{ print $1; }' | grep " + database,
                 is_check=True).rstrip().decode('ascii')
+        elif label_selector:
+            namespace_opt = f'-n {namespace}' if namespace else ''
+            result = self.run(
+                f"{self._cli} get pods {namespace_opt} -l '{label_selector}' -o jsonpath='{{.items[0].metadata.name}}'",
+                is_check=True)
+            if isinstance(result, bytes):
+                return result.decode('utf-8').strip().strip("'")
+            return str(result).strip().strip("'") if result else ''
         else:
             namespace = f'-n {namespace}' if namespace else ''
             return self.run(f"{self._cli} get pods {namespace} --no-headers | awk '{{ print $1; }}' | grep -w '{label}'", is_check=True).rstrip().decode('ascii')
@@ -862,26 +871,27 @@ def get_pods(self):
 
     @typechecked
     @logger_time_stamp
-    def wait_for_pod_create(self, pod_name: str,
+    def wait_for_pod_create(self, pod_name: str = '', label: str = '',
                             namespace: str = environment_variables.environment_variables_dict['namespace'],
                             timeout: int = int(environment_variables.environment_variables_dict['timeout'])):
         """
-        This method waits till pod name is creating or throw exception after timeout
+        This method waits till pod is created or throws exception after timeout.
+        Can match by pod_name or label selector (for Job pods with random suffixes).
+        :param pod_name: Pod name to match
+        :param label: Label selector to match (e.g. 'app=stressng_workload-<uuid>')
         :param namespace:
-        :param pod_name:
         :param timeout:
-        :return: True if getting pod name or raise PodNameError
+        :return: True if pod found or raise PodNotCreateTimeout
         """
         current_wait_time = 0
         while timeout <= 0 or current_wait_time <= timeout:
-            if self.pod_exists(pod_name=pod_name, namespace=namespace):
-                self.describe_pod(pod_name=pod_name, namespace=namespace)
+            if label and self.pod_label_exists(label_name=label, namespace=namespace):
+                return True
+            elif pod_name and self.pod_exists(pod_name=pod_name, namespace=namespace):
                 return True
-            # sleep for x seconds
             time.sleep(OC.SLEEP_TIME)
             current_wait_time += OC.SLEEP_TIME
-        self.describe_pod(pod_name=pod_name, namespace=namespace)
-        raise PodNotCreateTimeout(pod_name)
+        raise PodNotCreateTimeout(pod_name or label)
 
     @typechecked
     @logger_time_stamp
@@ -1163,13 +1173,23 @@ def wait_for_pod_completed(self, label: str, workload: str = '', label_uuid: boo
                         f"{self._cli} {namespace} wait --for=condition=failed -l {label}-{self.__get_short_uuid(workload=workload)} jobs --timeout={OC.SLEEP_TIME}s")
                     if 'met' in result:
                         return False
-                if not job:
+                elif job:
+                    # Handle job=True with label_uuid=False (direct pod workloads)
+                    result = self.run(
+                        f"{self._cli} {namespace} wait --for=condition=complete -l {label} jobs --timeout={OC.SHORT_TIMEOUT}s")
+                    if 'met' in result:
+                        return True
+                    result = self.run(
+                        f"{self._cli} {namespace} wait --for=condition=failed -l {label} jobs --timeout={OC.SLEEP_TIME}s")
+                    if 'met' in result:
+                        return False
+                elif not job:
                     result = self.run(f"{self._cli} get pod -l {label}" + " -n benchmark-runner --no-headers | awk '{ print $3; }'")
                     if 'Completed' in result:
                         return True
-            # sleep for x seconds
-            time.sleep(OC.SLEEP_TIME)
-            current_wait_time += OC.SLEEP_TIME
+                # sleep for x seconds
+                time.sleep(OC.SLEEP_TIME)
+                current_wait_time += OC.SLEEP_TIME
         except Exception as err:
             raise PodNotCompletedTimeout(workload=workload)
 
@@ -1245,6 +1265,55 @@ def get_vm(self, label: str = '', namespace: str = environment_variables.environ
         else:
             return self.run(f'{self._cli} get vmi', is_check=True)
 
+    def _get_pod_field(self, field: str, label: str = '', pod_name: str = '', namespace: str = '') -> str:
+        """
+        Get a pod field via jsonpath, by label selector or pod name
+        """
+        namespace = namespace or environment_variables.environment_variables_dict.get('namespace', '')
+        try:
+            if label:
+                result = self.run(
+                    cmd=f"{self._cli} get pods -n {namespace} -l {label} -o jsonpath='{{.items[0].{field}}}'")
+            else:
+                result = self.run(
+                    cmd=f"{self._cli} get pod -n {namespace} {pod_name} -o jsonpath='{{.{field}}}'")
+            return result.strip().strip(b"'").decode('ascii') if isinstance(result, bytes) else str(result).strip().strip("'")
+        except Exception:
+            return ''
+
+    def get_pod_ip(self, label: str = '', pod_name: str = '', namespace: str = '') -> str:
+        return self._get_pod_field('status.podIP', label=label, pod_name=pod_name, namespace=namespace)
+
+    def get_pod_node(self, label: str = '', pod_name: str = '', namespace: str = '') -> str:
+        return self._get_pod_field('spec.nodeName', label=label, pod_name=pod_name, namespace=namespace)
+
+    def get_vmi_ip(self, namespace: str, vm_name: str, retries: int = 30) -> str:
+        """
+        Get the IP address of a VirtualMachineInstance, retrying until available
+        """
+        for attempt in range(retries):
+            try:
+                result = self.run(
+                    cmd=f"{self._cli} get vmi -n {namespace} {vm_name} -o jsonpath='{{.status.interfaces[0].ipAddress}}'")
+                ip = result.strip().strip(b"'").decode('ascii') if isinstance(result, bytes) else str(result).strip().strip("'")
+                if ip and ip != '<none>':
+                    return ip
+            except Exception:
+                pass
+            time.sleep(2)
+        return ''
+
+    def get_cluster_name(self) -> str:
+        """
+        Get the cluster name/ID
+        """
+        try:
+            result = self.run(cmd=f"{self._cli} get infrastructure cluster -o jsonpath='{{.status.infrastructureName}}'")
+            return result.strip().strip(b"'").decode('ascii') if isinstance(result, bytes) else str(result).strip().strip("'")
+        except Exception:
+            return ''
+
+
     @logger_time_stamp
     def __verify_vm_log_complete(self, vm_name: str, timeout: int = int(environment_variables.environment_variables_dict['timeout'])):
         """
@@ -1452,19 +1521,26 @@ def create_vm_sync(self, yaml: str, vm_name: str,
 
     @typechecked
     @logger_time_stamp
-    def delete_vm_sync(self, yaml: str, vm_name: str,
+    def delete_vm_sync(self, yaml: str = '', vm_name: str = '',
                        namespace: str = environment_variables.environment_variables_dict['namespace'],
                        timeout: int = int(environment_variables.environment_variables_dict['timeout'])):
         """
-        This method deletes specified VM synchronously; return False if it does not exist
+        This method deletes specified VM synchronously; return False if it does not exist.
+        Can delete by YAML file or by VM name directly.
+        :param yaml: YAML file to delete (deletes all resources in YAML)
+        :param vm_name: VM name to delete (used when no YAML, or to delete specific VM)
         :param namespace:
         :param timeout:
-        :param vm_name:
-        :param yaml:
         :return: return False if vm does not exist
         """
         if self.vm_exists(vm_name=vm_name, namespace=namespace):
-            self.delete_async(yaml)
+            if yaml:
+                self.delete_async(yaml)
+            else:
+                try:
+                    self.run(f"{self._cli} delete vm {vm_name} -n {namespace} --ignore-not-found")
+                except Exception:
+                    pass
             return self.wait_for_vm_delete(vm_name=vm_name, namespace=namespace, timeout=timeout)
         else:
             return False
@@ -1492,9 +1568,19 @@ def wait_for_vm_completed(self, workload: str = '', vm_name: str = '',
         current_wait_time = 0
         namespace = f'-n {namespace}' if namespace else ''
         while timeout <= 0 or current_wait_time <= timeout:
-            if self.run(
-                    f"{self._cli} {namespace} get benchmark {workload} -o jsonpath={{.status.complete}}") == 'true':
-                return True
+            # Check VMI phase for direct VM workloads
+            if vm_name:
+                vmi_phase = self.run(
+                    f"{self._cli} {namespace} get vmi {vm_name} -o jsonpath={{.status.phase}}")
+                if vmi_phase == 'Succeeded':
+                    return True
+                elif vmi_phase == 'Failed':
+                    return False
+            else:
+                # Fallback to benchmark CR for operator-based workloads
+                if self.run(
+                        f"{self._cli} {namespace} get benchmark {workload} -o jsonpath={{.status.complete}}") == 'true':
+                    return True
             # sleep for x seconds
             time.sleep(OC.SLEEP_TIME)
             current_wait_time += OC.SLEEP_TIME