Migrate stressng and uperf workloads from benchmark-operator/snafu to native resource creation#1186
Migrate stressng and uperf workloads from benchmark-operator/snafu to native resource creation#1186
Conversation
|
Skipping CI for Draft Pull Request. |
594eece to
950a41c
Compare
|
/test all |
|
@arpsharm, |
98c26bb to
fe01110
Compare
|
/test all |
fe01110 to
360e410
Compare
|
/test all |
fb7a63b to
8012f77
Compare
|
[APPROVALNOTIFIER] This PR is NOT APPROVED This pull-request has been approved by: arpsharm The full list of commands accepted by this bot can be found here. DetailsNeeds approval from an approver in each of these files:Approvers can indicate their approval by writing |
8012f77 to
66fb387
Compare
| 'node_range': self._environment_variables_dict.get('node_range', ''), | ||
| 'pod_id': '', | ||
| 'hostnetwork': self._environment_variables_dict.get('hostnetwork', 'False') | ||
| } |
There was a problem hiding this comment.
Try to create uperf_data.yaml with default values
| self._environment_variables_dict['test_user'] = os.environ.get('TEST_USER', 'ripsaw') | ||
| self._environment_variables_dict['port'] = os.environ.get('PORT', '30000') | ||
| self._environment_variables_dict['run_id'] = os.environ.get('RUN_ID', 'NA') | ||
|
|
There was a problem hiding this comment.
Add it to init and put it inside yaml
Pls check in elastic if we need all the fields and if not remove it, for example:
self._environment_variables_dict['test_user'] = os.environ.get('TEST_USER', 'ripsaw')
| for _ in range(30): | ||
| if not self._oc.vm_exists(vm_name=self.__client_vm_name): | ||
| break | ||
| time.sleep(1) |
There was a problem hiding this comment.
Pls use existing method delete_vm_sync
Go over all the places with subprocess.run and check for existing method in oc class and use always sync method.
| yaml_path = os.path.join(f'{self._run_artifacts_path}', f'{self.__name}.yaml') | ||
| apply_cmd = f"oc apply -f {yaml_path}" | ||
| result = subprocess.run(apply_cmd, shell=True, capture_output=True, text=True) | ||
| if result.returncode != 0: |
|
|
||
| # Wait for client workload to complete by polling for signal file via guest agent | ||
| logger.info("Waiting for uperf client workload to complete...") | ||
| max_wait = 600 # 10 minutes timeout |
There was a problem hiding this comment.
take timout from env variable or from oc class
| self._environment_variables_dict['clustername'] = cluster_name | ||
| self._environment_variables_dict['test_user'] = os.environ.get('TEST_USER', 'ripsaw') | ||
| self._environment_variables_dict['port'] = os.environ.get('PORT', '30000') | ||
| self._environment_variables_dict['run_id'] = os.environ.get('RUN_ID', 'NA') |
| time.sleep(5) | ||
|
|
||
| # Re-generate client YAML with server IP (template needs it) | ||
| from benchmark_runner.common.template_operations.template_operations import TemplateOperations |
| # Re-generate client YAML with server IP (template needs it) | ||
| from benchmark_runner.common.template_operations.template_operations import TemplateOperations | ||
| template_ops = TemplateOperations(workload=self._workload) | ||
| template_ops.set_environment_variables(self._environment_variables_dict) |
| logger.info(f"Client IP: {client_ip}") | ||
|
|
||
| # Get pod logs using oc command | ||
| logs_cmd = f"oc logs -n {self._environment_variables_dict['namespace']} {client_pod}" |
| self.__server_job_name = '' | ||
| self.__client_job_name = '' | ||
|
|
||
| def _parse_uperf_pod_logs(self, pod_logs, server_ip, server_node, client_node, pod_id, client_ip): |
There was a problem hiding this comment.
pls add data type pod_logs, server_ip, server_node, client_node, pod_id, client_ip and also check by using
@TypeChecked
@logger_time_stamp
66fb387 to
ddb6980
Compare
7922313 to
15cb446
Compare
15cb446 to
c86e5fd
Compare
| logger.info("Server VM is ready, getting server IP") | ||
|
|
||
| # Get server VMI IP - retry until IP is assigned | ||
| namespace = self._environment_variables_dict['namespace'] |
benchmark_runner/common/oc/oc.py
Outdated
| logger.warning(f"virtctl ssh error: {e}") | ||
| return None | ||
|
|
||
| def wait_for_virtctl_ssh(self, vm_name: str, namespace: str = '', key_path: str = '', username: str = 'fedora', timeout: int = 180) -> bool: |
There was a problem hiding this comment.
username: str = 'fedora', make environment variable
| self.__server_vm_name = f'uperf-server-{self._trunc_uuid}' | ||
| self.__client_vm_name = f'uperf-client-{self._trunc_uuid}' | ||
| self.__template_ops = TemplateOperations(workload=self._workload) | ||
| self.__ssh_key_path = self._environment_variables_dict.get('ssh_key_path', '/tmp/benchmark-runner-ssh-key') |
There was a problem hiding this comment.
We need dynamic key that generate for every workload
|
|
||
| # Wait for SSH to be ready on client VM | ||
| logger.info("Waiting for SSH on client VM...") | ||
| self._oc.wait_for_virtctl_ssh(vm_name=self.__client_vm_name, namespace=namespace, key_path=self.__ssh_key_path, username='fedora', timeout=180) |
There was a problem hiding this comment.
username should be environment variable
| workload_complete = False | ||
|
|
||
| for elapsed in range(0, max_wait, poll_interval): | ||
| check_result = self._oc.virtctl_ssh(vm_name=self.__client_vm_name, command='test -f /opt/uperf/workload_complete.signal && echo done', namespace=namespace, key_path=self.__ssh_key_path, username='fedora') |
There was a problem hiding this comment.
Virtctl class
def wait_for_vm_workload_completed (file_path, local_path) => should be on virtctl dir
- def ssh ready
- def wait for file created
- def scp the file to local
** not use hard coded pem secret
Uperf_vm.py
def parse uperf vm result
workload_operation.py => if uperf and stessng log parser is the same
| @@ -0,0 +1,43 @@ | |||
| apiVersion: v1 | |||
| check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | ||
| with open(f'{ssh_key_path}.pub', 'r') as f: | ||
| self._environment_variables_dict['ssh_public_key'] = f.read().strip() | ||
| self._environment_variables_dict['ssh_key_path'] = ssh_key_path |
There was a problem hiding this comment.
self._ssh_key_path= generate_ssh_key()
| - mkdir -p /opt/uperf && chmod 777 /opt/uperf | ||
| - systemctl enable --now qemu-guest-agent | ||
| - for nic in $(ls /sys/class/net/ | grep -v lo); do ethtool -L $nic combined $(nproc) 2>/dev/null; done || true | ||
| - uperf -s -P 30000 > /opt/uperf/server.log 2>&1 & |
There was a problem hiding this comment.
- export HOME=/root
- export TMP=/tmp
- export TEMP=/tmp
- /tmp/uperf.log
- python3 uperf_parser.py /tmp/uperf.log => should generate /tmp/uperf.json ( create configmap in same yaml of cloudinit with uperf_parser.py)
uperf_vm.py
-- so we need to wait for /tmp/uperf.json
-- copy /tmp/uperf.json to local
c86e5fd to
c6ea527
Compare
|
/test all |
c6ea527 to
cbde4b2
Compare
|
|
||
| @typechecked | ||
| def wait_for_file_created(self, vm_name: str, file_path: str, namespace: str = '', key_path: str = '', username: str = '', timeout: int = 3600) -> bool: | ||
| """ |
There was a problem hiding this comment.
timeout: int = 3600 => pls use the timeout from env variable because there are workload the run more than hour
cbde4b2 to
acc8202
Compare
What changed
oc.pyfor pod/VM introspection and guest-agent operationssubprocess.runcalls withoc.pymethods@typecheckedannotations, moved initializations to__init__stressng_timeoutvariable collision with generaltimeoutenv var