diff --git a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Benchmark.py b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Benchmark.py index 19d67508..35540c35 100644 --- a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Benchmark.py +++ b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Benchmark.py @@ -6,29 +6,40 @@ import unittest import subprocess import requests +import shutil +import re + +from dataclasses import dataclass +from typing import Optional unittest.TestLoader.sortTestMethodsUsing = None # unittest.TestLoader.sortTestMethodsUsing = lambda self, a, b: (a < b) - (a > b) -class RunCmd: - def run(self, cmd, env_vars=None): - # Ensure cmd is a list of arguments - if isinstance(cmd, str): - import shlex - cmd = shlex.split(cmd) - - # Print the command and environment variables for debugging - print("Running command:", cmd) - if env_vars: - print("With environment variables:", env_vars) - - # Execute the command with the environment variables - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False, env=env_vars) - output, err = p.communicate() - p_status = p.wait() - - # Return the status and output - return p_status, output +def run_cmd(cmd, env_vars=None): + # Ensure cmd is a list of arguments + if isinstance(cmd, str): + import shlex + cmd = shlex.split(cmd) + + # Print the command and environment variables for debugging + print("Running command:", cmd) + if env_vars: + print("With environment variables:", env_vars) + + env = os.environ.copy() + if env_vars: + env.update(env_vars) + + # Execute the command with the environment variables + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False, env=env) + output, err = p.communicate() + p_status = p.wait() + + output = output.decode("utf-8") if output else "" + err = err.decode("utf-8") if err else "" + + # Return the status and output + return p_status, output, err class PerfUtility: @@ -72,12 +83,9 @@ def model_test(self, data, perf_report): status = 0 if hqt_output_exist is False: # 0.1 .Run the tensor measurement instruction - import shutil - import os cmd = data["run_cmd"] env = data["env_vars"] - status, output = RunCmd().run(cmd, env) - output = output.decode("utf-8") + status, output, err = run_cmd(cmd, env) print(cmd) # 0.1.1 copy generated hqt_output(dst) to HQT folder(src) if os.path.exists(dst): @@ -95,16 +103,19 @@ def model_test(self, data, perf_report): env = data["env_vars"] #print(cmd) #return 0 - status, output = RunCmd().run(cmd, env) - output = output.decode("utf-8") + status, output, err = run_cmd(cmd, env) # 2.Parsing the run log filename = data["model"] + "_" + data["input_len"] + "_" + data["output_len"] + "_" + data["num_cards"] + 'c' + "_log.txt" - perf_report.dump_log_to_file(output, filename) - throughput, mem_allocated, max_mem_allocated, graph_compile = perf_report.parse_run_log(output) + perf_report.dump_log_to_file(output + "\n" + err, filename) + parsed = perf_report.parse_run_log(output) + + throughput = parsed.throughput or '0' + mem_allocated = parsed.mem_allocated or '0' + max_mem_allocated = parsed.max_mem_allocated or '0' + graph_compile = parsed.graph_compile or 'N/A' # 3.Add new row into report - #throughput = '0' new_row = {} perf_ratio = float(throughput) / float(data["ref_perf"]) if perf_report.report_level >= 3: @@ -117,6 +128,14 @@ def model_test(self, data, perf_report): return status +@dataclass +class RunLogData: + throughput: Optional[str] = None + mem_allocated: Optional[str] = None + max_mem_allocated: Optional[str] = None + graph_compile: Optional[str] = None + + class PerfReport: def __init__(self, name, report_level): self.name = name @@ -151,28 +170,29 @@ def init_perf_report(self): self.perf_report_df = df def dump_log_to_file(self, output, filename): - filepath = self.result_folder_name + os.sep + filename - fd = open(filepath, "w") # append mode - fd.write(output) - fd.close() - return + filepath = os.path.join(self.result_folder_name, filename) + with open(filepath, "w", encoding="utf-8") as f: + f.write(output) def parse_run_log(self, log): - throughput = '' - mem_allocated = '' - max_mem_allocated = '' - graph_compile = '' + result = RunLogData() + + patterns = { + "throughput": re.compile(r"Throughput.*?=\s*([\d.eE+-]+)\s+\S+"), + "mem_allocated": re.compile(r"Memory allocated\s*=\s*([\d.eE+-]+)\s+\S+"), + "max_mem_allocated": re.compile(r"Max memory allocated\s*=\s*([\d.eE+-]+)\s+\S+"), + "graph_compile": re.compile(r"Graph compilation duration\s*=\s*([\d.eE+-]+)\s+\S+"), + } + for line in log.splitlines(): - if line.find("Throughput") != -1: - throughput = line.split('=')[1].split(' ')[1] - elif line.find("Memory") != -1: - mem_allocated = line.split('=')[1].split(' ')[1] - elif line.find("Max") != -1: - max_mem_allocated = line.split('=')[1].split(' ')[1] - elif line.find("Graph") != -1: - graph_compile = line.split('=')[1].split(' ')[1] - return throughput, mem_allocated, max_mem_allocated, graph_compile + line = line.strip() + for key, pattern in patterns.items(): + if getattr(result, key) is None: + match = pattern.search(line) + if match: + setattr(result, key, match.group(1)) + return result def generate_perf_report(self): import os @@ -228,7 +248,6 @@ def generate_perf_report(self): ) print("\nReport File is : " + report_path) - import shutil shutil.make_archive(self.result_folder_name, "zip", self.result_folder_name) return @@ -293,7 +312,7 @@ def test_1_perfspect(self): output, err = p.communicate() status = p.wait() cmd = './perfspect/perfspect report --gaudi --output ' + self.perf_report.result_folder_name - status, output = RunCmd().run(cmd) + status, output, err = run_cmd(cmd) import socket hostname = socket.gethostname() xlsx_file = self.perf_report.result_folder_name + os.sep + hostname + '.xlsx' @@ -305,95 +324,39 @@ def test_1_perfspect(self): self.perf_report.gaudi_info_df = df self.assertEqual(False, False) - @unittest.skipIf(skip_llama2_70b == 1 , "Skip over this routine") - def test_2_llama2_70b(self): - - model_name = "Llama2_70b" + def run_model_test(self, model_name): # Get configs/data data = self.utils.load_input_data(model_name) - #print(data) self.assertNotEqual(data, None) # Testing - for i in data: - try: - response_status_code = self.utils.model_test(i, perf_report) - except: - response_status_code=-1 - continue - self.assertEqual(response_status_code, 0) + for item in data: + with self.subTest(input=item): + response_status_code = self.utils.model_test(item, perf_report) + self.assertEqual( + response_status_code, 0, + f"Model test failed with status code {response_status_code} for input {item}" + ) + + @unittest.skipIf(skip_llama2_70b == 1 , "Skip over this routine") + def test_2_llama2_70b(self): + self.run_model_test("Llama2_70b") @unittest.skipIf(skip_llama31_8b == 1 , "Skip over this routine") def test_3_llama3_1_8b(self): - - model_name = "Llama3.1_8b" - # Get configs/data - data = self.utils.load_input_data(model_name) - #print(data) - self.assertNotEqual(data, None) - - # Testing - for i in data: - try: - response_status_code = self.utils.model_test(i, perf_report) - except: - response_status_code=-1 - continue - self.assertEqual(response_status_code, 0) + self.run_model_test("Llama3.1_8b") @unittest.skipIf(skip_llama31_70b == 1 , "Skip over this routine") def test_4_llama3_1_70b(self): - - model_name = "Llama3.1_70b" - # Get configs/data - data = self.utils.load_input_data(model_name) - #print(data) - self.assertNotEqual(data, None) - - # Testing - for i in data: - try: - response_status_code = self.utils.model_test(i, perf_report) - except: - response_status_code=-1 - continue - self.assertEqual(response_status_code, 0) + self.run_model_test("Llama3.1_70b") @unittest.skipIf(skip_llama33_70b == 1 , "Skip over this routine") def test_5_llama3_3_70b(self): - - model_name = "Llama3.3_70b" - # Get configs/data - data = self.utils.load_input_data(model_name) - #print(data) - self.assertNotEqual(data, None) - - # Testing - for i in data: - try: - response_status_code = self.utils.model_test(i, perf_report) - except: - response_status_code=-1 - continue - self.assertEqual(response_status_code, 0) + self.run_model_test("Llama3.3_70b") @unittest.skipIf(skip_llama31_405b == 1 , "Skip over this routine") def test_6_llama3_1_405b(self): - - model_name = "Llama3.1_405b" - # Get configs/data - data = self.utils.load_input_data(model_name) - #print(data) - self.assertNotEqual(data, None) - - # Testing - for i in data: - try: - response_status_code = self.utils.model_test(i, perf_report) - except: - response_status_code=-1 - continue - self.assertEqual(response_status_code, 0) + self.run_model_test("Llama3.1_405b") if __name__ == "__main__": import sys diff --git a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile index 987eefc5..9b06d111 100644 --- a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile +++ b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile @@ -11,15 +11,21 @@ RUN npm install n -g && \ n latest RUN python3 -m pip install --no-cache-dir --upgrade pip -RUN python3 -m pip install --upgrade-strategy eager optimum[habana] RUN python3 -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.20.0 RUN mkdir -p /workspace WORKDIR /workspace -RUN git clone https://github.com/huggingface/optimum-habana && cd optimum-habana && git checkout v1.16.0 +RUN git clone -b v1.18.0 https://github.com/huggingface/optimum-habana && \ +cd optimum-habana && \ +python3 -m pip install --no-cache-dir . WORKDIR /workspace/optimum-habana/examples/text-generation RUN python3 -m pip install -r requirements.txt RUN python3 -m pip install -r requirements_lm_eval.txt + +# Installing datasets version compatible with PiQA +# TODO: Remove this once optimum-habana is updated +RUN python3 -m pip install datasets==3.6.0 + COPY . . COPY Gaudi_1-20.json Gaudi.json COPY HQT_1-20.zip HQT.zip