diff --git a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Benchmark.py b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Benchmark.py
index 19d67508..35540c35 100644
--- a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Benchmark.py
+++ b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Benchmark.py
@@ -6,29 +6,40 @@
 import unittest
 import subprocess
 import requests
+import shutil
+import re
+
+from dataclasses import dataclass
+from typing import Optional
 
 unittest.TestLoader.sortTestMethodsUsing = None
 
 # unittest.TestLoader.sortTestMethodsUsing = lambda self, a, b: (a < b) - (a > b)
-class RunCmd:
-    def run(self, cmd, env_vars=None):
-        # Ensure cmd is a list of arguments
-        if isinstance(cmd, str):
-            import shlex
-            cmd = shlex.split(cmd)
-
-        # Print the command and environment variables for debugging
-        print("Running command:", cmd)
-        if env_vars:
-            print("With environment variables:", env_vars)
-
-        # Execute the command with the environment variables
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False, env=env_vars)
-        output, err = p.communicate()
-        p_status = p.wait()
-
-        # Return the status and output
-        return p_status, output
+def run_cmd(cmd, env_vars=None):
+    # Ensure cmd is a list of arguments
+    if isinstance(cmd, str):
+        import shlex
+        cmd = shlex.split(cmd)
+
+    # Print the command and environment variables for debugging
+    print("Running command:", cmd)
+    if env_vars:
+        print("With environment variables:", env_vars)
+
+    env = os.environ.copy()
+    if env_vars:
+        env.update(env_vars)
+
+    # Execute the command with the environment variables
+    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False, env=env)
+    output, err = p.communicate()
+    p_status = p.wait()
+
+    output = output.decode("utf-8") if output else ""
+    err = err.decode("utf-8") if err else ""
+
+    # Return the status and output
+    return p_status, output, err
 
 class PerfUtility:
 
@@ -72,12 +83,9 @@ def model_test(self, data, perf_report):
             status = 0
             if hqt_output_exist is False:
                 # 0.1 .Run the tensor measurement instruction
-                import shutil
-                import os
                 cmd = data["run_cmd"]
                 env = data["env_vars"]
-                status, output = RunCmd().run(cmd, env)
-                output = output.decode("utf-8")
+                status, output, err = run_cmd(cmd, env)
                 print(cmd)
                 # 0.1.1 copy generated hqt_output(dst) to HQT folder(src)
                 if os.path.exists(dst):
@@ -95,16 +103,19 @@ def model_test(self, data, perf_report):
         env = data["env_vars"]
         #print(cmd)
         #return 0
-        status, output = RunCmd().run(cmd, env)
-        output = output.decode("utf-8")
+        status, output, err = run_cmd(cmd, env)
 
         # 2.Parsing the run log
         filename = data["model"] + "_" + data["input_len"] + "_" + data["output_len"] + "_" + data["num_cards"] + 'c' + "_log.txt"
-        perf_report.dump_log_to_file(output, filename)
-        throughput, mem_allocated, max_mem_allocated, graph_compile = perf_report.parse_run_log(output)
+        perf_report.dump_log_to_file(output + "\n" + err, filename)
+        parsed = perf_report.parse_run_log(output)
+
+        throughput = parsed.throughput or '0'
+        mem_allocated = parsed.mem_allocated or '0'
+        max_mem_allocated = parsed.max_mem_allocated or '0'
+        graph_compile = parsed.graph_compile or 'N/A'
 
         # 3.Add new row into report
-        #throughput = '0'
         new_row = {}
         perf_ratio = float(throughput) / float(data["ref_perf"])
         if perf_report.report_level >= 3:
@@ -117,6 +128,14 @@ def model_test(self, data, perf_report):
         return status
 
 
+@dataclass
+class RunLogData:
+    throughput: Optional[str] = None
+    mem_allocated: Optional[str] = None
+    max_mem_allocated: Optional[str] = None
+    graph_compile: Optional[str] = None
+
+
 class PerfReport:
     def __init__(self, name, report_level):
         self.name = name
@@ -151,28 +170,29 @@ def init_perf_report(self):
         self.perf_report_df = df
 
     def dump_log_to_file(self, output, filename):
-        filepath = self.result_folder_name + os.sep + filename
-        fd = open(filepath, "w")  # append mode
-        fd.write(output)
-        fd.close()
-        return
+        filepath = os.path.join(self.result_folder_name, filename)
+        with open(filepath, "w", encoding="utf-8") as f:
+            f.write(output)
 
     def parse_run_log(self, log):
-        throughput = ''
-        mem_allocated = ''
-        max_mem_allocated = ''
-        graph_compile = ''
+        result = RunLogData()
+
+        patterns = {
+            "throughput": re.compile(r"Throughput.*?=\s*([\d.eE+-]+)\s+\S+"),
+            "mem_allocated": re.compile(r"Memory allocated\s*=\s*([\d.eE+-]+)\s+\S+"),
+            "max_mem_allocated": re.compile(r"Max memory allocated\s*=\s*([\d.eE+-]+)\s+\S+"),
+            "graph_compile": re.compile(r"Graph compilation duration\s*=\s*([\d.eE+-]+)\s+\S+"),
+        }
+
         for line in log.splitlines():
-            if line.find("Throughput") != -1:
-                throughput = line.split('=')[1].split(' ')[1]
-            elif line.find("Memory") != -1:
-                mem_allocated = line.split('=')[1].split(' ')[1]
-            elif line.find("Max") != -1:
-                max_mem_allocated = line.split('=')[1].split(' ')[1]
-            elif line.find("Graph") != -1:
-                graph_compile = line.split('=')[1].split(' ')[1]
-        return throughput, mem_allocated, max_mem_allocated, graph_compile
+            line = line.strip()
+            for key, pattern in patterns.items():
+                if getattr(result, key) is None:
+                    match = pattern.search(line)
+                    if match:
+                        setattr(result, key, match.group(1))
 
+        return result
 
     def generate_perf_report(self):
         import os
@@ -228,7 +248,6 @@ def generate_perf_report(self):
             )
 
         print("\nReport File is : " + report_path)
-        import shutil
 
         shutil.make_archive(self.result_folder_name, "zip", self.result_folder_name)
         return
@@ -293,7 +312,7 @@ def test_1_perfspect(self):
             output, err = p.communicate()
             status = p.wait()
         cmd = './perfspect/perfspect report --gaudi --output ' + self.perf_report.result_folder_name
-        status, output = RunCmd().run(cmd)
+        status, output, err = run_cmd(cmd)
         import socket
         hostname = socket.gethostname()
         xlsx_file = self.perf_report.result_folder_name + os.sep + hostname + '.xlsx'
@@ -305,95 +324,39 @@ def test_1_perfspect(self):
             self.perf_report.gaudi_info_df = df
         self.assertEqual(False, False)
 
-    @unittest.skipIf(skip_llama2_70b == 1 , "Skip over this routine")
-    def test_2_llama2_70b(self):
-
-        model_name = "Llama2_70b"
+    def run_model_test(self, model_name):
         # Get configs/data
         data = self.utils.load_input_data(model_name)
-        #print(data)
         self.assertNotEqual(data, None)
 
         # Testing
-        for i in data:
-            try:
-                response_status_code = self.utils.model_test(i, perf_report)
-            except:
-                response_status_code=-1
-                continue
-        self.assertEqual(response_status_code, 0)
+        for item in data:
+            with self.subTest(input=item):
+                response_status_code = self.utils.model_test(item, perf_report)
+                self.assertEqual(
+                    response_status_code, 0,
+                    f"Model test failed with status code {response_status_code} for input {item}"
+                )
+
+    @unittest.skipIf(skip_llama2_70b == 1 , "Skip over this routine")
+    def test_2_llama2_70b(self):
+        self.run_model_test("Llama2_70b")
 
     @unittest.skipIf(skip_llama31_8b == 1 , "Skip over this routine")
     def test_3_llama3_1_8b(self):
-
-        model_name = "Llama3.1_8b"
-        # Get configs/data
-        data = self.utils.load_input_data(model_name)
-        #print(data)
-        self.assertNotEqual(data, None)
-
-        # Testing
-        for i in data:
-            try:
-                response_status_code = self.utils.model_test(i, perf_report)
-            except:
-                response_status_code=-1
-                continue
-        self.assertEqual(response_status_code, 0)
+        self.run_model_test("Llama3.1_8b")
 
     @unittest.skipIf(skip_llama31_70b == 1 , "Skip over this routine")
     def test_4_llama3_1_70b(self):
-
-        model_name = "Llama3.1_70b"
-        # Get configs/data
-        data = self.utils.load_input_data(model_name)
-        #print(data)
-        self.assertNotEqual(data, None)
-
-        # Testing
-        for i in data:
-            try:
-                response_status_code = self.utils.model_test(i, perf_report)
-            except:
-                response_status_code=-1
-                continue
-        self.assertEqual(response_status_code, 0)
+        self.run_model_test("Llama3.1_70b")
 
     @unittest.skipIf(skip_llama33_70b == 1 , "Skip over this routine")
     def test_5_llama3_3_70b(self):
-
-        model_name = "Llama3.3_70b"
-        # Get configs/data
-        data = self.utils.load_input_data(model_name)
-        #print(data)
-        self.assertNotEqual(data, None)
-
-        # Testing
-        for i in data:
-            try:
-                response_status_code = self.utils.model_test(i, perf_report)
-            except:
-                response_status_code=-1
-                continue
-        self.assertEqual(response_status_code, 0)
+        self.run_model_test("Llama3.3_70b")
 
     @unittest.skipIf(skip_llama31_405b == 1 , "Skip over this routine")
     def test_6_llama3_1_405b(self):
-
-        model_name = "Llama3.1_405b"
-        # Get configs/data
-        data = self.utils.load_input_data(model_name)
-        #print(data)
-        self.assertNotEqual(data, None)
-
-        # Testing
-        for i in data:
-            try:
-                response_status_code = self.utils.model_test(i, perf_report)
-            except:
-                response_status_code=-1
-                continue
-        self.assertEqual(response_status_code, 0)
+        self.run_model_test("Llama3.1_405b")
 
 if __name__ == "__main__":
     import sys
diff --git a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile
index 987eefc5..9b06d111 100644
--- a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile
+++ b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile
@@ -11,15 +11,21 @@ RUN npm install n -g && \
     n latest
 
 RUN python3 -m pip install --no-cache-dir --upgrade pip
-RUN python3 -m pip install --upgrade-strategy eager optimum[habana]
 RUN python3 -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.20.0
 RUN mkdir -p /workspace
 WORKDIR /workspace
-RUN git clone https://github.com/huggingface/optimum-habana && cd optimum-habana && git checkout v1.16.0
+RUN git clone -b v1.18.0 https://github.com/huggingface/optimum-habana && \
+cd optimum-habana && \
+python3 -m pip install --no-cache-dir .
 
 WORKDIR /workspace/optimum-habana/examples/text-generation
 RUN python3 -m pip install -r requirements.txt
 RUN python3 -m pip install -r requirements_lm_eval.txt
+
+# Installing datasets version compatible with PiQA
+# TODO: Remove this once optimum-habana is updated
+RUN python3 -m pip install datasets==3.6.0
+
 COPY . .
 COPY Gaudi_1-20.json Gaudi.json
 COPY HQT_1-20.zip HQT.zip