diff --git a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile
index 987eefc5..97785937 100644
--- a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile
+++ b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile
@@ -1,8 +1,7 @@
 # Copyright (c) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-FROM vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:1.20.0-543
-
+FROM vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
 # Need node to build doc HTML. Taken from https://stackoverflow.com/a/67491580
 RUN apt-get update && apt-get install -y --no-install-recommends \
     software-properties-common \
@@ -12,15 +11,16 @@ RUN npm install n -g && \
 
 RUN python3 -m pip install --no-cache-dir --upgrade pip
 RUN python3 -m pip install --upgrade-strategy eager optimum[habana]
-RUN python3 -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.20.0
+RUN python3 -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
 RUN mkdir -p /workspace
 WORKDIR /workspace
-RUN git clone https://github.com/huggingface/optimum-habana && cd optimum-habana && git checkout v1.16.0
+RUN git clone https://github.com/huggingface/optimum-habana && cd optimum-habana && git checkout v1.18.0
 
 WORKDIR /workspace/optimum-habana/examples/text-generation
 RUN python3 -m pip install -r requirements.txt
 RUN python3 -m pip install -r requirements_lm_eval.txt
 COPY . .
-COPY Gaudi_1-20.json Gaudi.json
-COPY HQT_1-20.zip HQT.zip
+COPY Gaudi_1-21.json Gaudi.json
+COPY HQT_1-21.zip HQT.zip
 RUN python3 -m pip install -r requirements_bm.txt
+ENV skip_llama2_70b 1
diff --git a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile.1-20 b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile.1-20
new file mode 100644
index 00000000..987eefc5
--- /dev/null
+++ b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Dockerfile.1-20
@@ -0,0 +1,26 @@
+# Copyright (c) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM vault.habana.ai/gaudi-docker/1.20.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:1.20.0-543
+
+# Need node to build doc HTML. Taken from https://stackoverflow.com/a/67491580
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    npm
+RUN npm install n -g && \
+    n latest
+
+RUN python3 -m pip install --no-cache-dir --upgrade pip
+RUN python3 -m pip install --upgrade-strategy eager optimum[habana]
+RUN python3 -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.20.0
+RUN mkdir -p /workspace
+WORKDIR /workspace
+RUN git clone https://github.com/huggingface/optimum-habana && cd optimum-habana && git checkout v1.16.0
+
+WORKDIR /workspace/optimum-habana/examples/text-generation
+RUN python3 -m pip install -r requirements.txt
+RUN python3 -m pip install -r requirements_lm_eval.txt
+COPY . .
+COPY Gaudi_1-20.json Gaudi.json
+COPY HQT_1-20.zip HQT.zip
+RUN python3 -m pip install -r requirements_bm.txt
diff --git a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Gaudi_1-21.json b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Gaudi_1-21.json
new file mode 100644
index 00000000..7e82cdc3
--- /dev/null
+++ b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/Gaudi_1-21.json
@@ -0,0 +1,636 @@
+{
+  "Gaudi3": [
+    {
+      "model": "Llama3.1_8b",
+      "num_cards": "1",
+      "input_len": "",
+      "output_len": "",
+      "bs": "",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 1 run_lm_eval.py -o acc_llama_quant.json --model_name_or_path meta-llama/Llama-3.1-8B-Instruct --warmup 0 --use_hpu_graphs --use_kv_cache --trim_logits --batch_size 1 --bucket_size=128 --bucket_internal --trust_remote_code --tasks hellaswag lambada_openai piqa winogrande --bf16 --attn_softmax_bf16 --use_flash_attention --flash_attention_recompute --flash_attention_causal_mask",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_measure.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": ""
+    },
+    {
+      "model": "Llama3.1_8b",
+      "num_cards": "1",
+      "input_len": "128",
+      "output_len": "128",
+      "bs": "1536",
+      "run_cmd": "python3 run_generation.py --model_name_or_path meta-llama/Llama-3.1-8B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 128 --bf16 --batch_size 1536 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "24631"
+    },
+    {
+      "model": "Llama3.1_8b",
+      "num_cards": "1",
+      "input_len": "128",
+      "output_len": "2048",
+      "bs": "768",
+      "run_cmd": "python3 run_generation.py --model_name_or_path meta-llama/Llama-3.1-8B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 128 --bf16 --batch_size 768 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "21793"
+    },
+    {
+      "model": "Llama3.1_8b",
+      "num_cards": "1",
+      "input_len": "2048",
+      "output_len": "128",
+      "bs": "256",
+      "run_cmd": "python3 run_generation.py --model_name_or_path meta-llama/Llama-3.1-8B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 2048 --bf16 --batch_size 256 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "2883"
+    },
+    {
+      "model": "Llama3.1_8b",
+      "num_cards": "1",
+      "input_len": "2048",
+      "output_len": "2048",
+      "bs": "256",
+      "run_cmd": "python3 run_generation.py --model_name_or_path meta-llama/Llama-3.1-8B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 2048 --bf16 --batch_size 256 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "9125"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "2",
+      "input_len": "",
+      "output_len": "",
+      "bs": "",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_lm_eval.py -o acc_llama_quant.json --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --warmup 0 --use_hpu_graphs --use_kv_cache --trim_logits --batch_size 1 --bucket_size=128 --bucket_internal --trust_remote_code --tasks hellaswag lambada_openai piqa winogrande --bf16 --attn_softmax_bf16 --use_flash_attention --flash_attention_recompute --flash_attention_causal_mask",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_measure.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": ""
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "2",
+      "input_len": "128",
+      "output_len": "128",
+      "bs": "2048",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --attn_batch_split 2 --max_new_tokens 128 --max_input_tokens 128 --bf16 --batch_size 2048 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "5471"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "2",
+      "input_len": "128",
+      "output_len": "2048",
+      "bs": "450",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --attn_batch_split 2 --max_new_tokens 2048 --max_input_tokens 128 --bf16 --batch_size 450 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "6484"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "2",
+      "input_len": "2048",
+      "output_len": "128",
+      "bs": "223",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --attn_batch_split 2 --max_new_tokens 128 --max_input_tokens 2048 --bf16 --batch_size 223 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "682"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "2",
+      "input_len": "2048",
+      "output_len": "2048",
+      "bs": "175",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --attn_batch_split 2 --max_new_tokens 2048 --max_input_tokens 2048 --bf16 --batch_size 175 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "2931"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "8",
+      "input_len": "",
+      "output_len": "",
+      "bs": "",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_lm_eval.py -o acc_llama_quant.json --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --warmup 0 --use_hpu_graphs --use_kv_cache --trim_logits --batch_size 1 --bucket_size=128 --bucket_internal --trust_remote_code --tasks hellaswag lambada_openai piqa winogrande --bf16 --attn_softmax_bf16 --use_flash_attention --flash_attention_recompute --flash_attention_causal_mask",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_measure.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": ""
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "8",
+      "input_len": "128",
+      "output_len": "128",
+      "bs": "4000",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --attn_batch_split 2 --max_new_tokens 128 --max_input_tokens 128 --bf16 --batch_size 4000 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "18317"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "8",
+      "input_len": "128",
+      "output_len": "2048",
+      "bs": "768",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --attn_batch_split 2 --max_new_tokens 2048 --max_input_tokens 128 --bf16 --batch_size 768 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "20943"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "8",
+      "input_len": "2048",
+      "output_len": "128",
+      "bs": "512",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --attn_batch_split 2 --max_new_tokens 128 --max_input_tokens 2048 --bf16 --batch_size 512 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "2303"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "8",
+      "input_len": "2048",
+      "output_len": "2048",
+      "bs": "600",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --attn_batch_split 2 --max_new_tokens 2048 --max_input_tokens 2048 --bf16 --batch_size 600 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "10604"
+    },
+    {
+      "model": "Llama3.3_70b",
+      "num_cards": "8",
+      "input_len": "",
+      "output_len": "",
+      "bs": "",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_lm_eval.py -o acc_llama_quant.json --model_name_or_path meta-llama/Llama-3.3-70B-Instruct --warmup 0 --use_hpu_graphs --use_kv_cache --trim_logits --batch_size 1 --bucket_size=128 --bucket_internal --trust_remote_code --tasks hellaswag lambada_openai piqa winogrande --bf16 --attn_softmax_bf16 --use_flash_attention --flash_attention_recompute --flash_attention_causal_mask",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_measure.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": ""
+    },
+    {
+      "model": "Llama3.3_70b",
+      "num_cards": "8",
+      "input_len": "128",
+      "output_len": "128",
+      "bs": "3986",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.3-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 128 --bf16 --batch_size 3986 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "16883"
+    },
+    {
+      "model": "Llama3.3_70b",
+      "num_cards": "8",
+      "input_len": "128",
+      "output_len": "2048",
+      "bs": "2048",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.3-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 128 --bf16 --batch_size 2048 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "25071"
+    },
+    {
+      "model": "Llama3.3_70b",
+      "num_cards": "8",
+      "input_len": "2048",
+      "output_len": "128",
+      "bs": "774",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.3-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 2048 --bf16 --batch_size 774 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "1890"
+    },
+    {
+      "model": "Llama3.3_70b",
+      "num_cards": "8",
+      "input_len": "2048",
+      "output_len": "2048",
+      "bs": "719",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.3-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 2048 --bf16 --batch_size 719 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "11087"
+    },
+    {
+      "model": "Llama3.1_405b",
+      "num_cards": "8",
+      "input_len": "",
+      "output_len": "",
+      "bs": "",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_lm_eval.py -o acc_llama_quant.json --model_name_or_path meta-llama/Llama-3.1-405B-Instruct --warmup 0 --use_hpu_graphs --use_kv_cache --trim_logits --batch_size 1 --bucket_size=128 --bucket_internal --trust_remote_code --tasks hellaswag lambada_openai piqa winogrande --bf16 --attn_softmax_bf16 --use_flash_attention --flash_attention_recompute --flash_attention_causal_mask",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_measure.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": ""
+    },
+    {
+      "model": "Llama3.1_405b",
+      "num_cards": "8",
+      "input_len": "128",
+      "output_len": "128",
+      "bs": "2996",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-405B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 128 --bf16 --batch_size 2996 --flash_attention_causal_mask --book_source --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "3489"
+    },
+    {
+      "model": "Llama3.1_405b",
+      "num_cards": "8",
+      "input_len": "128",
+      "output_len": "2048",
+      "bs": "460",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-405B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 128 --bf16 --batch_size 460 --flash_attention_causal_mask --book_source --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "4974"
+    },
+    {
+      "model": "Llama3.1_405b",
+      "num_cards": "8",
+      "input_len": "2048",
+      "output_len": "128",
+      "bs": "195",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-405B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 2048 --bf16 --batch_size 195 --flash_attention_causal_mask --book_source --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "397"
+    },
+    {
+      "model": "Llama3.1_405b",
+      "num_cards": "8",
+      "input_len": "2048",
+      "output_len": "2048",
+      "bs": "180",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-405B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 2048 --bf16 --batch_size 180 --flash_attention_causal_mask --book_source --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "2212"
+    }
+  ],
+  "Gaudi2": [
+    {
+      "model": "Llama2_70b",
+      "num_cards": "2",
+      "input_len": "",
+      "output_len": "",
+      "bs": "",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_lm_eval.py -o acc_llama_quant.json --model_name_or_path meta-llama/Llama-2-70b-hf --warmup 0 --use_hpu_graphs --use_kv_cache --trim_logits --batch_size 1 --bucket_size=128 --bucket_internal --trust_remote_code --tasks hellaswag lambada_openai piqa winogrande --bf16 --attn_softmax_bf16 --use_flash_attention --flash_attention_recompute --flash_attention_causal_mask",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_measure.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": ""
+    },
+    {
+      "model": "Llama2_70b",
+      "num_cards": "2",
+      "input_len": "128",
+      "output_len": "128",
+      "bs": "1750",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-2-70b-hf --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 128 --bf16 --batch_size 1750 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "2935"
+    },
+    {
+      "model": "Llama2_70b",
+      "num_cards": "2",
+      "input_len": "128",
+      "output_len": "2048",
+      "bs": "256",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-2-70b-hf --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 128 --bf16 --batch_size 256 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "3963"
+    },
+    {
+      "model": "Llama2_70b",
+      "num_cards": "2",
+      "input_len": "2048",
+      "output_len": "128",
+      "bs": "95",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-2-70b-hf --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 2048 --bf16 --batch_size 95 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "318"
+    },
+    {
+      "model": "Llama2_70b",
+      "num_cards": "2",
+      "input_len": "2048",
+      "output_len": "2048",
+      "bs": "159",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-2-70b-hf --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 2048 --bf16 --batch_size 159 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "1767"
+    },
+    {
+      "model": "Llama3.1_8b",
+      "num_cards": "1",
+      "input_len": "",
+      "output_len": "",
+      "bs": "",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 1 run_lm_eval.py -o acc_llama_quant.json --model_name_or_path meta-llama/Llama-3.1-8B-Instruct --warmup 0 --use_hpu_graphs --use_kv_cache --trim_logits --batch_size 1 --bucket_size=128 --bucket_internal --trust_remote_code --tasks hellaswag lambada_openai piqa winogrande --bf16 --attn_softmax_bf16 --use_flash_attention --flash_attention_recompute --flash_attention_causal_mask",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_measure.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": ""
+    },
+    {
+      "model": "Llama3.1_8b",
+      "num_cards": "1",
+      "input_len": "128",
+      "output_len": "128",
+      "bs": "2816",
+      "run_cmd": "python3 run_generation.py --model_name_or_path meta-llama/Llama-3.1-8B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 128 --bf16 --batch_size 2816 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 3",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "19907"
+    },
+    {
+      "model": "Llama3.1_8b",
+      "num_cards": "1",
+      "input_len": "128",
+      "output_len": "2048",
+      "bs": "512",
+      "run_cmd": "python3 run_generation.py --model_name_or_path meta-llama/Llama-3.1-8B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 128 --bf16 --batch_size 512 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "14866"
+    },
+    {
+      "model": "Llama3.1_8b",
+      "num_cards": "1",
+      "input_len": "2048",
+      "output_len": "128",
+      "bs": "179",
+      "run_cmd": "python3 run_generation.py --model_name_or_path meta-llama/Llama-3.1-8B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 2048 --bf16 --batch_size 179 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 3",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "2099"
+    },
+    {
+      "model": "Llama3.1_8b",
+      "num_cards": "1",
+      "input_len": "2048",
+      "output_len": "2048",
+      "bs": "256",
+      "run_cmd": "python3 run_generation.py --model_name_or_path meta-llama/Llama-3.1-8B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 2048 --bf16 --batch_size 256 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "6060"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "2",
+      "input_len": "",
+      "output_len": "",
+      "bs": "",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_lm_eval.py -o acc_llama_quant.json --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --warmup 0 --use_hpu_graphs --use_kv_cache --trim_logits --batch_size 1 --bucket_size=128 --bucket_internal --trust_remote_code --tasks hellaswag lambada_openai piqa winogrande --bf16 --attn_softmax_bf16 --use_flash_attention --flash_attention_recompute --flash_attention_causal_mask",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_measure.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": ""
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "2",
+      "input_len": "128",
+      "output_len": "128",
+      "bs": "1792",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 128 --bf16 --batch_size 1792 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "3421"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "2",
+      "input_len": "128",
+      "output_len": "2048",
+      "bs": "256",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 128 --bf16 --batch_size 256 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "3827"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "2",
+      "input_len": "2048",
+      "output_len": "128",
+      "bs": "142",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 2048 --bf16 --batch_size 142 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "462"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "2",
+      "input_len": "2048",
+      "output_len": "2048",
+      "bs": "139",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 2 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 2048 --bf16 --batch_size 139 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "1665"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "8",
+      "input_len": "",
+      "output_len": "",
+      "bs": "",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_lm_eval.py -o acc_llama_quant.json --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --warmup 0 --use_hpu_graphs --use_kv_cache --trim_logits --batch_size 1 --bucket_size=128 --bucket_internal --trust_remote_code --tasks hellaswag lambada_openai piqa winogrande --bf16 --attn_softmax_bf16 --use_flash_attention --flash_attention_recompute --flash_attention_causal_mask",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_measure.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": ""
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "8",
+      "input_len": "128",
+      "output_len": "128",
+      "bs": "4000",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 128 --bf16 --batch_size 4000 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "10404"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "8",
+      "input_len": "128",
+      "output_len": "2048",
+      "bs": "768",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 128 --bf16 --batch_size 768 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "13639"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "8",
+      "input_len": "2048",
+      "output_len": "128",
+      "bs": "383",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 128 --max_input_tokens 2048 --bf16 --batch_size 383 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute --attn_batch_split 2",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "1553"
+    },
+    {
+      "model": "Llama3.1_70b",
+      "num_cards": "8",
+      "input_len": "2048",
+      "output_len": "2048",
+      "bs": "476",
+      "run_cmd": "python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py --model_name_or_path meta-llama/Llama-3.1-70B-Instruct --attn_softmax_bf16 --trim_logits --warmup 2 --use_kv_cache --use_hpu_graphs --limit_hpu_graphs --bucket_size=128 --bucket_internal --max_new_tokens 2048 --max_input_tokens 2048 --bf16 --batch_size 476 --flash_attention_causal_mask --use_flash_attention --flash_attention_recompute",
+      "env_vars": {
+        "HF_DATASETS_TRUST_REMOTE_CODE": "true",
+        "QUANT_CONFIG": "./quantization_config/maxabs_quant.json",
+        "TQDM_DISABLE": "1"
+      },
+      "ref_perf": "6715"
+    }
+  ]
+}
diff --git a/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/HQT_1-21.zip b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/HQT_1-21.zip
new file mode 100644
index 00000000..e31ebce5
Binary files /dev/null and b/PyTorch/Hugging_Face_pipelines/Benchmarking_on_Optimum-habana_with_fp8/HQT_1-21.zip differ