diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index f09b1e6..05cd810 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -9,11 +9,11 @@
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
     target="http://localhost:8000/v1",
-    data_type="emulated",
     max_seconds=30,
-    data="prompt_tokens=512,generated_tokens=256",
+    #scenario = "benchmarking_32k",
+    data="prompt_tokens=128,output_tokens=128",
     vllm_kwargs={"enable-chunked-prefill": True}
 )
 
 task.execute_remotely("oneshot-a100x1")
-#task.execute_locally()
\ No newline at end of file
+#task.execute_locally()
diff --git a/examples/lmeval_example.py b/examples/lmeval_example.py
index 8910aa2..688c355 100644
--- a/examples/lmeval_example.py
+++ b/examples/lmeval_example.py
@@ -6,8 +6,8 @@
     model_id="meta-llama/Llama-3.2-1B-Instruct",
     tasks="gsm8k",
     model_args="dtype=auto,max_model_len=8192",
-    batch_size="auto",    
+    batch_size="auto",
 )
 
 task.execute_remotely("oneshot-a100x1")
-#task.execute_locally()
\ No newline at end of file
+#task.execute_locally()
diff --git a/src/automation/configs.py b/src/automation/configs.py
index 76dbe58..5972ab3 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -1,2 +1,3 @@
-DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_5:latest"
-DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
\ No newline at end of file
+DEFAULT_DOCKER_IMAGE = "quay.io/nmmlops/mlops/k8s-research-cuda12_8:latest"
+DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
+DEFAULT_RESEARCH_BRANCH = "main"
diff --git a/src/automation/standards/benchmarking/benchmarking_128k.json b/src/automation/standards/benchmarking/benchmarking_128k.json
new file mode 100644
index 0000000..13b8105
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_128k.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 128000,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 128000,
+        "output_tokens": 2048,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 2048
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_16k.json b/src/automation/standards/benchmarking/benchmarking_16k.json
new file mode 100644
index 0000000..f927a4a
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_16k.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 16000,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 16000,
+        "output_tokens": 2048,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 2048
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_32k.json b/src/automation/standards/benchmarking/benchmarking_32k.json
new file mode 100644
index 0000000..6543fd7
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_32k.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 32000,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 32000,
+        "output_tokens": 2048,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 2048
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_64k.json b/src/automation/standards/benchmarking/benchmarking_64k.json
new file mode 100644
index 0000000..871b210
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_64k.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 64000,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 64000,
+        "output_tokens": 2048,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 2048
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_chat.json b/src/automation/standards/benchmarking/benchmarking_chat.json
new file mode 100644
index 0000000..f4d0548
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_chat.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 512,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 512,
+        "output_tokens": 256,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 256
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_code_completion.json b/src/automation/standards/benchmarking/benchmarking_code_completion.json
new file mode 100644
index 0000000..6be35df
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_code_completion.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 256,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 256,
+        "output_tokens": 1024,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1024
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_code_fixing.json b/src/automation/standards/benchmarking/benchmarking_code_fixing.json
new file mode 100644
index 0000000..bceff14
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_code_fixing.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 1024,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 1024,
+        "output_tokens": 1024,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1024
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_docstring_generation.json b/src/automation/standards/benchmarking/benchmarking_docstring_generation.json
new file mode 100644
index 0000000..0eda212
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_docstring_generation.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 768,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 768,
+        "output_tokens": 128,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 128
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_instruction.json b/src/automation/standards/benchmarking/benchmarking_instruction.json
new file mode 100644
index 0000000..0fac491
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_instruction.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 256,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 256,
+        "output_tokens": 128,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 128
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_long_rag.json b/src/automation/standards/benchmarking/benchmarking_long_rag.json
new file mode 100644
index 0000000..4fe719a
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_long_rag.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 10240,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 10240,
+        "output_tokens": 1536,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1536
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_rag.json b/src/automation/standards/benchmarking/benchmarking_rag.json
new file mode 100644
index 0000000..9525b09
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_rag.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 1024,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 1024,
+        "output_tokens": 128,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 128
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_summarization.json b/src/automation/standards/benchmarking/benchmarking_summarization.json
new file mode 100644
index 0000000..9525b09
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_summarization.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 1024,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 1024,
+        "output_tokens": 128,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 128
+    }
+}
diff --git a/src/automation/standards/benchmarking/chat.json b/src/automation/standards/benchmarking/chat.json
new file mode 100644
index 0000000..024438c
--- /dev/null
+++ b/src/automation/standards/benchmarking/chat.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 512,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 1024,
+        "output_tokens": 256,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1024
+    }
+}
diff --git a/src/automation/standards/benchmarking/rag.json b/src/automation/standards/benchmarking/rag.json
new file mode 100644
index 0000000..c7ee2f2
--- /dev/null
+++ b/src/automation/standards/benchmarking/rag.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 4096,
+        "prompt_tokens_stdev": 512,
+        "prompt_tokens_min": 2048,
+        "prompt_tokens_max": 6144,
+        "output_tokens": 512,
+        "output_tokens_stdev": 128,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1024
+    }
+}
diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index d886599..c6961b1 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -1,27 +1,30 @@
 from clearml import Task
 from typing import Sequence, Optional
-from automation.configs import DEFAULT_OUTPUT_URI
+from automation.configs import DEFAULT_OUTPUT_URI, DEFAULT_RESEARCH_BRANCH
 from automation.standards import STANDARD_CONFIGS
 import yaml
 import os
 
 class BaseTask():
 
-    base_packages = ["git+https://github.com/neuralmagic/research.git"]
-
     def __init__(
         self,
         project_name: str,
         task_name: str,
         docker_image: str,
+        branch: Optional[str] = DEFAULT_RESEARCH_BRANCH,
         packages: Optional[Sequence[str]]=None,
         task_type: str="training",
     ):
+        branch_name = branch or DEFAULT_RESEARCH_BRANCH
+        base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch_name}"]
         
         if packages is not None:
-            packages = list(set(packages + self.base_packages))
+            packages = list(set(packages + base_packages))
         else:
-            packages = self.base_packages
+            packages = base_packages
+
+        print(packages)
 
         self.project_name = project_name
         self.task_name = task_name
@@ -29,6 +32,7 @@ def __init__(
         self.packages = packages
         self.task_type = task_type
         self.task = None
+        self.branch = branch
         self.script_path = None
         self.callable_artifacts = None
   
@@ -50,8 +54,8 @@ def process_config(self, config):
             return yaml.safe_load(open(STANDARD_CONFIGS[config], "r"))
         elif os.path.exists(config):
             return yaml.safe_load(open(config, "r"))
-        elif os.path.exists(os.path.join("..", "standatrds", config)):
-            return yaml.safe_load(open(os.path.join("..", "standatrds", config)), "r")
+        elif os.path.exists(os.path.join("..", "standards", config)):
+            return yaml.safe_load(open(os.path.join("..", "standards", config)), "r")
         else:
             return yaml.safe_load(config)
 
@@ -91,7 +95,7 @@ def create_task(self):
             add_task_init_call=True,
             script=self.script_path,
             repo="https://github.com/neuralmagic/research.git",
-            branch="main",
+            branch=self.branch,
         )
         self.task.output_uri = DEFAULT_OUTPUT_URI
         self.set_arguments()
diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 390012b..d4c6974 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -1,13 +1,20 @@
 from automation.tasks import BaseTask
-from automation.configs import DEFAULT_DOCKER_IMAGE
+from automation.configs import DEFAULT_DOCKER_IMAGE, DEFAULT_RESEARCH_BRANCH
 from typing import Optional, Sequence
 import os
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@http_backend"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"
 
 class GuideLLMTask(BaseTask):
 
+    """
+    guidellm_packages = [
+        "huggingface-hub==0.34.3",
+        "triton==3.3.1",
+        "vllm==0.10.0",
+        "hf_xet",
+    """
     guidellm_packages = [
         "vllm",
         GUIDELLM_PACKAGE,
@@ -23,6 +30,7 @@ def __init__(
         docker_image: str=DEFAULT_DOCKER_IMAGE,
         packages: Optional[Sequence[str]]=None,
         clearml_model: bool=False,
+        branch: str= DEFAULT_RESEARCH_BRANCH,
         task_type: str="training",
         vllm_kwargs: dict={},
         target: str="http://localhost:8000/v1",
@@ -45,6 +53,12 @@ def __init__(
         if "packages" in config_kwargs:
             packages = list(set(packages + config_kwargs.pop("packages")))
 
+        # keep only the pinned version of a library
+        for pkg in packages:
+            if "==" in pkg and pkg.split("==")[0] in packages:
+                lib_name = pkg.split("==")[0]
+                packages.remove(lib_name)
+
         # Initialize base parameters
         super().__init__(
             project_name=project_name,
@@ -52,6 +66,7 @@ def __init__(
             docker_image=docker_image,
             packages=packages,
             task_type=task_type,
+            branch = branch,
         )
 
         # Check for conflicts in configs and constructor arguments
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 617b502..f9b6feb 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -1,11 +1,10 @@
-
 import os
+import sys
 from clearml import Task
 from automation.utils import resolve_model_id, cast_args, kill_process_tree
 from automation.vllm import start_vllm_server
 from pyhocon import ConfigFactory
 
-
 def main(configurations=None):
     task = Task.current_task()
 
@@ -29,7 +28,6 @@ def main(configurations=None):
         guidellm_args = configurations.get("GuideLLM", {})
         environment_args = configurations.get("environment", {})
         vllm_args = configurations.get("vLLM", {})
-        
 
     clearml_model = args["Args"]["clearml_model"]
     if isinstance(clearml_model, str):
@@ -39,7 +37,6 @@ def main(configurations=None):
     if isinstance(force_download, str):
         force_download = force_download.lower() == "true"
 
-
     # Resolve model_id
     model_id = resolve_model_id(args["Args"]["model"], clearml_model, force_download)
 
@@ -54,7 +51,7 @@ def main(configurations=None):
     if not server_initialized:
         kill_process_tree(server_process.pid)
         task.upload_artifact(name="vLLM server log", artifact_object=server_log)
-        raise AssertionError("Server failed to intialize")
+        raise AssertionError("Server failed to initialize")
 
     # Parse through environment variables
     for k, v in environment_args.items():
@@ -62,13 +59,51 @@ def main(configurations=None):
 
     guidellm_args["model"] = model_id
 
-    from guidellm import generate_benchmark_report
-    guidellm_args = cast_args(guidellm_args, generate_benchmark_report)
-    report = generate_benchmark_report(**guidellm_args)
-    kill_process_tree(server_process.pid)
+    import json
+    import asyncio
+    from pathlib import Path
+    from guidellm.benchmark.entrypoints import benchmark_with_scenario
+    from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
+
+    # user defined scenarios are a temporary fix until the guidellm bugs get fixed otherwise we would use the upstream scenarios
+    user_scenario = guidellm_args.get("scenario", "")
+    if user_scenario:
+        filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{user_scenario}.json"))
+        if os.path.exists(filepath):
+            current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
+        else:
+            raise ValueError(f"Scenario path {filepath} does not exist")
+    #elif len(get_builtin_scenarios()) > 0:
+    #    to be used when get_builtin_scenarios() bug is fixed
+    #    current_scenario = GenerativeTextScenario.from_builtin(get_builtin_scenarios()[0], dict(guidellm_args))
+    else:
+        filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{user_scenario}.json"))
+        current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
+
+    # Ensure output_path is set and consistent
+    output_path = Path(guidellm_args.get("output_path", "guidellm-output.json"))
+    guidellm_args["output_path"] = str(output_path)
+
+    print("[DEBUG] Calling benchmark_with_scenario with:")
+    print(json.dumps(guidellm_args, indent=2))
+
+    executable_path = os.path.dirname(sys.executable)
+    vllm_path = os.path.join(executable_path, "vllm")
+    print(f"The vllm path is: {vllm_path}")
 
-    task.upload_artifact(name="guidellm guidance report", artifact_object=report.to_json())
-    task.upload_artifact(name="vLLM server log", artifact_object=server_log)
+    try:
+        asyncio.run(
+            benchmark_with_scenario(
+                current_scenario,
+                output_path= output_path,
+                output_extras= None
+            )
+        )
+
+    finally:
+        task.upload_artifact(name="guidellm guidance report", artifact_object=output_path)
+        task.upload_artifact(name="vLLM server log", artifact_object=server_log)
+        kill_process_tree(server_process.pid)
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()
diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py
index 6036d65..4711efc 100644
--- a/src/automation/vllm/server.py
+++ b/src/automation/vllm/server.py
@@ -40,12 +40,15 @@ def start_vllm_server(
             subprocess_env[k] = str(v)
         else:
             if v == True or v == "True":
-                v = "true"
-            server_command.extend([f"--{k}", str(v)])
+                server_command.append(f"--{k}")
+            else:
+                server_command.extend([f"--{k}", str(v)])
+                
 
     server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt"
     server_log_file = open(server_log_file_name, "w")
-    server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env)
+    server_process = subprocess.Popen(server_command, shell=False, env=subprocess_env)
+    #server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env)
 
     delay = 5
     server_initialized = False
@@ -64,4 +67,4 @@ def start_vllm_server(
     if server_initialized:
         return server_process, True, server_log_file_name
     else:
-        return server_process, False, server_log_file_name
\ No newline at end of file
+        return server_process, False, server_log_file_name