back to base

chibu · chibu · commit 768d13589592 · 2025-07-02T14:48:45.000Z
diff --git a/setup.py b/setup.py
@@ -17,9 +17,6 @@
         #"google-cloud-storage>=1.13.2",
         "datasets",
         "pyhocon",
-        "build>=1.0.0",
-        "setuptools>=61.0",
-        "setuptools-git-versioning>=2.0,<3",
     ],
     python_requires=">=3.7",
 )
diff --git a/src/automation/standards/benchmarking/chat.json b/src/automation/standards/benchmarking/chat.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 512,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 1024,
+        "output_tokens": 256,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1024
+    }
+}
diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
@@ -4,7 +4,8 @@
 import os
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"
+#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm"
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm"
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]"
 
@@ -14,7 +15,7 @@ class GuideLLMTask(BaseTask):
         #"build>=1.0.0",
         #"setuptools>=61.0",
         #"setuptools-git-versioning>=2.0,<3",
-        #"vllm",
+        "vllm",
         GUIDELLM_PACKAGE,
         "hf_xet",
     ]
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
@@ -61,9 +61,9 @@ def clean_hocon_value(v):
     print(gpu_count)
     print(os.getcwd())
 
-    from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
-    print(get_builtin_scenarios())
-    default_scenario = get_builtin_scenarios()[0]
+    from pathlib import Path
+    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
+    current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
         vllm_args,
@@ -105,10 +105,12 @@ def clean_hocon_value(v):
     print(f"The vllm path is: {vllm_path}")
 
 
-    print(get_builtin_scenarios())
-    default_scenario = get_builtin_scenarios()[0]
+    #default_scenario = get_builtin_scenarios()[0]
+    #current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args))
 
-    current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args))
+    from pathlib import Path
+    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
+    current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
 
     #import time 
     #time.sleep(300)