test rag

chibu · chibu · commit 432031ed3c68 · 2025-07-02T16:57:35.000Z
diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
@@ -11,22 +11,11 @@
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
     target="http://localhost:8000/v1",
-    #target="http://fed73cc1-us-east.lb.appdomain.cloud/v1",
     data_type="emulated",
     max_seconds=30,
-    #data="{'prompt_tokens': 512, 'generated_tokens': 256, 'output_tokens' : 256}",
     data="prompt_tokens=512,generated_tokens=256,output_tokens=256",
-    #data="prompt_tokens=512,generated_tokens=256",
     branch = "update_guidellm",
     #vllm_kwargs={"enable-chunked-prefill": True}
 )
 
-#from clearml import Task
-#task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task")
 task.execute_remotely("remote-upgrade-default")
-#task.execute_locally()
-import os
-import sys
-executable_path = os.path.dirname(sys.executable)
-vllm_path = os.path.join(executable_path, "vllm")
-print(f"The vllm path is: {vllm_path}")
diff --git a/src/automation/standards/benchmarking/rag.json b/src/automation/standards/benchmarking/rag.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 4096,
+        "prompt_tokens_stdev": 512,
+        "prompt_tokens_min": 2048,
+        "prompt_tokens_max": 6144,
+        "output_tokens": 512,
+        "output_tokens_stdev": 128,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1024
+    }
+}
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
@@ -63,7 +63,7 @@ def clean_hocon_value(v):
 
     from pathlib import Path
     from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
-    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
+    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "rag.json"))
     current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
@@ -109,9 +109,9 @@ def clean_hocon_value(v):
     #default_scenario = get_builtin_scenarios()[0]
     #current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args))
 
-    from pathlib import Path
-    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
-    current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
+    #from pathlib import Path
+    #filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
+    #current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
 
     #import time 
     #time.sleep(300)