onpremise settings

chibu · chibu · commit 69638eae4c4f · 2025-07-03T14:51:21.000Z
diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
@@ -1,21 +1,20 @@
-
 from automation.tasks import GuideLLMTask
 
 task = GuideLLMTask(
     project_name="alexandre_debug",
     task_name="test_guidellm_task",
-    #model="meta-llama/Llama-3.2-1B-Instruct",
-    model="Qwen/Qwen2.5-1.5B-Instruct",
+    model="meta-llama/Llama-3.2-1B-Instruct",
     rate_type="throughput",
     backend="aiohttp_server",
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
     target="http://localhost:8000/v1",
     max_seconds=30,
-    scenario = "benchmarking_32kz",
+    #scenario = "benchmarking_32k",
     data="prompt_tokens=128,output_tokens=128",
     branch = "update_guidellm",
     vllm_kwargs={"enable-chunked-prefill": True}
 )
 
-task.execute_remotely("remote-upgrade-default")
+task.execute_remotely("oneshot-a100x1")
+#task.execute_locally()
diff --git a/src/automation/configs.py b/src/automation/configs.py
@@ -1,5 +1,4 @@
 DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest"
 DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
-#DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
 DEFAULT_RESEARCH_BRANCH = "main"
 DEFAULT_GUIDELLM_SCENARIO = "chat"