Skip to content

Commit 69638ea

Browse files
author
chibu
committed
onpremise settings
1 parent ac9ef63 commit 69638ea

File tree

2 files changed

+4
-6
lines changed

2 files changed

+4
-6
lines changed

examples/guidellm_example.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,20 @@
1-
21
from automation.tasks import GuideLLMTask
32

43
task = GuideLLMTask(
54
project_name="alexandre_debug",
65
task_name="test_guidellm_task",
7-
#model="meta-llama/Llama-3.2-1B-Instruct",
8-
model="Qwen/Qwen2.5-1.5B-Instruct",
6+
model="meta-llama/Llama-3.2-1B-Instruct",
97
rate_type="throughput",
108
backend="aiohttp_server",
119
GUIDELLM__MAX_CONCURRENCY=256,
1210
GUIDELLM__REQUEST_TIMEOUT=21600,
1311
target="http://localhost:8000/v1",
1412
max_seconds=30,
15-
scenario = "benchmarking_32kz",
13+
#scenario = "benchmarking_32k",
1614
data="prompt_tokens=128,output_tokens=128",
1715
branch = "update_guidellm",
1816
vllm_kwargs={"enable-chunked-prefill": True}
1917
)
2018

21-
task.execute_remotely("remote-upgrade-default")
19+
task.execute_remotely("oneshot-a100x1")
20+
#task.execute_locally()

src/automation/configs.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest"
22
DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
3-
#DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
43
DEFAULT_RESEARCH_BRANCH = "main"
54
DEFAULT_GUIDELLM_SCENARIO = "chat"

0 commit comments

Comments
 (0)