Skip to content

Commit cc6e229

Browse files
committed
Initial Commit
1 parent 71d7a63 commit cc6e229

File tree

4 files changed

+53
-9
lines changed

4 files changed

+53
-9
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,21 @@ jobs:
1717
- name: Checkout repo
1818
uses: actions/checkout@v4
1919

20+
- name: Check bench-spec.yaml configuration
21+
shell: bash
22+
run: |
23+
if [ ! -f "bench-spec.yaml" ]; then
24+
echo "::error:: bench-spec.yaml not found in project root."
25+
exit 1
26+
fi
27+
28+
if ! grep -q "Location: LMCacheGKE" bench-spec.yaml; then
29+
echo "::warning:: Location: LMCacheGKE not found in bench-spec.yaml. Skipping benchmark run."
30+
exit 0
31+
fi
32+
33+
echo "✅ bench-spec.yaml correctly configured for LMCacheGKE."
34+
2035
- name: Show runner info
2136
run: |
2237
echo "✅ Running on self-hosted runner!"

1-infrastructure/lmcache-gke/run-gke.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,19 @@ if [ "$A100_VRAM" -eq 80 ]; then
3939
if [ "$NUM_GPUS" -eq 1 ]; then
4040
echo "Creating cluster with 1 A100 80GB, Stats should be 12 vCPUs, 170GB memory"
4141
MACHINE_TYPE="a2-ultragpu-1g" # 12 vCPUs, 170GB memory
42-
ACCELERATOR_TYPE="nvidia-tesla-a100-80gb"
42+
ACCELERATOR_TYPE="nvidia-a100-80gb"
4343
elif [ "$NUM_GPUS" -eq 2 ]; then
4444
echo "Creating cluster with 2 A100 80GB, Stats should be 24 vCPUs, 340GB memory"
4545
MACHINE_TYPE="a2-ultragpu-2g" # 24 vCPUs, 340GB memory
46-
ACCELERATOR_TYPE="nvidia-tesla-a100-80gb"
46+
ACCELERATOR_TYPE="nvidia-a100-80gb"
4747
elif [ "$NUM_GPUS" -eq 4 ]; then
4848
echo "Creating cluster with 4 A100 80GB, Stats should be 48 vCPUs, 680GB memory"
4949
MACHINE_TYPE="a2-ultragpu-4g" # 48 vCPUs, 680GB memory
50-
ACCELERATOR_TYPE="nvidia-tesla-a100-80gb"
50+
ACCELERATOR_TYPE="nvidia-a100-80gb"
5151
elif [ "$NUM_GPUS" -eq 8 ]; then
5252
echo "Creating cluster with 8 A100 80GB, Stats should be 96 vCPUs, 1360GB memory"
5353
MACHINE_TYPE="a2-ultragpu-8g" # 96 vCPUs, 1360GB memory
54-
ACCELERATOR_TYPE="nvidia-tesla-a100-80gb"
54+
ACCELERATOR_TYPE="nvidia-a100-80gb"
5555
else
5656
echo "Error: For A100 80GB, only 1, 2, 4, 8 GPUs are supported."
5757
exit 1

bench-spec.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
Infrastructure:
2-
# Location: NoBench # when pushing changes that should not run any workflows
2+
Location: NoBench # when pushing changes that should not run any workflows
33
# Location: LocalMinikube
4-
Location: LMCacheGKE
5-
numClusterGPUs: 2
6-
A100_VRAM: 80
4+
# Location: LMCacheGKE
5+
# numClusterGPUs: 2
6+
# A100_VRAM: 80
77

88
Serving:
99
Baseline: Helm-ProductionStack

run-bench.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,36 @@
1717
def read_bench_spec() -> Dict[str, Any]:
1818
"""Read and parse the bench-spec.yaml file."""
1919
with open('bench-spec.yaml', 'r') as f:
20-
return yaml.safe_load(f)
20+
config = yaml.safe_load(f)
21+
22+
# validate that hf_token is not <YOUR_HF_TOKEN>
23+
baseline = config['Serving'].get('Baseline')
24+
if baseline == 'SGLang':
25+
print("validating hf_token for SGLang baseline")
26+
hf_token = single_config.get('hf_token')
27+
if hf_token == '<YOUR_HF_TOKEN>':
28+
raise ValueError("hf_token must be specified in bench-spec.yaml for SGLang baseline")
29+
elif baseline == 'Helm-ProductionStack':
30+
print("validating hf_token for Helm-ProductionStack baseline")
31+
prodstack_config = config['Serving'].get('Helm-ProductionStack', {})
32+
hf_token = prodstack_config.get('hf_token')
33+
if hf_token == '<YOUR_HF_TOKEN>':
34+
raise ValueError("hf_token must be specified in bench-spec.yaml for Helm-ProductionStack baseline")
35+
elif baseline == 'Latest-ProductionStack':
36+
print("validating hf_token for Latest-ProductionStack baseline")
37+
latest_production_stack_config = config['Serving'].get('Latest-ProductionStack', {})
38+
hf_token = latest_production_stack_config.get('hf_token')
39+
if hf_token == '<YOUR_HF_TOKEN>':
40+
raise ValueError("hf_token must be specified in bench-spec.yaml for Latest-ProductionStack baseline")
41+
elif baseline == 'Dynamo':
42+
print("validating hf_token for Dynamo baseline")
43+
pass
44+
else:
45+
raise ValueError(f"Unsupported baseline: {baseline}")
46+
47+
print("Validated hf_token. run_bench.py now running")
48+
return config
49+
2150

2251
# 1. Infrastructure Setup
2352
def setup_infrastructure(config: Dict[str, Any]) -> None:

0 commit comments

Comments
 (0)