Initial Commit

sammshen · sammshen · commit cc6e229f64ce · 2025-05-14T17:10:13.000Z
diff --git a/.github/workflows/run_benchmarks_gke.yaml b/.github/workflows/run_benchmarks_gke.yaml
@@ -17,6 +17,21 @@ jobs:
       - name: Checkout repo
         uses: actions/checkout@v4
 
+      - name: Check bench-spec.yaml configuration
+        shell: bash
+        run: |
+          if [ ! -f "bench-spec.yaml" ]; then
+            echo "::error:: bench-spec.yaml not found in project root."
+            exit 1
+          fi
+
+          if ! grep -q "Location: LMCacheGKE" bench-spec.yaml; then
+            echo "::warning:: Location: LMCacheGKE not found in bench-spec.yaml. Skipping benchmark run."
+            exit 0
+          fi
+
+          echo "✅ bench-spec.yaml correctly configured for LMCacheGKE."
+
       - name: Show runner info
         run: |
           echo "✅ Running on self-hosted runner!"
diff --git a/1-infrastructure/lmcache-gke/run-gke.sh b/1-infrastructure/lmcache-gke/run-gke.sh
@@ -39,19 +39,19 @@ if [ "$A100_VRAM" -eq 80 ]; then
   if [ "$NUM_GPUS" -eq 1 ]; then
     echo "Creating cluster with 1 A100 80GB, Stats should be 12 vCPUs, 170GB memory"
     MACHINE_TYPE="a2-ultragpu-1g" # 12 vCPUs, 170GB memory
-    ACCELERATOR_TYPE="nvidia-tesla-a100-80gb"
+    ACCELERATOR_TYPE="nvidia-a100-80gb"
   elif [ "$NUM_GPUS" -eq 2 ]; then
     echo "Creating cluster with 2 A100 80GB, Stats should be 24 vCPUs, 340GB memory"
     MACHINE_TYPE="a2-ultragpu-2g" # 24 vCPUs, 340GB memory
-    ACCELERATOR_TYPE="nvidia-tesla-a100-80gb"
+    ACCELERATOR_TYPE="nvidia-a100-80gb"
   elif [ "$NUM_GPUS" -eq 4 ]; then
     echo "Creating cluster with 4 A100 80GB, Stats should be 48 vCPUs, 680GB memory"
     MACHINE_TYPE="a2-ultragpu-4g" # 48 vCPUs, 680GB memory
-    ACCELERATOR_TYPE="nvidia-tesla-a100-80gb"
+    ACCELERATOR_TYPE="nvidia-a100-80gb"
   elif [ "$NUM_GPUS" -eq 8 ]; then
     echo "Creating cluster with 8 A100 80GB, Stats should be 96 vCPUs, 1360GB memory"
     MACHINE_TYPE="a2-ultragpu-8g" # 96 vCPUs, 1360GB memory
-    ACCELERATOR_TYPE="nvidia-tesla-a100-80gb"
+    ACCELERATOR_TYPE="nvidia-a100-80gb"
   else
     echo "Error: For A100 80GB, only 1, 2, 4, 8 GPUs are supported."
     exit 1
diff --git a/bench-spec.yaml b/bench-spec.yaml
@@ -1,9 +1,9 @@
 Infrastructure:
-  # Location: NoBench # when pushing changes that should not run any workflows
+  Location: NoBench # when pushing changes that should not run any workflows
   # Location: LocalMinikube
-  Location: LMCacheGKE
-  numClusterGPUs: 2
-  A100_VRAM: 80
+  # Location: LMCacheGKE
+  # numClusterGPUs: 2
+  # A100_VRAM: 80
 
 Serving:
   Baseline: Helm-ProductionStack
diff --git a/run-bench.py b/run-bench.py
@@ -17,7 +17,36 @@
 def read_bench_spec() -> Dict[str, Any]:
     """Read and parse the bench-spec.yaml file."""
     with open('bench-spec.yaml', 'r') as f:
-        return yaml.safe_load(f)
+        config = yaml.safe_load(f)
+
+        # validate that hf_token is not <YOUR_HF_TOKEN>
+        baseline = config['Serving'].get('Baseline')
+        if baseline == 'SGLang':
+            print("validating hf_token for SGLang baseline")
+            hf_token = single_config.get('hf_token')
+            if hf_token == '<YOUR_HF_TOKEN>':
+                raise ValueError("hf_token must be specified in bench-spec.yaml for SGLang baseline")
+        elif baseline == 'Helm-ProductionStack':
+            print("validating hf_token for Helm-ProductionStack baseline")
+            prodstack_config = config['Serving'].get('Helm-ProductionStack', {})
+            hf_token = prodstack_config.get('hf_token')
+            if hf_token == '<YOUR_HF_TOKEN>':
+                raise ValueError("hf_token must be specified in bench-spec.yaml for Helm-ProductionStack baseline")
+        elif baseline == 'Latest-ProductionStack':
+            print("validating hf_token for Latest-ProductionStack baseline")
+            latest_production_stack_config = config['Serving'].get('Latest-ProductionStack', {})
+            hf_token = latest_production_stack_config.get('hf_token')
+            if hf_token == '<YOUR_HF_TOKEN>':
+                raise ValueError("hf_token must be specified in bench-spec.yaml for Latest-ProductionStack baseline")
+        elif baseline == 'Dynamo':
+            print("validating hf_token for Dynamo baseline")
+            pass
+        else:
+            raise ValueError(f"Unsupported baseline: {baseline}")
+
+        print("Validated hf_token. run_bench.py now running")
+        return config
+
 
 # 1. Infrastructure Setup
 def setup_infrastructure(config: Dict[str, Any]) -> None: