finally fixed the run scripts

sammshen · sammshen · commit ef4cf04a8bef · 2025-05-15T21:09:45.000Z
diff --git a/3-workloads/agentic/run_agentic.sh b/3-workloads/agentic/run_agentic.sh
@@ -86,7 +86,7 @@ for interval in "${NEW_USER_INTERVALS[@]}"; do
     # Change to project root before running summarize.py
     cd "$PROJECT_ROOT"
     python3 "4-latest-results/post-processing/summarize.py" \
-        "${output_file#../../}" \
+        "4-latest-results/${output_file#../../}" \
         KEY="$KEY" \
         WORKLOAD="agentic" \
         NUM_USERS_WARMUP="$NUM_USERS_WARMUP" \
diff --git a/3-workloads/mooncake/run_mooncake.sh b/3-workloads/mooncake/run_mooncake.sh
@@ -56,7 +56,7 @@ for qps in "${QPS_VALUES[@]}"; do
     # Change to project root before running summarize.py
     cd "$PROJECT_ROOT"
     python3 "4-latest-results/post-processing/summarize.py" \
-        "${output_file#../../}" \
+        "4-latest-results/${output_file#../../}" \
         KEY="$KEY" \
         WORKLOAD="mooncake" \
         NUM_ROUNDS="$NUM_ROUNDS" \
diff --git a/3-workloads/sharegpt/workload_execution/run-sharegpt.sh b/3-workloads/sharegpt/workload_execution/run-sharegpt.sh
@@ -67,7 +67,7 @@ for qps in "${QPS_VALUES[@]}"; do
     # Change to project root before running summarize.py
     cd "$PROJECT_ROOT"
     python3 "4-latest-results/post-processing/summarize.py" \
-        "${output_file#../../../}" \
+        "4-latest-results/${output_file#../../../}" \
         KEY="$KEY" \
         WORKLOAD="sharegpt" \
         LIMIT="$LIMIT" \
diff --git a/3-workloads/synthetic/run_synthetic.sh b/3-workloads/synthetic/run_synthetic.sh
@@ -84,8 +84,9 @@ for qps in "${QPS_VALUES[@]}"; do
 
     # Change to project root before running summarize.py
     cd "$PROJECT_ROOT"
+
     python3 "4-latest-results/post-processing/summarize.py" \
-        "${KEY}_synthetic_output_${qps}.csv" \
+        "4-latest-results/${KEY}_synthetic_output_${qps}.csv" \
         KEY="$KEY" \
         WORKLOAD="synthetic" \
         NUM_USERS_WARMUP="$NUM_USERS_WARMUP" \
@@ -100,3 +101,17 @@ for qps in "${QPS_VALUES[@]}"; do
     # Change back to script directory
     cd "$SCRIPT_DIR"
 done
+
+# Debugging:
+# python3 "4-latest-results/post-processing/summarize.py" \
+#     "4-latest-results/stack_synthetic_output_0.7.csv" \
+#     KEY="stack" \
+#     WORKLOAD="synthetic" \
+#     NUM_USERS_WARMUP="750" \
+#     NUM_USERS="350" \
+#     NUM_ROUNDS="20" \
+#     SYSTEM_PROMPT="0" \
+#     CHAT_HISTORY="20000" \
+#     ANSWER_LEN="1000" \
+#     QPS="0.7" \
+#     USE_SHAREGPT="false"
diff --git a/4-latest-results/post-processing/summarize.py b/4-latest-results/post-processing/summarize.py
@@ -22,12 +22,6 @@ def ProcessSummary(
         else:
             launched_queries = len(df)
 
-        print(
-            f"Launched queries: {launched_queries}, "
-            f"pending queries: {pending_queries}, "
-            f"finished queries: {len(df)}"
-        )
-
         if qps is None:
             qps = 0.0
 
@@ -55,6 +49,12 @@ def ProcessSummary(
         average_ratio = df['ratio'].mean()
 
         print("\n==================== Performance summary ======================")
+        print(
+            f"   Launched queries: {launched_queries}, "
+            f"pending queries: {pending_queries}, "
+            f"finished queries: {len(df)}"
+        )
+
         print(f"  Processing speed: {finished_qps:.4f} reqs/s")
         print(f"  Input tokens per second: {average_prefill_speed:.4f} tokens/s")
         print(f"  Output tokens per second: {average_generation_speed:.4f} tokens/s")
@@ -103,13 +103,15 @@ def process_output(filename: str, **kwargs):
     print(f"Performance summary saved to {results_path}")
 
     # Save a copy of the results file to ~/srv/runner-db/
+    print(f"Saving results to ~/srv/runner-db/{filename_without_parent_or_ext}-{timestamp}.results")
     runner_db_path = os.path.expanduser("~/srv/runner-db/")
     os.makedirs(runner_db_path, exist_ok=True)
     runner_db_file = os.path.join(runner_db_path, f"{filename_without_parent_or_ext}-{timestamp}.results")
 
     # Copy the contents to the new location
     with open(results_path, "r") as src, open(runner_db_file, "w") as dst:
         dst.write(src.read())
+    print(f"Results saved to ~/srv/runner-db/{filename_without_parent_or_ext}-{timestamp}.results")
 
 if __name__ == "__main__":
     if len(sys.argv) < 2:
diff --git a/bench-spec-TEMPLATE.yaml b/bench-spec-TEMPLATE.yaml
@@ -31,7 +31,7 @@ Serving:
     numGPUs: 1 # PLEASE make sure that replicaCount x numGPUs <= numClusterGPUs
     numCPUs: 4 # PLEASE look at the vCPU limits in the comment above (try to keep 12 or below)
     tensorParallelSize: 1 # please make sure tensorParallelSize <= numGPUs (this is the number of GPUs per replica)
-    hf_token: <YOUR_HF_TOKEN> # leave as <YOUR_HF_TOKEN> if running on LMCacheGKE
+    hf_token: <YOUR_HF_TOKEN>
     maxModelLen: 16384
 
   # Option 2: Latest-ProductionStack (Renders the latest k8s deployment directly from production-stack source code)
@@ -45,7 +45,7 @@ Serving:
     numGPUs: 2
     numCPUs: 4
     tensorParallelSize: 2
-    hf_token: <YOUR_HF_TOKEN> # leave as <YOUR_HF_TOKEN> if running on LMCacheGKE
+    hf_token: <YOUR_HF_TOKEN>
     maxModelLen: 16384
 
   # Option 3: SGLang