Automate benchmarking - downloading data from GPU to local (#756)

Manan17 · Manan Shah · vaibhavjindal · web-flow · commit b6ed7357bd32 · 2025-06-12T11:16:44.000-07:00
## Summary
&lt;!--- This is a required section; please describe the main purpose of
this proposed code change. ---&gt;

&lt;!---
## Details
This is an optional section; is there anything specific that reviewers
should be aware of?
---&gt;

## Testing Done
&lt;!--- This is a required section; please describe how this change was
tested. ---&gt;

&lt;!-- 
Replace BLANK with your device type. For example, A100-80G-PCIe

Complete the following tasks before sending your PR, and replace `[ ]`
with
`[x]` to indicate you have done them. 
--&gt;

- Hardware Type: &lt;BLANK&gt;
- [ ] run `make test` to ensure correctness
- [ ] run `make checkstyle` to ensure code style
- [ ] run `make test-convergence` to ensure convergence

---------

Co-authored-by: Manan Shah &lt;manashah@linkedin.com&gt;
Co-authored-by: Vaibhav Jindal &lt;vaibhav.jndl@gmail.com&gt;
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -29,6 +29,7 @@ jobs:
       OUTPUT_FILENAME: benchmark.csv
       GENERATED_CSV: benchmark/data/all_benchmark_data.csv
 
+
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
@@ -71,7 +72,6 @@ jobs:
         run: |
           mkdir -p gh-pages/${OUTPUT_DIR}/${{ steps.get_hash.outputs.hash }}
           cp ${GENERATED_CSV} gh-pages/${OUTPUT_DIR}/${{ steps.get_hash.outputs.hash }}/${OUTPUT_FILENAME}
-
       # Step 7: Append commit hash to commits.txt if not already present
       - name: Update commits.txt
         run: |
@@ -84,7 +84,6 @@ jobs:
           if ! grep -q "${{ steps.get_hash.outputs.hash }}" ${OUTPUT_DIR}/commits.txt; then
             echo "${{ steps.get_hash.outputs.hash }}" >> ${OUTPUT_DIR}/commits.txt
           fi
-
       # Step 7: Commit and push
       - name: Commit and push to gh-pages
         run: |
@@ -94,3 +93,4 @@ jobs:
           git add .
           git commit -m "Add benchmark for commit ${{ steps.get_hash.outputs.hash }}" || echo "No changes to commit"
           git push origin gh-pages
+
diff --git a/benchmark/scripts/utils.py b/benchmark/scripts/utils.py
@@ -235,7 +235,7 @@ def create_unique_key(row):
                     pass
             else:
                 existing_data_dict[row_key] = row_dict
-
+    os.makedirs(os.path.dirname(filename_abs_path), exist_ok=True)
     with open(filename_abs_path, mode="w", newline="") as file:
         writer = csv.DictWriter(file, fieldnames=fieldnames)
         writer.writeheader()
diff --git a/dev/modal/benchmarks.py b/dev/modal/benchmarks.py
@@ -17,6 +17,7 @@
 @app.function(gpu="H100", image=repo, timeout=60 * 45)
 def liger_benchmarks():
     import subprocess
+    import os
 
     subprocess.run(
         ["uv pip install -e '.[dev]' --system"],
@@ -26,3 +27,48 @@ def liger_benchmarks():
     )
     subprocess.run(["python benchmark/scripts/benchmark_kto_loss.py"], check=True, shell=True, cwd=REMOTE_ROOT_PATH)
     subprocess.run(["python benchmark/scripts/benchmark_cpo_loss.py"], check=True, shell=True, cwd=REMOTE_ROOT_PATH)
+
+    file_path = Path(REMOTE_ROOT_PATH) / "benchmark" / "data" / "all_benchmark_data.csv"
+    print(f"Checking if file exists at: {file_path}")
+    print(f"File exists: {os.path.exists(file_path)}")
+    
+    if not os.path.exists(file_path):
+        print("Listing directory contents:")
+        data_dir = file_path.parent
+        if os.path.exists(data_dir):
+            print(f"Contents of {data_dir}:")
+            print(os.listdir(data_dir))
+        else:
+            print(f"Data directory {data_dir} does not exist")
+        raise FileNotFoundError(f"Benchmark data file not found at {file_path}")
+
+    with open(file_path, "rb") as f:
+        data = f.read()
+        print(f"Successfully read {len(data)} bytes of data")
+        return data
+
+
+@app.local_entrypoint()
+def main():
+    try:
+        # Run the benchmarks and get the data
+        print("Starting benchmark run...")
+        benchmark_data = liger_benchmarks.remote()
+        
+        if not benchmark_data:
+            raise ValueError("No data received from remote function")
+            
+        # Save the data locally
+        local_data_path = ROOT_PATH / "benchmark" / "data" / "all_benchmark_data.csv"
+        print(f"Attempting to save data to: {local_data_path}")
+        
+        local_data_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        with open(local_data_path, "wb") as f:
+            f.write(benchmark_data)
+        
+        print(f"Successfully saved {len(benchmark_data)} bytes to: {local_data_path}")
+        
+    except Exception as e:
+        print(f"Error occurred: {str(e)}")
+        raise