Skip to content

Commit b6ed735

Browse files
Manan17Manan Shahvaibhavjindal
authored
Automate benchmarking - downloading data from GPU to local (#756)
## Summary <!--- This is a required section; please describe the main purpose of this proposed code change. ---> <!--- ## Details This is an optional section; is there anything specific that reviewers should be aware of? ---> ## Testing Done <!--- This is a required section; please describe how this change was tested. ---> <!-- Replace BLANK with your device type. For example, A100-80G-PCIe Complete the following tasks before sending your PR, and replace `[ ]` with `[x]` to indicate you have done them. --> - Hardware Type: <BLANK> - [ ] run `make test` to ensure correctness - [ ] run `make checkstyle` to ensure code style - [ ] run `make test-convergence` to ensure convergence --------- Co-authored-by: Manan Shah <manashah@linkedin.com> Co-authored-by: Vaibhav Jindal <vaibhav.jndl@gmail.com>
1 parent 3e54d0b commit b6ed735

File tree

3 files changed

+49
-3
lines changed

3 files changed

+49
-3
lines changed

.github/workflows/benchmark.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ jobs:
2929
OUTPUT_FILENAME: benchmark.csv
3030
GENERATED_CSV: benchmark/data/all_benchmark_data.csv
3131

32+
3233
steps:
3334
- name: Checkout code
3435
uses: actions/checkout@v3
@@ -71,7 +72,6 @@ jobs:
7172
run: |
7273
mkdir -p gh-pages/${OUTPUT_DIR}/${{ steps.get_hash.outputs.hash }}
7374
cp ${GENERATED_CSV} gh-pages/${OUTPUT_DIR}/${{ steps.get_hash.outputs.hash }}/${OUTPUT_FILENAME}
74-
7575
# Step 7: Append commit hash to commits.txt if not already present
7676
- name: Update commits.txt
7777
run: |
@@ -84,7 +84,6 @@ jobs:
8484
if ! grep -q "${{ steps.get_hash.outputs.hash }}" ${OUTPUT_DIR}/commits.txt; then
8585
echo "${{ steps.get_hash.outputs.hash }}" >> ${OUTPUT_DIR}/commits.txt
8686
fi
87-
8887
# Step 7: Commit and push
8988
- name: Commit and push to gh-pages
9089
run: |
@@ -94,3 +93,4 @@ jobs:
9493
git add .
9594
git commit -m "Add benchmark for commit ${{ steps.get_hash.outputs.hash }}" || echo "No changes to commit"
9695
git push origin gh-pages
96+

benchmark/scripts/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def create_unique_key(row):
235235
pass
236236
else:
237237
existing_data_dict[row_key] = row_dict
238-
238+
os.makedirs(os.path.dirname(filename_abs_path), exist_ok=True)
239239
with open(filename_abs_path, mode="w", newline="") as file:
240240
writer = csv.DictWriter(file, fieldnames=fieldnames)
241241
writer.writeheader()

dev/modal/benchmarks.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
@app.function(gpu="H100", image=repo, timeout=60 * 45)
1818
def liger_benchmarks():
1919
import subprocess
20+
import os
2021

2122
subprocess.run(
2223
["uv pip install -e '.[dev]' --system"],
@@ -26,3 +27,48 @@ def liger_benchmarks():
2627
)
2728
subprocess.run(["python benchmark/scripts/benchmark_kto_loss.py"], check=True, shell=True, cwd=REMOTE_ROOT_PATH)
2829
subprocess.run(["python benchmark/scripts/benchmark_cpo_loss.py"], check=True, shell=True, cwd=REMOTE_ROOT_PATH)
30+
31+
file_path = Path(REMOTE_ROOT_PATH) / "benchmark" / "data" / "all_benchmark_data.csv"
32+
print(f"Checking if file exists at: {file_path}")
33+
print(f"File exists: {os.path.exists(file_path)}")
34+
35+
if not os.path.exists(file_path):
36+
print("Listing directory contents:")
37+
data_dir = file_path.parent
38+
if os.path.exists(data_dir):
39+
print(f"Contents of {data_dir}:")
40+
print(os.listdir(data_dir))
41+
else:
42+
print(f"Data directory {data_dir} does not exist")
43+
raise FileNotFoundError(f"Benchmark data file not found at {file_path}")
44+
45+
with open(file_path, "rb") as f:
46+
data = f.read()
47+
print(f"Successfully read {len(data)} bytes of data")
48+
return data
49+
50+
51+
@app.local_entrypoint()
52+
def main():
53+
try:
54+
# Run the benchmarks and get the data
55+
print("Starting benchmark run...")
56+
benchmark_data = liger_benchmarks.remote()
57+
58+
if not benchmark_data:
59+
raise ValueError("No data received from remote function")
60+
61+
# Save the data locally
62+
local_data_path = ROOT_PATH / "benchmark" / "data" / "all_benchmark_data.csv"
63+
print(f"Attempting to save data to: {local_data_path}")
64+
65+
local_data_path.parent.mkdir(parents=True, exist_ok=True)
66+
67+
with open(local_data_path, "wb") as f:
68+
f.write(benchmark_data)
69+
70+
print(f"Successfully saved {len(benchmark_data)} bytes to: {local_data_path}")
71+
72+
except Exception as e:
73+
print(f"Error occurred: {str(e)}")
74+
raise

0 commit comments

Comments
 (0)