Skip to content

Commit 20212c3

Browse files
committed
support arbitrary branches
1 parent 7ea329c commit 20212c3

File tree

1 file changed

+86
-96
lines changed

1 file changed

+86
-96
lines changed

.github/workflows/run-bench.yml

Lines changed: 86 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ on:
77
description: 'Benchmark dataset regex (leave empty for all)'
88
required: false
99
default: ''
10-
tags:
11-
description: 'Space-separated list of tags to benchmark'
10+
branches:
11+
description: 'Space-separated list of branches to benchmark'
1212
required: true
13-
default: '4.0.0-rc.1'
13+
default: 'github_actions main'
1414
push:
1515
branches:
1616
- github_actions
@@ -51,9 +51,6 @@ jobs:
5151
distribution: temurin
5252
cache: maven
5353

54-
- name: Checkout specific tag
55-
run: git checkout ${{ matrix.tag }}
56-
5754
- name: Get version from pom.xml
5855
id: get-version
5956
run: |
@@ -65,48 +62,59 @@ jobs:
6562
fi
6663
fi
6764
echo "version=$VERSION" >> $GITHUB_OUTPUT
68-
echo "Tag ${{ matrix.tag }} has version $VERSION"
65+
echo "Current branch has version $VERSION"
6966
70-
- name: Build with Maven (JDK 24)
71-
if: matrix.jdk == '24'
67+
# Build the current branch and save the fat jar
68+
- name: Build with Maven (JDK ${{ matrix.jdk }})
7269
run: mvn -B -Punix-amd64-profile package --file pom.xml
7370

74-
- name: Run Bench
71+
# Save the fat jar for later use
72+
- name: Save fat jar
73+
run: |
74+
mkdir -p /tmp/jvector-jar
75+
cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar /tmp/jvector-jar/jvector-examples-fat.jar
76+
77+
# Parse the branches input and run benchmarks for each branch
78+
- name: Run benchmarks for each branch
7579
run: |
76-
# Use the jar-with-dependencies which includes all required dependencies
77-
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
78-
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
79-
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output bench-results
80-
81-
# List files in current directory to help with debugging
82-
echo "Files in current directory:"
83-
ls -la
84-
85-
- name: Upload Benchmark Results
80+
# Get the list of branches to benchmark
81+
IFS=' ' read -r -a BRANCHES <<< "${{ github.event.inputs.branches }}"
82+
83+
# Create a directory to store all benchmark results
84+
mkdir -p benchmark_results
85+
86+
# Loop through each branch
87+
for branch in "${BRANCHES[@]}"; do
88+
echo "Processing branch: $branch"
89+
90+
# Checkout the branch
91+
git checkout $branch || { echo "Failed to checkout branch $branch"; continue; }
92+
93+
# Build the branch
94+
mvn -B -Punix-amd64-profile package --file pom.xml
95+
96+
# Run benchmark using the saved fat jar
97+
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
98+
-jar /tmp/jvector-jar/jvector-examples-fat.jar \
99+
--config jvector-examples/yaml-configs/autoDefault.yml \
100+
--output ${branch}-bench-results
101+
102+
# Move the results to the benchmark_results directory
103+
mv ${branch}-bench-results.csv benchmark_results/
104+
mv ${branch}-bench-results.json benchmark_results/ || true
105+
106+
echo "Completed benchmarks for branch: $branch"
107+
done
108+
109+
- name: Upload Individual Benchmark Results
86110
uses: actions/upload-artifact@v4
87111
with:
88112
name: benchmark-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}
89113
path: |
90-
bench-results.csv
114+
benchmark_results/*.csv
115+
benchmark_results/*.json
91116
if-no-files-found: warn
92117

93-
- name: Download Previous Benchmark Results
94-
uses: dawidd6/action-download-artifact@v2
95-
continue-on-error: true
96-
with:
97-
workflow: run-bench.yml
98-
name: benchmark-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}
99-
path: previous-results
100-
skip_unpack: false
101-
if_no_artifact_found: warn
102-
103-
- name: Download All Benchmark Results
104-
uses: actions/download-artifact@v4
105-
with:
106-
path: all-benchmark-results
107-
pattern: benchmark-results-*
108-
merge-multiple: true
109-
110118
- name: Set up Python
111119
uses: actions/setup-python@v4
112120
with:
@@ -122,123 +130,105 @@ jobs:
122130
cat > visualize.py << 'EOF'
123131
import os
124132
import glob
133+
import re
125134
import pandas as pd
126135
import matplotlib.pyplot as plt
127-
128-
# Find all CSV files
129-
csv_files = glob.glob('all-benchmark-results/**/bench-results.csv', recursive=True)
130-
136+
137+
# Find all CSV files in the benchmark_results directory
138+
csv_files = glob.glob('benchmark_results/*-bench-results.csv')
139+
131140
if not csv_files:
132141
print("No benchmark results found! Checking other possible locations...")
133-
csv_files = glob.glob('**/bench-results.csv', recursive=True)
134-
142+
csv_files = glob.glob('**/*-bench-results.csv', recursive=True)
143+
135144
print(f"Found {len(csv_files)} CSV files:")
136145
for f in csv_files:
137146
print(f" - {f}")
138-
147+
139148
# Read and combine all results
140149
dfs = []
141150
for file in csv_files:
142151
try:
143-
# Extract version from path
144-
parts = file.split('/')
145-
# Try to extract version from directory name
146-
version = "unknown"
147-
for part in parts:
148-
if part.startswith("v") or part.startswith("4."):
149-
version = part
150-
break
151-
152+
# Extract branch name from filename
153+
filename = os.path.basename(file)
154+
branch_match = re.match(r'([^-]+)-bench-results\.csv', filename)
155+
branch = branch_match.group(1) if branch_match else "unknown"
156+
152157
df = pd.read_csv(file)
153-
# Add version column if not present
154-
if 'version' not in df.columns:
155-
df['version'] = version
156-
158+
# Add branch column if not present
159+
if 'branch' not in df.columns:
160+
df['branch'] = branch
161+
157162
dfs.append(df)
158-
print(f"Processed {file} with version {version}")
163+
print(f"Processed {file} with branch {branch}")
159164
except Exception as e:
160165
print(f"Error processing {file}: {e}")
161-
166+
162167
if not dfs:
163168
print("No valid benchmark results found!")
164169
exit(1)
165-
170+
166171
combined_df = pd.concat(dfs)
167172
combined_df.to_csv('all_benchmark_results.csv', index=False)
168173
print(f"Combined {len(dfs)} benchmark results")
169-
170-
# Sort by version for proper ordering in plots
171-
# Handle version strings like 4.0.0-beta.6
172-
def version_key(v):
173-
if isinstance(v, str):
174-
v = v.replace('v', '')
175-
parts = []
176-
for part in v.replace('-', '.').split('.'):
177-
try:
178-
parts.append(int(part))
179-
except ValueError:
180-
parts.append(part)
181-
return parts
182-
return v
183-
184-
combined_df['version_sort'] = combined_df['version'].apply(version_key)
185-
combined_df = combined_df.sort_values('version_sort')
186-
174+
187175
# Create plots for each metric
188176
metrics = ['QPS', 'Mean Latency', 'Recall@10']
189177
for metric in metrics:
190178
if metric not in combined_df.columns:
191179
print(f"Warning: Metric {metric} not found in results")
192180
continue
193-
181+
194182
plt.figure(figsize=(10, 6))
195-
183+
196184
for dataset, group in combined_df.groupby('dataset'):
197-
plt.plot(group['version'], group[metric], marker='o', label=dataset)
198-
199-
plt.title(f"{metric} Across JVector Versions")
200-
plt.xlabel("Version")
185+
plt.plot(group['branch'], group[metric], marker='o', label=dataset)
186+
187+
plt.title(f"{metric} Across JVector Branches")
188+
plt.xlabel("Branch")
201189
plt.ylabel(metric)
202190
plt.xticks(rotation=45)
203191
plt.grid(True, linestyle='--', alpha=0.7)
204192
plt.legend()
205193
plt.tight_layout()
206-
194+
207195
safe_metric = metric.replace('@', '_at_').replace(' ', '_')
208196
plt.savefig(f"{safe_metric}.png")
209197
print(f"Created plot for {metric}")
210-
198+
211199
# Create a summary markdown report
212200
with open('benchmark_report.md', 'w') as f:
213-
f.write("# JVector Historical Benchmark Results\n\n")
214-
f.write(f"Comparing {len(combined_df['version'].unique())} versions of JVector\n\n")
215-
201+
f.write("# JVector Branch Benchmark Comparison\n\n")
202+
f.write(f"Comparing {len(combined_df['branch'].unique())} branches of JVector\n\n")
203+
216204
f.write("## Summary Table\n\n")
217205
# Use to_markdown if available, otherwise use to_string
218206
try:
219-
table = combined_df[['version', 'dataset'] + [m for m in metrics if m in combined_df.columns]].to_markdown(index=False)
207+
table = combined_df[['branch', 'dataset'] + [m for m in metrics if m in combined_df.columns]].to_markdown(index=False)
220208
except AttributeError:
221-
table = combined_df[['version', 'dataset'] + [m for m in metrics if m in combined_df.columns]].to_string(index=False)
209+
table = combined_df[['branch', 'dataset'] + [m for m in metrics if m in combined_df.columns]].to_string(index=False)
222210
f.write(table)
223-
211+
224212
f.write("\n\n## Visualizations\n\n")
225213
for metric in metrics:
226214
if metric not in combined_df.columns:
227215
continue
228216
safe_metric = metric.replace('@', '_at_').replace(' ', '_')
229217
f.write(f"### {metric}\n\n")
230218
f.write(f"![{metric} Chart]({safe_metric}.png)\n\n")
231-
219+
232220
print("Created benchmark report")
233221
EOF
234-
222+
235223
python visualize.py
236224
237225
- name: Upload combined results and visualizations
238226
uses: actions/upload-artifact@v4
239227
with:
240-
name: benchmark-summary
228+
name: benchmark-comparison-results
241229
path: |
230+
benchmark_results/*.csv
231+
benchmark_results/*.json
242232
all_benchmark_results.csv
243233
*.png
244234
benchmark_report.md

0 commit comments

Comments
 (0)