77 description : ' Benchmark dataset regex (leave empty for all)'
88 required : false
99 default : ' '
10- tags :
11- description : ' Space-separated list of tags to benchmark'
10+ branches :
11+ description : ' Space-separated list of branches to benchmark'
1212 required : true
13- default : ' 4.0.0-rc.1 '
13+ default : ' github_actions main '
1414 push :
1515 branches :
1616 - github_actions
5151 distribution : temurin
5252 cache : maven
5353
54- - name : Checkout specific tag
55- run : git checkout ${{ matrix.tag }}
56-
5754 - name : Get version from pom.xml
5855 id : get-version
5956 run : |
@@ -65,48 +62,59 @@ jobs:
6562 fi
6663 fi
6764 echo "version=$VERSION" >> $GITHUB_OUTPUT
68- echo "Tag ${{ matrix.tag }} has version $VERSION"
65+ echo "Current branch has version $VERSION"
6966
70- - name : Build with Maven (JDK 24)
71- if : matrix.jdk == '24'
67+ # Build the current branch and save the fat jar
68+ - name : Build with Maven (JDK ${{ matrix.jdk }})
7269 run : mvn -B -Punix-amd64-profile package --file pom.xml
7370
74- - name : Run Bench
71+ # Save the fat jar for later use
72+ - name : Save fat jar
73+ run : |
74+ mkdir -p /tmp/jvector-jar
75+ cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar /tmp/jvector-jar/jvector-examples-fat.jar
76+
77+ # Parse the branches input and run benchmarks for each branch
78+ - name : Run benchmarks for each branch
7579 run : |
76- # Use the jar-with-dependencies which includes all required dependencies
77- java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
78- ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
79- -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output bench-results
80-
81- # List files in current directory to help with debugging
82- echo "Files in current directory:"
83- ls -la
84-
85- - name : Upload Benchmark Results
80+ # Get the list of branches to benchmark
81+ IFS=' ' read -r -a BRANCHES <<< "${{ github.event.inputs.branches }}"
82+
83+ # Create a directory to store all benchmark results
84+ mkdir -p benchmark_results
85+
86+ # Loop through each branch
87+ for branch in "${BRANCHES[@]}"; do
88+ echo "Processing branch: $branch"
89+
90+ # Checkout the branch
91+ git checkout $branch || { echo "Failed to checkout branch $branch"; continue; }
92+
93+ # Build the branch
94+ mvn -B -Punix-amd64-profile package --file pom.xml
95+
96+ # Run benchmark using the saved fat jar
97+ java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
98+ -jar /tmp/jvector-jar/jvector-examples-fat.jar \
99+ --config jvector-examples/yaml-configs/autoDefault.yml \
100+ --output ${branch}-bench-results
101+
102+ # Move the results to the benchmark_results directory
103+ mv ${branch}-bench-results.csv benchmark_results/
104+ mv ${branch}-bench-results.json benchmark_results/ || true
105+
106+ echo "Completed benchmarks for branch: $branch"
107+ done
108+
109+ - name : Upload Individual Benchmark Results
86110 uses : actions/upload-artifact@v4
87111 with :
88112 name : benchmark-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}
89113 path : |
90- bench-results.csv
114+ benchmark_results/*.csv
115+ benchmark_results/*.json
91116 if-no-files-found : warn
92117
93- - name : Download Previous Benchmark Results
94- uses : dawidd6/action-download-artifact@v2
95- continue-on-error : true
96- with :
97- workflow : run-bench.yml
98- name : benchmark-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}
99- path : previous-results
100- skip_unpack : false
101- if_no_artifact_found : warn
102-
103- - name : Download All Benchmark Results
104- uses : actions/download-artifact@v4
105- with :
106- path : all-benchmark-results
107- pattern : benchmark-results-*
108- merge-multiple : true
109-
110118 - name : Set up Python
111119 uses : actions/setup-python@v4
112120 with :
@@ -122,123 +130,105 @@ jobs:
122130 cat > visualize.py << 'EOF'
123131 import os
124132 import glob
133+ import re
125134 import pandas as pd
126135 import matplotlib.pyplot as plt
127-
128- # Find all CSV files
129- csv_files = glob.glob('all-benchmark-results/**/ bench-results.csv', recursive=True )
130-
136+
137+ # Find all CSV files in the benchmark_results directory
138+ csv_files = glob.glob('benchmark_results/*- bench-results.csv')
139+
131140 if not csv_files:
132141 print("No benchmark results found! Checking other possible locations...")
133- csv_files = glob.glob('**/bench-results.csv', recursive=True)
134-
142+ csv_files = glob.glob('**/*- bench-results.csv', recursive=True)
143+
135144 print(f"Found {len(csv_files)} CSV files:")
136145 for f in csv_files:
137146 print(f" - {f}")
138-
147+
139148 # Read and combine all results
140149 dfs = []
141150 for file in csv_files:
142151 try:
143- # Extract version from path
144- parts = file.split('/')
145- # Try to extract version from directory name
146- version = "unknown"
147- for part in parts:
148- if part.startswith("v") or part.startswith("4."):
149- version = part
150- break
151-
152+ # Extract branch name from filename
153+ filename = os.path.basename(file)
154+ branch_match = re.match(r'([^-]+)-bench-results\.csv', filename)
155+ branch = branch_match.group(1) if branch_match else "unknown"
156+
152157 df = pd.read_csv(file)
153- # Add version column if not present
154- if 'version ' not in df.columns:
155- df['version '] = version
156-
158+ # Add branch column if not present
159+ if 'branch ' not in df.columns:
160+ df['branch '] = branch
161+
157162 dfs.append(df)
158- print(f"Processed {file} with version {version }")
163+ print(f"Processed {file} with branch {branch }")
159164 except Exception as e:
160165 print(f"Error processing {file}: {e}")
161-
166+
162167 if not dfs:
163168 print("No valid benchmark results found!")
164169 exit(1)
165-
170+
166171 combined_df = pd.concat(dfs)
167172 combined_df.to_csv('all_benchmark_results.csv', index=False)
168173 print(f"Combined {len(dfs)} benchmark results")
169-
170- # Sort by version for proper ordering in plots
171- # Handle version strings like 4.0.0-beta.6
172- def version_key(v):
173- if isinstance(v, str):
174- v = v.replace('v', '')
175- parts = []
176- for part in v.replace('-', '.').split('.'):
177- try:
178- parts.append(int(part))
179- except ValueError:
180- parts.append(part)
181- return parts
182- return v
183-
184- combined_df['version_sort'] = combined_df['version'].apply(version_key)
185- combined_df = combined_df.sort_values('version_sort')
186-
174+
187175 # Create plots for each metric
188176 metrics = ['QPS', 'Mean Latency', 'Recall@10']
189177 for metric in metrics:
190178 if metric not in combined_df.columns:
191179 print(f"Warning: Metric {metric} not found in results")
192180 continue
193-
181+
194182 plt.figure(figsize=(10, 6))
195-
183+
196184 for dataset, group in combined_df.groupby('dataset'):
197- plt.plot(group['version '], group[metric], marker='o', label=dataset)
198-
199- plt.title(f"{metric} Across JVector Versions ")
200- plt.xlabel("Version ")
185+ plt.plot(group['branch '], group[metric], marker='o', label=dataset)
186+
187+ plt.title(f"{metric} Across JVector Branches ")
188+ plt.xlabel("Branch ")
201189 plt.ylabel(metric)
202190 plt.xticks(rotation=45)
203191 plt.grid(True, linestyle='--', alpha=0.7)
204192 plt.legend()
205193 plt.tight_layout()
206-
194+
207195 safe_metric = metric.replace('@', '_at_').replace(' ', '_')
208196 plt.savefig(f"{safe_metric}.png")
209197 print(f"Created plot for {metric}")
210-
198+
211199 # Create a summary markdown report
212200 with open('benchmark_report.md', 'w') as f:
213- f.write("# JVector Historical Benchmark Results \n\n")
214- f.write(f"Comparing {len(combined_df['version '].unique())} versions of JVector\n\n")
215-
201+ f.write("# JVector Branch Benchmark Comparison \n\n")
202+ f.write(f"Comparing {len(combined_df['branch '].unique())} branches of JVector\n\n")
203+
216204 f.write("## Summary Table\n\n")
217205 # Use to_markdown if available, otherwise use to_string
218206 try:
219- table = combined_df[['version ', 'dataset'] + [m for m in metrics if m in combined_df.columns]].to_markdown(index=False)
207+ table = combined_df[['branch ', 'dataset'] + [m for m in metrics if m in combined_df.columns]].to_markdown(index=False)
220208 except AttributeError:
221- table = combined_df[['version ', 'dataset'] + [m for m in metrics if m in combined_df.columns]].to_string(index=False)
209+ table = combined_df[['branch ', 'dataset'] + [m for m in metrics if m in combined_df.columns]].to_string(index=False)
222210 f.write(table)
223-
211+
224212 f.write("\n\n## Visualizations\n\n")
225213 for metric in metrics:
226214 if metric not in combined_df.columns:
227215 continue
228216 safe_metric = metric.replace('@', '_at_').replace(' ', '_')
229217 f.write(f"### {metric}\n\n")
230218 f.write(f"\n\n")
231-
219+
232220 print("Created benchmark report")
233221 EOF
234-
222+
235223 python visualize.py
236224
237225 - name : Upload combined results and visualizations
238226 uses : actions/upload-artifact@v4
239227 with :
240- name : benchmark-summary
228+ name : benchmark-comparison-results
241229 path : |
230+ benchmark_results/*.csv
231+ benchmark_results/*.json
242232 all_benchmark_results.csv
243233 *.png
244234 benchmark_report.md
0 commit comments