Skip to content

Commit 817a25c

Browse files
authored
GitHub actions regression test (#499)
* Squash: consolidated changes to implement automated regresssion testing * diagnostic classes for benchmark testing * adding stdev and error bars to QPS * fixed to have correct datasets * pick up latest changes to viz script * do not fail workflow if viz files are not available * remove cleartext hash and use secret instead
1 parent 8364012 commit 817a25c

File tree

21 files changed

+3063
-47
lines changed

21 files changed

+3063
-47
lines changed

.github/workflows/run-bench.yml

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
name: Run Bench Main
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
benchmark_config:
7+
description: 'Benchmark dataset regex (leave empty for all)'
8+
required: false
9+
default: ''
10+
branches:
11+
description: 'Space-separated list of branches to benchmark'
12+
required: false
13+
default: 'main'
14+
pull_request:
15+
branches:
16+
- main
17+
18+
jobs:
19+
# Job to generate the matrix configuration
20+
generate-matrix:
21+
runs-on: ubuntu-latest
22+
outputs:
23+
matrix: ${{ steps.set-matrix.outputs.matrix }}
24+
steps:
25+
- name: Generate matrix
26+
id: set-matrix
27+
run: |
28+
# Print event information for debugging
29+
echo "Event name: ${{ github.event_name }}"
30+
echo "Branches input: '${{ github.event.inputs.branches }}'"
31+
32+
# Default branches based on event type
33+
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
34+
echo "Pull request detected. Using main and PR branch: ${{ github.head_ref }}"
35+
BRANCHES='["main", "${{ github.head_ref }}"]'
36+
elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then
37+
# Parse space-separated branches input into JSON array
38+
echo "Workflow dispatch with branches input detected"
39+
BRANCHES_INPUT="${{ github.event.inputs.branches }}"
40+
BRANCHES="["
41+
for branch in $BRANCHES_INPUT; do
42+
if [[ "$BRANCHES" != "[" ]]; then
43+
BRANCHES="$BRANCHES, "
44+
fi
45+
BRANCHES="$BRANCHES\"$branch\""
46+
echo "Adding branch to matrix: $branch"
47+
done
48+
BRANCHES="$BRANCHES]"
49+
else
50+
echo "Default event type. Using main branch only"
51+
BRANCHES='["main"]'
52+
fi
53+
54+
echo "Generated branches matrix: $BRANCHES"
55+
echo "matrix={\"jdk\":[24],\"isa\":[\"isa-avx512f\"],\"branch\":$BRANCHES}" >> $GITHUB_OUTPUT
56+
57+
test-avx512:
58+
needs: generate-matrix
59+
concurrency:
60+
group: ${{ matrix.isa }}-${{ matrix.jdk }}-${{ matrix.branch }}
61+
cancel-in-progress: false
62+
strategy:
63+
matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
64+
runs-on: ${{ matrix.isa }}
65+
steps:
66+
- name: verify-avx512
67+
run: |
68+
# avx2 is included just for illustration
69+
required="avx2 avx512f avx512cd avx512bw avx512dq avx512v"
70+
printf "required ISA feature flags: %s\n" "${required}"
71+
flags="$(lscpu|grep '^Flags'|cut -d: -f2)"
72+
output=""
73+
for flag in ${required} ; do
74+
if [[ " $flags " == *"${flag}"* ]]
75+
then output="${output} $flag(OK)"
76+
else output="${output} $flag(FAIL)"
77+
fi ; done
78+
printf "%s\n" ${output}
79+
if [[ " $output " == *"FAIL"* ]] ; then exit 2 ; fi
80+
- name: Set up GCC
81+
run: |
82+
sudo apt install -y gcc
83+
- uses: actions/checkout@v4
84+
- name: Set up JDK ${{ matrix.jdk }}
85+
uses: actions/setup-java@v3
86+
with:
87+
java-version: ${{ matrix.jdk }}
88+
distribution: temurin
89+
cache: maven
90+
91+
- name: Get version from pom.xml
92+
id: get-version
93+
run: |
94+
VERSION=$(grep -o '<version>[^<]*</version>' pom.xml | head -1 | sed 's/<version>\(.*\)<\/version>/\1/')
95+
if [[ "$VERSION" == *'${revision}'* ]]; then
96+
REVISION=$(grep -o '<revision>[^<]*</revision>' pom.xml | head -1 | sed 's/<revision>\(.*\)<\/revision>/\1/')
97+
if [ -n "$REVISION" ]; then
98+
VERSION=${VERSION//\$\{revision\}/$REVISION}
99+
fi
100+
fi
101+
echo "version=$VERSION" >> $GITHUB_OUTPUT
102+
echo "Current branch has version $VERSION"
103+
104+
# Print debug information about the current job
105+
- name: Print job information
106+
run: |
107+
echo "Running benchmark for:"
108+
echo " - Branch: ${{ matrix.branch }}"
109+
echo " - JDK: ${{ matrix.jdk }}"
110+
echo " - ISA: ${{ matrix.isa }}"
111+
112+
# Checkout the branch specified in the matrix
113+
- name: Checkout branch
114+
uses: actions/checkout@v4
115+
with:
116+
ref: ${{ matrix.branch }}
117+
fetch-depth: 0
118+
119+
# Create a directory to store benchmark results
120+
- name: Create results directory
121+
run: mkdir -p benchmark_results
122+
123+
# Build the branch
124+
- name: Build branch
125+
run: mvn -B -Punix-amd64-profile package --file pom.xml
126+
127+
# Run the benchmark if jvector-examples exists
128+
- name: Run benchmark
129+
env:
130+
DATASET_HASH: ${{ secrets.DATASETS_KEYPATH }}
131+
run: |
132+
# Check if jvector-examples directory and AutoBenchYAML class exist
133+
if [ ! -d "jvector-examples" ]; then
134+
echo "Warning: jvector-examples directory not found in branch ${{ matrix.branch }}. Skipping benchmark."
135+
exit 0
136+
fi
137+
138+
# Check if the jar with dependencies was built
139+
JAR_COUNT=$(ls jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar 2>/dev/null | wc -l)
140+
if [ "$JAR_COUNT" -eq 0 ]; then
141+
echo "Warning: No jar with dependencies found in branch ${{ matrix.branch }}. Skipping benchmark."
142+
exit 0
143+
fi
144+
145+
# Determine available memory and set heap size to half of it
146+
TOTAL_MEM_GB=$(free -g | awk '/^Mem:/ {print $2}')
147+
# Ensure we have a valid number, default to 16GB total (8GB heap) if detection fails
148+
if [[ -z "$TOTAL_MEM_GB" ]] || [[ "$TOTAL_MEM_GB" -le 0 ]]; then
149+
echo "Warning: Could not detect memory size, defaulting to 16GB total memory (8GB heap)"
150+
TOTAL_MEM_GB=16
151+
fi
152+
HALF_MEM_GB=$((TOTAL_MEM_GB / 2))
153+
# Ensure minimum heap size of 1GB
154+
if [[ "$HALF_MEM_GB" -lt 1 ]]; then
155+
HALF_MEM_GB=1
156+
fi
157+
echo "Total memory: ${TOTAL_MEM_GB}GB, using ${HALF_MEM_GB}GB for Java heap"
158+
159+
# Run the benchmark
160+
echo "Running benchmark for branch ${{ matrix.branch }}"
161+
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
162+
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
163+
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
164+
-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
165+
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${{ matrix.branch }}-bench-results dpr-1M
166+
else
167+
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
168+
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
169+
-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
170+
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${{ matrix.branch }}-bench-results
171+
fi
172+
173+
# Move the results to the benchmark_results directory
174+
mv ${{ matrix.branch }}-bench-results.csv benchmark_results/ || true
175+
mv ${{ matrix.branch }}-bench-results.json benchmark_results/ || true
176+
177+
echo "Completed benchmarks for branch: ${{ matrix.branch }}"
178+
179+
- name: Upload Individual Benchmark Results
180+
uses: actions/upload-artifact@v4
181+
with:
182+
name: benchmark-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}-${{ matrix.branch }}
183+
path: |
184+
benchmark_results/*.csv
185+
benchmark_results/*.json
186+
if-no-files-found: warn
187+
188+
# Job to combine results and create visualizations
189+
combine-results:
190+
needs: test-avx512
191+
runs-on: ubuntu-latest
192+
steps:
193+
- name: Download all benchmark results
194+
uses: actions/download-artifact@v4
195+
with:
196+
pattern: benchmark-results-*
197+
path: all-benchmark-results
198+
merge-multiple: true
199+
200+
- name: Checkout repository
201+
uses: actions/checkout@v4
202+
203+
- name: Set up Python
204+
uses: actions/setup-python@v4
205+
with:
206+
python-version: '3.x'
207+
208+
- name: Install Python Dependencies
209+
run: |
210+
python -m pip install --upgrade pip
211+
pip install matplotlib numpy psutil
212+
213+
- name: Generate visualization using visualize_benchmarks.py
214+
run: |
215+
# Discover all downloaded CSV benchmark result files
216+
shopt -s globstar nullglob
217+
files=(all-benchmark-results/**/*-bench-results.csv)
218+
if [ ${#files[@]} -eq 0 ]; then
219+
echo "No benchmark results found in all-benchmark-results. Searching repo as fallback..."
220+
files=(**/*-bench-results.csv)
221+
fi
222+
echo "Found ${#files[@]} CSV files"
223+
for f in "${files[@]}"; do echo " - $f"; done
224+
225+
# Check if any files were found
226+
if [ ${#files[@]} -eq 0 ]; then
227+
echo "No benchmark result files found. Skipping visualization generation."
228+
echo "This can happen when benchmarks are skipped due to missing dependencies or other issues."
229+
# Create empty output directory to satisfy artifact upload
230+
mkdir -p benchmark_reports
231+
echo "No benchmark results were available for visualization." > benchmark_reports/no_results.txt
232+
exit 0
233+
fi
234+
235+
# Ensure output directory matches the script's default/output expectation
236+
OUTPUT_DIR="benchmark_reports"
237+
238+
# Run the visualization script with all files, default threshold (5.0)
239+
python visualize_benchmarks.py --output-dir "$OUTPUT_DIR" "${files[@]}"
240+
241+
- name: Upload visualization artifacts
242+
uses: actions/upload-artifact@v4
243+
with:
244+
name: benchmark-comparison-results
245+
path: |
246+
benchmark_reports/**
247+
retention-days: 90

0 commit comments

Comments
 (0)