1- name : Performance - Score Director
1+ # - Runs entirely on a single machine.
2+ # - The baseline is established first, then the branch under test is measured.
3+ # - Each benchmark gives a 99.9 % confidence interval.
4+ # - The confidence intervals are compared to determine if the branch under test is a regression or an improvement.
5+ # - The error threshold is expected to be below +/- 2.5 %.
6+ # We have yet to see an error of over +/- 4 %.
7+ # With the error so high, the impact is that small regressions are not considered statistically significant.
8+ name : Performance Regression Test - Score Director
29
310on :
411 workflow_dispatch :
512 inputs :
613 jdk :
7- description : ' JDK version (17, 21, 23, ...) '
8- default : ' 23 '
14+ description : ' JDK version'
15+ default : ' 21 '
916 required : true
1017 baseline :
1118 description : ' Timefold Solver release'
1219 default : ' 1.14.0'
1320 required : true
1421 branch :
15- description : ' Development branch to test against '
22+ description : ' Branch to benchmark (needs to use 999-SNAPSHOT) '
1623 default : ' main'
1724 required : true
1825 branch_owner :
1926 description : ' User owning the branch'
2027 default : ' TimefoldAI'
2128 required : true
29+ async_profiler_version :
30+ description : ' async-profiler version'
31+ default : ' 3.0'
32+ required : true
2233
2334jobs :
2435
25- test :
26- concurrency :
27- group : perf-score-director-${{ matrix.example }}
28- cancel-in-progress : true
29- runs-on : ubuntu-latest
36+ benchmark :
37+ runs-on : perf-linux-x64-2cores
3038 strategy :
39+ fail-fast : false # Jobs fail if the benchmark error is over predefined thresholds; other benchmarks continue.
3140 matrix :
32- example : [cloudbalancing, conferencescheduling, curriculumcourse, examination, machinereassignment, meetingscheduling, nurserostering, pas, taskassigning, travelingtournament, tsp, vehiclerouting]
41+ example : [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing]
42+ env :
43+ MVN_USERNAME : ' ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}'
44+ MVN_PASSWORD : ' ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}'
3345 steps :
34- - uses : sdkman/sdkman-action@v1
46+ - name : Phase 0 - Checkout timefold-solver-benchmarks
47+ uses : actions/checkout@v4
3548 with :
36- candidate : java
37- version : ${{ github.event.inputs.jdk }}-tem
38- - uses : actions/setup-java@v4
49+ repository : TimefoldAI/timefold-solver-benchmarks
50+ path : ./timefold-solver-benchmarks
51+
52+ - name : Phase 0 - Setup JDK and Maven
53+ uses : actions/setup-java@v4
3954 with :
40- distribution : ' jdkfile'
4155 java-version : ${{ github.event.inputs.jdk }}
42- jdkFile : ${{ steps.sdkman.outputs.file }}
43- - name : Checkout timefold-solver-benchmarks
56+ distribution : ' temurin'
57+ cache : ' maven'
58+ server-id : ' timefold-solver-enterprise'
59+ server-username : ' MVN_USERNAME'
60+ server-password : ' MVN_PASSWORD'
61+
62+ - name : Phase 0 - Setup Async Profiler
63+ working-directory : ./timefold-solver-benchmarks
64+ run : |
65+ export FILENAME=async-profiler-${{ github.event.inputs.async_profiler_version }}-linux-x64.tar.gz
66+ wget https://github.com/async-profiler/async-profiler/releases/download/v${{ github.event.inputs.async_profiler_version }}/$FILENAME
67+ tar -xzf $FILENAME
68+ ls -l
69+
70+ # Fine-tuned for stability on GHA.
71+ - name : Phase 0 - Configure the benchmark
72+ working-directory : ./timefold-solver-benchmarks
73+ shell : bash
74+ run : |
75+ echo "forks=20" > scoredirector-benchmark.properties
76+ echo "warmup_iterations=10" >> scoredirector-benchmark.properties
77+ echo "measurement_iterations=5" >> scoredirector-benchmark.properties
78+ echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties
79+ echo "score_director_type=cs" >> scoredirector-benchmark.properties
80+ echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties
81+ cat scoredirector-benchmark.properties
82+ chmod +x run-scoredirector.sh
83+
84+ - name : Phase 1 - Compile the benchmark
85+ working-directory : ./timefold-solver-benchmarks
86+ shell : bash
87+ run : mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}"
88+
89+ - name : Phase 1 - Run the baseline configuration
90+ working-directory : ./timefold-solver-benchmarks
91+ id : benchmark_baseline
92+ env :
93+ RUN_ID : ${{ github.event.inputs.baseline }}
94+ shell : bash
95+ run : |
96+ ./run-scoredirector.sh
97+ echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT"
98+ echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT"
99+ echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT"
100+
101+ - name : Phase 2 - Checkout timefold-solver
44102 uses : actions/checkout@v4
45103 with :
46- repository : TimefoldAI/timefold-solver-benchmarks
47- path : ./timefold-solver-benchmarks
48- - name : Compile the benchmarks
104+ repository : ${{ github.event.inputs.branch_owner }}/timefold-solver
105+ ref : ${{ github.event.inputs.branch }}
106+ path : ./timefold-solver
107+
108+ - name : Phase 2 - Quickly build timefold-solver
109+ working-directory : ./timefold-solver
110+ shell : bash
111+ run : mvn -B -Dquickly clean install
112+
113+ # Clone timefold-solver-enterprise
114+ - name : Phase 2 - Checkout timefold-solver-enterprise (Specified)
115+ id : checkout-solver-enterprise
116+ uses : actions/checkout@v4
117+ continue-on-error : true
118+ with :
119+ repository : TimefoldAI/timefold-solver-enterprise
120+ ref : ${{ github.event.inputs.branch }}
121+ token : ${{ secrets.BENCHMARK_PUBLISH_TOKEN }}
122+ path : ./timefold-solver-enterprise
123+ - name : Phase 2 - Checkout timefold-solver-enterprise (Fallback)
124+ if : steps.checkout-solver-enterprise.outcome != 'success'
125+ uses : actions/checkout@v4
126+ with :
127+ repository : TimefoldAI/timefold-solver-enterprise
128+ ref : main
129+ token : ${{ secrets.BENCHMARK_PUBLISH_TOKEN }}
130+ path : ./timefold-solver-enterprise
131+
132+ - name : Phase 2 - Quickly build timefold-solver-enterprise
133+ working-directory : ./timefold-solver-enterprise
134+ shell : bash
135+ run : mvn -B -Dquickly clean install
136+
137+ - name : Phase 2 - Compile the benchmarks
138+ working-directory : ./timefold-solver-benchmarks
139+ shell : bash
140+ run : mvn clean install -B -Dquickly -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}"
141+
142+ - name : Phase 2 - Run the benchmark on the new code
143+ id : benchmark_new
144+ working-directory : ./timefold-solver-benchmarks
145+ env :
146+ RUN_ID : ${{ github.event.inputs.branch }}
147+ shell : bash
148+ run : |
149+ ./run-scoredirector.sh
150+ echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT"
151+ echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT"
152+ echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT"
153+
154+ - name : Phase 3 - Archive benchmark data
155+ uses : actions/upload-artifact@v4
156+ with :
157+ name : results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }}
158+ path : |
159+ ./timefold-solver-benchmarks/results/scoredirector
160+
161+ - name : Phase 3 - Report results
49162 working-directory : ./timefold-solver-benchmarks
163+ env :
164+ OLD_RANGE_START : ${{ steps.benchmark_baseline.outputs.RANGE_START }}
165+ OLD_RANGE_MID : ${{ steps.benchmark_baseline.outputs.RANGE_MID }}
166+ OLD_RANGE_END : ${{ steps.benchmark_baseline.outputs.RANGE_END }}
167+ NEW_RANGE_START : ${{ steps.benchmark_new.outputs.RANGE_START }}
168+ NEW_RANGE_MID : ${{ steps.benchmark_new.outputs.RANGE_MID }}
169+ NEW_RANGE_END : ${{ steps.benchmark_new.outputs.RANGE_END }}
50170 shell : bash
51- run : mvn clean install -Dai.timefold.solver.version=${{ github.event.inputs.baseline }}
171+ run : |
172+ export DIFF_START=$(echo "scale=2; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc)
173+ export DIFF_MID=$(echo "scale=2; ($OLD_RANGE_MID / $NEW_RANGE_MID) * 100" | bc)
174+ export DIFF_END=$(echo "scale=2; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc)
175+ export FAIL=false
176+
177+ if (( $(echo "$DIFF_MID >= 98.00" | bc -l) && $(echo "$DIFF_MID <= 102.00"|bc -l) )); then
178+ # Ignore differences of up to 2 %.
179+ echo "### Performance unchanged" >> $GITHUB_STEP_SUMMARY
180+ echo "(Decided to ignore a very small difference of under 2 %.)" >> $GITHUB_STEP_SUMMARY
181+ else
182+ if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then
183+ if [ "$NEW_RANGE_START" -ge "$OLD_RANGE_MID" ]; then
184+ echo "### 🍀 Possible improvement 🍀" >> $GITHUB_STEP_SUMMARY
185+ elif [ "$OLD_RANGE_END" -le "$NEW_RANGE_MID" ]; then
186+ echo "### ⚠️ Possible regression ⚠️" >> $GITHUB_STEP_SUMMARY
187+ else
188+ echo "### Performance unchanged " >> $GITHUB_STEP_SUMMARY
189+ fi
190+ elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then
191+ echo "### 🚀🚀🚀 Statistically significant improvement 🚀🚀🚀" >> $GITHUB_STEP_SUMMARY
192+ else
193+ echo "### ‼️‼️‼️ Statistically significant regression ‼️‼️‼️" >> $GITHUB_STEP_SUMMARY
194+ export FAIL=true
195+ fi
196+ fi
197+
198+ echo "| | **Ref** | **Min** | **Mean** | **Max** |" >> $GITHUB_STEP_SUMMARY
199+ echo "|:------:|:-----------:|:-----------------:|:-----------------:|:-----------------:|" >> $GITHUB_STEP_SUMMARY
200+ echo "| _Old_ | [v${{ github.event.inputs.baseline }}](https://github.com/TimefoldAI/timefold-solver/releases/tag/v${{ github.event.inputs.baseline }}) | ${OLD_RANGE_START} | ${OLD_RANGE_MID} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY
201+ echo "| _New_ | [${{ github.event.inputs.branch_owner }}'s ${{ github.event.inputs.branch }}](https://github.com/${{ github.event.inputs.branch_owner }}/timefold-solver/tree/${{ github.event.inputs.branch }}) | ${NEW_RANGE_START} | ${NEW_RANGE_MID} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY
202+ echo "| _Diff_ | | ${DIFF_START} % | ${DIFF_MID} % | ${DIFF_END} % |" >> $GITHUB_STEP_SUMMARY
203+
204+ echo "" >> $GITHUB_STEP_SUMMARY
205+ echo "Min and max define a 99.9 % confidence interval." >> $GITHUB_STEP_SUMMARY
206+ echo "Min and max are in operations per second. Higher is better." >> $GITHUB_STEP_SUMMARY
207+ echo "Diff under 100 % represents an improvement, over 100 % a regression." >> $GITHUB_STEP_SUMMARY
208+
209+ if [ "$FAIL" = true ]; then
210+ exit 1
211+ fi
0 commit comments