1- # - Runs entirely on a single machine.
1+ # - Runs entirely on a single machine, a self-hosted runner on Github Actions .
22# - The baseline is established first, then the branch under test is measured.
33# - Each benchmark gives a 99.9 % confidence interval.
44# - The confidence intervals are compared to determine if the branch under test is a regression or an improvement.
5- # - The error threshold is expected to be below +/- 2.5 %.
6- # We have yet to see an error of over +/- 4 %.
7- # With the error so high, the impact is that small regressions are not considered statistically significant.
5+ # - The error threshold is expected to be below +/- 2.0 %.
86name : Performance Regression Test - Score Director
97
108on :
3836 strategy :
3937 fail-fast : false # Jobs fail if the benchmark error is over predefined thresholds; other benchmarks continue.
4038 matrix :
41- example : [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing]
39+ # Meeting Scheduling and Vehicle Routing run longer than the other benchmarks (due to setup costs).
40+ # In the interest of fair CPU use distribution across all the benchmarks, we let them run first.
41+ example : [meeting_scheduling, vehicle_routing, cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp]
4242 env :
4343 MVN_USERNAME : ' ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}'
4444 MVN_PASSWORD : ' ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}'
7272 working-directory : ./timefold-solver-benchmarks
7373 shell : bash
7474 run : |
75- echo "forks=20 " > scoredirector-benchmark.properties
76- echo "warmup_iterations=10 " >> scoredirector-benchmark.properties
77- echo "measurement_iterations=10 " >> scoredirector-benchmark.properties
75+ echo "forks=10 " > scoredirector-benchmark.properties
76+ echo "warmup_iterations=5 " >> scoredirector-benchmark.properties
77+ echo "measurement_iterations=5 " >> scoredirector-benchmark.properties
7878 echo "relative_score_error_threshold=0.02" >> scoredirector-benchmark.properties
7979 echo "score_director_type=cs" >> scoredirector-benchmark.properties
8080 echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties
@@ -169,6 +169,8 @@ jobs:
169169 NEW_RANGE_END : ${{ steps.benchmark_new.outputs.RANGE_END }}
170170 shell : bash
171171 run : |
172+ export OLD_DEV=$(echo "scale=2; ($OLD_RANGE_MID / $OLD_RANGE_START) * 100 - 100" | bc)
173+ export NEW_DEV=$(echo "scale=2; ($NEW_RANGE_MID / $NEW_RANGE_START) * 100 - 100" | bc)
172174 export DIFF_START=$(echo "scale=2; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc)
173175 export DIFF_MID=$(echo "scale=2; ($OLD_RANGE_MID / $NEW_RANGE_MID) * 100" | bc)
174176 export DIFF_END=$(echo "scale=2; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc)
@@ -195,15 +197,15 @@ jobs:
195197 fi
196198 fi
197199
198- echo "| | **Ref** | **Min** | ** Mean** | **Max** |" >> $GITHUB_STEP_SUMMARY
199- echo "|:------:|:-----------:|:-----------------:|:-----------------:|:-----------------:| " >> $GITHUB_STEP_SUMMARY
200- echo "| _Old_ | [v${{ github.event.inputs.baseline }}](https://github.com/TimefoldAI/timefold-solver/releases/tag/v${{ github.event.inputs.baseline }}) | ${OLD_RANGE_START} | ${ OLD_RANGE_MID} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY
201- echo "| _New_ | [${{ github.event.inputs.branch_owner }}'s ${{ github.event.inputs.branch }}](https://github.com/${{ github.event.inputs.branch_owner }}/timefold-solver/tree/${{ github.event.inputs.branch }}) | ${NEW_RANGE_START} | ${ NEW_RANGE_MID} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY
202- echo "| _Diff_ | | ${DIFF_START} % | ${DIFF_MID} % | ${DIFF_END } % |" >> $GITHUB_STEP_SUMMARY
200+ echo "| | **Ref** | **Mean** |" >> $GITHUB_STEP_SUMMARY
201+ echo "|:------:|:-----------:|:-----------------:|" >> $GITHUB_STEP_SUMMARY
202+ echo "| _Old_ | [v${{ github.event.inputs.baseline }}](https://github.com/TimefoldAI/timefold-solver/releases/tag/v${{ github.event.inputs.baseline }}) | ${OLD_RANGE_MID} ± ${OLD_DEV} % |" >> $GITHUB_STEP_SUMMARY
203+ echo "| _New_ | [${{ github.event.inputs.branch_owner }}'s ${{ github.event.inputs.branch }}](https://github.com/${{ github.event.inputs.branch_owner }}/timefold-solver/tree/${{ github.event.inputs.branch }}) | ${NEW_RANGE_MID} ± ${NEW_DEV} % |" >> $GITHUB_STEP_SUMMARY
204+ echo "| _Diff_ | | ${DIFF_MID} % |" >> $GITHUB_STEP_SUMMARY
203205
204206 echo "" >> $GITHUB_STEP_SUMMARY
205- echo "Min and max define a 99.9 % confidence interval ." >> $GITHUB_STEP_SUMMARY
206- echo "Min and max are in operations per second. Higher is better ." >> $GITHUB_STEP_SUMMARY
207+ echo "Mean is in operations per second. Higher is better ." >> $GITHUB_STEP_SUMMARY
208+ echo "Mean ± X % describes a 99.9 % confidence interval ." >> $GITHUB_STEP_SUMMARY
207209 echo "Diff under 100 % represents an improvement, over 100 % a regression." >> $GITHUB_STEP_SUMMARY
208210
209211 if [ "$FAIL" = true ]; then
0 commit comments