7373 echo "forks=15" > scoredirector-benchmark.properties
7474 echo "warmup_iterations=5" >> scoredirector-benchmark.properties
7575 echo "measurement_iterations=15" >> scoredirector-benchmark.properties
76- echo "relative_score_error_threshold=0.025C " >> scoredirector-benchmark.properties
76+ echo "relative_score_error_threshold=0.025 " >> scoredirector-benchmark.properties
7777 echo "score_director_type=cs" >> scoredirector-benchmark.properties
7878 echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties
7979 cat scoredirector-benchmark.properties
9292 shell : bash
9393 run : |
9494 ./run-scoredirector.sh
95+ # The benchmark gives the 99.9 % confidence interval.
9596 echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT"
9697 echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT"
9798
@@ -144,6 +145,7 @@ jobs:
144145 shell : bash
145146 run : |
146147 ./run-scoredirector.sh
148+ # The benchmark gives the 99.9 % confidence interval.
147149 echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT"
148150 echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT"
149151
@@ -163,7 +165,16 @@ jobs:
163165 NEW_RANGE_END : ${{ steps.benchmark_new.outputs.RANGE_END }}
164166 shell : bash
165167 run : |
166- echo "OLD_RANGE_START=$OLD_RANGE_START"
167- echo "OLD_RANGE_END=$OLD_RANGE_END"
168- echo "NEW_RANGE_START=$NEW_RANGE_START"
169- echo "NEW_RANGE_END=$NEW_RANGE_END"
168+ echo "Baseline result with 99.9 % confidence: "
169+ echo " [$OLD_RANGE_START, $OLD_RANGE_END]"
170+ echo " New result with 99.9 % confidence: "
171+ echo " [$NEW_RANGE_START, $NEW_RANGE_END]"
172+ echo ""
173+ if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then
174+ echo "Result is not statistically significant."
175+ elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then
176+ echo "Statistically significant improvement."
177+ else
178+ echo "Statistically significant regression."
179+ exit 1
180+ fi
0 commit comments