ci: enable perf testing (#25)

triceo · web-flow · commit b4b21608c6f7 · 2024-10-03T18:42:18.000+02:00
diff --git a/.github/workflows/nightly_performance_score_director.yml b/.github/workflows/nightly_performance_score_director.yml
@@ -0,0 +1,24 @@
+name: Schedule a nightly run of the Score Director performance benchmark
+
+on:
+  schedule:
+    - cron: '59 23 * * 1-5' # Every workday at the end of the day.
+
+jobs:
+  trigger:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout timefold-solver
+        uses: actions/checkout@v4
+        with:
+          repository: TimefoldAI/timefold-solver
+      - name: Schedule the other workflow
+        shell: bash
+        run: |
+          if git log --since="24 hours ago" --oneline | grep -q .; then
+            echo '{}' | gh workflow run performance_score_director.yml --json
+            echo "Launched nightly perf tests." >> $GITHUB_STEP_SUMMARY
+          else
+            # Don't waste money.
+            echo "No commits in the past 24 hours." >> $GITHUB_STEP_SUMMARY
+          fi
diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml
@@ -1,51 +1,211 @@
-name: Performance - Score Director
+# - Runs entirely on a single machine.
+# - The baseline is established first, then the branch under test is measured.
+# - Each benchmark gives a 99.9 % confidence interval.
+# - The confidence intervals are compared to determine if the branch under test is a regression or an improvement.
+# - The error threshold is expected to be below +/- 2.5 %.
+#   We have yet to see an error of over +/- 4 %.
+#   With the error so high, the impact is that small regressions are not considered statistically significant.
+name: Performance Regression Test - Score Director
 
 on:
   workflow_dispatch:
     inputs:
       jdk:
-        description: 'JDK version (17, 21, 23, ...)'
-        default: '23'
+        description: 'JDK version'
+        default: '21'
         required: true
       baseline:
         description: 'Timefold Solver release'
         default: '1.14.0'
         required: true
       branch:
-        description: 'Development branch to test against'
+        description: 'Branch to benchmark (needs to use 999-SNAPSHOT)'
         default: 'main'
         required: true
       branch_owner:
         description: 'User owning the branch'
         default: 'TimefoldAI'
         required: true
+      async_profiler_version:
+        description: 'async-profiler version'
+        default: '3.0'
+        required: true
 
 jobs:
 
-  test:
-    concurrency:
-      group: perf-score-director-${{ matrix.example }}
-      cancel-in-progress: true
-    runs-on: ubuntu-latest
+  benchmark:
+    runs-on: perf-linux-x64-2cores
     strategy:
+      fail-fast: false # Jobs fail if the benchmark error is over predefined thresholds; other benchmarks continue.
       matrix:
-        example: [cloudbalancing, conferencescheduling, curriculumcourse, examination, machinereassignment, meetingscheduling, nurserostering, pas, taskassigning, travelingtournament, tsp, vehiclerouting]
+        example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing]
+    env:
+      MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}'
+      MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}'
     steps:
-      - uses: sdkman/sdkman-action@v1
+      - name: Phase 0 - Checkout timefold-solver-benchmarks
+        uses: actions/checkout@v4
         with:
-          candidate: java
-          version: ${{ github.event.inputs.jdk }}-tem
-      - uses: actions/setup-java@v4
+          repository: TimefoldAI/timefold-solver-benchmarks
+          path: ./timefold-solver-benchmarks
+
+      - name: Phase 0 - Setup JDK and Maven
+        uses: actions/setup-java@v4
         with:
-          distribution: 'jdkfile'
           java-version: ${{ github.event.inputs.jdk }}
-          jdkFile: ${{ steps.sdkman.outputs.file }}
-      - name: Checkout timefold-solver-benchmarks
+          distribution: 'temurin'
+          cache: 'maven'
+          server-id: 'timefold-solver-enterprise'
+          server-username: 'MVN_USERNAME'
+          server-password: 'MVN_PASSWORD'
+
+      - name: Phase 0 - Setup Async Profiler
+        working-directory: ./timefold-solver-benchmarks
+        run: |
+          export FILENAME=async-profiler-${{ github.event.inputs.async_profiler_version }}-linux-x64.tar.gz
+          wget https://github.com/async-profiler/async-profiler/releases/download/v${{ github.event.inputs.async_profiler_version }}/$FILENAME
+          tar -xzf $FILENAME 
+          ls -l
+
+      # Fine-tuned for stability on GHA.
+      - name: Phase 0 - Configure the benchmark
+        working-directory: ./timefold-solver-benchmarks
+        shell: bash
+        run: |
+          echo "forks=20" > scoredirector-benchmark.properties
+          echo "warmup_iterations=10" >> scoredirector-benchmark.properties
+          echo "measurement_iterations=5" >> scoredirector-benchmark.properties
+          echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties
+          echo "score_director_type=cs" >> scoredirector-benchmark.properties
+          echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties
+          cat scoredirector-benchmark.properties
+          chmod +x run-scoredirector.sh
+
+      - name: Phase 1 - Compile the benchmark
+        working-directory: ./timefold-solver-benchmarks
+        shell: bash
+        run: mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}"
+
+      - name: Phase 1 - Run the baseline configuration
+        working-directory: ./timefold-solver-benchmarks
+        id: benchmark_baseline
+        env:
+          RUN_ID: ${{ github.event.inputs.baseline }}
+        shell: bash
+        run: |
+          ./run-scoredirector.sh
+          echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT"
+          echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT"
+          echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT"
+
+      - name: Phase 2 - Checkout timefold-solver
         uses: actions/checkout@v4
         with:
-          repository: TimefoldAI/timefold-solver-benchmarks
-          path: ./timefold-solver-benchmarks
-      - name: Compile the benchmarks
+          repository: ${{ github.event.inputs.branch_owner }}/timefold-solver
+          ref: ${{ github.event.inputs.branch }}
+          path: ./timefold-solver
+
+      - name: Phase 2 - Quickly build timefold-solver
+        working-directory: ./timefold-solver
+        shell: bash
+        run: mvn -B -Dquickly clean install
+
+      # Clone timefold-solver-enterprise
+      - name: Phase 2 - Checkout timefold-solver-enterprise (Specified)
+        id: checkout-solver-enterprise
+        uses: actions/checkout@v4
+        continue-on-error: true
+        with:
+          repository: TimefoldAI/timefold-solver-enterprise
+          ref: ${{ github.event.inputs.branch }}
+          token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }}
+          path: ./timefold-solver-enterprise
+      - name: Phase 2 - Checkout timefold-solver-enterprise (Fallback)
+        if: steps.checkout-solver-enterprise.outcome != 'success'
+        uses: actions/checkout@v4
+        with:
+          repository: TimefoldAI/timefold-solver-enterprise
+          ref: main
+          token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }}
+          path: ./timefold-solver-enterprise
+
+      - name: Phase 2 - Quickly build timefold-solver-enterprise
+        working-directory: ./timefold-solver-enterprise
+        shell: bash
+        run: mvn -B -Dquickly clean install
+
+      - name: Phase 2 - Compile the benchmarks
+        working-directory: ./timefold-solver-benchmarks
+        shell: bash
+        run: mvn clean install -B -Dquickly -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}"
+
+      - name: Phase 2 - Run the benchmark on the new code
+        id: benchmark_new
+        working-directory: ./timefold-solver-benchmarks
+        env:
+          RUN_ID: ${{ github.event.inputs.branch }}
+        shell: bash
+        run: |
+          ./run-scoredirector.sh
+          echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT"
+          echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT"
+          echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT"
+
+      - name: Phase 3 - Archive benchmark data
+        uses: actions/upload-artifact@v4
+        with:
+          name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }}
+          path: |
+            ./timefold-solver-benchmarks/results/scoredirector
+
+      - name: Phase 3 - Report results
         working-directory: ./timefold-solver-benchmarks
+        env:
+          OLD_RANGE_START: ${{ steps.benchmark_baseline.outputs.RANGE_START }}
+          OLD_RANGE_MID:   ${{ steps.benchmark_baseline.outputs.RANGE_MID }}
+          OLD_RANGE_END:   ${{ steps.benchmark_baseline.outputs.RANGE_END }}
+          NEW_RANGE_START: ${{ steps.benchmark_new.outputs.RANGE_START }}
+          NEW_RANGE_MID:   ${{ steps.benchmark_new.outputs.RANGE_MID }}
+          NEW_RANGE_END:   ${{ steps.benchmark_new.outputs.RANGE_END }}
         shell: bash
-        run: mvn clean install -Dai.timefold.solver.version=${{ github.event.inputs.baseline }}
+        run: |
+          export DIFF_START=$(echo "scale=2; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc)
+          export DIFF_MID=$(echo "scale=2; ($OLD_RANGE_MID / $NEW_RANGE_MID) * 100" | bc)
+          export DIFF_END=$(echo "scale=2; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc)
+          export FAIL=false
+          
+          if (( $(echo "$DIFF_MID >= 98.00" | bc -l) && $(echo "$DIFF_MID <= 102.00"|bc -l) )); then
+            # Ignore differences of up to 2 %.
+            echo "### Performance unchanged" >> $GITHUB_STEP_SUMMARY
+            echo "(Decided to ignore a very small difference of under 2 %.)" >> $GITHUB_STEP_SUMMARY
+          else
+            if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then
+              if [ "$NEW_RANGE_START" -ge "$OLD_RANGE_MID" ]; then
+                echo "### 🍀 Possible improvement 🍀" >> $GITHUB_STEP_SUMMARY
+              elif [ "$OLD_RANGE_END" -le "$NEW_RANGE_MID" ]; then
+                echo "### ⚠️ Possible regression ⚠️" >> $GITHUB_STEP_SUMMARY
+              else
+                echo "### Performance unchanged " >> $GITHUB_STEP_SUMMARY
+              fi          
+            elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then
+              echo "### 🚀🚀🚀 Statistically significant improvement 🚀🚀🚀" >> $GITHUB_STEP_SUMMARY
+            else
+              echo "### ‼️‼️‼️ Statistically significant regression ‼️‼️‼️" >> $GITHUB_STEP_SUMMARY
+              export FAIL=true
+            fi
+          fi          
+          
+          echo "|        |   **Ref**   |      **Min**      |      **Mean**     |      **Max**      |" >> $GITHUB_STEP_SUMMARY
+          echo "|:------:|:-----------:|:-----------------:|:-----------------:|:-----------------:|" >> $GITHUB_STEP_SUMMARY
+          echo "|  _Old_ | [v${{ github.event.inputs.baseline }}](https://github.com/TimefoldAI/timefold-solver/releases/tag/v${{ github.event.inputs.baseline }}) | ${OLD_RANGE_START} | ${OLD_RANGE_MID} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY
+          echo "|  _New_ | [${{ github.event.inputs.branch_owner }}'s ${{ github.event.inputs.branch }}](https://github.com/${{ github.event.inputs.branch_owner }}/timefold-solver/tree/${{ github.event.inputs.branch }}) | ${NEW_RANGE_START} | ${NEW_RANGE_MID} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY
+          echo "| _Diff_ |             |  ${DIFF_START} %  |   ${DIFF_MID} %   |   ${DIFF_END} %   |" >> $GITHUB_STEP_SUMMARY
+          
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Min and max define a 99.9 % confidence interval." >> $GITHUB_STEP_SUMMARY
+          echo "Min and max are in operations per second. Higher is better." >> $GITHUB_STEP_SUMMARY
+          echo "Diff under 100 % represents an improvement, over 100 % a regression." >> $GITHUB_STEP_SUMMARY
+          
+          if [ "$FAIL" = true ]; then
+              exit 1
+          fi
diff --git a/.github/workflows/turtle.yml b/.github/workflows/turtle.yml
@@ -2,7 +2,7 @@ name: Turtle Tests
 
 on:
   schedule:
-    - cron: '0 2 * * *' # Every day at 2am UTC
+    - cron: '0 3 * * *' # Every day at 3am UTC
 
 jobs:
   test:
diff --git a/pom.xml b/pom.xml
@@ -34,8 +34,8 @@
     <dependencyManagement>
         <dependencies>
             <dependency>
-                <groupId>ai.timefold.solver</groupId>
-                <artifactId>timefold-solver-build-parent</artifactId>
+                <groupId>ai.timefold.solver.enterprise</groupId>
+                <artifactId>timefold-solver-enterprise-build-parent</artifactId>
                 <version>${version.ai.timefold.solver}</version>
                 <type>pom</type>
                 <scope>import</scope>
@@ -58,7 +58,6 @@
         <dependency>
             <groupId>ai.timefold.solver.enterprise</groupId>
             <artifactId>timefold-solver-enterprise-core</artifactId>
-            <version>${version.ai.timefold.solver}</version>
         </dependency>
         <dependency>
             <groupId>ai.timefold.solver</groupId>
@@ -124,7 +123,6 @@
         <dependency>
             <groupId>ai.timefold.solver</groupId>
             <artifactId>timefold-solver-core</artifactId>
-            <version>${version.ai.timefold.solver}</version>
             <type>test-jar</type>
             <scope>test</scope>
         </dependency>
@@ -167,6 +165,13 @@
                 <version>3.13.0</version>
                 <configuration>
                     <release>${java.release}</release>
+                    <annotationProcessorPaths>
+                        <path>
+                            <groupId>org.openjdk.jmh</groupId>
+                            <artifactId>jmh-generator-annprocess</artifactId>
+                            <version>${jmh.version}</version>
+                        </path>
+                    </annotationProcessorPaths>
                 </configuration>
             </plugin>
             <plugin>
@@ -207,7 +212,7 @@
                     <dependency>
                         <groupId>ai.timefold.solver</groupId>
                         <artifactId>timefold-solver-ide-config</artifactId>
-                        <version>${project.version}</version>
+                        <version>${version.ai.timefold.solver}</version>
                     </dependency>
                 </dependencies>
                 <executions>
@@ -242,6 +247,18 @@
     </build>
 
     <profiles>
+        <profile>
+            <id>quickly</id>
+            <activation>
+                <property>
+                    <name>quickly</name>
+                </property>
+            </activation>
+            <properties>
+                <spotless.skip>true</spotless.skip>
+                <skipTests>true</skipTests>
+            </properties>
+        </profile>
         <profile>
             <id>jmh</id>
             <activation>
diff --git a/run-coldstart.sh b/run-coldstart.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 sudo -i sysctl kernel.perf_event_paranoid=1
 sudo -i sysctl kernel.kptr_restrict=0
-nohup taskset -c 0 java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.coldstart.Main > target/nohup.out 2>&1 &
+java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.coldstart.Main
diff --git a/run-scoredirector.sh b/run-scoredirector.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 sudo -i sysctl kernel.perf_event_paranoid=1
 sudo -i sysctl kernel.kptr_restrict=0
-nohup taskset -c 0 java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.scoredirector.Main > target/nohup.out 2>&1 &
+java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.scoredirector.Main
diff --git a/src/main/java/ai/timefold/solver/benchmarks/examples/pas/persistence/PatientAdmissionScheduleImporter.java b/src/main/java/ai/timefold/solver/benchmarks/examples/pas/persistence/PatientAdmissionScheduleImporter.java
@@ -147,9 +147,7 @@ private void readDepartmentListAndDepartmentSpecialismList() throws IOException
             List<Department> departmentList =
                     new ArrayList<>(departmentListSize);
             idToDepartmentMap = new HashMap<>(departmentListSize);
-            List<DepartmentSpecialism> departmentSpecialismList =
-                    new ArrayList<>(
-                            departmentListSize * 5);
+            List<DepartmentSpecialism> departmentSpecialismList = new ArrayList<>(departmentListSize * 5);
             long departmentSpecialismId = 0L;
             for (int i = 0; i < departmentListSize; i++) {
                 String line = bufferedReader.readLine();
@@ -229,8 +227,7 @@ private void readRoomListAndRoomSpecialismListAndRoomEquipmentList() throws IOEx
                 String line = bufferedReader.readLine();
                 String[] lineTokens = splitByPipelineAndTrim(line, 6);
                 String[] roomTokens = splitBySpace(lineTokens[0], 2);
-                Department department = idToDepartmentMap.get(
-                        Long.parseLong(lineTokens[2]));
+                Department department = idToDepartmentMap.get(Long.parseLong(lineTokens[2]));
                 Room room =
                         new Room(Long.parseLong(roomTokens[0]), roomTokens[1],
                                 department, Integer.parseInt(lineTokens[1]),
diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java b/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java
@@ -38,7 +38,6 @@
 import ai.timefold.solver.benchmarks.micro.coldstart.jmh.TimeToSolverFactoryBenchmark;
 import ai.timefold.solver.benchmarks.micro.common.AbstractMain;
 
-import org.openjdk.jmh.results.Result;
 import org.openjdk.jmh.runner.Runner;
 import org.openjdk.jmh.runner.RunnerException;
 import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
@@ -76,7 +75,7 @@ public static void main(String[] args) throws RunnerException, IOException {
         var relativeScoreErrorThreshold = configuration.getRelativeScoreErrorThreshold();
         var thresholdForPrint = ((int) Math.round(relativeScoreErrorThreshold * 10_000)) / 100.0D;
         runResults.forEach(result -> {
-            Result<?> primaryResult = result.getPrimaryResult();
+            var primaryResult = result.getPrimaryResult();
             var score = primaryResult.getScore();
             var scoreError = primaryResult.getScoreError();
             var relativeScoreError = scoreError / score;
diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java
diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java b/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java
diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml