Test benchmarks with PkgJogger (#20)

adrhill · web-flow · commit c19c82347d7f · 2024-10-10T23:59:09.000+02:00
* Test benchmarks with PkgJogger

* Update benchmark workflow

* Fix benchmark baseline
diff --git a/.github/workflows/Benchmark.yml b/.github/workflows/Benchmark.yml
@@ -6,25 +6,21 @@ on:
 jobs:
   Benchmark:
     runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      actions: write # needed to allow julia-actions/cache to proactively delete old caches that it has created
+      contents: read
     if: contains(github.event.pull_request.labels.*.name, 'run benchmark')
     steps:
       - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@latest
-      - name: Cache artifacts
-        uses: actions/cache@v3
-        env:
-          cache-name: cache-artifacts
+      - uses: julia-actions/setup-julia@v2
         with:
-          path: ~/.julia/artifacts 
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
+          version: '1'
+      - uses: julia-actions/cache@v2
       - name: Install dependencies
-        run: julia -e 'using Pkg; pkg"add JSON PkgBenchmark BenchmarkCI@0.1"'
+        run: julia --color=yes -e 'using Pkg; pkg"add JSON PkgBenchmark BenchmarkCI@0.1"'
       - name: Run benchmarks
-        run: julia benchmark/run_benchmarks.jl
+        run: julia --color=yes benchmark/run_benchmarks.jl
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           
diff --git a/benchmark/Project.toml b/benchmark/Project.toml
@@ -1,8 +1,9 @@
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-RelevancePropagation = "0be6dd02-ae9e-43eb-b318-c6e81d6890d8"
 PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
+PkgJogger = "10150987-6cc1-4b76-abee-b1c1cbd91c01"
+RelevancePropagation = "0be6dd02-ae9e-43eb-b318-c6e81d6890d8"
 
 [compat]
 BenchmarkTools = "1"
diff --git a/benchmark/bench_jogger.jl b/benchmark/bench_jogger.jl
@@ -0,0 +1,86 @@
+using BenchmarkTools
+using Flux
+using RelevancePropagation
+using RelevancePropagation: lrp!, modify_layer
+
+on_CI = haskey(ENV, "GITHUB_ACTIONS")
+
+T = Float32
+input_size = (32, 32, 3, 1)
+input = rand(T, input_size)
+
+model = Chain(
+    Chain(
+        Conv((3, 3), 3 => 8, relu; pad=1),
+        Conv((3, 3), 8 => 8, relu; pad=1),
+        MaxPool((2, 2)),
+        Conv((3, 3), 8 => 16, relu; pad=1),
+        Conv((3, 3), 16 => 16, relu; pad=1),
+        MaxPool((2, 2)),
+    ),
+    Chain(
+        Flux.flatten,
+        Dense(1024 => 512, relu),         # 102_764_544 parameters
+        Dropout(0.5),
+        Dense(512 => 100, relu),
+    ),
+)
+Flux.testmode!(model, true)
+
+# Use one representative algorithm of each type
+algs = Dict("LRP" => LRP, "LREpsilonPlusFlat" => model -> LRP(model, EpsilonPlusFlat()))
+
+# Define benchmark
+_alg(alg, model) = alg(model) # for use with @benchmarkable macro
+
+suite = BenchmarkGroup()
+suite["CNN"] = BenchmarkGroup([k for k in keys(algs)])
+for (name, alg) in algs
+    analyzer = alg(model)
+    suite["CNN"][name] = BenchmarkGroup(["construct analyzer", "analyze"])
+    suite["CNN"][name]["construct analyzer"] = @benchmarkable _alg($(alg), $(model))
+    suite["CNN"][name]["analyze"] = @benchmarkable analyze($(input), $(analyzer))
+end
+
+# generate input for conv layers
+insize = (32, 32, 3, 1)
+in_dense = 64
+out_dense = 10
+aᵏ = rand(T, insize)
+
+layers = Dict(
+    "Conv"  => (Conv((3, 3), 3 => 2), aᵏ),
+    "Dense" => (Dense(in_dense, out_dense, relu), randn(T, in_dense, 1)),
+)
+rules = Dict(
+    "ZeroRule"      => ZeroRule(),
+    "EpsilonRule"   => EpsilonRule(),
+    "GammaRule"     => GammaRule(),
+    "WSquareRule"   => WSquareRule(),
+    "FlatRule"      => FlatRule(),
+    "AlphaBetaRule" => AlphaBetaRule(),
+    "ZPlusRule"     => ZPlusRule(),
+    "ZBoxRule"      => ZBoxRule(zero(T), oneunit(T)),
+)
+
+layernames = String.(keys(layers))
+rulenames  = String.(keys(rules))
+
+suite["modify layer"] = BenchmarkGroup(rulenames)
+suite["apply rule"]   = BenchmarkGroup(rulenames)
+for rname in rulenames
+    suite["modify layer"][rname] = BenchmarkGroup(layernames)
+    suite["apply rule"][rname] = BenchmarkGroup(layernames)
+end
+
+for (lname, (layer, aᵏ)) in layers
+    Rᵏ = similar(aᵏ)
+    Rᵏ⁺¹ = layer(aᵏ)
+    for (rname, rule) in rules
+        modified_layer = modify_layer(rule, layer)
+        suite["modify layer"][rname][lname] = @benchmarkable modify_layer($(rule), $(layer))
+        suite["apply rule"][rname][lname] = @benchmarkable lrp!(
+            $(Rᵏ), $(rule), $(layer), $(modified_layer), $(aᵏ), $(Rᵏ⁺¹)
+        )
+    end
+end
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
@@ -1,86 +1,5 @@
-using BenchmarkTools
-using Flux
+using PkgJogger
 using RelevancePropagation
-using RelevancePropagation: lrp!, modify_layer
-
-on_CI = haskey(ENV, "GITHUB_ACTIONS")
-
-T = Float32
-input_size = (32, 32, 3, 1)
-input = rand(T, input_size)
-
-model = Chain(
-    Chain(
-        Conv((3, 3), 3 => 8, relu; pad=1),
-        Conv((3, 3), 8 => 8, relu; pad=1),
-        MaxPool((2, 2)),
-        Conv((3, 3), 8 => 16, relu; pad=1),
-        Conv((3, 3), 16 => 16, relu; pad=1),
-        MaxPool((2, 2)),
-    ),
-    Chain(
-        Flux.flatten,
-        Dense(1024 => 512, relu),         # 102_764_544 parameters
-        Dropout(0.5),
-        Dense(512 => 100, relu),
-    ),
-)
-Flux.testmode!(model, true)
-
-# Use one representative algorithm of each type
-algs = Dict("LRP" => LRP, "LREpsilonPlusFlat" => model -> LRP(model, EpsilonPlusFlat()))
-
-# Define benchmark
-_alg(alg, model) = alg(model) # for use with @benchmarkable macro
-
-SUITE = BenchmarkGroup()
-SUITE["CNN"] = BenchmarkGroup([k for k in keys(algs)])
-for (name, alg) in algs
-    analyzer = alg(model)
-    SUITE["CNN"][name] = BenchmarkGroup(["construct analyzer", "analyze"])
-    SUITE["CNN"][name]["construct analyzer"] = @benchmarkable _alg($(alg), $(model))
-    SUITE["CNN"][name]["analyze"] = @benchmarkable analyze($(input), $(analyzer))
-end
-
-# generate input for conv layers
-insize = (32, 32, 3, 1)
-in_dense = 64
-out_dense = 10
-aᵏ = rand(T, insize)
-
-layers = Dict(
-    "Conv"  => (Conv((3, 3), 3 => 2), aᵏ),
-    "Dense" => (Dense(in_dense, out_dense, relu), randn(T, in_dense, 1)),
-)
-rules = Dict(
-    "ZeroRule"      => ZeroRule(),
-    "EpsilonRule"   => EpsilonRule(),
-    "GammaRule"     => GammaRule(),
-    "WSquareRule"   => WSquareRule(),
-    "FlatRule"      => FlatRule(),
-    "AlphaBetaRule" => AlphaBetaRule(),
-    "ZPlusRule"     => ZPlusRule(),
-    "ZBoxRule"      => ZBoxRule(zero(T), oneunit(T)),
-)
-
-layernames = String.(keys(layers))
-rulenames  = String.(keys(rules))
-
-SUITE["modify layer"] = BenchmarkGroup(rulenames)
-SUITE["apply rule"]   = BenchmarkGroup(rulenames)
-for rname in rulenames
-    SUITE["modify layer"][rname] = BenchmarkGroup(layernames)
-    SUITE["apply rule"][rname] = BenchmarkGroup(layernames)
-end
-
-for (lname, (layer, aᵏ)) in layers
-    Rᵏ = similar(aᵏ)
-    Rᵏ⁺¹ = layer(aᵏ)
-    for (rname, rule) in rules
-        modified_layer = modify_layer(rule, layer)
-        SUITE["modify layer"][rname][lname] = @benchmarkable modify_layer($(rule), $(layer))
-        SUITE["apply rule"][rname][lname] = @benchmarkable lrp!(
-            $(Rᵏ), $(rule), $(layer), $(modified_layer), $(aᵏ), $(Rᵏ⁺¹)
-        )
-    end
-end
+# Use PkgJogger.@jog to create the JogRelevancePropagation module
+@jog RelevancePropagation
+SUITE = JogRelevancePropagation.suite()
diff --git a/benchmark/run_benchmarks.jl b/benchmark/run_benchmarks.jl
@@ -6,5 +6,5 @@
 using BenchmarkCI
 on_CI = haskey(ENV, "GITHUB_ACTIONS")
 
-BenchmarkCI.judge()
+BenchmarkCI.judge(; baseline="origin/main")
 on_CI ? BenchmarkCI.postjudge() : BenchmarkCI.displayjudgement()
diff --git a/test/Project.toml b/test/Project.toml
@@ -1,11 +1,13 @@
 [deps]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
+PkgJogger = "10150987-6cc1-4b76-abee-b1c1cbd91c01"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 ReferenceTests = "324d217c-45ce-50fc-942e-d289b448e8cf"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -54,4 +54,8 @@ using Aqua
         @info "Testing analyzers on batches..."
         include("test_batches.jl")
     end
+    @testset "Benchmark correctness" begin
+        @info "Testing whether benchmarks are up-to-date..."
+        include("test_benchmarks.jl")
+    end
 end
diff --git a/test/test_benchmarks.jl b/test/test_benchmarks.jl
@@ -0,0 +1,4 @@
+using PkgJogger
+using RelevancePropagation
+
+PkgJogger.@test_benchmarks RelevancePropagation