diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 00000000..08e98915 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,37 @@ +name: Benchmark +on: + pull_request: + branches: + - main +permissions: + pull-requests: write # action needs to post a comment + +jobs: + benchmark: + name: Julia ${{ matrix.version }} - ${{ matrix.platform.os }} - ${{ matrix.platform.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.platform.os }} + strategy: + fail-fast: false + matrix: + platform: + - os: ubuntu-latest + arch: x64 + - os: ubuntu-24.04-arm + arch: aarch64 + version: + - '1' + - 'lts' + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.platform.arch }} + - name: Install dependencies + run: julia -e 'using Pkg; pkg"add PkgBenchmark BenchmarkCI@0.1"' + - name: Run benchmarks + run: julia -e 'using BenchmarkCI; BenchmarkCI.judge(baseline = "origin/main")' + - name: Post results + run: julia -e 'using BenchmarkCI; BenchmarkCI.postjudge()' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index d1458d2d..da2f1ce8 100644 --- a/.gitignore +++ b/.gitignore @@ -28,7 +28,6 @@ Manifest.toml # Test data files and benchmarking jetsavetest*.dat -benchmark/* # Misc files .DS_Store diff --git a/Project.toml b/Project.toml index 51e5e2a4..7e84c26b 100644 --- a/Project.toml +++ b/Project.toml @@ -9,9 +9,9 @@ CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" -LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" LorentzVectorHEP = "f612022c-142a-473f-8cfd-a09cf3793c6c" MuladdMacro = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221" +SIMD = "fdea26ae-647d-5447-a871-4b548cad5224" StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" [weakdeps] @@ -31,10 +31,10 @@ EDM4hep = "0.4.0" EnumX = "1.0.4" JSON = "0.21" Logging = "1.9" -LoopVectorization = "0.12.170" LorentzVectorHEP = "0.1.6" Makie = "0.20, 0.21, 0.22" MuladdMacro = "0.2.4" +SIMD = "3.7.1" StructArrays = "0.6.18, 0.7" Test = "1.9" julia = "1.9" diff --git a/benchmark/Project.toml b/benchmark/Project.toml new file mode 100644 index 00000000..e8a3ff0c --- /dev/null +++ b/benchmark/Project.toml @@ -0,0 +1,6 @@ +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +JetReconstruction = "44e8cb2c-dfab-4825-9c70-d4808a591196" + +[sources] +JetReconstruction = {path = ".."} diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 00000000..d02d73b2 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,7 @@ +# Mini-benchmarks for JetReconstruction + +The benchmarks can be follow the standard [`PkgBenchmark.jl`](https://juliaci.github.io/PkgBenchmark.jl/stable/) structure. Can be run locally with you favourite `PkgBenchmark.jl`-compatible tool. + +```sh +julia --project=benchmark benchmark/benchmarks.jl +``` \ No newline at end of file diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl new file mode 100644 index 00000000..020b2bae --- /dev/null +++ b/benchmark/benchmarks.jl @@ -0,0 +1,65 @@ +using BenchmarkTools +using JetReconstruction + +const events_file_pp = joinpath(@__DIR__, "..", "test", "data", "events.pp13TeV.hepmc3.gz") +const events_file_ee = joinpath(@__DIR__, "..", "test", "data", "events.eeH.hepmc3.gz") + +const pp_events = JetReconstruction.read_final_state_particles(events_file_pp, + T = PseudoJet) +const ee_events = JetReconstruction.read_final_state_particles(events_file_ee, T = EEJet) + +function jet_reconstruct_harness(events; algorithm, strategy, power, distance, + recombine = RecombinationMethods[RecombinationScheme.EScheme], + ptmin::Real = 5.0, dcut = nothing, njets = nothing,) + for event in events + cs = jet_reconstruct(event; R = distance, p = power, algorithm = algorithm, + strategy = strategy, recombine...) + if !isnothing(njets) + finaljets = exclusive_jets(cs; njets = njets) + elseif !isnothing(dcut) + finaljets = exclusive_jets(cs; dcut = dcut) + else + finaljets = inclusive_jets(cs; ptmin = ptmin) + end + end +end + +const SUITE = BenchmarkGroup() +SUITE["jet_reconstruction"] = BenchmarkGroup(["reconstruction"]) + +## pp events +for stg in [RecoStrategy.N2Plain, RecoStrategy.N2Tiled] + strategy_name = "$(stg)" + SUITE["jet_reconstruction"][strategy_name] = BenchmarkGroup(["pp", strategy_name]) + for alg in [JetAlgorithm.AntiKt, JetAlgorithm.CA, JetAlgorithm.Kt] + for distance in [0.4] + power = JetReconstruction.algorithm2power[alg] + SUITE["jet_reconstruction"][strategy_name]["Alg=$alg, R=$distance"] = @benchmarkable jet_reconstruct_harness($pp_events; + algorithm = $alg, + strategy = $stg, + power = $power, + distance = $distance, + ptmin = 5.0) evals=1 samples=32 + end + end +end + +## ee events +SUITE["jet_reconstruction"]["ee"] = BenchmarkGroup(["ee"]) +for alg in [JetAlgorithm.Durham] + for distance in [0.4] + power = -1 + SUITE["jet_reconstruction"]["ee"]["Alg=$alg, R=$distance"] = @benchmarkable jet_reconstruct_harness($ee_events; + algorithm = $alg, + strategy = $RecoStrategy.Best, + power = $power, + distance = $distance, + ptmin = 5.0) evals=1 samples=32 + end +end + +if abspath(PROGRAM_FILE) == @__FILE__ + @info "Running benchmark suite" + results = run(SUITE, verbose = true) + @info results +end diff --git a/src/JetReconstruction.jl b/src/JetReconstruction.jl index 05ce60ea..d8dedbb7 100644 --- a/src/JetReconstruction.jl +++ b/src/JetReconstruction.jl @@ -18,6 +18,7 @@ module JetReconstruction using LorentzVectorHEP using MuladdMacro using StructArrays +using SIMD # Import from LorentzVectorHEP methods for those 4-vector types pt2(p::LorentzVector) = LorentzVectorHEP.pt2(p) diff --git a/src/PlainAlgo.jl b/src/PlainAlgo.jl index 2e2db535..cf1611c1 100644 --- a/src/PlainAlgo.jl +++ b/src/PlainAlgo.jl @@ -1,4 +1,3 @@ -using LoopVectorization """ dist(i, j, rapidity_array, phi_array) diff --git a/src/TiledAlgoLL.jl b/src/TiledAlgoLL.jl index 25a16481..34b6e867 100644 --- a/src/TiledAlgoLL.jl +++ b/src/TiledAlgoLL.jl @@ -5,7 +5,6 @@ using Logging using Accessors -using LoopVectorization # Include struct definitions and basic operations include("TiledAlgoLLStructs.jl") diff --git a/src/Utils.jl b/src/Utils.jl index aec6d82b..bb39bbb6 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -149,14 +149,62 @@ array. The use of `@turbo` macro gives a significant performance boost. - `dij_min`: The minimum value in the first `n` elements of the `dij` array. - `best`: The index of the minimum value in the `dij` array. """ -fast_findmin(dij, n) = begin - # findmin(@inbounds @view dij[1:n]) - best = 1 - @inbounds dij_min = dij[1] - @turbo for here in 2:n - newmin = dij[here] < dij_min - best = newmin ? here : best - dij_min = newmin ? dij[here] : dij_min +function fast_findmin end + +if Sys.ARCH == :aarch64 + fast_findmin(dij, n) = _naive_fast_findmin(@view(dij[begin:n])) +else + function fast_findmin(dij, n) + if n <= 8 + return _naive_fast_findmin(@view(dij[begin:n])) + else + return _simd_fast_findmin(dij, n) + end end - dij_min, best +end + +function _naive_fast_findmin(dij) + x = @fastmath foldl(min, dij) + i = findfirst(==(x), dij)::Int + x, i +end + +function _simd_fast_findmin(dij::DenseVector{T}, n) where {T} + laneIndices = SIMD.Vec{8, Int}((1, 2, 3, 4, 5, 6, 7, 8)) + minvals = SIMD.Vec{8, T}(Inf) + min_indices = SIMD.Vec{8, Int}(0) + + n_batches, remainder = divrem(n, 8) + lane = VecRange{8}(0) + i = 1 + @inbounds @fastmath for _ in 1:n_batches + dijs = dij[lane + i] + predicate = dijs < minvals + minvals = vifelse(predicate, dijs, minvals) + min_indices = vifelse(predicate, laneIndices, min_indices) + + i += 8 + laneIndices += 8 + end + + # last batch + back_track = 8 - remainder + i -= back_track + laneIndices -= back_track + + dijs = dij[lane + i] + predicate = dijs < minvals + minvals = vifelse(predicate, dijs, minvals) + min_indices = vifelse(predicate, laneIndices, min_indices) + + min_value = SIMD.minimum(minvals) + min_index = @inbounds min_value == minvals[1] ? min_indices[1] : + min_value == minvals[2] ? min_indices[2] : + min_value == minvals[3] ? min_indices[3] : + min_value == minvals[4] ? min_indices[4] : + min_value == minvals[5] ? min_indices[5] : + min_value == minvals[6] ? min_indices[6] : + min_value == minvals[7] ? min_indices[7] : min_indices[8] + + return min_value, min_index end