From 7c907d4a3310d3598753b70c16740d9202a7f620 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 27 Oct 2024 22:53:56 +0100 Subject: [PATCH 1/5] use SIMD.jl directly instead of LV.jl for `fast_findmin()` --- Project.toml | 4 ++-- src/JetReconstruction.jl | 1 + src/PlainAlgo.jl | 2 -- src/TiledAlgoLL.jl | 1 - src/Utils.jl | 41 ++++++++++++++++++++++++++++++---------- 5 files changed, 34 insertions(+), 15 deletions(-) diff --git a/Project.toml b/Project.toml index fb46ccb1..1fde8fbc 100644 --- a/Project.toml +++ b/Project.toml @@ -8,10 +8,10 @@ Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" -LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" LorentzVectorHEP = "f612022c-142a-473f-8cfd-a09cf3793c6c" LorentzVectors = "3f54b04b-17fc-5cd4-9758-90c048d965e3" MuladdMacro = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221" +SIMD = "fdea26ae-647d-5447-a871-4b548cad5224" StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" [weakdeps] @@ -30,13 +30,13 @@ EDM4hep = "0.4.0" EnumX = "1.0.4" JSON = "0.21" Logging = "1.9" -LoopVectorization = "0.12.170" LorentzVectorHEP = "0.1.6" LorentzVectors = "0.4.3" Makie = "0.20, 0.21, 0.22" MuladdMacro = "0.2.4" StructArrays = "0.6.18, 0.7" Test = "1.9" +SIMD = "3.6" julia = "1.9" [extras] diff --git a/src/JetReconstruction.jl b/src/JetReconstruction.jl index e20e3612..57d71127 100644 --- a/src/JetReconstruction.jl +++ b/src/JetReconstruction.jl @@ -18,6 +18,7 @@ module JetReconstruction using LorentzVectorHEP using MuladdMacro using StructArrays +using SIMD # Import from LorentzVectorHEP methods for those 4-vector types pt2(p::LorentzVector) = LorentzVectorHEP.pt2(p) diff --git a/src/PlainAlgo.jl b/src/PlainAlgo.jl index fabcb912..2695da6d 100644 --- a/src/PlainAlgo.jl +++ b/src/PlainAlgo.jl @@ -1,5 +1,3 @@ -using LoopVectorization - """ dist(i, j, rapidity_array, phi_array) diff --git a/src/TiledAlgoLL.jl b/src/TiledAlgoLL.jl index ff3b4701..3c2ed195 100644 --- a/src/TiledAlgoLL.jl +++ b/src/TiledAlgoLL.jl @@ -5,7 +5,6 @@ using Logging using Accessors -using LoopVectorization # Include struct definitions and basic operations include("TiledAlgoLLStructs.jl") diff --git a/src/Utils.jl b/src/Utils.jl index 24448bfd..4e5ed596 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -123,7 +123,7 @@ end fast_findmin(dij, n) Find the minimum value and its index in the first `n` elements of the `dij` -array. The use of `@turbo` macro gives a significant performance boost. +array. # Arguments - `dij`: An array of values. @@ -133,14 +133,35 @@ array. The use of `@turbo` macro gives a significant performance boost. - `dij_min`: The minimum value in the first `n` elements of the `dij` array. - `best`: The index of the minimum value in the `dij` array. """ -fast_findmin(dij, n) = begin - # findmin(@inbounds @view dij[1:n]) - best = 1 - @inbounds dij_min = dij[1] - @turbo for here in 2:n - newmin = dij[here] < dij_min - best = newmin ? here : best - dij_min = newmin ? dij[here] : dij_min +function fast_findmin(x, n) + laneIndices = SIMD.Vec{8, Int64}((1, 2, 3, 4, 5, 6, 7, 8)) + minvals = SIMD.Vec{8, Float64}(Inf) + min_indices = SIMD.Vec{8, Int64}(0) + + n_batches, remainder = divrem(n, 8) + lane = VecRange{8}(0) + i = 1 + @inbounds @fastmath for _ in 1:n_batches + predicate = x[lane + i] < minvals + minvals = vifelse(predicate, x[lane + i], minvals) + min_indices = vifelse(predicate, laneIndices, min_indices) + + i += 8 + laneIndices += 8 end - dij_min, best + + min_value = SIMD.minimum(minvals) + min_index = min_value == minvals[1] ? min_indices[1] : min_value == minvals[2] ? min_indices[2] : + min_value == minvals[3] ? min_indices[3] : min_value == minvals[4] ? min_indices[4] : + min_value == minvals[5] ? min_indices[5] : min_value == minvals[6] ? min_indices[6] : + min_value == minvals[7] ? min_indices[7] : min_indices[8] + + @inbounds @fastmath for _ in 1:remainder + xi = x[i] + pred = x[i] < min_value + min_value = ifelse(pred, xi, min_value) + min_index = ifelse(pred, i, min_index) + i += 1 + end + return min_value, min_index end From cc3efacc45d4afc7588a181beb40ef72276d23eb Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 27 Oct 2024 22:57:42 +0100 Subject: [PATCH 2/5] clean up --- src/Utils.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Utils.jl b/src/Utils.jl index 4e5ed596..af75657e 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -133,17 +133,17 @@ array. - `dij_min`: The minimum value in the first `n` elements of the `dij` array. - `best`: The index of the minimum value in the `dij` array. """ -function fast_findmin(x, n) - laneIndices = SIMD.Vec{8, Int64}((1, 2, 3, 4, 5, 6, 7, 8)) - minvals = SIMD.Vec{8, Float64}(Inf) - min_indices = SIMD.Vec{8, Int64}(0) +function fast_findmin(dij::DenseVector{T}, n) where T + laneIndices = SIMD.Vec{8, Int}((1, 2, 3, 4, 5, 6, 7, 8)) + minvals = SIMD.Vec{8, T}(Inf) + min_indices = SIMD.Vec{8, Int}(0) n_batches, remainder = divrem(n, 8) lane = VecRange{8}(0) i = 1 @inbounds @fastmath for _ in 1:n_batches - predicate = x[lane + i] < minvals - minvals = vifelse(predicate, x[lane + i], minvals) + predicate = dij[lane + i] < minvals + minvals = vifelse(predicate, dij[lane + i], minvals) min_indices = vifelse(predicate, laneIndices, min_indices) i += 8 @@ -157,9 +157,9 @@ function fast_findmin(x, n) min_value == minvals[7] ? min_indices[7] : min_indices[8] @inbounds @fastmath for _ in 1:remainder - xi = x[i] - pred = x[i] < min_value - min_value = ifelse(pred, xi, min_value) + xi = dij[i] + pred = dij[i] < min_value + min_value= ifelse(pred, xi, min_value) min_index = ifelse(pred, i, min_index) i += 1 end From dddec19d5bda352cd5f6504dfcd3787c421a22b0 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 27 Oct 2024 23:02:39 +0100 Subject: [PATCH 3/5] more inbounds just for safety --- src/Utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Utils.jl b/src/Utils.jl index af75657e..f592bbc7 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -151,7 +151,7 @@ function fast_findmin(dij::DenseVector{T}, n) where T end min_value = SIMD.minimum(minvals) - min_index = min_value == minvals[1] ? min_indices[1] : min_value == minvals[2] ? min_indices[2] : + min_index = @inbounds min_value == minvals[1] ? min_indices[1] : min_value == minvals[2] ? min_indices[2] : min_value == minvals[3] ? min_indices[3] : min_value == minvals[4] ? min_indices[4] : min_value == minvals[5] ? min_indices[5] : min_value == minvals[6] ? min_indices[6] : min_value == minvals[7] ? min_indices[7] : min_indices[8] From ea7ed5c9c19cd1d61c46862db2bce2cdaf494e13 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 27 Oct 2024 23:57:41 +0100 Subject: [PATCH 4/5] clean up --- src/Utils.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Utils.jl b/src/Utils.jl index f592bbc7..ab96bc97 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -142,8 +142,9 @@ function fast_findmin(dij::DenseVector{T}, n) where T lane = VecRange{8}(0) i = 1 @inbounds @fastmath for _ in 1:n_batches - predicate = dij[lane + i] < minvals - minvals = vifelse(predicate, dij[lane + i], minvals) + dijs = dij[lane + i] + predicate = dijs < minvals + minvals = vifelse(predicate, dijs, minvals) min_indices = vifelse(predicate, laneIndices, min_indices) i += 8 From 1e20f7bcfaa4c962d86e62f8e30f55d23aebe112 Mon Sep 17 00:00:00 2001 From: Moelf Date: Mon, 28 Oct 2024 00:20:39 +0100 Subject: [PATCH 5/5] format --- src/Utils.jl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Utils.jl b/src/Utils.jl index ab96bc97..8ee38ecf 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -133,7 +133,7 @@ array. - `dij_min`: The minimum value in the first `n` elements of the `dij` array. - `best`: The index of the minimum value in the `dij` array. """ -function fast_findmin(dij::DenseVector{T}, n) where T +function fast_findmin(dij::DenseVector{T}, n) where {T} laneIndices = SIMD.Vec{8, Int}((1, 2, 3, 4, 5, 6, 7, 8)) minvals = SIMD.Vec{8, T}(Inf) min_indices = SIMD.Vec{8, Int}(0) @@ -152,15 +152,18 @@ function fast_findmin(dij::DenseVector{T}, n) where T end min_value = SIMD.minimum(minvals) - min_index = @inbounds min_value == minvals[1] ? min_indices[1] : min_value == minvals[2] ? min_indices[2] : - min_value == minvals[3] ? min_indices[3] : min_value == minvals[4] ? min_indices[4] : - min_value == minvals[5] ? min_indices[5] : min_value == minvals[6] ? min_indices[6] : - min_value == minvals[7] ? min_indices[7] : min_indices[8] + min_index = @inbounds min_value == minvals[1] ? min_indices[1] : + min_value == minvals[2] ? min_indices[2] : + min_value == minvals[3] ? min_indices[3] : + min_value == minvals[4] ? min_indices[4] : + min_value == minvals[5] ? min_indices[5] : + min_value == minvals[6] ? min_indices[6] : + min_value == minvals[7] ? min_indices[7] : min_indices[8] @inbounds @fastmath for _ in 1:remainder xi = dij[i] pred = dij[i] < min_value - min_value= ifelse(pred, xi, min_value) + min_value = ifelse(pred, xi, min_value) min_index = ifelse(pred, i, min_index) i += 1 end