Skip to content
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
LorentzVectorHEP = "f612022c-142a-473f-8cfd-a09cf3793c6c"
MuladdMacro = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221"
SIMD = "fdea26ae-647d-5447-a871-4b548cad5224"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"

Expand All @@ -33,10 +33,10 @@ EDM4hep = "0.4.0"
EnumX = "1.0.4"
JSON = "0.21"
Logging = "1.9"
LoopVectorization = "0.12.170"
LorentzVectorHEP = "0.1.6"
Makie = "0.20, 0.21, 0.22, 0.23, 0.24"
MuladdMacro = "0.2.4"
SIMD = "3.7.1"
Random = "1.9"
Statistics = "1.9"
StructArrays = "0.6.18, 0.7"
Expand Down
1 change: 1 addition & 0 deletions src/JetReconstruction.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module JetReconstruction
using LorentzVectorHEP
using MuladdMacro
using StructArrays
using SIMD

# Import from LorentzVectorHEP methods for those 4-vector types
pt2(p::LorentzVector) = LorentzVectorHEP.pt2(p)
Expand Down
2 changes: 0 additions & 2 deletions src/PlainAlgo.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
using LoopVectorization

"""
dist(i, j, rapidity_array, phi_array)

Expand Down
1 change: 0 additions & 1 deletion src/TiledAlgoLL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

using Logging
using Accessors
using LoopVectorization

# Include struct definitions and basic operations
include("TiledAlgoLLStructs.jl")
Expand Down
66 changes: 57 additions & 9 deletions src/Utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,62 @@ array. The use of `@turbo` macro gives a significant performance boost.
- `dij_min`: The minimum value in the first `n` elements of the `dij` array.
- `best`: The index of the minimum value in the `dij` array.
"""
fast_findmin(dij, n) = begin
# findmin(@inbounds @view dij[1:n])
best = 1
@inbounds dij_min = dij[1]
@turbo for here in 2:n
newmin = dij[here] < dij_min
best = newmin ? here : best
dij_min = newmin ? dij[here] : dij_min
function fast_findmin end

if Sys.ARCH == :aarch64
fast_findmin(dij, n) = _naive_fast_findmin(@view(dij[begin:n]))
else
function fast_findmin(dij, n)
if n <= 8
return _naive_fast_findmin(@view(dij[begin:n]))
else
return _simd_fast_findmin(dij, n)
end
end
dij_min, best
end

function _naive_fast_findmin(dij)
x = @fastmath foldl(min, dij)
i = findfirst(==(x), dij)::Int
x, i
end

function _simd_fast_findmin(dij::DenseVector{T}, n) where {T}
laneIndices = SIMD.Vec{8, Int}((1, 2, 3, 4, 5, 6, 7, 8))
minvals = SIMD.Vec{8, T}(Inf)
min_indices = SIMD.Vec{8, Int}(0)

n_batches, remainder = divrem(n, 8)
lane = VecRange{8}(0)
i = 1
@inbounds @fastmath for _ in 1:n_batches
dijs = dij[lane + i]
predicate = dijs < minvals
minvals = vifelse(predicate, dijs, minvals)
min_indices = vifelse(predicate, laneIndices, min_indices)

i += 8
laneIndices += 8
end

# last batch
back_track = 8 - remainder
i -= back_track
laneIndices -= back_track

dijs = dij[lane + i]
predicate = dijs < minvals
minvals = vifelse(predicate, dijs, minvals)
min_indices = vifelse(predicate, laneIndices, min_indices)

min_value = SIMD.minimum(minvals)
min_index = @inbounds min_value == minvals[1] ? min_indices[1] :
min_value == minvals[2] ? min_indices[2] :
min_value == minvals[3] ? min_indices[3] :
min_value == minvals[4] ? min_indices[4] :
min_value == minvals[5] ? min_indices[5] :
min_value == minvals[6] ? min_indices[6] :
min_value == minvals[7] ? min_indices[7] : min_indices[8]

return min_value, min_index
end
Loading