diff --git a/Project.toml b/Project.toml
index 262d1dd2..4bb57f3b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,9 +9,9 @@ CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
 CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
 EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
 LorentzVectorHEP = "f612022c-142a-473f-8cfd-a09cf3793c6c"
 MuladdMacro = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221"
+SIMD = "fdea26ae-647d-5447-a871-4b548cad5224"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
 
@@ -33,10 +33,10 @@ EDM4hep = "0.4.0"
 EnumX = "1.0.4"
 JSON = "0.21"
 Logging = "1.9"
-LoopVectorization = "0.12.170"
 LorentzVectorHEP = "0.1.6"
 Makie = "0.20, 0.21, 0.22, 0.23, 0.24"
 MuladdMacro = "0.2.4"
+SIMD = "3.7.1"
 Random = "1.9"
 Statistics = "1.9"
 StructArrays = "0.6.18, 0.7"
diff --git a/src/JetReconstruction.jl b/src/JetReconstruction.jl
index dd171900..4e987b5c 100644
--- a/src/JetReconstruction.jl
+++ b/src/JetReconstruction.jl
@@ -18,6 +18,7 @@ module JetReconstruction
 using LorentzVectorHEP
 using MuladdMacro
 using StructArrays
+using SIMD
 
 # Import from LorentzVectorHEP methods for those 4-vector types
 pt2(p::LorentzVector) = LorentzVectorHEP.pt2(p)
diff --git a/src/PlainAlgo.jl b/src/PlainAlgo.jl
index ab5ec256..beabc4fa 100644
--- a/src/PlainAlgo.jl
+++ b/src/PlainAlgo.jl
@@ -1,5 +1,3 @@
-using LoopVectorization
-
 """
     dist(i, j, rapidity_array, phi_array)
 
diff --git a/src/TiledAlgoLL.jl b/src/TiledAlgoLL.jl
index cbe82367..8d9bfbcf 100644
--- a/src/TiledAlgoLL.jl
+++ b/src/TiledAlgoLL.jl
@@ -5,7 +5,6 @@
 
 using Logging
 using Accessors
-using LoopVectorization
 
 # Include struct definitions and basic operations
 include("TiledAlgoLLStructs.jl")
diff --git a/src/Utils.jl b/src/Utils.jl
index caa9fe2e..abbf2c05 100644
--- a/src/Utils.jl
+++ b/src/Utils.jl
@@ -114,14 +114,62 @@ array. The use of `@turbo` macro gives a significant performance boost.
 - `dij_min`: The minimum value in the first `n` elements of the `dij` array.
 - `best`: The index of the minimum value in the `dij` array.
 """
-fast_findmin(dij, n) = begin
-    # findmin(@inbounds @view dij[1:n])
-    best = 1
-    @inbounds dij_min = dij[1]
-    @turbo for here in 2:n
-        newmin = dij[here] < dij_min
-        best = newmin ? here : best
-        dij_min = newmin ? dij[here] : dij_min
+function fast_findmin end
+
+if Sys.ARCH == :aarch64
+    fast_findmin(dij, n) = _naive_fast_findmin(@view(dij[begin:n]))
+else
+    function fast_findmin(dij, n)
+        if n <= 8
+            return _naive_fast_findmin(@view(dij[begin:n]))
+        else
+            return _simd_fast_findmin(dij, n)
+        end
     end
-    dij_min, best
+end
+
+function _naive_fast_findmin(dij)
+    x = @fastmath foldl(min, dij)
+    i = findfirst(==(x), dij)::Int
+    x, i
+end
+
+function _simd_fast_findmin(dij::DenseVector{T}, n) where {T}
+    laneIndices = SIMD.Vec{8, Int}((1, 2, 3, 4, 5, 6, 7, 8))
+    minvals = SIMD.Vec{8, T}(Inf)
+    min_indices = SIMD.Vec{8, Int}(0)
+
+    n_batches, remainder = divrem(n, 8)
+    lane = VecRange{8}(0)
+    i = 1
+    @inbounds @fastmath for _ in 1:n_batches
+        dijs = dij[lane + i]
+        predicate = dijs < minvals
+        minvals = vifelse(predicate, dijs, minvals)
+        min_indices = vifelse(predicate, laneIndices, min_indices)
+
+        i += 8
+        laneIndices += 8
+    end
+
+    # last batch
+    back_track = 8 - remainder
+    i -= back_track
+    laneIndices -= back_track
+
+    dijs = dij[lane + i]
+    predicate = dijs < minvals
+    minvals = vifelse(predicate, dijs, minvals)
+    min_indices = vifelse(predicate, laneIndices, min_indices)
+
+    min_value = SIMD.minimum(minvals)
+    min_index = @inbounds min_value == minvals[1] ? min_indices[1] :
+                          min_value == minvals[2] ? min_indices[2] :
+                          min_value == minvals[3] ? min_indices[3] :
+                          min_value == minvals[4] ? min_indices[4] :
+                          min_value == minvals[5] ? min_indices[5] :
+                          min_value == minvals[6] ? min_indices[6] :
+                          min_value == minvals[7] ? min_indices[7] : min_indices[8]
+
+    return min_value, min_index
 end