Skip to content

Commit d96705c

Browse files
committed
Improve M1 native support
1 parent db0ae88 commit d96705c

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.11"
4+
version = "0.12.12"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/codegen/lower_load.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
function prefetchisagoodidea(ls::LoopSet, op::Operation, td::UnrollArgs)
2+
# ((Sys.ARCH === :x86_64) || (Sys.ARCH === :i686)) || return false
23
# return false
34
@unpack u₁, u₁loopsym, u₂loopsym, vloopsym, vstep, u₂max, suffix = td
45
length(loopdependencies(op)) 1 && return 0
@@ -40,6 +41,8 @@ function prefetchisagoodidea(ls::LoopSet, op::Operation, td::UnrollArgs)
4041
0
4142
end
4243
function add_prefetches!(q::Expr, ls::LoopSet, op::Operation, td::UnrollArgs, prefetchind::Int)
44+
# TODO: maybe prefetch for non-x86_64?
45+
((Sys.ARCH === :x86_64) || (Sys.ARCH === :i686)) || return nothing
4346
@unpack u₁, u₁loopsym, u₂loopsym, vloopsym, u₂max = td
4447
# we should only be here if `unitsride(vloop)`
4548
dontskip = (cache_lnsze(ls) ÷ reg_size(ls)) - 1

src/modeling/determinestrategy.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -543,17 +543,16 @@ function solve_unroll(
543543
W::Int, vloopsym::Symbol, rounduᵢ::Int
544544
)
545545
(u₁step, u₂step) = if rounduᵢ == 1 # max is to safeguard against some weird arch I've never heard of.
546-
(max(1, cache_lnsze(ls) ÷ reg_size(ls)), 1)
546+
(clamp(cache_lnsze(ls) ÷ reg_size(ls), 1, 4), 1)
547547
elseif rounduᵢ == 2
548-
(1, max(1,cache_lnsze(ls) ÷ reg_size(ls)))
548+
(1, clamp(cache_lnsze(ls) ÷ reg_size(ls), 1, 4))
549549
elseif rounduᵢ == -1
550550
(8 ÷ ls.vector_width[], 1)
551551
elseif rounduᵢ == -2
552552
(1, 8 ÷ ls.vector_width[])
553553
else
554554
(1, 1)
555555
end
556-
# @show u₁step, u₂step
557556
u₁loop = getloop(ls, u₁loopsym)
558557
u₂loop = getloop(ls, u₂loopsym)
559558
solve_unroll(

0 commit comments

Comments
 (0)