Skip to content

Commit def5ad1

Browse files
committed
rip out cache_size
1 parent f33d08b commit def5ad1

File tree

8 files changed

+24
-48
lines changed

8 files changed

+24
-48
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.124"
4+
version = "0.12.125"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/broadcast.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -548,8 +548,8 @@ end
548548
# we have an N dimensional loop.
549549
# need to construct the LoopSet
550550
ls = LoopSet(Mod)
551-
inline, u₁, u₂, v, isbroadcast, _, rs, rc, cls, l1, l2, l3, threads, warncheckarg = UNROLL
552-
set_hw!(ls, rs, rc, cls, l1, l2, l3)
551+
inline, u₁, u₂, v, isbroadcast, _, rs, rc, cls, threads, warncheckarg = UNROLL
552+
set_hw!(ls, rs, rc, cls)
553553
ls.isbroadcast = isbroadcast # maybe set `false` in a DiffEq-like `@..` macro
554554
loopsyms = [gensym!(ls, "n") for _ 1:N]
555555
add_broadcast_loops!(ls, loopsyms, :dest)
@@ -584,8 +584,8 @@ end
584584
# we have an N dimensional loop.
585585
# need to construct the LoopSet
586586
ls = LoopSet(Mod)
587-
inline, u₁, u₂, v, isbroadcast, _, rs, rc, cls, l1, l2, l3, threads, warncheckarg = UNROLL
588-
set_hw!(ls, rs, rc, cls, l1, l2, l3)
587+
inline, u₁, u₂, v, isbroadcast, _, rs, rc, cls, threads, warncheckarg = UNROLL
588+
set_hw!(ls, rs, rc, cls)
589589
ls.isbroadcast = isbroadcast # maybe set `false` in a DiffEq-like `@..` macro
590590
loopsyms = [gensym!(ls, "n") for _ 1:N]
591591
pushprepreamble!(ls, Expr(:(=), :dest, Expr(:call, :parent, :dest′)))
@@ -626,7 +626,7 @@ end
626626
::Val{UNROLL},
627627
::Val{dontbc}
628628
) where {T<:NativeTypes,N,T2<:Number,Mod,UNROLL,dontbc}
629-
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, l1, l2, l3, threads = UNROLL
629+
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, threads = UNROLL
630630
quote
631631
$(Expr(:meta, :inline))
632632
arg = T(first(bc.args))
@@ -646,7 +646,7 @@ end
646646
::Val{UNROLL},
647647
::Val{dontbc}
648648
) where {T<:NativeTypes,N,A<:AbstractArray{T,N},T2<:Number,Mod,UNROLL,dontbc}
649-
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, l1, l2, l3, threads = UNROLL
649+
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, threads = UNROLL
650650
quote
651651
$(Expr(:meta, :inline))
652652
arg = T(first(bc.args))

src/codegen/lower_threads.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ function thread_one_loops_expr(
420420
valid_thread_loop::Vector{Bool},
421421
ntmax::UInt,
422422
c::Float64,
423-
UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,Int,Int,Int,UInt},
423+
UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt},
424424
OPS::Expr,
425425
ARF::Expr,
426426
AM::Expr,
@@ -615,7 +615,7 @@ function thread_two_loops_expr(
615615
valid_thread_loop::Vector{Bool},
616616
ntmax::UInt,
617617
c::Float64,
618-
UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,Int,Int,Int,UInt},
618+
UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt},
619619
OPS::Expr,
620620
ARF::Expr,
621621
AM::Expr,
@@ -877,7 +877,7 @@ function valid_thread_loops(ls::LoopSet)
877877
end
878878
function avx_threads_expr(
879879
ls::LoopSet,
880-
UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,Int,Int,Int,UInt},
880+
UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt},
881881
nt::UInt,
882882
OPS::Expr,
883883
ARF::Expr,

src/codegen/split_loops.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,7 @@ function split_loopset(ls::LoopSet, ids::Vector{Int}, issecond::Bool)
107107
# it shouldn't.
108108
# Current behavior is incorrect when VECWIDTH chosen does actually differ between
109109
# split loops and the loops are statically sized, because code gen will then assume it is correct...
110-
l1, l2, l3 = cache_sze(ls)
111-
set_hw!(ls_new, reg_size(ls), reg_count(ls), cache_lnsze(ls), l1, l2, l3)
110+
set_hw!(ls_new, reg_size(ls), reg_count(ls), cache_lnsze(ls))
112111
ls_new.vector_width = ls.vector_width
113112
fill_offset_memop_collection!(ls)
114113
# println("ls_new operations:")

src/condense_loopset.jl

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -550,28 +550,17 @@ function add_grouped_strided_pointer!(extra_args::Expr, ls::LoopSet)
550550
preserve, shouldindbyind, roots
551551
end
552552

553-
# first_cache() = ifelse(gt(num_cache_levels(), StaticInt{2}()), StaticInt{2}(), StaticInt{1}())
554-
# function _first_cache_size(::StaticInt{FCS}) where {FCS}
555-
# L1inclusive = StaticInt{FCS}() - VectorizationBase.cache_size(One())
556-
# ifelse(eq(first_cache(), StaticInt(2)) & VectorizationBase.cache_inclusive(StaticInt(2)), L1inclusive, StaticInt{FCS}())
557-
# end
558-
# _first_cache_size(::Nothing) = StaticInt(262144)
559-
# first_cache_size() = _first_cache_size(cache_size(first_cache()))
560-
561553
@generated function _turbo_config_val(
562554
::Val{CNFARG},
563555
::StaticInt{W},
564556
::StaticInt{RS},
565557
::StaticInt{AR},
566558
::StaticInt{NT},
567559
::StaticInt{CLS},
568-
::StaticInt{L1},
569-
::StaticInt{L2},
570-
::StaticInt{L3},
571-
) where {CNFARG,W,RS,AR,CLS,L1,L2,L3,NT}
560+
) where {CNFARG,W,RS,AR,CLS,NT}
572561
inline, u₁, u₂, v, BROADCAST, thread = CNFARG
573562
nt = min(thread % UInt, NT % UInt)
574-
t = Expr(:tuple, inline, u₁, u₂, v, BROADCAST, W, RS, AR, CLS, L1, L2, L3, nt)
563+
t = Expr(:tuple, inline, u₁, u₂, v, BROADCAST, W, RS, AR, CLS, nt)
575564
length(CNFARG) == 7 && push!(t.args, CNFARG[7])
576565
Expr(:call, Expr(:curly, :Val, t))
577566
end
@@ -582,10 +571,7 @@ end
582571
register_size(),
583572
available_registers(),
584573
lv_max_num_threads(),
585-
cache_linesize(),
586-
cache_size(StaticInt(1)),
587-
cache_size(StaticInt(2)),
588-
cache_size(StaticInt(3)),
574+
cache_linesize()
589575
)
590576
end
591577
function find_samename_constparent(op::Operation, opname::Symbol)

src/modeling/graphs.jl

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,6 @@ mutable struct LoopSet
451451
register_size::Int
452452
register_count::Int
453453
cache_linesize::Int
454-
cache_size::Tuple{Int,Int,Int}
455454
ureduct::Int
456455
equalarraydims::Vector{Tuple{Vector{Symbol},Vector{Int}}}
457456
omop::OffsetLoadCollection
@@ -499,11 +498,11 @@ function save_tilecost!(ls::LoopSet)
499498
end
500499
# ls.reg_pres[5,1] = ls.reg_pres[5,2]
501500
end
502-
function set_hw!(ls::LoopSet, rs::Int, rc::Int, cls::Int, l1::Int, l2::Int, l3::Int)
501+
function set_hw!(ls::LoopSet, rs::Int, rc::Int, cls::Int)
503502
ls.register_size = rs
504503
ls.register_count = rc
505504
ls.cache_linesize = cls
506-
ls.cache_size = (l1, l2, l3)
505+
# ls.cache_size = (l1, l2, l3)
507506
# ls.opmask_register[] = omr
508507
nothing
509508
end
@@ -514,16 +513,12 @@ function set_hw!(ls::LoopSet)
514513
ls,
515514
Int(register_size()),
516515
Int(available_registers()),
517-
Int(cache_linesize()),
518-
Int(cache_size(StaticInt(1))),
519-
Int(cache_size(StaticInt(2))),
520-
Int(cache_size(StaticInt(3))),
516+
Int(cache_linesize())
521517
)
522518
end
523519
reg_size(ls::LoopSet) = ls.register_size
524520
reg_count(ls::LoopSet) = ls.register_count
525521
cache_lnsze(ls::LoopSet) = ls.cache_linesize
526-
cache_sze(ls::LoopSet) = ls.cache_size
527522

528523
pushprepreamble!(ls::LoopSet, ex) = push!(ls.prepreamble.args, ex)
529524
function pushpreamble!(ls::LoopSet, op::Operation, v::Symbol)
@@ -608,7 +603,6 @@ function LoopSet(mod::Symbol)
608603
ls.register_size = 0
609604
ls.register_count = 0
610605
ls.cache_linesize = 0
611-
ls.cache_size = (0, 0, 0)
612606
ls.ureduct = -1
613607
ls.equalarraydims = Tuple{Vector{Symbol},Vector{Int}}[]
614608
ls.omop = OffsetLoadCollection()

src/reconstruct_loopset.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -874,9 +874,9 @@ function avx_loopset!(
874874
end
875875
function avx_body(
876876
ls::LoopSet,
877-
UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,Int,Int,Int,UInt},
877+
UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt},
878878
)
879-
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, l1, l2, l3, nt = UNROLL
879+
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, nt = UNROLL
880880
q =
881881
(iszero(u₁) & iszero(v)) ? lower_and_split_loops(ls, inline % Int) :
882882
lower(ls, u₁ % Int, u₂ % Int, v % Int, inline % Int)
@@ -916,14 +916,14 @@ function _turbo_loopset(
916916
@nospecialize(LPSYMsv),
917917
LBsv::Core.SimpleVector,
918918
vargs::Core.SimpleVector,
919-
UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,Int,Int,Int,UInt},
919+
UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt},
920920
)
921921
nops = length(OPSsv) ÷ 3
922922
instr = Instruction[Instruction(OPSsv[3i+1], OPSsv[3i+2]) for i 0:nops-1]
923923
ops = OperationStruct[OPSsv[3i] for i 1:nops]
924924
ls = LoopSet(:LoopVectorization)
925-
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, l1, l2, l3, nt = UNROLL
926-
set_hw!(ls, rs, rc, cls, l1, l2, l3)
925+
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, nt = UNROLL
926+
set_hw!(ls, rs, rc, cls)
927927
ls.vector_width = W
928928
ls.isbroadcast = isbroadcast
929929
arsv = Vector{ArrayRefStruct}(undef, length(ARFsv))
@@ -990,11 +990,11 @@ Execute an `@turbo` block. The block's code is represented via the arguments:
990990
post = hoist_constant_memory_accesses!(ls)
991991
# q = @show(avx_body(ls, var"#UNROLL#")); post === ls.preamble ? q : Expr(:block, q, post)
992992
q = if (last(var"#UNROLL#") > 1) && length(var"#LPSYM#") == length(ls.loops)
993-
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, l1, l2, l3, nt = var"#UNROLL#"
993+
inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, nt = var"#UNROLL#"
994994
# wrap in `var"#OPS#", var"#ARF#", var"#AM#", var"#LPSYM#"` in `Expr` to homogenize types
995995
avx_threads_expr(
996996
ls,
997-
(inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, l1, l2, l3, one(UInt)),
997+
(inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, one(UInt)),
998998
nt,
999999
:(Val{$(var"#OPS#")}()),
10001000
:(Val{$(var"#ARF#")}()),

src/user_api_conveniences.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ function matmul_params(
3131
rs,
3232
rc,
3333
cls,
34-
Int(cache_size(StaticInt(1))),
35-
Int(cache_size(StaticInt(2))),
36-
Int(cache_size(StaticInt(3))),
3734
)
3835
if N nothing
3936
nloop = GEMMLOOPSET.loops[1]

0 commit comments

Comments
 (0)