Skip to content

Commit 4fecc57

Browse files
committed
lsp
1 parent 5674f89 commit 4fecc57

File tree

8 files changed

+40
-59
lines changed

8 files changed

+40
-59
lines changed

src/codegen/lower_load.jl

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ function lower_load_no_optranslation!(
140140
q::Expr, ls::LoopSet, op::Operation, td::UnrollArgs, mask::Bool, inds_calc_by_ptr_offset::Vector{Bool}
141141
)
142142
@unpack u₁, u₁loopsym, u₂loopsym, vloopsym, suffix = td
143-
loopdeps = loopdependencies(op)
144143
# @assert isvectorized(op)
145144
opu₁, opu₂ = isunrolled_sym(op, u₁loopsym, u₂loopsym, vloopsym, ls)
146145
u = ifelse(opu₁, u₁, 1)
@@ -215,8 +214,6 @@ function lower_load_for_optranslation!(
215214
for i eachindex(gespinds.args)
216215
if i == translationind
217216
gespinds.args[i] = Expr(:call, lv(Core.ifelse(equal_steps, :firstunroll, :lastunroll)), gespinds.args[i])
218-
# else
219-
# gespinds.args[i] = Expr(:call, lv(:unmm), gespinds.args[i])
220217
end
221218
end
222219
ip = GlobalRef(VectorizationBase, :increment_ptr)
@@ -262,7 +259,7 @@ function lower_load_for_optranslation!(
262259
broadcasted_data = broadcastedname(variable_name_data)
263260
push!(q.args, :($broadcasted_data = getfield($(broadcastedname(variable_name_u)), 1)))
264261
end
265-
gf = GlobalRef(Core,:getfield)
262+
gf = GlobalRef(Core, :getfield)
266263
for u₂ 0:u₂max-1
267264
variable_name_u₂ = Symbol(variable_name(op, u₂), '_', u₁)
268265
t = Expr(:tuple)
@@ -285,7 +282,7 @@ function lower_load_for_optranslation!(
285282
push!(q.args, Expr(:(=), broadcastedname(variable_name_u₂), Expr(:call, lv(:VecUnroll), tb)))
286283
end
287284
end
288-
nothing
285+
nothing
289286
end
290287

291288
# TODO: this code should be rewritten to be more "orthogonal", so that we're just combining separate pieces.
@@ -306,7 +303,6 @@ function lower_load!(
306303
if -suffix < mno < 0 # already checked that `suffix != -1` above
307304
varnew = variable_name(op, suffix)
308305
varold = variable_name(operations(ls)[id], suffix + mno)
309-
opold = operations(ls)[id]
310306
u = isu₁unrolled(op) ? u₁ : 1
311307
push!(q.args, Expr(:(=), Symbol(varnew, '_', u), Symbol(varold, '_', u)))
312308
return

src/codegen/lowering.jl

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,6 @@ function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, in
224224
isone(UF) && return lower_no_unroll(ls, us, n, inclmask)
225225
@unpack u₁loopnum, vloopnum, u₁, u₂ = us
226226
order = names(ls)
227-
loopsym = order[n]
228227
loop = getloop(ls, n)
229228
vectorized = order[vloopnum]
230229
nisunrolled = isunrolled1(us, n)
@@ -274,7 +273,7 @@ function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, in
274273
end
275274
remblock = Expr(:block)
276275
(nisvectorized && (UFt > 0) && isone(num_loops(ls))) && push!(remblock.args, definemask(loop))
277-
unroll_cleanup = true
276+
# unroll_cleanup = true
278277
else
279278
remblock = init_remblock(loop, ls.lssm, n)#loopsym)
280279
# unroll_cleanup = Ureduct > 0 || (nisunrolled ? (u₂ > 1) : (u₁ > 1))
@@ -463,7 +462,7 @@ end
463462
@generated function of_same_size(::Type{T}, ::Type{S}) where {T,S}
464463
sizeof_S = sizeof(S)
465464
sizeof(T) == sizeof_S && return T
466-
Tfloat = T <: Union{Float32,Float64}
465+
# Tfloat = T <: Union{Float32,Float64}
467466
if T <: Union{Float32,Float64}
468467
sizeof_S 8 ? Float64 : Float32
469468
elseif T <: Signed
@@ -504,7 +503,7 @@ function outer_reduction_zero(op::Operation, u₁u::Bool, Umax::Int, reduct_clas
504503
end
505504
end
506505
elseif isifelse
507-
Expr(:call, identify, reduct_zero) # type stability within LV
506+
Expr(:call, identity, reduct_zero) # type stability within LV
508507
else
509508
Expr(:call, reduct_zero, Tsym)
510509
end
@@ -670,7 +669,7 @@ function reinit_and_update_tiled_outer_reduct!(letblock::Expr, block::Expr, ls::
670669
instr = instruction(op).instr
671670
instr === :ifelse && continue # FIXME - skipping this will result in bad performance
672671
u₁u, u₂u = isunrolled_sym(op, u₁loopsym, u₂loopsym, vloopsym, ls)
673-
reduct_class = reduction_instruction_class(instr)
672+
reduct_class::Float64 = reduction_instruction_class(instr)
674673
z = outer_reduction_zero(op, u₁u, Umax, reduct_class, rs)
675674
reduct = reduce_to_onevecunroll(reduct_class)
676675
mvar = variable_name(op, -1)
@@ -820,8 +819,6 @@ end
820819
function setup_preamble!(ls::LoopSet, us::UnrollSpecification, Ureduct::Int)
821820
@unpack u₁loopnum, u₂loopnum, vloopnum, u₁, u₂ = us
822821
order = names(ls)
823-
u₁loopsym = order[u₁loopnum]
824-
u₂loopsym = order[u₂loopnum]
825822
vectorized = order[vloopnum]
826823
set_vector_width!(ls, vectorized)
827824
iszero(length(ls.includedactualarrays) + length(ls.outer_reductions)) || define_eltype_vec_width!(ls.preamble, ls, vectorized, false)
@@ -844,15 +841,15 @@ function setup_preamble!(ls::LoopSet, us::UnrollSpecification, Ureduct::Int)
844841
end
845842
end
846843
function lsexpr(ls::LoopSet, q)
847-
Expr(:block, ls.preamble, q)
844+
Expr(:block, ls.preamble, q)
848845
end
849846

850847
function isanouterreduction(ls::LoopSet, op::Operation)
851-
opname = name(op)
852-
for or ls.outer_reductions
853-
name(ls.operations[or]) === opname && return true
854-
end
855-
false
848+
opname = name(op)
849+
for or ls.outer_reductions
850+
name(ls.operations[or]) === opname && return true
851+
end
852+
false
856853
end
857854

858855
# tiled_outerreduct_unroll(ls::LoopSet) = tiled_outerreduct_unroll(ls.unrollspecification)
@@ -912,7 +909,6 @@ function lower_unrollspec(ls::LoopSet)
912909
@unpack vloopnum, u₁, u₂ = us
913910
order = names(ls)
914911
init_loop_map!(ls)
915-
vectorized = order[vloopnum]
916912
Ureduct = calc_Ureduct!(ls, us)
917913
setup_preamble!(ls, us, Ureduct)
918914
initgesps = add_loop_start_stop_manager!(ls)

src/modeling/graphs.jl

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,12 +1138,12 @@ function add_operation!(
11381138
array, rawindices = ref_from_expr!(ls, RHS)
11391139
RHS_ref = array_reference_meta!(ls, array, rawindices, elementbytes, gensym!(ls, LHS_sym))
11401140
op = add_load!(ls, RHS_ref, elementbytes)
1141-
iop = add_compute!(ls, LHS_sym, :identity, [op], elementbytes)
1141+
add_compute!(ls, LHS_sym, :identity, [op], elementbytes)
11421142
# pushfirst!(LHS_ref.parents, iop)
11431143
elseif RHS.head === :call
11441144
f = first(RHS.args)
11451145
if f === :getindex
1146-
add_load!(ls, LHS_sym, LHS_ref, elementbytes)
1146+
add_load_getindex!(ls, LHS_sym, RHS, elementbytes)
11471147
elseif f isa Symbol && Base.sym_in(f, (:zero, :one, :typemin, :typemax))
11481148
c = gensym!(ls, f)
11491149
op = add_constant!(ls, c, ls.loopsymbols[1:position], LHS_sym, elementbytes, :numericconstant)
@@ -1160,7 +1160,7 @@ function add_operation!(
11601160
elseif RHS.head === :if
11611161
add_if!(ls, LHS_sym, RHS, elementbytes, position, LHS_ref)
11621162
elseif RHS.head === :block
1163-
add_operation!(ls, LHS, strip_op_linenumber_nodes(RHS), elementbytes, position)
1163+
add_operation!(ls, LHS_sym, strip_op_linenumber_nodes(RHS), elementbytes, position)
11641164
elseif RHS.head === :(.)
11651165
c = gensym!(ls, "getproperty")
11661166
pushpreamble!(ls, Expr(:(=), c, RHS))
@@ -1169,7 +1169,7 @@ function add_operation!(
11691169
# pushpreamble!(ls, op, c)
11701170
# op
11711171
elseif Meta.isexpr(RHS, :comparison, 5)
1172-
add_comparison!(ls, LHS, RHS, elementbytes, position)
1172+
add_comparison!(ls, LHS_sym, RHS, elementbytes, position)
11731173
else
11741174
throw(LoopError("Expression not recognized.", RHS))
11751175
end
@@ -1179,7 +1179,6 @@ function prepare_rhs_for_storage!(ls::LoopSet, RHS::Union{Symbol,Expr}, array, r
11791179
RHS isa Symbol && return add_store!(ls, RHS, array, rawindices, elementbytes)
11801180
mpref = array_reference_meta!(ls, array, rawindices, elementbytes)
11811181
cachedparents = copy(mpref.parents)
1182-
ref = mpref.mref.ref
11831182
lrhs = gensym!(ls, "RHS")
11841183
mpref.varname = lrhs
11851184
add_operation!(ls, lrhs, RHS, mpref, elementbytes, position)
@@ -1361,7 +1360,6 @@ function check_valid_reorder_dims!(ls::LoopSet)
13611360
fill!(validreorder, 0x03)
13621361
omop = offsetloadcollection(ls)
13631362
@unpack opids = omop
1364-
num_collections = length(opids)
13651363
ops = operations(ls)
13661364
for i eachindex(opids)
13671365
opidsᵢ = opids[i]
@@ -1450,7 +1448,7 @@ function fill_offset_memop_collection!(ls::LoopSet)
14501448
collectionsize += 1
14511449
end
14521450
end
1453-
for (collectionid,opidc) enumerate(opids)
1451+
for opidc opids
14541452
length(opidc) > 1 || continue
14551453
# we check if we can turn the offsets into an unroll
14561454
# we have up to `length(opidc)` loads to do, so we allocate that many "base" vectors

src/modeling/operations.jl

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -471,11 +471,22 @@ function ifelse_reduction(f::F, rsym::Symbol, op::Operation) where {F}
471471
end
472472

473473
# for f ∈ [:reduction_scalar_combine, :reduction_to_scalar, :reduce_number_of_vectors, (:reduce_to_onevecunroll,:IfElseOp),(:reduction_to_single_vector,:IfElseCollapser)]
474-
for f [:reduction_scalar_combine, :reduction_to_scalar, :reduce_number_of_vectors,:reduce_to_onevecunroll,:reduction_to_single_vector]
475-
@eval begin
476-
$f(x) = $f(reduction_instruction_class(x))
477-
$f(op::Operation)::GlobalRef = lv($f(instruction(op)))
478-
end
479-
end
474+
# for f ∈ [:reduction_scalar_combine, :reduction_to_scalar, :reduce_number_of_vectors,:reduce_to_onevecunroll,:reduction_to_single_vector]
475+
# @eval begin
476+
# $f(x) = $f(reduction_instruction_class(x))
477+
# $f(op::Operation)::GlobalRef = lv($f(instruction(op)))
478+
# end
479+
# end
480+
# No `@eval` to make the language server happy
481+
reduction_scalar_combine(x) = reduction_scalar_combine(reduction_instruction_class(x))
482+
reduction_scalar_combine(op::Operation)::GlobalRef = lv(reduction_scalar_combine(instruction(op)))
483+
reduction_to_scalar(x) = reduction_to_scalar(reduction_instruction_class(x))
484+
reduction_to_scalar(op::Operation)::GlobalRef = lv(reduction_to_scalar(instruction(op)))
485+
reduce_number_of_vectors(x) = reduce_number_of_vectors(reduction_instruction_class(x))
486+
reduce_number_of_vectors(op::Operation)::GlobalRef = lv(reduce_number_of_vectors(instruction(op)))
487+
reduce_to_onevecunroll(x) = reduce_to_onevecunroll(reduction_instruction_class(x))
488+
reduce_to_onevecunroll(op::Operation)::GlobalRef = lv(reduce_to_onevecunroll(instruction(op)))
489+
reduction_to_single_vector(x) = reduction_to_single_vector(reduction_instruction_class(x))
490+
reduction_to_single_vector(op::Operation)::GlobalRef = lv(reduction_to_single_vector(instruction(op)))
480491

481492

src/parse/add_loads.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ function add_load!(
4848
mpref = array_reference_meta!(ls, array, rawindices, elementbytes, var)
4949
add_load!(ls, mpref, elementbytes)
5050
end
51-
function load_is_constant(ls::LoopSet, mpref::ArrayReferenceMetaPosition)
51+
function load_is_constant(mpref::ArrayReferenceMetaPosition)
5252
li = mpref.mref.loopedindex
5353
inds = getindicesonly(mpref)
5454
for i eachindex(li)
@@ -62,7 +62,7 @@ end
6262
function add_load!(
6363
ls::LoopSet, mpref::ArrayReferenceMetaPosition, elementbytes::Int
6464
)
65-
if length(mpref.loopdependencies) == 0 || load_is_constant(ls, mpref)
65+
if length(mpref.loopdependencies) == 0 || load_is_constant(mpref)
6666
return add_constant!(ls, mpref, elementbytes)
6767
end
6868
op = Operation( ls, varname(mpref), elementbytes, :getindex, memload, mpref )

src/simdfunctionals/map.jl

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ function _vmap_singlethread!(
6666
i = convert(Int, start)
6767
V = VectorizationBase.pick_vector_width(promote_type(T, reduce(promote_type, map(eltype, ptrargs))))
6868
W = unwrap(V)
69-
st = VectorizationBase.static_sizeof(T)
7069
UNROLL = 4
7170
LOG2UNROLL = 2
7271
while i < vsub_nsw(N, ((W << LOG2UNROLL) - 1))
@@ -129,6 +128,7 @@ function (m::VmapClosure{NonTemporal,F,D,N,A})(p::Ptr{UInt}) where {NonTemporal,
129128
(offset, stop ) = ThreadingUtilities.load(p, Int, offset)
130129

131130
_vmap_singlethread!(m.f, dest, start, stop, Val{NonTemporal}(), args)
131+
ThreadingUtilities._atomic_store!(p, ThreadingUtilities.SPIN)
132132
NonTemporal && Threads.atomic_fence()
133133
nothing
134134
end
@@ -165,25 +165,6 @@ end
165165
vmc = VmapClosure{NonTemporal}(f, ptry, ptrargs)
166166
@cfunction($vmc, Cvoid, (Ptr{UInt},))
167167
end
168-
# @inline function _cfunc_closure(f, ptry, ptrargs, ::Val{NonTemporal}) where {NonTemporal}
169-
# vmc = VmapClosure{NonTemporal}(f, ptry, ptrargs)
170-
# @cfunction($vmc, Cvoid, (Ptr{UInt},))
171-
# end
172-
# @generated function vmap_closure(f::F, ptry::D, ptrargs::A, ::Val{NonTemporal}) where {F,D<:StridedPointer,N,A<:Tuple{Vararg{StridedPointer,N}},NonTemporal}
173-
# # fsym = get(FUNCTIONSYMBOLS, F, Symbol("##NOTFOUND##"))
174-
# # fsym === Symbol("##NOTFOUND##")
175-
# if false# iszero(sizeof(F))
176-
# quote
177-
# $(Expr(:meta,:inline))
178-
# VmapKnownClosure{$NonTemporal,$F,$D,$N,$A}()
179-
# end
180-
# else
181-
# quote
182-
# $(Expr(:meta,:inline))
183-
# _cfunc_closure(f, ptry, ptrargs, Val{$NonTemporal}())
184-
# end
185-
# end
186-
# end
187168

188169
function vmap_multithread!(
189170
f::F,
@@ -203,7 +184,6 @@ function vmap_multithread!(
203184
end
204185

205186
cfunc = vmap_closure(f, ptry, ptrargs, Val{NonTemporal}())
206-
vmc = VmapClosure{NonTemporal}(f, ptry, ptrargs)
207187
Nveciter = (N + (W-1)) >> Wshift
208188
Nd, Nr = divrem(Nveciter, nt)
209189
NdW = Nd << Wshift

src/transforms.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ function hoist_constant_memory_accesses!(ls::LoopSet)
44
hoist_stores = false
55
for op operations(ls)
66
if isload(op)
7-
length(getindicesonly(op)) == 0 && host_constant_load!(ls, op)
7+
length(getindicesonly(op)) == 0 && hoist_constant_vload!(ls, op)
88
elseif isstore(op) && iszero(length(getindicesonly(op)))
99
hoist_stores = true
1010
end
@@ -29,7 +29,7 @@ function hoist_constant_vload!(ls::LoopSet, op::Operation)
2929
end
3030

3131
function return_empty_reductinit(op::Operation, var::Symbol)
32-
for (i,opp) enumerate(parents(op))
32+
for opp parents(op)
3333
if (name(opp) === var) && (length(reduceddependencies(opp)) == 0) && (length(loopdependencies(opp)) == 0) && (length(children(opp)) == 1)
3434
return opp
3535
end

src/vectorizationbase_compat/contract_pass.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ function capture_a_muladd(ex::Expr, mod)
188188
if mod === nothing
189189
call.args[1] = f
190190
else
191-
call.args[1] = Expr(:(.), mod, QuoteNote(f))#_fast))
191+
call.args[1] = Expr(:(.), mod, QuoteNode(f))
192192
end
193193
true, call
194194
end

0 commit comments

Comments
 (0)