Skip to content

Commit 609237f

Browse files
committed
track groupedstridedpointer returning tuple to GC.@preserve, and use comparison operators for AbstractStridedPointers now that they're available instead of pointerforcomparison.
1 parent de9de65 commit 609237f

File tree

7 files changed

+32
-16
lines changed

7 files changed

+32
-16
lines changed

src/LoopVectorization.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ using VectorizationBase: register_size, register_count, cache_linesize, cache_si
66
mask, pick_vector_width, MM, AbstractMask, data, grouped_strided_pointer,
77
maybestaticlength, maybestaticsize, staticm1, staticp1, staticmul, vzero,
88
maybestaticrange, offsetprecalc, lazymul,
9-
maybestaticfirst, maybestaticlast, scalar_less, scalar_greaterequal, gep, gesp, llvmptr, NativeTypes,
9+
maybestaticfirst, maybestaticlast, scalar_less, scalar_greaterequal, gep, gesp, NativeTypes, #llvmptr,
1010
vfmadd, vfmsub, vfnmadd, vfnmsub, vfmadd_fast, vfmsub_fast, vfnmadd_fast, vfnmsub_fast, vfmadd231, vfmsub231, vfnmadd231, vfnmsub231,
1111
vfma_fast, vmuladd_fast, vdiv_fast, vadd_fast, vsub_fast, vmul_fast,
1212
relu, stridedpointer, StridedPointer, StridedBitPointer, AbstractStridedPointer, _vload, _vstore!,

src/broadcast.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ end
207207
Nnew += 1
208208
end
209209
typ = Expr(:curly, :StridedPointer, T, Nnew, Cnew, Bnew, Rtup)
210-
ptr = Expr(:call, typ, Expr(:call, lv(:llvmptr), :p), strd, offsets)
210+
# ptr = Expr(:call, typ, Expr(:call, lv(:llvmptr), :p), strd, offsets)
211+
ptr = Expr(:call, typ, Expr(:call, lv(:pointer), :p), strd, offsets)
211212
Expr(:block, Expr(:meta,:inline), :(strd = p.strd), :(offs = p.offsets), ptr)
212213
end
213214
# @generated function VectorizationBase.stridedpointer(A::LowDimArray{D,T,N}) where {D,T,N}

src/codegen/loopstartstopmanager.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ end
6565
newB = C > 0 ? (C == minrank ? B : 0) : B #TODO: confirm correctness
6666
quote
6767
$(Expr(:meta,:inline))
68-
VectorizationBase.StridedPointer{$T,1,$newC,$newB,$(R[minrank],)}($(lv(llvmptr))(sptr), (sptr.strd[$minrank],), (Zero(),))
68+
# VectorizationBase.StridedPointer{$T,1,$newC,$newB,$(R[minrank],)}($(lv(llvmptr))(sptr), (sptr.strd[$minrank],), (Zero(),))
69+
VectorizationBase.StridedPointer{$T,1,$newC,$newB,$(R[minrank],)}(pointer(sptr), (sptr.strd[$minrank],), (Zero(),))
6970
end
7071
end
7172
set_first_stride(x) = x # cross fingers that this works

src/codegen/lowering.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ function init_remblock(unrolledloop::Loop, lssm::LoopStartStopManager, n::Int)#u
655655
else
656656
termar = lssm.incrementedptrs[n][termind]
657657
ptr = vptr(termar)
658-
condition = Expr(:call, :<, vptr(ptr), maxsym(ptr, 0))
658+
condition = Expr(:call, :<, ptr, maxsym(ptr, 0))
659659
end
660660
Expr(:if, condition)
661661
end

src/condense_loopset.jl

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ function add_grouped_strided_pointer!(extra_args::Expr, ls::LoopSet)
230230
gsp = Expr(:call, lv(:grouped_strided_pointer))
231231
tgarrays = Expr(:tuple)
232232
i = 0
233+
preserve_assignment = Expr(:tuple); preserve = Symbol[];
233234
for ref ls.refs_aliasing_syms
234235
i += 1
235236
found = false
@@ -242,6 +243,9 @@ function add_grouped_strided_pointer!(extra_args::Expr, ls::LoopSet)
242243
break
243244
end
244245
found || push!(tgarrays.args, vptr(ref))
246+
pres = gensym!(ls, "#preserve#")
247+
push!(preserve_assignment.args, pres)
248+
push!(preserve, pres)
245249
end
246250
push!(gsp.args, tgarrays)
247251
matcheddims = Expr(:tuple)
@@ -255,8 +259,10 @@ function add_grouped_strided_pointer!(extra_args::Expr, ls::LoopSet)
255259
length(t.args) > 1 && push!(matcheddims.args, t)
256260
end
257261
push!(gsp.args, val(matcheddims))
258-
push!(extra_args.args, gsp)
259-
nothing
262+
gsps = gensym!(ls, "#grouped#strided#pointer#")
263+
push!(extra_args.args, gsps)
264+
pushpreamble!(ls, Expr(:(=), Expr(:tuple, gsps, preserve_assignment), gsp))
265+
preserve
260266
end
261267

262268
# first_cache() = ifelse(gt(num_cache_levels(), StaticInt{2}()), StaticInt{2}(), StaticInt{1}())
@@ -309,7 +315,7 @@ function generate_call(ls::LoopSet, (inline,u₁,u₂)::Tuple{Bool,Int8,Int8}, t
309315
vargs_as_tuple = true#!debug
310316
vargs_as_tuple || push!(q.args, lbarg)
311317
extra_args = vargs_as_tuple ? Expr(:tuple) : q
312-
add_grouped_strided_pointer!(extra_args, ls)
318+
preserve = add_grouped_strided_pointer!(extra_args, ls)
313319
for is ls.preamble_symsym
314320
push!(extra_args.args, last(is))
315321
end
@@ -320,7 +326,7 @@ function generate_call(ls::LoopSet, (inline,u₁,u₂)::Tuple{Bool,Int8,Int8}, t
320326
vargs_as_tuple && push!(q.args, Expr(:tuple, lbarg, extra_args))
321327
vecwidthdefq = Expr(:block)
322328
define_eltype_vec_width!(vecwidthdefq, ls, nothing)
323-
Expr(:block, vecwidthdefq, q)
329+
Expr(:block, vecwidthdefq, q), preserve
324330
end
325331

326332

@@ -383,16 +389,22 @@ make_crashy(q) = Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__,
383389
@inline vecmemaybe(x::VectorizationBase._Vec) = Vec(x)
384390
@inline vecmemaybe(x::Tuple) = VectorizationBase.VecUnroll(x)
385391

392+
function gc_preserve(call::Expr, preserve::Vector{Symbol})
393+
q = Expr(:gc_preserve, call)
394+
append!(q.args, preserve)
395+
q
396+
end
397+
386398
function setup_call_inline(ls::LoopSet, inline::Bool, u₁::Int8, u₂::Int8, thread::Int)
387-
call = generate_call(ls, (inline,u₁,u₂), thread % UInt, false)
399+
call, preserve = generate_call(ls, (inline,u₁,u₂), thread % UInt, false)
388400
if iszero(length(ls.outer_reductions))
389-
q = Expr(:block,gc_preserve(ls, call))
390-
append!(ls.preamble.args, q.args)
401+
pushpreamble!(ls, gc_preserve(call, preserve))
402+
push!(ls.preamble.args, nothing)
391403
return ls.preamble
392404
end
393405
retv = loopset_return_value(ls, Val(false))
394406
outer_reducts = Expr(:local)
395-
q = Expr(:block,gc_preserve(ls, Expr(:(=), retv, call)))
407+
q = Expr(:block,gc_preserve(Expr(:(=), retv, call), preserve))
396408
for or ls.outer_reductions
397409
op = ls.operations[or]
398410
var = name(op)
@@ -409,7 +421,7 @@ function setup_call_inline(ls::LoopSet, inline::Bool, u₁::Int8, u₂::Int8, th
409421
end
410422
function setup_call_debug(ls::LoopSet)
411423
# avx_loopset(instr, ops, arf, AM, LB, vargs)
412-
pushpreamble!(ls, generate_call(ls, (false,zero(Int8),zero(Int8)), zero(UInt), true))
424+
pushpreamble!(ls, first(generate_call(ls, (false,zero(Int8),zero(Int8)), zero(UInt), true)))
413425
Expr(:block, ls.prepreamble, ls.preamble)
414426
end
415427
function setup_call(

src/reconstruct_loopset.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ function rank_to_sortperm(R::NTuple{N,Int}) where {N}
171171
end
172172
sp
173173
end
174+
# sptrs::Expr, ls::LoopSet, ar::ArrayReferenceMeta, @nospecialize(_::Type{Core.LLVMPtr{T,0}}),
174175
function add_mref!(
175176
sptrs::Expr, ls::LoopSet, ar::ArrayReferenceMeta, @nospecialize(_::Type{Ptr{T}}),
176177
C::Int, B::Int, R::NTuple{N,Int}, name::Symbol
@@ -425,6 +426,7 @@ end
425426
# elbytes(::VectorizationBase.AbstractPointer{T}) where {T} = sizeof(T)::Int
426427
# typeeltype(::Type{P}) where {T,P<:VectorizationBase.AbstractStridedPointer{T}} = T
427428
typeeltype(::Type{Ptr{T}}) where {T} = T
429+
# typeeltype(::Type{Core.LLVMPtr{T,0}}) where {T} = T
428430
typeeltype(::Type{VectorizationBase.FastRange{T,F,S}}) where {T,F,S} = T
429431
typeeltype(::Type{T}) where {T<:Real} = T
430432
# typeeltype(::Any) = Int8
@@ -568,7 +570,7 @@ Execute an `@avx` block. The block's code is represented via the arguments:
568570
@generated function _avx_!(
569571
::Val{UNROLL}, ::Val{OPS}, ::Val{ARF}, ::Val{AM}, ::Val{LPSYM}, var"#lv#tuple#args#"::Tuple{LB,V}
570572
) where {UNROLL, OPS, ARF, AM, LPSYM, LB, V}
571-
# 1 + 1 # Irrelevant line you can comment out/in to force recompilation...
573+
1 + 1 # Irrelevant line you can comment out/in to force recompilation...
572574
ls = _avx_loopset(OPS, ARF, AM, LPSYM, LB.parameters, V.parameters, UNROLL)
573575
# return @show avx_body(ls, UNROLL)
574576
if last(UNROLL) > 1

src/simdfunctionals/filter.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ function vfilter!(f::F, x::Vector{T}, y::AbstractArray{T}) where {F,T <: NativeT
88
st = VectorizationBase.static_sizeof(T)
99
zero_index = MM(W, Static(0), st)
1010
GC.@preserve x y begin
11-
ptr_x = llvmptr(x)
12-
ptr_y = llvmptr(y)
11+
# ptr_x = llvmptr(x); ptr_y = llvmptr(y)
12+
ptr_x = pointer(x); ptr_y = pointer(y)
1313
for _ 1:Nrep
1414
vy = VectorizationBase.__vload(ptr_y, zero_index, False(), register_size())
1515
mask = f(vy)

0 commit comments

Comments
 (0)