Skip to content

Commit f0f1309

Browse files
authored
Merge pull request #95 from timholy/teh/u1u2
A big renaming
2 parents ca1cf8a + 327fef2 commit f0f1309

File tree

8 files changed

+278
-277
lines changed

8 files changed

+278
-277
lines changed

src/condense_loopset.jl

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -208,13 +208,13 @@ end
208208
# If you change the number of arguments here, make commensurate changes
209209
# to the `insert!` locations in `setup_call_noinline`.
210210
@generated function __avx__!(
211-
::Val{UT}, ::Type{OPS}, ::Type{ARF}, ::Type{AM}, ::Type{LPSYM}, lb::LB,
211+
::Val{UNROLL}, ::Type{OPS}, ::Type{ARF}, ::Type{AM}, ::Type{LPSYM}, lb::LB,
212212
::Val{AR}, ::Val{D}, ::Val{IND}, subsetvals, arraydescript, vargs::Vararg{<:Any,N}
213-
) where {UT, OPS, ARF, AM, LPSYM, LB, N, AR, D, IND}
213+
) where {UNROLL, OPS, ARF, AM, LPSYM, LB, N, AR, D, IND}
214214
1 + 1
215215
num_vptrs = length(ARF.parameters)::Int
216216
vptrs = [gensym(:vptr) for _ 1:num_vptrs]
217-
call = Expr(:call, lv(:_avx_!), Val{UT}(), OPS, ARF, AM, LPSYM, :lb)
217+
call = Expr(:call, lv(:_avx_!), Val{UNROLL}(), OPS, ARF, AM, LPSYM, :lb)
218218
for n 1:num_vptrs
219219
push!(call.args, vptrs[n])
220220
end
@@ -245,7 +245,7 @@ end
245245
end
246246

247247
# Try to condense in type stable manner
248-
function generate_call(ls::LoopSet, IUT, debug::Bool = false)
248+
function generate_call(ls::LoopSet, inline_unroll, debug::Bool = false)
249249
operation_descriptions = Expr(:curly, :Tuple)
250250
varnames = Symbol[]
251251
for op operations(ls)
@@ -260,20 +260,20 @@ function generate_call(ls::LoopSet, IUT, debug::Bool = false)
260260
argmeta = argmeta_and_consts_description(ls, arraysymbolinds)
261261
loop_bounds = loop_boundaries(ls)
262262
loop_syms = Expr(:curly, :Tuple, map(QuoteNode, ls.loopsymbols)...)
263-
inline, U, T = IUT
263+
inline, u₁, u₂ = inline_unroll
264264
if inline | debug
265265
func = debug ? lv(:_avx_loopset_debug) : lv(:_avx_!)
266266
lbarg = debug ? Expr(:call, :typeof, loop_bounds) : loop_bounds
267267
q = Expr(
268-
:call, func, Expr(:call, Expr(:curly, :Val, (U,T))),
268+
:call, func, Expr(:call, Expr(:curly, :Val, (u₁, u₂))),
269269
operation_descriptions, arrayref_descriptions, argmeta, loop_syms, lbarg
270270
)
271271
debug && deleteat!(q.args, 2)
272272
foreach(ref -> push!(q.args, vptr(ref)), ls.refs_aliasing_syms)
273273
else# not forcing inline; calling __avx__! which calls an inlined _avx_!
274274
arraydescript = Expr(:tuple)
275275
q = Expr(
276-
:call, lv(:__avx__!), Expr(:call, Expr(:curly, :Val, (U,T))),
276+
:call, lv(:__avx__!), Expr(:call, Expr(:curly, :Val, (u₁, u₂))),
277277
operation_descriptions, arrayref_descriptions, argmeta, loop_syms, loop_bounds, arraydescript
278278
)
279279
for array ls.includedactualarrays
@@ -405,15 +405,15 @@ function setup_call_debug(ls::LoopSet)
405405
pushpreamble!(ls, generate_call(ls, (true,zero(Int8),zero(Int8)), true))
406406
ls.preamble
407407
end
408-
function setup_call(ls::LoopSet, inline::Bool = true, U = zero(Int8), T = zero(Int8))
408+
function setup_call(ls::LoopSet, inline::Bool = true, u₁ = zero(Int8), u₂ = zero(Int8))
409409
# We outline/inline at the macro level by creating/not creating an anonymous function.
410410
# The old API instead was based on inlining or not inline the generated function, but
411411
# the generated function must be inlined into the initial loop preamble for performance reasons.
412412
# Creating an anonymous function and calling it also achieves the outlining, while still
413413
# inlining the generated function into the loop preamble.
414414
if inline
415-
setup_call_inline(ls, U, T)
415+
setup_call_inline(ls, u₁, u₂)
416416
else
417-
setup_call_noinline(ls, U, T)
417+
setup_call_noinline(ls, u₁, u₂)
418418
end
419419
end

src/constructors.jl

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,12 @@ One can find some circumstances where `inline=true` is faster, and other circums
111111
where `inline=false` is faster, so the best setting may require experimentation.
112112
113113
`unroll` is an integer that specifies the loop unrolling factor, or a
114-
tuple `(4, 2)` signaling that the generated code should unroll more than
115-
one loop.
114+
tuple `(u₁, u₂) = (4, 2)` signaling that the generated code should unroll more than
115+
one loop. `u₁` is the unrolling factor for the first unrolled loop and `u₂` for the next (if present),
116+
but it applies to the loop ordering and unrolling that will be chosen by LoopVectorization,
117+
*not* the order in `body`.
118+
`uᵢ=0` (the default) indicates that LoopVectorization should pick its own value,
119+
and `uᵢ=-1` disables unrolling for the correspond loop.
116120
"""
117121
macro avx(q)
118122
q = macroexpand(__module__, q)
@@ -133,50 +137,50 @@ function check_unroll(arg)
133137
a1 = (arg.args[1])::Symbol
134138
a1 === :unroll || return nothing
135139
tup = arg.args[2]
136-
T = Int8(-1)
140+
u₂ = Int8(-1)
137141
if tup isa Integer
138-
U = convert(Int8, tup)
142+
u₁ = convert(Int8, tup)
139143
elseif isa(tup, Expr)
140144
if length(tup.args) == 1
141-
U = convert(Int8, tup.args[1])
145+
u₁ = convert(Int8, tup.args[1])
142146
elseif length(tup.args) == 2
143-
U = convert(Int8, tup.args[1])
144-
T = convert(Int8, tup.args[2])
147+
u₁ = convert(Int8, tup.args[1])
148+
u₂ = convert(Int8, tup.args[2])
145149
else
146150
return nothing
147151
end
148152
else
149153
return nothing
150154
end
151-
U, T
155+
u₁, u₂
152156
end
153-
function check_macro_kwarg(arg, inline::Bool = true, U::Int8 = zero(Int8), T::Int8 = zero(Int8))
157+
function check_macro_kwarg(arg, inline::Bool = true, u₁::Int8 = zero(Int8), u₂::Int8 = zero(Int8))
154158
@assert arg.head === :(=)
155159
i = check_inline(arg)
156160
if i !== nothing
157161
inline = i
158162
else
159163
u = check_unroll(arg)
160164
if u !== nothing
161-
U, T = u
165+
u₁, u₂ = u
162166
end
163167
end
164-
inline, U, T
168+
inline, u₁, u₂
165169
end
166170
macro avx(arg, q)
167171
@assert q.head === :for
168172
@assert arg.head === :(=)
169173
q = macroexpand(__module__, q)
170-
inline, U, T = check_macro_kwarg(arg)
174+
inline, u₁, u₂ = check_macro_kwarg(arg)
171175
ls = LoopSet(q, __module__)
172-
esc(setup_call(ls, inline, U, T))
176+
esc(setup_call(ls, inline, u₁, u₂))
173177
end
174178
macro avx(arg1, arg2, q)
175179
@assert q.head === :for
176180
q = macroexpand(__module__, q)
177-
inline, U, T = check_macro_kwarg(arg1)
178-
inline, U, T = check_macro_kwarg(arg2, inline, U, T)
179-
esc(setup_call(LoopSet(q, __module__), inline, U, T))
181+
inline, u₁, u₂ = check_macro_kwarg(arg1)
182+
inline, u₁, u₂ = check_macro_kwarg(arg2, inline, u₁, u₂)
183+
esc(setup_call(LoopSet(q, __module__), inline, u₁, u₂))
180184
end
181185

182186

@@ -194,8 +198,8 @@ end
194198
macro _avx(arg, q)
195199
@assert q.head === :for
196200
q = macroexpand(__module__, q)
197-
inline, U, T = check_macro_kwarg(arg)
198-
esc(lower(LoopSet(q, __module__), U, T))
201+
inline, u₁, u₂ = check_macro_kwarg(arg)
202+
esc(lower(LoopSet(q, __module__), u₁, u₂))
199203
end
200204

201205
macro avx_debug(q)

0 commit comments

Comments
 (0)