Skip to content

Commit a232255

Browse files
committed
Fix cost 0 modeling resulting in bad splitting.
1 parent 7852f60 commit a232255

File tree

8 files changed

+8
-59
lines changed

8 files changed

+8
-59
lines changed

src/codegen/loopstartstopmanager.jl

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ function indices_calculated_by_pointer_offsets(ls::LoopSet, ar::ArrayReferenceMe
8585
gespinds = Expr(:tuple)
8686
out = Vector{Bool}(undef, length(indices))
8787
li = ar.loopedindex
88-
# @show ls.vector_width
8988
for i eachindex(li)
9089
ii = i + offset
9190
ind = indices[ii]
@@ -249,7 +248,6 @@ function cse_constant_offsets!(
249248
ls::LoopSet, allarrayrefs::Vector{ArrayReferenceMeta}, allarrayrefsind::Int, name_to_array_map::Vector{Vector{Int}}, arrayref_to_name_op_collection::Vector{Vector{Tuple{Int,Int,Int}}}
250249
)
251250
ar = allarrayrefs[allarrayrefsind]
252-
# @show ar
253251
# vptrar = vptr(ar)
254252
arrayref_to_name_op = arrayref_to_name_op_collection[allarrayrefsind]
255253
array_refs_with_same_name = name_to_array_map[first(first(arrayref_to_name_op))]
@@ -542,7 +540,6 @@ function use_loop_induct_var!(
542540
offsetprecalc_descript = Expr(:tuple)
543541
use_offsetprecalc = false
544542
vptrar = vptr(ar)
545-
# @show ar
546543
Wisz = false#ls.vector_width == 0
547544
for (i,isli) enumerate(li)
548545
ii = i + offset
@@ -605,7 +602,6 @@ function add_loop_start_stop_manager!(ls::LoopSet)
605602
use_livs[i] = use_loop_induct_var!(ls, q, arrayrefs[i], arrayrefs, i, includeinlet[i])
606603
#name_to_array_map[first(first(unique_to_name_and_op_map[i]))], unique_to_name_and_op_map)
607604
end
608-
# @show use_livs,
609605
# loops, sorted from outer-most to inner-most
610606
looporder = reversenames(ls)
611607
# For each loop, we need to choose an induction variable
@@ -633,7 +629,6 @@ function add_loop_start_stop_manager!(ls::LoopSet)
633629
terminators[nloops+1-i] = if (loopsym loopinductvars) || (any(r -> any(isequal(-i), r), use_livs)) || iszero(length(loopstartᵢ))
634630
0
635631
else
636-
# @show i, loopsym loopdependencies.(operations(ls)) operations(ls)
637632
# @assert !iszero(length(loopstartᵢ))
638633
last(ric[argmin(first.(ric))]) # index corresponds to array ref's position in loopstart
639634
end
@@ -703,9 +698,7 @@ function pointermax_index(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int,
703698
loopsym = names(ls)[n]
704699
index = Expr(:tuple);
705700
ind = 0
706-
# @show ar loopsym names(ls) n
707701
for (j,i) enumerate(getindicesonly(ar))
708-
# @show j,i
709702
if i === loopsym
710703
ind = j
711704
if iszero(sub)
@@ -773,7 +766,6 @@ function append_pointer_maxes!(
773766
push!(loopstart.args, Expr(:(=), maxsym(vptr_ar, sub), pointermax(ls, ar, n, sub, isvectorized, stopindicator, incr)))
774767
end
775768
else
776-
# @show n, getloop(ls, n) ar
777769
index, ind = pointermax_index(ls, ar, n, submax, isvectorized, stopindicator, incr)
778770
pointercompbase = maxsym(vptr_ar, submax)
779771
push!(loopstart.args, Expr(:(=), pointercompbase, Expr(:call, lv(:gesp), vptr_ar, index)))
@@ -839,7 +831,6 @@ function startloop(ls::LoopSet, us::UnrollSpecification, n::Int, submax = maxunr
839831
push!(loopstart.args, startloop(getloop(ls, loopsym), loopsym))
840832
else
841833
isvectorized = n == vloopnum
842-
# @show ptrdefs
843834
append_pointer_maxes!(loopstart, ls, ptrdefs[termind], n, submax, isvectorized)
844835
end
845836
loopstart
@@ -891,7 +882,6 @@ function terminatecondition(ls::LoopSet, us::UnrollSpecification, n::Int, inclma
891882

892883
termar = lssm.incrementedptrs[n][termind]
893884
ptr = vptr(termar)
894-
# @show UF, isvectorized(us, n)
895885
if inclmask && isvectorized(us, n)
896886
Expr(:call, :<, ptr, maxsym(ptr, 0))
897887
else

src/codegen/lower_constant.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ function getparentsreductzero(ls::LoopSet, op::Operation)::Float64
9292
return reduction_instruction_class(instruction(opp))
9393
end
9494
end
95-
@show identifier(op)
9695
throw("Reduct zero not found for operation $(name(op)).")
9796
end
9897
vecbasefunc(f) = Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:VectorizationBase)), QuoteNode(f))

src/codegen/split_loops.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ function lower_and_split_loops(ls::LoopSet, inline::Int)
102102
ls_2 = split_loopset(ls, remaining_ops)
103103
order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, cost_2, shouldinline_2 = choose_order_cost(ls_2)
104104
# U_1 = T_1 = U_2 = T_2 = 2
105-
# @show cost_1 + cost_2 ≤ cost_fused, cost_1, cost_2, cost_fused
105+
#@show cost_1 + cost_2 ≤ cost_fused, cost_1, cost_2, cost_fused
106106
if cost_1 + cost_2 cost_fused
107107
ls_2_lowered = if length(remaining_ops) > 1
108108
inline = iszero(inline) ? (shouldinline_1 % Int) : inline

src/condense_loopset.jl

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ function shifted_loopset(ls::LoopSet, loopsyms::Vector{Symbol})
105105
end
106106
ld
107107
end
108-
# loopdeps_uint(ls::LoopSet, op::Operation) = (@show op; shifted_loopset(ls, loopdependencies(op)))
109108
loopdeps_uint(ls::LoopSet, op::Operation) = shifted_loopset(ls, loopdependencies(op))
110109
reduceddeps_uint(ls::LoopSet, op::Operation) = shifted_loopset(ls, reduceddependencies(op))
111110
childdeps_uint(ls::LoopSet, op::Operation) = shifted_loopset(ls, reducedchildren(op))
@@ -356,7 +355,6 @@ end
356355
# 2) decide whether to gesp that loopstart inside `add_grouped_strided_pointer`
357356
function add_grouped_strided_pointer!(extra_args::Expr, ls::LoopSet)
358357
allarrayrefs, name_to_array_map, unique_to_name_and_op_map = uniquearrayrefs_csesummary(ls)
359-
# @show allarrayrefs
360358
gsp = Expr(:call, lv(:grouped_strided_pointer))
361359
tgarrays = Expr(:tuple)
362360
# refs_to_gesp = ArrayReferenceMeta[]
@@ -371,7 +369,6 @@ function add_grouped_strided_pointer!(extra_args::Expr, ls::LoopSet)
371369
# ar = allarrayrefs[j]
372370
# gespinds = cse_constant_offsets!(ls, allarrayrefs, j, array_refs_with_same_name, arrayref_to_name_op_collection)
373371
# end
374-
# @show refs_aliasing_syms
375372
for (j,ref) enumerate(refs_aliasing_syms)
376373
vpref = vptr(ref)
377374
duplicate = false
@@ -381,7 +378,6 @@ function add_grouped_strided_pointer!(extra_args::Expr, ls::LoopSet)
381378
break
382379
end
383380
end
384-
# @show duplicate
385381
duplicate && continue
386382
duplicate_map[j] = (i += 1)
387383
found = false

src/modeling/determinestrategy.jl

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ function unitstride(ls::LoopSet, op::Operation, s::Symbol)
4444
li = op.ref.loopedindex
4545
# The first index is allowed to be indexed by `s`
4646
fi = first(inds)
47-
# @show (fi === DISCONTIGUOUS), (fi === CONSTANTZEROINDEX), (first(getstrides(op)) ≠ 1), unitstep(getloop(ls,s))
4847
if ((fi === DISCONTIGUOUS) | (fi === CONSTANTZEROINDEX)) || (first(getstrides(op)) 1) || !unitstep(getloop(ls,s))
4948
return false
5049
# elseif !first(li)
@@ -77,27 +76,25 @@ function cost(ls::LoopSet, op::Operation, (u₁,u₂)::Tuple{Symbol,Symbol}, vlo
7776
end
7877
elseif iscompute(op) &&
7978
Base.sym_in(instruction(op).instr, (:(+), :(-), :add_fast, :sub_fast)) &&
80-
all(opp -> (isloopvalue(opp) | isconstant(opp)), parents(op))
79+
all(opp -> (isloopvalue(opp)), parents(op))
80+
# all(opp -> (isloopvalue(opp) | isconstant(opp)), parents(op))
8181
return 0.0, 0, 0.0
8282
end
8383
opisvectorized = isvectorized(op)
8484
srt, sl, srp = opisvectorized ? vector_cost(instr, Wshift, size_T) : scalar_cost(instr)
8585
if accesses_memory(op)
8686
# either vbroadcast/reductionstore, vmov(a/u)pd, or gather/scatter
8787
if opisvectorized
88-
# @show unitstride(ls,op,vloopsym), srt,sl,srp
8988
if !unitstride(ls, op, vloopsym)# || !isdense(op) # need gather/scatter
9089
indices = getindices(op)
9190
contigind = first(indices)
92-
# @show rejectinterleave(op) op
9391
shifter = max(2,Wshift)
9492
if rejectinterleave(op)
9593
offset = 0.0 # gather/scatter, alignment doesn't matter
9694
else
9795
shifter -= 1
9896
offset = 0.5reg_size(ls) / cache_lnsze(ls)
9997
end
100-
# @show shifter,offset, Wshift
10198
if shifter > 1 &&
10299
(!rejectcurly(op) && (((contigind === CONSTANTZEROINDEX) && ((length(indices) > 1) && (indices[2] === u₁) || (indices[2] === u₂))) ||
103100
((u₁ === contigind) | (u₂ === contigind))))
@@ -118,7 +115,6 @@ function cost(ls::LoopSet, op::Operation, (u₁,u₂)::Tuple{Symbol,Symbol}, vlo
118115
# this feature is common to all of them.
119116
srt += 0.5reg_size(ls) / cache_lnsze(ls)
120117
end
121-
# @show srt,sl,srp
122118
elseif isstore(op) # broadcast or reductionstore; if store we want to penalize reduction
123119
srt *= 3
124120
sl *= 3
@@ -184,7 +180,7 @@ function evaluate_cost_unroll(
184180
included_vars[id] && continue
185181
# it must also be a subset of defined symbols
186182
loopdependencies(op) nested_loop_syms || continue
187-
# hasintersection(reduceddependencies(op), nested_loop_syms) && return Inf
183+
# hasintersection(reduceddependencies(op), nested_loop_syms) && return Inf
188184
rd = reduceddependencies(op)
189185
hasintersection(rd, @view(nested_loop_syms[1:end-length(rd)])) && return Inf
190186
if isstore(op) #TODO: DRY (this is repeated in evaluate_cost_tile)
@@ -194,7 +190,6 @@ function evaluate_cost_unroll(
194190
end
195191
end
196192
included_vars[id] = true
197-
# @show op, cost(ls, op, vloopsym, Wshift, size_T)
198193
# TODO: use actual unrolls here?
199194
c = first(cost(ls, op, (Symbol(""),Symbol("")), vloopsym, Wshift, size_T))
200195
total_cost += iter * c
@@ -213,16 +208,13 @@ function depchain_cost!(
213208
for opp parents(op)
214209
skip[identifier(opp)] && continue
215210
rt, sl = depchain_cost!(ls, skip, opp, unrolled, vloopsym, Wshift, size_T, rt, sl)
216-
# @show rt,sl, opp
217211
end
218212
# Basically assuming memory and compute don't conflict, but everything else does
219213
# Ie, ignoring the fact that integer and floating point operations likely don't either
220214
if iscompute(op)
221215
rtᵢ, slᵢ = cost(ls, op, (unrolled,Symbol("")), vloopsym, Wshift, size_T)
222-
# @show rtᵢ, slᵢ, op
223216
rt += rtᵢ; sl += slᵢ
224217
end
225-
# @show rt, sl
226218
rt, sl
227219
end
228220
function parentsnotreduction(op::Operation)
@@ -280,7 +272,6 @@ function unroll_no_reductions(ls, order, vloopsym)
280272
max(1, min(4, round(Int, 2compute_rt / load_rt)))
281273
end
282274
# u = min(u, max(1, (reg_count(ls) ÷ max(1,round(Int,rp)))))
283-
# @show u
284275
# commented out here is to decide to align loops
285276
# if memory_rt > compute_rt && isone(u) && (length(order) > 1) && (last(order) === vloopsym) && length(getloop(ls, last(order))) > 8W
286277
# ls.align_loops[] = findfirst(operations(ls)) do op
@@ -333,7 +324,6 @@ function determine_unroll_factor(
333324
load_recip_throughput,
334325
store_recip_throughput
335326
)
336-
# @show recip_throughput, latency
337327
recip_throughput, latency
338328
end
339329
function count_reductions(ls::LoopSet)
@@ -393,9 +383,9 @@ function determine_unroll_factor(ls::LoopSet, order::Vector{Symbol}, vloopsym::S
393383
end
394384
end
395385
# min(8, roundpow2(max(1, round(Int, latency / (rt * num_reductions) ) ))), best_unrolled
396-
UF = min(8, VectorizationBase.nextpow2(max(1, round(Int, latency / (rt * num_reductions) ) )))
386+
UF = VectorizationBase.nextpow2(round(Int, clamp(latency / (rt * num_reductions), 1.0, 8.0)))
397387
if UF == 1 && num_reductions > 1
398-
UF = min(8, VectorizationBase.nextpow2(max(1, round(Int, latency / (rt * cld(num_reductions, 2)) ) )))
388+
UF = VectorizationBase.nextpow2(round(Int, clamp(latency / (rt * cld(num_reductions, 2)), 1.0, 8.0)))
399389
end
400390
if best_unrolled === vloopsym
401391
UF = demote_unroll_factor(ls, UF, vloopsym)
@@ -406,8 +396,6 @@ end
406396
function unroll_cost(X, u₁, u₂, u₁L, u₂L)
407397
u₂factor = (num_iterations(u₂L, u₂)/u₂L)
408398
u₁factor = (num_iterations(u₁L, u₁)/u₁L)
409-
# @show num_iterations(u₂L, u₂)/u₂L, u₂, u₂L
410-
# @show num_iterations(u₁L, u₁)/u₁L, u₁, u₁L
411399
# X[1]*u₂factor*u₁factor + X[4] + X[2] * u₂factor + X[3] * u₁factor
412400
X[1] + X[2] * u₂factor + X[3] * u₁factor + X[4] * u₁factor * u₂factor
413401
end
@@ -433,8 +421,6 @@ function solve_unroll_iter(X, R, u₁L, u₂L, u₁range, u₂range)
433421
for u₂temp u₂range
434422
RR u₁temp*u₂temp*R₁ + u₁temp*R₂ + u₂temp*R₃ || continue
435423
tempcost = unroll_cost(X, u₁temp, u₂temp, u₁L, u₂L)
436-
# @show u₁temp, u₂temp, tempcost
437-
# @show u₁temp*u₂temp*R₁ + u₁temp*R₂ + u₂temp*R₃
438424
if tempcost bestcost
439425
bestcost = tempcost
440426
u₁best, u₂best = u₁temp, u₂temp
@@ -455,10 +441,8 @@ function solve_unroll_lagrange(X, R, u₁L, u₂L, u₁step::Int, u₂step::Int,
455441
c = X₃*RR^2
456442
discriminant = b^2 - 4a*c
457443
discriminant < 0 && return -1,-1,Inf
458-
# @show R₁, R₂, R₃, R₄
459444
u₁float = max((sqrt(discriminant) + b) / (-2a), float(u₁step)) # must be at least 1
460445
u₂float = (RR - u₁float*R₂)/(u₁float*R₁)
461-
# @show u₁float, u₂float
462446
if !(isfinite(u₂float) & isfinite(u₁float)) # brute force
463447
u₁low = u₂low = 1
464448
u₁high = iszero(X₂) ? 2 : (atleast32registers ? 8 : 6)
@@ -611,7 +595,6 @@ function solve_unroll(
611595
else
612596
u₂Lf = Float64(u₂L)
613597
end
614-
# @show u₁Lf, u₂Lf, u₁L, length(u₁loop)
615598
u₁, u₂, cost = solve_unroll(cost_vec, reg_pressure, maxu₁, maxu₂, u₁Lf, u₂Lf, u₁step, u₂step, atleast32registers)
616599
# heuristic to more evenly divide small numbers of iterations
617600
if isstaticloop(u₂loop)
@@ -635,7 +618,6 @@ function loopdepindices(ls::LoopSet, op::Operation)
635618
isdiscontig = first(loopdeps) === DISCONTIGUOUS
636619
# isdiscontig = isdiscontiguous(op.ref)
637620
loopedindex = op.ref.loopedindex
638-
# @show loopdeps
639621
if !isdiscontig && all(loopedindex) && !(any(==(CONSTANTZEROINDEX), loopdeps))
640622
return loopdeps
641623
end
@@ -654,7 +636,6 @@ function loopdepindices(ls::LoopSet, op::Operation)
654636
end
655637
function stride_penalty(ls::LoopSet, op::Operation, order::Vector{Symbol}, loopfreqs)
656638
loopdeps = loopdepindices(ls, op)
657-
# @show op loopdeps
658639
opstrides = Vector{Int}(undef, length(loopdeps))
659640
# very minor stride assumption here, because we don't really want to base optimization decisions on it...
660641
opstrides[1] = 1.0 + (first(loopdependencies(op.ref)) === DISCONTIGUOUS) + (first(loopdependencies(op.ref)) === CONSTANTZEROINDEX)
@@ -815,10 +796,8 @@ function load_elimination_cost_factor!(
815796
cost_vec, reg_pressure, choose_to_inline, ls::LoopSet, op::Operation, iters, unrollsyms::UnrollSymbols, Wshift, size_T
816797
)
817798
@unpack u₁loopsym, u₂loopsym, vloopsym = unrollsyms
818-
# @show isoptranslation(ls, op, unrollsyms)
819799
if !iszero(first(isoptranslation(ls, op, unrollsyms)))
820800
rt, lat, rp = cost(ls, op, (u₁loopsym, u₂loopsym), vloopsym, Wshift, size_T)
821-
# @show rt
822801
rto = rt
823802
rt *= iters
824803
# rt *= factor1; rp *= factor2;
@@ -1086,7 +1065,6 @@ function evaluate_cost_tile!(
10861065
if isstore(op) & (!u₁reducesrt) & (!u₂reducesrt)
10871066
irreducible_storecosts += rt
10881067
end
1089-
# iiter = convert(Int, iters[id]); @show u₁reducesrt, u₂reducesrt, op, rt, rto, rp, iiter
10901068
update_cost_vec!(cost_vec, rt, u₁reducesrt, u₂reducesrt)
10911069
update_reg_pres!(reg_pressure, rp, u₁reducesrp, u₂reducesrp)
10921070
# update_costs!(reg_pressure, rp, u₁reducesrp, u₂reducesrp)
@@ -1104,7 +1082,6 @@ function evaluate_cost_tile!(
11041082
else
11051083
0
11061084
end
1107-
# @show (irreducible_storecosts / sum(cost_vec))
11081085
if (irreducible_storecosts / sum(cost_vec) 0.5) && !any(op -> loadintostore(ls, op), operations(ls))
11091086
u₁, u₂ = if visbit
11101087
vecsforbyte = 8 ÷ ls.vector_width

src/modeling/graphs.jl

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,6 @@ staticmulincr(ptr, incr) = Expr(:call, lv(:staticmul), Expr(:call, :eltype, ptr)
204204
@inline cmpend(i::Int, r::CloseOpen) = i < getfield(r,:upper)
205205
@inline cmpend(i::Int, r::AbstractUnitRange) = i last(r)
206206
@inline cmpend(i::Int, r::AbstractRange) = i last(r)
207-
# @inline cmpend(i::Int, r::AbstractRange) = @show i last(r) i ≤ last(r)
208207
# @inline cmpend(i::Int, r::AbstractRange) = i ≤ vsub_fast(last(r), step(r))
209208

210209
@inline vcmpend(i::Int, r::CloseOpen, ::StaticInt{W}) where {W} = i vsub_fast(getfield(r,:upper), W)
@@ -213,7 +212,6 @@ staticmulincr(ptr, incr) = Expr(:call, lv(:staticmul), Expr(:call, :eltype, ptr)
213212
# i += 4*3 # i = 12
214213
@inline vcmpend(i::Int, r::AbstractRange, ::StaticInt{W}) where {W} = i vsub_fast(last(r), vsub_fast(W*step(r), 1))
215214
# @inline vcmpend(i::Int, r::AbstractRange, ::StaticInt{W}) where {W} = i ≤ vsub_fast(last(r), W*step(r))
216-
# @inline vcmpend(i::Int, r::AbstractRange, ::StaticInt{W}) where {W} = @show i m = vsub_fast(last(r), W*step(r)) i ≤ m
217215
# @inline vcmpend(i::Int, r::AbstractRange, ::StaticInt{W}) where {W} = i ≤ vsub_fast(last(r), W)
218216

219217
function staticloopexpr(loop::Loop)
@@ -664,20 +662,12 @@ end
664662
names(ls::LoopSet) = ls.loop_order.loopnames
665663
reversenames(ls::LoopSet) = ls.loop_order.bestorder
666664
function getloopid_or_nothing(ls::LoopSet, s::Symbol)
667-
# @show ls.loopsymbols, s
668665
for (loopnum,sym) enumerate(ls.loopsymbols)
669666
s === sym && return loopnum
670667
end
671668
end
672669

673670
getloopid(ls::LoopSet, s::Symbol) = getloopid_or_nothing(ls, s)::Int
674-
# function getloopid(ls::LoopSet, s::Symbol)::Int
675-
# @show ls.loops
676-
# id = getloopid_or_nothing(ls, s)
677-
# @show id
678-
# id
679-
# end
680-
# getloop(ls::LoopSet, i::Integer) = getloop(ls, names(ls)[i])
681671
getloop(ls::LoopSet, i::Integer) = ls.loops[ls.loopordermap[i]] # takes nest level after reordering
682672
getloop_from_id(ls::LoopSet, i::Integer) = ls.loops[i] # takes w/ respect to original loop order.
683673
getloop(ls::LoopSet, s::Symbol) = getloop_from_id(ls, getloopid(ls, s))
@@ -1270,7 +1260,6 @@ function fill_offset_memop_collection!(ls::LoopSet)
12701260
else
12711261
isstore(opp) || continue
12721262
end
1273-
# @show op opp
12741263
oppref = opp.ref.ref
12751264
sameref(opref, oppref) || continue
12761265
if collectionsize == 0
@@ -1306,13 +1295,11 @@ function fill_offset_memop_collection!(ls::LoopSet)
13061295
for j 1:num_unroll_collections
13071296
collectionⱼ = unroll_collections[j]
13081297
# giet id (`first`) of first item in collection to get base offsets for comparison
1309-
# @show op, opid ops[opidc[first(first(collectionⱼ))], first(first(collectionⱼ))
13101298
if view(getoffsets(ops[opidc[first(first(collectionⱼ))]]), r) == v
13111299
found_match = true
13121300
push!(collectionⱼ, (i, o))
13131301
end
13141302
end
1315-
# @show opid, found_match
13161303
if !found_match
13171304
num_unroll_collections += 1 # the `i` points to position within `opidc`
13181305
unroll_collections[num_unroll_collections] = [(i,o)]

src/reconstruct_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ Execute an `@avx` block. The block's code is represented via the arguments:
672672
@aggressive_constprop @generated function _avx_!(
673673
::Val{var"#UNROLL#"}, ::Val{var"#OPS#"}, ::Val{var"#ARF#"}, ::Val{var"#AM#"}, ::Val{var"#LPSYM#"}, var"#lv#tuple#args#"::Tuple{var"#LB#",var"#V#"}
674674
) where {var"#UNROLL#", var"#OPS#", var"#ARF#", var"#AM#", var"#LPSYM#", var"#LB#", var"#V#"}
675-
# 1 + 1 # Irrelevant line you can comment out/in to force recompilation...
675+
1 + 1 # Irrelevant line you can comment out/in to force recompilation...
676676
ls = _avx_loopset(var"#OPS#", var"#ARF#", var"#AM#", var"#LPSYM#", var"#LB#".parameters, var"#V#".parameters, var"#UNROLL#")
677677
# return @show avx_body(ls, var"#UNROLL#")
678678
if last(var"#UNROLL#") > 1

test/copy.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ using LoopVectorization, OffsetArrays, Test
155155
@test x == q2
156156
fill!(q2, -999999); @avx q2 .= x;
157157
@test x == q2
158-
@test all(iszero, issue_256!(x))
158+
@test all(iszero, issue_256!(reshape(x,(length(x),1))))
159159

160160
B = rand(R, 79, 83);
161161
A1 = zeros(T, 79, 85);

0 commit comments

Comments
 (0)