Skip to content

Commit 382b190

Browse files
committed
Made masking func more general than perhaps it ought to be.
1 parent d2113b3 commit 382b190

File tree

1 file changed

+8
-9
lines changed

1 file changed

+8
-9
lines changed

src/LoopVectorization.jl

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ end
313313
push!(q.args, :($sym = Base.FastMath.div_fast($sym, $mod.SIMDPirates.vprod($gsym))))
314314
end
315315
end
316-
316+
push!(q.args, nothing)
317317
# display(q)
318318
# We are using pointers, so better add a GC.@preserve.
319319
gcpreserve = true
@@ -349,15 +349,15 @@ end
349349
elseif @capture(x, reductionA_ = M_.vmul(reductionA_, B_ ) ) || @capture(x, reductionA_ = M_.vmul(B_, reductionA_ ) ) || @capture(x, reductionA_ = vmul(reductionA_, B_ ) ) || @capture(x, reductionA_ = vmul(B_, reductionA_ ) )
350350
M === nothing && (M = default_module)
351351
return :( $reductionA = $M.vifelse($masksym, $M.vmul($reductionA, $B), $reductionA) )
352-
elseif @capture(x, reductionA_ = M_.vmuladd(B_, C_, reductionA_) ) || @capture(x, reductionA_ = vmuladd(B_, C_, reductionA_) )
353-
M === nothing && (M = default_module)
354-
return :( $reductionA = $M.vifelse($masksym, $M.vmuladd($B, $C, $reductionA), $reductionA) )
355-
elseif @capture(x, reductionA_ = M_.vfnmadd(B_, C_, reductionA_ ) ) || @capture(x, reductionA_ = vfnmadd(B_, C_, reductionA_ ) )
352+
elseif @capture(x, reductionA_ = M_.f_(B_, C_, reductionA_) ) || @capture(x, reductionA_ = f_(B_, C_, reductionA_) )
356353
M === nothing && (M = default_module)
357-
return :( $reductionA = $M.vifelse($masksym, $M.vfnmadd($B, $C, $reductionA), $reductionA) )
358-
elseif @capture(x, reductionA_ = M_.vsub(reductionA_, B_ ) ) || @capture(x, reductionA_ = vsub(reductionA_, B_ ) )
354+
return :( $reductionA = $M.vifelse($masksym, $M.$f($B, $C, $reductionA), $reductionA) )
355+
# elseif @capture(x, reductionA_ = M_.vfnmadd(B_, C_, reductionA_ ) ) || @capture(x, reductionA_ = vfnmadd(B_, C_, reductionA_ ) )
356+
# M === nothing && (M = default_module)
357+
# return :( $reductionA = $M.vifelse($masksym, $M.vfnmadd($B, $C, $reductionA), $reductionA) )
358+
elseif @capture(x, reductionA_ = M_.f_(reductionA_, B_ ) ) || @capture(x, reductionA_ = f_(reductionA_, B_ ) )
359359
M === nothing && (M = default_module)
360-
return :( $reductionA = $M.vifelse($masksym, $M.vsub($reductionA, $B), $reductionA) )
360+
return :( $reductionA = $M.vifelse($masksym, $M.$f($reductionA, $B), $reductionA) )
361361
# elseif @capture(x, reductionA_ = M_.vmul(reductionA_, B_ ) )
362362
# M === nothing && (M = :(LoopVectorization.SIMDPirates))
363363
# return :( $reductionA = $M.vifelse($masksym, $M.vmul($reductionA, $B), $reductionA) )
@@ -443,7 +443,6 @@ end
443443
## check to see if we are to do a vector load or a broadcast
444444
if i == declared_iter_sym
445445
load_expr = :($mod.vload($V, $pA + $itersym ))
446-
# load_expr = :($mod.vload($V, $pA, $itersym))
447446
elseif isa(i, Expr)
448447
contains_itersym, i2 = subsymbol(i, declared_iter_sym, itersym)
449448
if contains_itersym

0 commit comments

Comments
 (0)