|
313 | 313 | push!(q.args, :($sym = Base.FastMath.div_fast($sym, $mod.SIMDPirates.vprod($gsym))))
|
314 | 314 | end
|
315 | 315 | end
|
316 |
| - |
| 316 | + push!(q.args, nothing) |
317 | 317 | # display(q)
|
318 | 318 | # We are using pointers, so better add a GC.@preserve.
|
319 | 319 | gcpreserve = true
|
@@ -349,15 +349,15 @@ end
|
349 | 349 | elseif @capture(x, reductionA_ = M_.vmul(reductionA_, B_ ) ) || @capture(x, reductionA_ = M_.vmul(B_, reductionA_ ) ) || @capture(x, reductionA_ = vmul(reductionA_, B_ ) ) || @capture(x, reductionA_ = vmul(B_, reductionA_ ) )
|
350 | 350 | M === nothing && (M = default_module)
|
351 | 351 | return :( $reductionA = $M.vifelse($masksym, $M.vmul($reductionA, $B), $reductionA) )
|
352 |
| - elseif @capture(x, reductionA_ = M_.vmuladd(B_, C_, reductionA_) ) || @capture(x, reductionA_ = vmuladd(B_, C_, reductionA_) ) |
353 |
| - M === nothing && (M = default_module) |
354 |
| - return :( $reductionA = $M.vifelse($masksym, $M.vmuladd($B, $C, $reductionA), $reductionA) ) |
355 |
| - elseif @capture(x, reductionA_ = M_.vfnmadd(B_, C_, reductionA_ ) ) || @capture(x, reductionA_ = vfnmadd(B_, C_, reductionA_ ) ) |
| 352 | + elseif @capture(x, reductionA_ = M_.f_(B_, C_, reductionA_) ) || @capture(x, reductionA_ = f_(B_, C_, reductionA_) ) |
356 | 353 | M === nothing && (M = default_module)
|
357 |
| - return :( $reductionA = $M.vifelse($masksym, $M.vfnmadd($B, $C, $reductionA), $reductionA) ) |
358 |
| - elseif @capture(x, reductionA_ = M_.vsub(reductionA_, B_ ) ) || @capture(x, reductionA_ = vsub(reductionA_, B_ ) ) |
| 354 | + return :( $reductionA = $M.vifelse($masksym, $M.$f($B, $C, $reductionA), $reductionA) ) |
| 355 | + # elseif @capture(x, reductionA_ = M_.vfnmadd(B_, C_, reductionA_ ) ) || @capture(x, reductionA_ = vfnmadd(B_, C_, reductionA_ ) ) |
| 356 | + # M === nothing && (M = default_module) |
| 357 | + # return :( $reductionA = $M.vifelse($masksym, $M.vfnmadd($B, $C, $reductionA), $reductionA) ) |
| 358 | + elseif @capture(x, reductionA_ = M_.f_(reductionA_, B_ ) ) || @capture(x, reductionA_ = f_(reductionA_, B_ ) ) |
359 | 359 | M === nothing && (M = default_module)
|
360 |
| - return :( $reductionA = $M.vifelse($masksym, $M.vsub($reductionA, $B), $reductionA) ) |
| 360 | + return :( $reductionA = $M.vifelse($masksym, $M.$f($reductionA, $B), $reductionA) ) |
361 | 361 | # elseif @capture(x, reductionA_ = M_.vmul(reductionA_, B_ ) )
|
362 | 362 | # M === nothing && (M = :(LoopVectorization.SIMDPirates))
|
363 | 363 | # return :( $reductionA = $M.vifelse($masksym, $M.vmul($reductionA, $B), $reductionA) )
|
|
443 | 443 | ## check to see if we are to do a vector load or a broadcast
|
444 | 444 | if i == declared_iter_sym
|
445 | 445 | load_expr = :($mod.vload($V, $pA + $itersym ))
|
446 |
| - # load_expr = :($mod.vload($V, $pA, $itersym)) |
447 | 446 | elseif isa(i, Expr)
|
448 | 447 | contains_itersym, i2 = subsymbol(i, declared_iter_sym, itersym)
|
449 | 448 | if contains_itersym
|
|
0 commit comments