@@ -23,7 +23,8 @@ const SLEEFPiratesDict = Dict{Symbol,Tuple{Symbol,Symbol}}(
23
23
:exp2 => (:SLEEFPirates , :exp2 ),
24
24
:exp10 => (:SLEEFPirates , :exp10 ),
25
25
:expm1 => (:SLEEFPirates , :expm1 ),
26
- :sqrt => (:SLEEFPirates , :sqrt ), # faster than sqrt_fast
26
+ # :sqrt => (:SLEEFPirates, :sqrt), # faster than sqrt_fast
27
+ :sqrt => (:SIMDPirates , :sqrt ), # faster than sqrt_fast
27
28
:rsqrt => (:SIMDPirates , :rsqrt ),
28
29
:cbrt => (:SLEEFPirates , :cbrt_fast ),
29
30
:asin => (:SLEEFPirates , :asin_fast ),
216
217
remr = gensym (:remreps )
217
218
q = quote
218
219
$ Nsym = $ N
219
- ($ Qsym, $ remsym) = $ (num_vector_load_expr (:($ mod. LoopVectorization), N , W<< log2unroll))
220
+ ($ Qsym, $ remsym) = $ (num_vector_load_expr (:($ mod. LoopVectorization), Nsym , W<< log2unroll))
220
221
end
221
222
if unroll_factor > 1
222
223
push! (q. args, :($ remr = $ remsym >>> $ Wshift))
373
374
_spirate (prewalk (expr) do x
374
375
# @show x
375
376
# @show main_body
377
+ if @capture (x, A_[i__] += B_)
378
+ x = :($ A[$ (i... )] = $ B + $ A[$ (i... )])
379
+ elseif @capture (x, A_[i__] -= B_)
380
+ x = :($ A[$ (i... )] = $ A[$ (i... )] - $ B)
381
+ elseif @capture (x, A_[i__] *= B_)
382
+ x = :($ A[$ (i... )] = $ B * $ A[$ (i... )])
383
+ elseif @capture (x, A_[i__] /= B_)
384
+ x = :($ A[$ (i... )] = $ A[$ (i... )] / $ B)
385
+ end
376
386
if @capture (x, A_[i_] = B_) || @capture (x, setindex! (A_, B_, i_))
387
+ # println("Made it.")
377
388
if A ∉ keys (indexed_expressions)
378
389
# pA = esc(gensym(A))
379
390
# pA = esc(Symbol(:p,A))
439
450
else
440
451
pA = indexed_expressions[A]
441
452
end
442
-
443
453
# # check to see if we are to do a vector load or a broadcast
444
454
if i == declared_iter_sym
445
455
load_expr = :($ mod. vload ($ V, $ pA + $ itersym ))
0 commit comments