356
356
# function vmaterialize!(
357
357
@generated function vmaterialize! (
358
358
dest:: AbstractArray{T,N} , bc:: BC ,
359
- :: Val{Mod} , :: StaticInt{RS} , :: StaticInt{RC} , :: StaticInt{CLS}
360
- ) where {T <: NativeTypes , N, BC <: Union{Broadcasted,Product} , Mod, RS, RC, CLS}
359
+ :: Val{Mod} , :: Val{UNROLL} , :: StaticInt{RS} , :: StaticInt{RC} , :: StaticInt{CLS}
360
+ ) where {T <: NativeTypes , N, BC <: Union{Broadcasted,Product} , Mod, UNROLL, RS, RC, CLS}
361
361
# 2+1
362
362
# we have an N dimensional loop.
363
363
# need to construct the LoopSet
372
372
add_simple_store! (ls, :dest , ArrayReference (:dest , loopsyms), elementbytes)
373
373
resize! (ls. loop_order, num_loops (ls)) # num_loops may be greater than N, eg Product
374
374
# return ls
375
- q = lower (ls, 0 )
375
+ inline, u₁, u₂, threads = UNROLL
376
+ q = lower (ls, u₁ % Int, u₂ % Int, inline % Int)
376
377
push! (q. args, :dest )
377
378
# @show q
378
379
# q
388
389
end
389
390
@generated function vmaterialize! (
390
391
dest′:: Union{Adjoint{T,A},Transpose{T,A}} , bc:: BC ,
391
- :: Val{Mod} , :: StaticInt{RS} , :: StaticInt{RC} , :: StaticInt{CLS}
392
- ) where {T <: NativeTypes , N, A <: AbstractArray{T,N} , BC <: Union{Broadcasted,Product} , Mod, RS, RC, CLS}
392
+ :: Val{Mod} , :: Val{UNROLL} , :: StaticInt{RS} , :: StaticInt{RC} , :: StaticInt{CLS}
393
+ ) where {T <: NativeTypes , N, A <: AbstractArray{T,N} , BC <: Union{Broadcasted,Product} , Mod, UNROLL, RS, RC, CLS}
393
394
# we have an N dimensional loop.
394
395
# need to construct the LoopSet
395
396
ls = LoopSet (Mod)
402
403
add_broadcast! (ls, :dest , :bc , loopsyms, BC, elementbytes)
403
404
add_simple_store! (ls, :dest , ArrayReference (:dest , reverse (loopsyms)), elementbytes)
404
405
resize! (ls. loop_order, num_loops (ls)) # num_loops may be greater than N, eg Product
405
- q = lower (ls, 0 )
406
+ inline, u₁, u₂, threads = UNROLL
407
+ q = lower (ls, u₁ % Int, u₂ % Int, inline % Int)
406
408
push! (q. args, :dest′ )
407
409
q = Expr (
408
410
:block ,
@@ -414,32 +416,42 @@ end
414
416
# ls
415
417
end
416
418
# these are marked `@inline` so the `@avx` itself can choose whether or not to inline.
417
- @inline function vmaterialize! (
419
+ @generated function vmaterialize! (
418
420
dest:: AbstractArray{T,N} , bc:: Broadcasted{Base.Broadcast.DefaultArrayStyle{0},Nothing,typeof(identity),Tuple{T2}} ,
419
- :: Val{Mod} , RS:: Static , RC:: Static , CLS:: Static
420
- ) where {T <: NativeTypes , N, T2 <: Number , Mod}
421
- arg = T (first (bc. args))
422
- @avx for i ∈ eachindex (dest)
423
- dest[i] = arg
421
+ :: Val{Mod} , :: Val{UNROLL} , :: StaticInt{RS} , :: StaticInt{RC} , :: StaticInt{CLS}
422
+ ) where {T <: NativeTypes , N, T2 <: Number , Mod, UNROLL,RS,RC,CLS}
423
+ inline, u₁, u₂, threads = UNROLL
424
+ quote
425
+ $ (Expr (:meta ,:inline ))
426
+ arg = T (first (bc. args))
427
+ @avx inline= $ inline unroll= ($ u₁,$ u₂) thread= $ threads for i ∈ eachindex (dest)
428
+ dest[i] = arg
429
+ end
430
+ dest
424
431
end
425
- dest
426
432
end
427
- @inline function vmaterialize! (
433
+ @generated function vmaterialize! (
428
434
dest′:: Union{Adjoint{T,A},Transpose{T,A}} , bc:: Broadcasted{Base.Broadcast.DefaultArrayStyle{0},Nothing,typeof(identity),Tuple{T2}} ,
429
- :: Val{Mod} , RS:: Static , RC:: Static , CLS:: Static
430
- ) where {T <: NativeTypes , N, A <: AbstractArray{T,N} , T2 <: Number , Mod}
431
- arg = T (first (bc. args))
432
- dest = parent (dest′)
433
- @avx for i ∈ eachindex (dest)
434
- dest[i] = arg
435
+ :: Val{Mod} , :: Val{UNROLL} , :: StaticInt{RS} , :: StaticInt{RC} , :: StaticInt{CLS}
436
+ ) where {T <: NativeTypes , N, A <: AbstractArray{T,N} , T2 <: Number , Mod, UNROLL,RS,RC,CLS}
437
+ inline, u₁, u₂, threads = UNROLL
438
+ quote
439
+ $ (Expr (:meta ,:inline ))
440
+ arg = T (first (bc. args))
441
+ dest = parent (dest′)
442
+ @avx inline= $ inline unroll= ($ u₁,$ u₂) thread= $ threads for i ∈ eachindex (dest)
443
+ dest[i] = arg
444
+ end
445
+ dest′
435
446
end
436
- dest′
437
447
end
438
448
439
- @inline function vmaterialize (bc:: Broadcasted , :: Val{Mod} , RS:: Static , RC:: Static , CLS:: Static ) where {Mod}
449
+ @inline function vmaterialize (
450
+ bc:: Broadcasted , :: Val{Mod} , :: Val{UNROLL} , :: StaticInt{RS} , :: StaticInt{RC} , :: StaticInt{CLS}
451
+ ) where {Mod,UNROLL,RS,RC,CLS}
440
452
ElType = Base. Broadcast. combine_eltypes (bc. f, bc. args)
441
- vmaterialize! (similar (bc, ElType), bc, Val {Mod} (), RS, RC, CLS)
453
+ vmaterialize! (similar (bc, ElType), bc, Val {Mod} (), StaticInt {UNROLL} (), StaticInt {RS} (), StaticInt {RC} (), StaticInt { CLS} () )
442
454
end
443
455
444
- vmaterialize! (dest, bc, :: Val{mod} , :: StaticInt , :: StaticInt , :: StaticInt ) where {mod} = Base. Broadcast. materialize! (dest, bc)
456
+ vmaterialize! (dest, bc, :: Val , :: Val , :: StaticInt , :: StaticInt , :: StaticInt ) = Base. Broadcast. materialize! (dest, bc)
445
457
0 commit comments