Skip to content

Commit e620a8c

Browse files
committed
Allow nesting within Polyester.@Batch
1 parent 31840a8 commit e620a8c

File tree

1 file changed

+68
-60
lines changed

1 file changed

+68
-60
lines changed

src/codegen/lower_threads.jl

Lines changed: 68 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -390,32 +390,36 @@ function thread_one_loops_expr(
390390
var"##do#thread##" = var"#nrequest#" 0x00000000
391391
if var"##do#thread##"
392392
var"#threads#", var"#torelease#" = Polyester.request_threads(Threads.threadid()%UInt32, var"#nrequest#")
393-
var"#thread#factor#0#" = var"#nthreads#"
394-
$iterdef
395-
var"#thread#launch#count#" = 0x00000000
396-
var"#thread#id#" = 0x00000000
397-
var"#thread#mask#" = Polyester.mask(var"#threads#")
398-
var"#threads#remain#" = true
399-
while var"#threads#remain#"
400-
VectorizationBase.assume(var"#thread#mask#" zero(var"#thread#mask#"))
401-
var"#trailzing#zeros#" = Base.trailing_zeros(var"#thread#mask#") % UInt32
402-
var"#nblock#size#thread#0#" = Core.ifelse(
403-
var"#thread#launch#count#" < (var"#nrem#thread#0#" % UInt32),
404-
vadd_nw(var"#base#block#size#thread#0#", var"#block#rem#step#0#"),
405-
var"#base#block#size#thread#0#"
406-
)
407-
var"#trailzing#zeros#" = vadd_nw(var"#trailzing#zeros#", 0x00000001)
408-
$iterstop
409-
var"#thread#id#" = vadd_nw(var"#thread#id#", var"#trailzing#zeros#")
393+
var"#nrequest#" = var"#threads#".i
394+
var"##do#thread##" = var"#nrequest#" zero(var"#nrequest#")
395+
if var"##do#thread##"
396+
var"#thread#factor#0#" = var"#nthreads#"
397+
$iterdef
398+
var"#thread#launch#count#" = 0x00000000
399+
var"#thread#id#" = 0x00000000
400+
var"#thread#mask#" = Polyester.mask(var"#threads#")
401+
var"#threads#remain#" = true
402+
while var"#threads#remain#"
403+
VectorizationBase.assume(var"#thread#mask#" zero(var"#thread#mask#"))
404+
var"#trailzing#zeros#" = Base.trailing_zeros(var"#thread#mask#") % UInt32
405+
var"#nblock#size#thread#0#" = Core.ifelse(
406+
var"#thread#launch#count#" < (var"#nrem#thread#0#" % UInt32),
407+
vadd_nw(var"#base#block#size#thread#0#", var"#block#rem#step#0#"),
408+
var"#base#block#size#thread#0#"
409+
)
410+
var"#trailzing#zeros#" = vadd_nw(var"#trailzing#zeros#", 0x00000001)
411+
$iterstop
412+
var"#thread#id#" = vadd_nw(var"#thread#id#", var"#trailzing#zeros#")
410413

411-
var"##lbvargs#to_launch##" = ($loopboundexpr, var"#vargs#")
412-
avx_launch(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, StaticType{typeof(var"##lbvargs#to_launch##")}(), flatten_to_tuple(var"##lbvargs#to_launch##"), var"#thread#id#")
414+
var"##lbvargs#to_launch##" = ($loopboundexpr, var"#vargs#")
415+
avx_launch(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, StaticType{typeof(var"##lbvargs#to_launch##")}(), flatten_to_tuple(var"##lbvargs#to_launch##"), var"#thread#id#")
413416

414-
var"#thread#mask#" >>>= var"#trailzing#zeros#"
417+
var"#thread#mask#" >>>= var"#trailzing#zeros#"
415418

416-
var"#iter#start#0#" = var"#iter#stop#0#"
417-
var"#thread#launch#count#" = vadd_nw(var"#thread#launch#count#", 0x00000001)
418-
var"#threads#remain#" = var"#thread#launch#count#" var"#nrequest#"
419+
var"#iter#start#0#" = var"#iter#stop#0#"
420+
var"#thread#launch#count#" = vadd_nw(var"#thread#launch#count#", 0x00000001)
421+
var"#threads#remain#" = var"#thread#launch#count#" var"#nrequest#"
422+
end
419423
end
420424
else# eliminate undef var errors that the compiler should be able to figure out are unreachable, but doesn't
421425
var"#torelease#" = zero(Polyester.worker_type())
@@ -573,46 +577,50 @@ function thread_two_loops_expr(
573577
var"##do#thread##" = var"#nrequest#" 0x00000000
574578
if var"##do#thread##"
575579
var"#threads#", var"#torelease#" = Polyester.request_threads(Threads.threadid(), var"#nrequest#")
576-
$iterdef1
577-
$iterdef2
578-
# @show var"#base#block#size#thread#0#", var"#block#rem#step#0#" var"#base#block#size#thread#1#", var"#block#rem#step#1#"
579-
var"#thread#launch#count#" = 0x00000000
580-
var"#thread#launch#count#0#" = 0x00000000
581-
var"#thread#launch#count#1#" = 0x00000000
582-
var"#thread#id#" = 0x00000000
583-
var"#thread#mask#" = Polyester.mask(var"#threads#")
584-
var"#threads#remain#" = true
585-
while var"#threads#remain#"
586-
VectorizationBase.assume(var"#thread#mask#" zero(var"#thread#mask#"))
587-
var"#trailzing#zeros#" = Base.trailing_zeros(var"#thread#mask#") % UInt32
588-
var"#nblock#size#thread#0#" = Core.ifelse(
589-
var"#thread#launch#count#0#" < (var"#nrem#thread#0#" % UInt32),
590-
vadd_nw(var"#base#block#size#thread#0#", var"#block#rem#step#0#"),
591-
var"#base#block#size#thread#0#"
592-
)
593-
var"#nblock#size#thread#1#" = Core.ifelse(
594-
var"#thread#launch#count#1#" < (var"#nrem#thread#1#" % UInt32),
595-
vadd_nw(var"#base#block#size#thread#1#", var"#block#rem#step#1#"),
596-
var"#base#block#size#thread#1#"
597-
)
598-
var"#trailzing#zeros#" = vadd_nw(var"#trailzing#zeros#", 0x00000001)
599-
$iterstop1
600-
$iterstop2
601-
var"#thread#id#" = vadd_nw(var"#thread#id#", var"#trailzing#zeros#")
602-
# @show var"#thread#id#" $loopboundexpr
603-
var"##lbvargs#to_launch##" = ($loopboundexpr, var"#vargs#")
604-
avx_launch(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, StaticType{typeof(var"##lbvargs#to_launch##")}(), flatten_to_tuple(var"##lbvargs#to_launch##"), var"#thread#id#")
605-
var"#thread#mask#" >>>= var"#trailzing#zeros#"
580+
var"#nrequest#" = var"#threads#".i
581+
var"##do#thread##" = var"#nrequest#" zero(var"#nrequest#")
582+
if var"##do#thread##"
583+
$iterdef1
584+
$iterdef2
585+
# @show var"#base#block#size#thread#0#", var"#block#rem#step#0#" var"#base#block#size#thread#1#", var"#block#rem#step#1#"
586+
var"#thread#launch#count#" = 0x00000000
587+
var"#thread#launch#count#0#" = 0x00000000
588+
var"#thread#launch#count#1#" = 0x00000000
589+
var"#thread#id#" = 0x00000000
590+
var"#thread#mask#" = Polyester.mask(var"#threads#")
591+
var"#threads#remain#" = true
592+
while var"#threads#remain#"
593+
VectorizationBase.assume(var"#thread#mask#" zero(var"#thread#mask#"))
594+
var"#trailzing#zeros#" = Base.trailing_zeros(var"#thread#mask#") % UInt32
595+
var"#nblock#size#thread#0#" = Core.ifelse(
596+
var"#thread#launch#count#0#" < (var"#nrem#thread#0#" % UInt32),
597+
vadd_nw(var"#base#block#size#thread#0#", var"#block#rem#step#0#"),
598+
var"#base#block#size#thread#0#"
599+
)
600+
var"#nblock#size#thread#1#" = Core.ifelse(
601+
var"#thread#launch#count#1#" < (var"#nrem#thread#1#" % UInt32),
602+
vadd_nw(var"#base#block#size#thread#1#", var"#block#rem#step#1#"),
603+
var"#base#block#size#thread#1#"
604+
)
605+
var"#trailzing#zeros#" = vadd_nw(var"#trailzing#zeros#", 0x00000001)
606+
$iterstop1
607+
$iterstop2
608+
var"#thread#id#" = vadd_nw(var"#thread#id#", var"#trailzing#zeros#")
609+
# @show var"#thread#id#" $loopboundexpr
610+
var"##lbvargs#to_launch##" = ($loopboundexpr, var"#vargs#")
611+
avx_launch(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, StaticType{typeof(var"##lbvargs#to_launch##")}(), flatten_to_tuple(var"##lbvargs#to_launch##"), var"#thread#id#")
612+
var"#thread#mask#" >>>= var"#trailzing#zeros#"
606613

607-
var"##end#inner##" = var"#thread#launch#count#0#" == vsub_nw(var"#thread#factor#0#", 0x00000001)
608-
var"#thread#launch#count#0#" = Core.ifelse(var"##end#inner##", 0x00000000, vadd_nw(var"#thread#launch#count#0#", 0x00000001))
609-
var"#thread#launch#count#1#" = Core.ifelse(var"##end#inner##", var"#thread#launch#count#1#" + 0x00000001, var"#thread#launch#count#1#")
614+
var"##end#inner##" = var"#thread#launch#count#0#" == vsub_nw(var"#thread#factor#0#", 0x00000001)
615+
var"#thread#launch#count#0#" = Core.ifelse(var"##end#inner##", 0x00000000, vadd_nw(var"#thread#launch#count#0#", 0x00000001))
616+
var"#thread#launch#count#1#" = Core.ifelse(var"##end#inner##", var"#thread#launch#count#1#" + 0x00000001, var"#thread#launch#count#1#")
610617

611-
var"#iter#start#0#" = Core.ifelse(var"##end#inner##", var"#loop#1#start#init#", var"#iter#stop#0#")
612-
var"#iter#start#1#" = Core.ifelse(var"##end#inner##", var"#iter#stop#1#", var"#iter#start#1#")
618+
var"#iter#start#0#" = Core.ifelse(var"##end#inner##", var"#loop#1#start#init#", var"#iter#stop#0#")
619+
var"#iter#start#1#" = Core.ifelse(var"##end#inner##", var"#iter#stop#1#", var"#iter#start#1#")
613620

614-
var"#thread#launch#count#" = vadd_nw(var"#thread#launch#count#", 0x00000001)
615-
var"#threads#remain#" = var"#thread#launch#count#" var"#nrequest#"
621+
var"#thread#launch#count#" = vadd_nw(var"#thread#launch#count#", 0x00000001)
622+
var"#threads#remain#" = var"#thread#launch#count#" var"#nrequest#"
623+
end
616624
end
617625
else# eliminate undef var errors that the compiler should be able to figure out are unreachable, but doesn't
618626
var"#torelease#" = zero(Polyester.worker_type())

0 commit comments

Comments
 (0)