@@ -390,32 +390,36 @@ function thread_one_loops_expr(
390
390
var"##do#thread##" = var"#nrequest#" ≠ 0x00000000
391
391
if var"##do#thread##"
392
392
var"#threads#" , var"#torelease#" = Polyester. request_threads (Threads. threadid ()% UInt32, var"#nrequest#" )
393
- var"#thread#factor#0#" = var"#nthreads#"
394
- $ iterdef
395
- var"#thread#launch#count#" = 0x00000000
396
- var"#thread#id#" = 0x00000000
397
- var"#thread#mask#" = Polyester. mask (var"#threads#" )
398
- var"#threads#remain#" = true
399
- while var"#threads#remain#"
400
- VectorizationBase. assume (var"#thread#mask#" ≠ zero (var"#thread#mask#" ))
401
- var"#trailzing#zeros#" = Base. trailing_zeros (var"#thread#mask#" ) % UInt32
402
- var"#nblock#size#thread#0#" = Core. ifelse (
403
- var"#thread#launch#count#" < (var"#nrem#thread#0#" % UInt32),
404
- vadd_nw (var"#base#block#size#thread#0#" , var"#block#rem#step#0#" ),
405
- var"#base#block#size#thread#0#"
406
- )
407
- var"#trailzing#zeros#" = vadd_nw (var"#trailzing#zeros#" , 0x00000001 )
408
- $ iterstop
409
- var"#thread#id#" = vadd_nw (var"#thread#id#" , var"#trailzing#zeros#" )
393
+ var"#nrequest#" = var"#threads#" . i
394
+ var"##do#thread##" = var"#nrequest#" ≠ zero (var"#nrequest#" )
395
+ if var"##do#thread##"
396
+ var"#thread#factor#0#" = var"#nthreads#"
397
+ $ iterdef
398
+ var"#thread#launch#count#" = 0x00000000
399
+ var"#thread#id#" = 0x00000000
400
+ var"#thread#mask#" = Polyester. mask (var"#threads#" )
401
+ var"#threads#remain#" = true
402
+ while var"#threads#remain#"
403
+ VectorizationBase. assume (var"#thread#mask#" ≠ zero (var"#thread#mask#" ))
404
+ var"#trailzing#zeros#" = Base. trailing_zeros (var"#thread#mask#" ) % UInt32
405
+ var"#nblock#size#thread#0#" = Core. ifelse (
406
+ var"#thread#launch#count#" < (var"#nrem#thread#0#" % UInt32),
407
+ vadd_nw (var"#base#block#size#thread#0#" , var"#block#rem#step#0#" ),
408
+ var"#base#block#size#thread#0#"
409
+ )
410
+ var"#trailzing#zeros#" = vadd_nw (var"#trailzing#zeros#" , 0x00000001 )
411
+ $ iterstop
412
+ var"#thread#id#" = vadd_nw (var"#thread#id#" , var"#trailzing#zeros#" )
410
413
411
- var"##lbvargs#to_launch##" = ($ loopboundexpr, var"#vargs#" )
412
- avx_launch (Val {$UNROLL} (), $ OPS, $ ARF, $ AM, $ LPSYM, StaticType {typeof(var"##lbvargs#to_launch##")} (), flatten_to_tuple (var"##lbvargs#to_launch##" ), var"#thread#id#" )
414
+ var"##lbvargs#to_launch##" = ($ loopboundexpr, var"#vargs#" )
415
+ avx_launch (Val {$UNROLL} (), $ OPS, $ ARF, $ AM, $ LPSYM, StaticType {typeof(var"##lbvargs#to_launch##")} (), flatten_to_tuple (var"##lbvargs#to_launch##" ), var"#thread#id#" )
413
416
414
- var"#thread#mask#" >>>= var"#trailzing#zeros#"
417
+ var"#thread#mask#" >>>= var"#trailzing#zeros#"
415
418
416
- var"#iter#start#0#" = var"#iter#stop#0#"
417
- var"#thread#launch#count#" = vadd_nw (var"#thread#launch#count#" , 0x00000001 )
418
- var"#threads#remain#" = var"#thread#launch#count#" ≠ var"#nrequest#"
419
+ var"#iter#start#0#" = var"#iter#stop#0#"
420
+ var"#thread#launch#count#" = vadd_nw (var"#thread#launch#count#" , 0x00000001 )
421
+ var"#threads#remain#" = var"#thread#launch#count#" ≠ var"#nrequest#"
422
+ end
419
423
end
420
424
else # eliminate undef var errors that the compiler should be able to figure out are unreachable, but doesn't
421
425
var"#torelease#" = zero (Polyester. worker_type ())
@@ -573,46 +577,50 @@ function thread_two_loops_expr(
573
577
var"##do#thread##" = var"#nrequest#" ≠ 0x00000000
574
578
if var"##do#thread##"
575
579
var"#threads#" , var"#torelease#" = Polyester. request_threads (Threads. threadid (), var"#nrequest#" )
576
- $ iterdef1
577
- $ iterdef2
578
- # @show var"#base#block#size#thread#0#", var"#block#rem#step#0#" var"#base#block#size#thread#1#", var"#block#rem#step#1#"
579
- var"#thread#launch#count#" = 0x00000000
580
- var"#thread#launch#count#0#" = 0x00000000
581
- var"#thread#launch#count#1#" = 0x00000000
582
- var"#thread#id#" = 0x00000000
583
- var"#thread#mask#" = Polyester. mask (var"#threads#" )
584
- var"#threads#remain#" = true
585
- while var"#threads#remain#"
586
- VectorizationBase. assume (var"#thread#mask#" ≠ zero (var"#thread#mask#" ))
587
- var"#trailzing#zeros#" = Base. trailing_zeros (var"#thread#mask#" ) % UInt32
588
- var"#nblock#size#thread#0#" = Core. ifelse (
589
- var"#thread#launch#count#0#" < (var"#nrem#thread#0#" % UInt32),
590
- vadd_nw (var"#base#block#size#thread#0#" , var"#block#rem#step#0#" ),
591
- var"#base#block#size#thread#0#"
592
- )
593
- var"#nblock#size#thread#1#" = Core. ifelse (
594
- var"#thread#launch#count#1#" < (var"#nrem#thread#1#" % UInt32),
595
- vadd_nw (var"#base#block#size#thread#1#" , var"#block#rem#step#1#" ),
596
- var"#base#block#size#thread#1#"
597
- )
598
- var"#trailzing#zeros#" = vadd_nw (var"#trailzing#zeros#" , 0x00000001 )
599
- $ iterstop1
600
- $ iterstop2
601
- var"#thread#id#" = vadd_nw (var"#thread#id#" , var"#trailzing#zeros#" )
602
- # @show var"#thread#id#" $loopboundexpr
603
- var"##lbvargs#to_launch##" = ($ loopboundexpr, var"#vargs#" )
604
- avx_launch (Val {$UNROLL} (), $ OPS, $ ARF, $ AM, $ LPSYM, StaticType {typeof(var"##lbvargs#to_launch##")} (), flatten_to_tuple (var"##lbvargs#to_launch##" ), var"#thread#id#" )
605
- var"#thread#mask#" >>>= var"#trailzing#zeros#"
580
+ var"#nrequest#" = var"#threads#" . i
581
+ var"##do#thread##" = var"#nrequest#" ≠ zero (var"#nrequest#" )
582
+ if var"##do#thread##"
583
+ $ iterdef1
584
+ $ iterdef2
585
+ # @show var"#base#block#size#thread#0#", var"#block#rem#step#0#" var"#base#block#size#thread#1#", var"#block#rem#step#1#"
586
+ var"#thread#launch#count#" = 0x00000000
587
+ var"#thread#launch#count#0#" = 0x00000000
588
+ var"#thread#launch#count#1#" = 0x00000000
589
+ var"#thread#id#" = 0x00000000
590
+ var"#thread#mask#" = Polyester. mask (var"#threads#" )
591
+ var"#threads#remain#" = true
592
+ while var"#threads#remain#"
593
+ VectorizationBase. assume (var"#thread#mask#" ≠ zero (var"#thread#mask#" ))
594
+ var"#trailzing#zeros#" = Base. trailing_zeros (var"#thread#mask#" ) % UInt32
595
+ var"#nblock#size#thread#0#" = Core. ifelse (
596
+ var"#thread#launch#count#0#" < (var"#nrem#thread#0#" % UInt32),
597
+ vadd_nw (var"#base#block#size#thread#0#" , var"#block#rem#step#0#" ),
598
+ var"#base#block#size#thread#0#"
599
+ )
600
+ var"#nblock#size#thread#1#" = Core. ifelse (
601
+ var"#thread#launch#count#1#" < (var"#nrem#thread#1#" % UInt32),
602
+ vadd_nw (var"#base#block#size#thread#1#" , var"#block#rem#step#1#" ),
603
+ var"#base#block#size#thread#1#"
604
+ )
605
+ var"#trailzing#zeros#" = vadd_nw (var"#trailzing#zeros#" , 0x00000001 )
606
+ $ iterstop1
607
+ $ iterstop2
608
+ var"#thread#id#" = vadd_nw (var"#thread#id#" , var"#trailzing#zeros#" )
609
+ # @show var"#thread#id#" $loopboundexpr
610
+ var"##lbvargs#to_launch##" = ($ loopboundexpr, var"#vargs#" )
611
+ avx_launch (Val {$UNROLL} (), $ OPS, $ ARF, $ AM, $ LPSYM, StaticType {typeof(var"##lbvargs#to_launch##")} (), flatten_to_tuple (var"##lbvargs#to_launch##" ), var"#thread#id#" )
612
+ var"#thread#mask#" >>>= var"#trailzing#zeros#"
606
613
607
- var"##end#inner##" = var"#thread#launch#count#0#" == vsub_nw (var"#thread#factor#0#" , 0x00000001 )
608
- var"#thread#launch#count#0#" = Core. ifelse (var"##end#inner##" , 0x00000000 , vadd_nw (var"#thread#launch#count#0#" , 0x00000001 ))
609
- var"#thread#launch#count#1#" = Core. ifelse (var"##end#inner##" , var"#thread#launch#count#1#" + 0x00000001 , var"#thread#launch#count#1#" )
614
+ var"##end#inner##" = var"#thread#launch#count#0#" == vsub_nw (var"#thread#factor#0#" , 0x00000001 )
615
+ var"#thread#launch#count#0#" = Core. ifelse (var"##end#inner##" , 0x00000000 , vadd_nw (var"#thread#launch#count#0#" , 0x00000001 ))
616
+ var"#thread#launch#count#1#" = Core. ifelse (var"##end#inner##" , var"#thread#launch#count#1#" + 0x00000001 , var"#thread#launch#count#1#" )
610
617
611
- var"#iter#start#0#" = Core. ifelse (var"##end#inner##" , var"#loop#1#start#init#" , var"#iter#stop#0#" )
612
- var"#iter#start#1#" = Core. ifelse (var"##end#inner##" , var"#iter#stop#1#" , var"#iter#start#1#" )
618
+ var"#iter#start#0#" = Core. ifelse (var"##end#inner##" , var"#loop#1#start#init#" , var"#iter#stop#0#" )
619
+ var"#iter#start#1#" = Core. ifelse (var"##end#inner##" , var"#iter#stop#1#" , var"#iter#start#1#" )
613
620
614
- var"#thread#launch#count#" = vadd_nw (var"#thread#launch#count#" , 0x00000001 )
615
- var"#threads#remain#" = var"#thread#launch#count#" ≠ var"#nrequest#"
621
+ var"#thread#launch#count#" = vadd_nw (var"#thread#launch#count#" , 0x00000001 )
622
+ var"#threads#remain#" = var"#thread#launch#count#" ≠ var"#nrequest#"
623
+ end
616
624
end
617
625
else # eliminate undef var errors that the compiler should be able to figure out are unreachable, but doesn't
618
626
var"#torelease#" = zero (Polyester. worker_type ())
0 commit comments