@@ -463,49 +463,49 @@ julia> for n ∈ 2:16
463
463
end
464
464
n = 2
465
465
0.863 ns (0 allocations: 0 bytes)
466
- 0.864 ns (0 allocations: 0 bytes)
467
- n = 3
468
466
0.863 ns (0 allocations: 0 bytes)
467
+ n = 3
468
+ 0.862 ns (0 allocations: 0 bytes)
469
469
0.863 ns (0 allocations: 0 bytes)
470
470
n = 4
471
- 0.864 ns (0 allocations: 0 bytes)
472
- 1.075 ns (0 allocations: 0 bytes)
471
+ 0.862 ns (0 allocations: 0 bytes)
472
+ 0.862 ns (0 allocations: 0 bytes)
473
473
n = 5
474
+ 1.074 ns (0 allocations: 0 bytes)
474
475
0.864 ns (0 allocations: 0 bytes)
475
- 1.076 ns (0 allocations: 0 bytes)
476
476
n = 6
477
- 0.865 ns (0 allocations: 0 bytes)
478
- 1.077 ns (0 allocations: 0 bytes)
477
+ 0.864 ns (0 allocations: 0 bytes)
478
+ 0.862 ns (0 allocations: 0 bytes)
479
479
n = 7
480
480
1.075 ns (0 allocations: 0 bytes)
481
- 0.866 ns (0 allocations: 0 bytes)
481
+ 0.864 ns (0 allocations: 0 bytes)
482
482
n = 8
483
- 0.974 ns (0 allocations: 0 bytes)
484
- 1.076 ns (0 allocations: 0 bytes)
483
+ 1.077 ns (0 allocations: 0 bytes)
484
+ 0.865 ns (0 allocations: 0 bytes)
485
485
n = 9
486
486
1.081 ns (0 allocations: 0 bytes)
487
- 1.077 ns (0 allocations: 0 bytes)
487
+ 0.865 ns (0 allocations: 0 bytes)
488
488
n = 10
489
- 1.203 ns (0 allocations: 0 bytes)
490
- 1.077 ns (0 allocations: 0 bytes)
489
+ 1.195 ns (0 allocations: 0 bytes)
490
+ 0.867 ns (0 allocations: 0 bytes)
491
491
n = 11
492
- 1.355 ns (0 allocations: 0 bytes)
493
- 1.292 ns (0 allocations: 0 bytes)
492
+ 1.357 ns (0 allocations: 0 bytes)
493
+ 1.400 ns (0 allocations: 0 bytes)
494
494
n = 12
495
- 1.539 ns (0 allocations: 0 bytes)
496
- 1.079 ns (0 allocations: 0 bytes)
495
+ 1.543 ns (0 allocations: 0 bytes)
496
+ 1.074 ns (0 allocations: 0 bytes)
497
497
n = 13
498
- 1.704 ns (0 allocations: 0 bytes)
499
- 1.290 ns (0 allocations: 0 bytes)
498
+ 1.702 ns (0 allocations: 0 bytes)
499
+ 1.077 ns (0 allocations: 0 bytes)
500
500
n = 14
501
- 1.916 ns (0 allocations: 0 bytes)
502
- 1.185 ns (0 allocations: 0 bytes)
501
+ 1.913 ns (0 allocations: 0 bytes)
502
+ 0.867 ns (0 allocations: 0 bytes)
503
503
n = 15
504
- 2.072 ns (0 allocations: 0 bytes)
505
- 1.292 ns (0 allocations: 0 bytes)
504
+ 2.076 ns (0 allocations: 0 bytes)
505
+ 1.077 ns (0 allocations: 0 bytes)
506
506
n = 16
507
507
2.273 ns (0 allocations: 0 bytes)
508
- 1.076 ns (0 allocations: 0 bytes)
508
+ 1.078 ns (0 allocations: 0 bytes)
509
509
```
510
510
511
511
More importantly, `reduce_tup(_pick_range, inds)` often performs better than `reduce(_pick_range, inds)`.
@@ -516,33 +516,33 @@ julia> inds = (Base.OneTo(100), 1:100, 1:ArrayInterface.StaticInt(100))
516
516
(Base.OneTo(100), 1:100, 1:Static(100))
517
517
518
518
julia> @btime reduce(ArrayInterface._pick_range, \$ (Ref(inds))[])
519
- 6.000 ns (0 allocations: 0 bytes)
519
+ 6.405 ns (0 allocations: 0 bytes)
520
520
Base.Slice(Static(1):Static(100))
521
521
522
522
julia> @btime ArrayInterface.reduce_tup(ArrayInterface._pick_range, \$ (Ref(inds))[])
523
- 2.578 ns (0 allocations: 0 bytes)
523
+ 2.570 ns (0 allocations: 0 bytes)
524
524
Base.Slice(Static(1):Static(100))
525
525
526
526
julia> inds = (Base.OneTo(100), 1:100, 1:UInt(100))
527
527
(Base.OneTo(100), 1:100, 0x0000000000000001:0x0000000000000064)
528
528
529
529
julia> @btime reduce(ArrayInterface._pick_range, \$ (Ref(inds))[])
530
- 6.191 ns (0 allocations: 0 bytes)
530
+ 6.411 ns (0 allocations: 0 bytes)
531
531
Base.Slice(Static(1):100)
532
532
533
533
julia> @btime ArrayInterface.reduce_tup(ArrayInterface._pick_range, \$ (Ref(inds))[])
534
- 2.591 ns (0 allocations: 0 bytes)
534
+ 2.592 ns (0 allocations: 0 bytes)
535
535
Base.Slice(Static(1):100)
536
536
537
537
julia> inds = (Base.OneTo(100), 1:100, 1:UInt(100), Int32(1):Int32(100))
538
538
(Base.OneTo(100), 1:100, 0x0000000000000001:0x0000000000000064, 1:100)
539
539
540
540
julia> @btime reduce(ArrayInterface._pick_range, \$ (Ref(inds))[])
541
- 9.268 ns (0 allocations: 0 bytes)
541
+ 9.048 ns (0 allocations: 0 bytes)
542
542
Base.Slice(Static(1):100)
543
543
544
544
julia> @btime ArrayInterface.reduce_tup(ArrayInterface._pick_range, \$ (Ref(inds))[])
545
- 2.570 ns (0 allocations: 0 bytes)
545
+ 2.569 ns (0 allocations: 0 bytes)
546
546
Base.Slice(Static(1):100)
547
547
```
548
548
"""
@@ -552,32 +552,25 @@ Base.Slice(Static(1):100)
552
552
push! (q. args, :(inds[1 ]))
553
553
return q
554
554
end
555
- splits = 0
556
- _N = N
557
- while _N > 1
558
- _Nhalf = _N >> 1
559
- for n ∈ 1 : _Nhalf
560
- assign = Symbol (:r_ ,n,:_ ,splits)
561
- call = if splits == 0
562
- Expr (:call , :f , Expr (:ref , :inds , n), Expr (:ref , :inds , n + _Nhalf))
563
- else
564
- Expr (:call , :f , Symbol (:r_ ,n,:_ ,splits- 1 ), Symbol (:r_ ,n + _Nhalf,:_ ,splits- 1 ))
565
- end
566
- push! (q. args, Expr (:(= ), assign, call))
567
- end
568
- for (i,n) ∈ enumerate ((_Nhalf<< 1 )+ 1 : _N)
569
- assign = Symbol (:r_ ,i,:_ ,splits)
570
- call = if _N == N
571
- Expr (:call , :f , assign, Expr (:ref , :inds , n))
572
- else
573
- Expr (:call , :f , assign, Symbol (:r_ , n, :_ , splits- 1 ))
555
+ syms = Vector {Symbol} (undef, N)
556
+ i = 0
557
+ for n ∈ 1 : N
558
+ syms[n] = iₙ = Symbol (:i_ , (i += 1 ))
559
+ push! (q. args, Expr (:(= ), iₙ, Expr (:ref , :inds , n)))
560
+ end
561
+ W = 1 << (8 sizeof (N) - 2 - leading_zeros (N))
562
+ while W > 0
563
+ _N = length (syms)
564
+ for _ ∈ 2 W: W: _N
565
+ for w ∈ 1 : W
566
+ new_sym = Symbol (:i_ , (i += 1 ))
567
+ push! (q. args, Expr (:(= ), new_sym, Expr (:call , :f , syms[w], syms[w+ W])))
568
+ syms[w] = new_sym
574
569
end
575
- push! (q . args, Expr (:( = ), assign, call) )
570
+ deleteat! (syms, 1 + W : 2 W )
576
571
end
577
- splits += 1
578
- _N = _Nhalf
572
+ W >>>= 1
579
573
end
580
- push! (q. args, Symbol (:r_ ,1 ,:_ ,splits - 1 ))
581
574
q
582
575
end
583
576
0 commit comments