Skip to content

Commit 514e3e8

Browse files
committed
Optimize reduce_tup a little more.
1 parent a7980c9 commit 514e3e8

File tree

1 file changed

+46
-53
lines changed

1 file changed

+46
-53
lines changed

src/ranges.jl

Lines changed: 46 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -463,49 +463,49 @@ julia> for n ∈ 2:16
463463
end
464464
n = 2
465465
0.863 ns (0 allocations: 0 bytes)
466-
0.864 ns (0 allocations: 0 bytes)
467-
n = 3
468466
0.863 ns (0 allocations: 0 bytes)
467+
n = 3
468+
0.862 ns (0 allocations: 0 bytes)
469469
0.863 ns (0 allocations: 0 bytes)
470470
n = 4
471-
0.864 ns (0 allocations: 0 bytes)
472-
1.075 ns (0 allocations: 0 bytes)
471+
0.862 ns (0 allocations: 0 bytes)
472+
0.862 ns (0 allocations: 0 bytes)
473473
n = 5
474+
1.074 ns (0 allocations: 0 bytes)
474475
0.864 ns (0 allocations: 0 bytes)
475-
1.076 ns (0 allocations: 0 bytes)
476476
n = 6
477-
0.865 ns (0 allocations: 0 bytes)
478-
1.077 ns (0 allocations: 0 bytes)
477+
0.864 ns (0 allocations: 0 bytes)
478+
0.862 ns (0 allocations: 0 bytes)
479479
n = 7
480480
1.075 ns (0 allocations: 0 bytes)
481-
0.866 ns (0 allocations: 0 bytes)
481+
0.864 ns (0 allocations: 0 bytes)
482482
n = 8
483-
0.974 ns (0 allocations: 0 bytes)
484-
1.076 ns (0 allocations: 0 bytes)
483+
1.077 ns (0 allocations: 0 bytes)
484+
0.865 ns (0 allocations: 0 bytes)
485485
n = 9
486486
1.081 ns (0 allocations: 0 bytes)
487-
1.077 ns (0 allocations: 0 bytes)
487+
0.865 ns (0 allocations: 0 bytes)
488488
n = 10
489-
1.203 ns (0 allocations: 0 bytes)
490-
1.077 ns (0 allocations: 0 bytes)
489+
1.195 ns (0 allocations: 0 bytes)
490+
0.867 ns (0 allocations: 0 bytes)
491491
n = 11
492-
1.355 ns (0 allocations: 0 bytes)
493-
1.292 ns (0 allocations: 0 bytes)
492+
1.357 ns (0 allocations: 0 bytes)
493+
1.400 ns (0 allocations: 0 bytes)
494494
n = 12
495-
1.539 ns (0 allocations: 0 bytes)
496-
1.079 ns (0 allocations: 0 bytes)
495+
1.543 ns (0 allocations: 0 bytes)
496+
1.074 ns (0 allocations: 0 bytes)
497497
n = 13
498-
1.704 ns (0 allocations: 0 bytes)
499-
1.290 ns (0 allocations: 0 bytes)
498+
1.702 ns (0 allocations: 0 bytes)
499+
1.077 ns (0 allocations: 0 bytes)
500500
n = 14
501-
1.916 ns (0 allocations: 0 bytes)
502-
1.185 ns (0 allocations: 0 bytes)
501+
1.913 ns (0 allocations: 0 bytes)
502+
0.867 ns (0 allocations: 0 bytes)
503503
n = 15
504-
2.072 ns (0 allocations: 0 bytes)
505-
1.292 ns (0 allocations: 0 bytes)
504+
2.076 ns (0 allocations: 0 bytes)
505+
1.077 ns (0 allocations: 0 bytes)
506506
n = 16
507507
2.273 ns (0 allocations: 0 bytes)
508-
1.076 ns (0 allocations: 0 bytes)
508+
1.078 ns (0 allocations: 0 bytes)
509509
```
510510
511511
More importantly, `reduce_tup(_pick_range, inds)` often performs better than `reduce(_pick_range, inds)`.
@@ -516,33 +516,33 @@ julia> inds = (Base.OneTo(100), 1:100, 1:ArrayInterface.StaticInt(100))
516516
(Base.OneTo(100), 1:100, 1:Static(100))
517517
518518
julia> @btime reduce(ArrayInterface._pick_range, \$(Ref(inds))[])
519-
6.000 ns (0 allocations: 0 bytes)
519+
6.405 ns (0 allocations: 0 bytes)
520520
Base.Slice(Static(1):Static(100))
521521
522522
julia> @btime ArrayInterface.reduce_tup(ArrayInterface._pick_range, \$(Ref(inds))[])
523-
2.578 ns (0 allocations: 0 bytes)
523+
2.570 ns (0 allocations: 0 bytes)
524524
Base.Slice(Static(1):Static(100))
525525
526526
julia> inds = (Base.OneTo(100), 1:100, 1:UInt(100))
527527
(Base.OneTo(100), 1:100, 0x0000000000000001:0x0000000000000064)
528528
529529
julia> @btime reduce(ArrayInterface._pick_range, \$(Ref(inds))[])
530-
6.191 ns (0 allocations: 0 bytes)
530+
6.411 ns (0 allocations: 0 bytes)
531531
Base.Slice(Static(1):100)
532532
533533
julia> @btime ArrayInterface.reduce_tup(ArrayInterface._pick_range, \$(Ref(inds))[])
534-
2.591 ns (0 allocations: 0 bytes)
534+
2.592 ns (0 allocations: 0 bytes)
535535
Base.Slice(Static(1):100)
536536
537537
julia> inds = (Base.OneTo(100), 1:100, 1:UInt(100), Int32(1):Int32(100))
538538
(Base.OneTo(100), 1:100, 0x0000000000000001:0x0000000000000064, 1:100)
539539
540540
julia> @btime reduce(ArrayInterface._pick_range, \$(Ref(inds))[])
541-
9.268 ns (0 allocations: 0 bytes)
541+
9.048 ns (0 allocations: 0 bytes)
542542
Base.Slice(Static(1):100)
543543
544544
julia> @btime ArrayInterface.reduce_tup(ArrayInterface._pick_range, \$(Ref(inds))[])
545-
2.570 ns (0 allocations: 0 bytes)
545+
2.569 ns (0 allocations: 0 bytes)
546546
Base.Slice(Static(1):100)
547547
```
548548
"""
@@ -552,32 +552,25 @@ Base.Slice(Static(1):100)
552552
push!(q.args, :(inds[1]))
553553
return q
554554
end
555-
splits = 0
556-
_N = N
557-
while _N > 1
558-
_Nhalf = _N >> 1
559-
for n 1:_Nhalf
560-
assign = Symbol(:r_,n,:_,splits)
561-
call = if splits == 0
562-
Expr(:call, :f, Expr(:ref, :inds, n), Expr(:ref, :inds, n + _Nhalf))
563-
else
564-
Expr(:call, :f, Symbol(:r_,n,:_,splits-1), Symbol(:r_,n + _Nhalf,:_,splits-1))
565-
end
566-
push!(q.args, Expr(:(=), assign, call))
567-
end
568-
for (i,n) enumerate((_Nhalf<<1)+1:_N)
569-
assign = Symbol(:r_,i,:_,splits)
570-
call = if _N == N
571-
Expr(:call, :f, assign, Expr(:ref, :inds, n))
572-
else
573-
Expr(:call, :f, assign, Symbol(:r_, n, :_, splits-1))
555+
syms = Vector{Symbol}(undef, N)
556+
i = 0
557+
for n 1:N
558+
syms[n] = iₙ = Symbol(:i_, (i += 1))
559+
push!(q.args, Expr(:(=), iₙ, Expr(:ref, :inds, n)))
560+
end
561+
W = 1 << (8sizeof(N) - 2 - leading_zeros(N))
562+
while W > 0
563+
_N = length(syms)
564+
for _ 2W:W:_N
565+
for w 1:W
566+
new_sym = Symbol(:i_, (i += 1))
567+
push!(q.args, Expr(:(=), new_sym, Expr(:call, :f, syms[w], syms[w+W])))
568+
syms[w] = new_sym
574569
end
575-
push!(q.args, Expr(:(=), assign, call))
570+
deleteat!(syms, 1+W:2W)
576571
end
577-
splits += 1
578-
_N = _Nhalf
572+
W >>>= 1
579573
end
580-
push!(q.args, Symbol(:r_,1,:_,splits - 1))
581574
q
582575
end
583576

0 commit comments

Comments
 (0)