@@ -5,16 +5,8 @@ module DistributedArrays
5
5
using Compat
6
6
import Compat. view
7
7
8
- if VERSION >= v " 0.5.0-dev+4340"
9
- using Primes
10
- using Primes: factor
11
- end
12
-
13
- if VERSION < v " 0.5.0-"
14
- typealias Future RemoteRef
15
- typealias RemoteChannel RemoteRef
16
- typealias AbstractSerializer SerializationState # On 0.4 fallback to the only concrete implementation
17
- end
8
+ using Primes
9
+ using Primes: factor
18
10
19
11
importall Base
20
12
import Base. Callable
@@ -195,40 +187,24 @@ function DArray(refs)
195
187
196
188
DArray (identity, refs, ndims, reshape (npids, dimdist), nindexes, ncuts)
197
189
end
198
- if VERSION < v " 0.5.0-"
199
- macro DArray (ex:: Expr )
200
- if ex. head != = :comprehension
201
- throw (ArgumentError (" invalid @DArray syntax" ))
202
- end
203
- ex. args[1 ] = esc (ex. args[1 ])
204
- ndim = length (ex. args) - 1
205
- ranges = map (r-> esc (r. args[2 ]), ex. args[2 : end ])
206
- for d = 1 : ndim
207
- var = ex. args[d+ 1 ]. args[1 ]
208
- ex. args[d+ 1 ] = :( $ (esc (var)) = ($ (ranges[d]))[I[$ d]] )
209
- end
210
- return :( DArray ((I:: Tuple{Vararg{UnitRange{Int}}} )-> ($ ex),
211
- tuple ($ (map (r-> :(length ($ r)), ranges)... ))) )
190
+
191
+ macro DArray (ex0:: Expr )
192
+ if ex0. head != = :comprehension
193
+ throw (ArgumentError (" invalid @DArray syntax" ))
212
194
end
213
- else
214
- macro DArray (ex0:: Expr )
215
- if ex0. head != = :comprehension
216
- throw (ArgumentError (" invalid @DArray syntax" ))
217
- end
218
- ex = ex0. args[1 ]
219
- if ex. head != = :generator
220
- throw (ArgumentError (" invalid @DArray syntax" ))
221
- end
222
- ex. args[1 ] = esc (ex. args[1 ])
223
- ndim = length (ex. args) - 1
224
- ranges = map (r-> esc (r. args[2 ]), ex. args[2 : end ])
225
- for d = 1 : ndim
226
- var = ex. args[d+ 1 ]. args[1 ]
227
- ex. args[d+ 1 ] = :( $ (esc (var)) = ($ (ranges[d]))[I[$ d]] )
228
- end
229
- return :( DArray ((I:: Tuple{Vararg{UnitRange{Int}}} )-> ($ ex0),
230
- tuple ($ (map (r-> :(length ($ r)), ranges)... ))) )
195
+ ex = ex0. args[1 ]
196
+ if ex. head != = :generator
197
+ throw (ArgumentError (" invalid @DArray syntax" ))
231
198
end
199
+ ex. args[1 ] = esc (ex. args[1 ])
200
+ ndim = length (ex. args) - 1
201
+ ranges = map (r-> esc (r. args[2 ]), ex. args[2 : end ])
202
+ for d = 1 : ndim
203
+ var = ex. args[d+ 1 ]. args[1 ]
204
+ ex. args[d+ 1 ] = :( $ (esc (var)) = ($ (ranges[d]))[I[$ d]] )
205
+ end
206
+ return :( DArray ((I:: Tuple{Vararg{UnitRange{Int}}} )-> ($ ex0),
207
+ tuple ($ (map (r-> :(length ($ r)), ranges)... ))) )
232
208
end
233
209
234
210
# new DArray similar to an existing one
@@ -656,29 +632,127 @@ function Base.setindex!(a::Array, d::DArray,
656
632
return a
657
633
end
658
634
635
+ # We also want to optimize setindex! with a SubDArray source, but this is hard
636
+ # and only works on 0.5.
637
+
638
+ # Similar to Base.indexin, but just create a logical mask. Note that this
639
+ # must return a logical mask in order to support merging multiple masks
640
+ # together into one linear index since we need to know how many elements to
641
+ # skip at the end. In many cases range intersection would be much faster
642
+ # than generating a logical mask, but that loses the endpoint information.
643
+ indexin_mask (a, b:: Number ) = a .== b
644
+ indexin_mask (a, r:: Range{Int} ) = [i in r for i in a]
645
+ indexin_mask (a, b:: AbstractArray{Int} ) = indexin_mask (a, IntSet (b))
646
+ indexin_mask (a, b:: AbstractArray ) = indexin_mask (a, Set (b))
647
+ indexin_mask (a, b) = [i in b for i in a]
648
+
649
+ import Base: tail
650
+ # Given a tuple of indices and a tuple of masks, restrict the indices to the
651
+ # valid regions. This is, effectively, reversing Base.setindex_shape_check.
652
+ # We can't just use indexing into MergedIndices here because getindex is much
653
+ # pickier about singleton dimensions than setindex! is.
654
+ restrict_indices (:: Tuple{} , :: Tuple{} ) = ()
655
+ function restrict_indices (a:: Tuple{Any, Vararg{Any}} , b:: Tuple{Any, Vararg{Any}} )
656
+ if (length (a[1 ]) == length (b[1 ]) == 1 ) || (length (a[1 ]) > 1 && length (b[1 ]) > 1 )
657
+ (vec (a[1 ])[vec (b[1 ])], restrict_indices (tail (a), tail (b))... )
658
+ elseif length (a[1 ]) == 1
659
+ (a[1 ], restrict_indices (tail (a), b))
660
+ elseif length (b[1 ]) == 1 && b[1 ][1 ]
661
+ restrict_indices (a, tail (b))
662
+ else
663
+ throw (DimensionMismatch (" this should be caught by setindex_shape_check; please submit an issue" ))
664
+ end
665
+ end
666
+ # The final indices are funky - they're allowed to accumulate together.
667
+ # An easy (albeit very inefficient) fix for too many masks is to use the
668
+ # outer product to merge them. But we can do that lazily with a custom type:
669
+ function restrict_indices (a:: Tuple{Any} , b:: Tuple{Any, Any, Vararg{Any}} )
670
+ (vec (a[1 ])[vec (ProductIndices (b, map (length, b)))],)
671
+ end
672
+ # But too many indices is much harder; this requires merging the indices
673
+ # in `a` before applying the final mask in `b`.
674
+ function restrict_indices (a:: Tuple{Any, Any, Vararg{Any}} , b:: Tuple{Any} )
675
+ if length (a[1 ]) == 1
676
+ (a[1 ], restrict_indices (tail (a), b))
677
+ else
678
+ # When one mask spans multiple indices, we need to merge the indices
679
+ # together. At this point, we can just use indexing to merge them since
680
+ # there's no longer special handling of singleton dimensions
681
+ (view (MergedIndices (a, map (length, a)), b[1 ]),)
682
+ end
683
+ end
684
+
685
+ immutable ProductIndices{I,N} <: AbstractArray{Bool, N}
686
+ indices:: I
687
+ sz:: NTuple{N,Int}
688
+ end
689
+ Base. size (P:: ProductIndices ) = P. sz
690
+ # This gets passed to map to avoid breaking propagation of inbounds
691
+ Base. @propagate_inbounds propagate_getindex (A, I... ) = A[I... ]
692
+ Base. @propagate_inbounds Base. getindex {_,N} (P:: ProductIndices{_,N} , I:: Vararg{Int, N} ) =
693
+ Bool ((& )(map (propagate_getindex, P. indices, I)... ))
694
+
695
+ immutable MergedIndices{I,N} <: AbstractArray{CartesianIndex{N}, N}
696
+ indices:: I
697
+ sz:: NTuple{N,Int}
698
+ end
699
+ Base. size (M:: MergedIndices ) = M. sz
700
+ Base. @propagate_inbounds Base. getindex {_,N} (M:: MergedIndices{_,N} , I:: Vararg{Int, N} ) =
701
+ CartesianIndex (map (propagate_getindex, M. indices, I))
702
+ # Additionally, we optimize bounds checking when using MergedIndices as an
703
+ # array index since checking, e.g., A[1:500, 1:500] is *way* faster than
704
+ # checking an array of 500^2 elements of CartesianIndex{2}. This optimization
705
+ # also applies to reshapes of MergedIndices since the outer shape of the
706
+ # container doesn't affect the index elements themselves. We can go even
707
+ # farther and say that even restricted views of MergedIndices must be valid
708
+ # over the entire array. This is overly strict in general, but in this
709
+ # use-case all the merged indices must be valid at some point, so it's ok.
710
+ typealias ReshapedMergedIndices{T,N,M<: MergedIndices } Base. ReshapedArray{T,N,M}
711
+ typealias SubMergedIndices{T,N,M<: Union{MergedIndices, ReshapedMergedIndices} } SubArray{T,N,M}
712
+ typealias MergedIndicesOrSub Union{MergedIndices, ReshapedMergedIndices, SubMergedIndices}
713
+ import Base: checkbounds_indices
714
+ @inline checkbounds_indices (:: Type{Bool} , inds:: Tuple{} , I:: Tuple{MergedIndicesOrSub,Vararg{Any}} ) =
715
+ checkbounds_indices (Bool, inds, (parent (parent (I[1 ])). indices... , tail (I)... ))
716
+ @inline checkbounds_indices (:: Type{Bool} , inds:: Tuple{Any} , I:: Tuple{MergedIndicesOrSub,Vararg{Any}} ) =
717
+ checkbounds_indices (Bool, inds, (parent (parent (I[1 ])). indices... , tail (I)... ))
718
+ @inline checkbounds_indices (:: Type{Bool} , inds:: Tuple , I:: Tuple{MergedIndicesOrSub,Vararg{Any}} ) =
719
+ checkbounds_indices (Bool, inds, (parent (parent (I[1 ])). indices... , tail (I)... ))
720
+
721
+ # The tricky thing here is that we want to optimize the accesses into the
722
+ # distributed array, but in doing so, we lose track of which indices in I we
723
+ # should be using.
724
+ #
725
+ # I’ve come to the conclusion that the function is utterly insane.
726
+ # There are *6* flavors of indices with four different reference points:
727
+ # 1. Find the indices of each portion of the DArray.
728
+ # 2. Find the valid subset of indices for the SubArray into that portion.
729
+ # 3. Find the portion of the `I` indices that should be used when you access the
730
+ # `K` indices in the subarray. This guy is nasty. It’s totally backwards
731
+ # from all other arrays, wherein we simply iterate over the source array’s
732
+ # elements. You need to *both* know which elements in `J` were skipped
733
+ # (`indexin_mask`) and which dimensions should match up (`restrict_indices`)
734
+ # 4. If `K` doesn’t correspond to an entire chunk, reinterpret `K` in terms of
735
+ # the local portion of the source array
659
736
function Base. setindex! (a:: Array , s:: SubDArray ,
660
737
I:: Union{UnitRange{Int},Colon,Vector{Int},StepRange{Int,Int}} ...)
738
+ Base. setindex_shape_check (s, Base. index_lengths (a, I... )... )
661
739
n = length (I)
662
740
d = s. parent
663
- J = s. indexes
664
- if length (J) < n
665
- a[I... ] = convert (Array,s)
666
- return a
667
- end
668
- offs = [isa (J[i],Int) ? J[i]- 1 : first (J[i])- 1 for i= 1 : n]
741
+ J = Base. decolon (d, s. indexes... )
669
742
@sync for i = 1 : length (d. pids)
670
- K_c = Any[ d. indexes[i] . .. ]
671
- K = [ intersect (J[j], K_c[j]) for j = 1 : n ]
743
+ K_c = d. indexes[i]
744
+ K = map (intersect, J, K_c)
672
745
if ! any (isempty, K)
673
- idxs = [ I[j][K[j]- offs[j]] for j= 1 : n ]
746
+ K_mask = map (indexin_mask, J, K_c)
747
+ idxs = restrict_indices (Base. decolon (a, I... ), K_mask)
674
748
if isequal (K, K_c)
675
749
# whole chunk
676
750
@async a[idxs... ] = chunk (d, i)
677
751
else
678
752
# partial chunk
679
753
@async a[idxs... ] =
680
754
remotecall_fetch (d. pids[i]) do
681
- view (localpart (d), [K[j]- first (K_c[j])+ 1 for j= 1 : n ]. .. )
755
+ view (localpart (d), [K[j]- first (K_c[j])+ 1 for j= 1 : length (J) ]. .. )
682
756
end
683
757
end
684
758
end
@@ -1395,16 +1469,7 @@ function compute_boundaries{T}(d::DVector{T}; kwargs...)
1395
1469
np = length (pids)
1396
1470
sample_sz_on_wrkr = 512
1397
1471
1398
- if VERSION < v " 0.5.0-"
1399
- results = Array (Any,np)
1400
- @sync begin
1401
- for (i,p) in enumerate (pids)
1402
- @async results[i] = remotecall_fetch (sample_n_setup_ref, p, d, sample_sz_on_wrkr; kwargs... )
1403
- end
1404
- end
1405
- else
1406
- results = asyncmap (p -> remotecall_fetch (sample_n_setup_ref, p, d, sample_sz_on_wrkr; kwargs... ), pids)
1407
- end
1472
+ results = asyncmap (p -> remotecall_fetch (sample_n_setup_ref, p, d, sample_sz_on_wrkr; kwargs... ), pids)
1408
1473
1409
1474
samples = Array (T,0 )
1410
1475
for x in results
@@ -1455,14 +1520,7 @@ function Base.sort{T}(d::DVector{T}; sample=true, kwargs...)
1455
1520
1456
1521
elseif sample== false
1457
1522
# Assume an uniform distribution between min and max values
1458
- if VERSION < v " 0.5.0-"
1459
- minmax= Array (Tuple, np)
1460
- @sync for (i,p) in enumerate (pids)
1461
- @async minmax[i] = remotecall_fetch (d-> (minimum (localpart (d)), maximum (localpart (d))), p, d)
1462
- end
1463
- else
1464
- minmax= asyncmap (p-> remotecall_fetch (d-> (minimum (localpart (d)), maximum (localpart (d))), p, d), pids)
1465
- end
1523
+ minmax= asyncmap (p-> remotecall_fetch (d-> (minimum (localpart (d)), maximum (localpart (d))), p, d), pids)
1466
1524
min_d = minimum (T[x[1 ] for x in minmax])
1467
1525
max_d = maximum (T[x[2 ] for x in minmax])
1468
1526
@@ -1503,19 +1561,10 @@ function Base.sort{T}(d::DVector{T}; sample=true, kwargs...)
1503
1561
end
1504
1562
1505
1563
local_sort_results = Array (Tuple, np)
1506
- if VERSION < v " 0.5.0-"
1507
- @sync begin
1508
- for (i,p) in enumerate (pids)
1509
- @async local_sort_results[i] =
1510
- remotecall_fetch (
1511
- scatter_n_sort_localparts, p, presorted ? nothing : d, i, refs, boundaries; kwargs... )
1512
- end
1513
- end
1514
- else
1515
- Base. asyncmap! ((i,p) -> remotecall_fetch (
1564
+
1565
+ Base. asyncmap! ((i,p) -> remotecall_fetch (
1516
1566
scatter_n_sort_localparts, p, presorted ? nothing : d, i, refs, boundaries; kwargs... ),
1517
1567
local_sort_results, 1 : np, pids)
1518
- end
1519
1568
1520
1569
# Construct a new DArray from the sorted refs. Remove parts with 0-length since
1521
1570
# the DArray constructor_from_refs does not yet support it. This implies that
0 commit comments