@@ -42,6 +42,49 @@ function direct_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray)
42
42
end
43
43
direct_sample! (a:: AbstractArray , x:: AbstractArray ) = direct_sample! (Random. GLOBAL_RNG, a, x)
44
44
45
+ # check whether we can use T to store indices 1:n exactly, and
46
+ # use some heuristics to decide whether it is beneficial for k samples
47
+ # (true for a subset of hardware-supported numeric types)
48
+ _storeindices (n, k, :: Type{T} ) where {T<: Integer } = n ≤ typemax (T)
49
+ _storeindices (n, k, :: Type{T} ) where {T<: Union{Float32,Float64} } = k < 22 && n ≤ maxintfloat (T)
50
+ _storeindices (n, k, :: Type{Complex{T}} ) where {T} = _storeindices (n, k, T)
51
+ _storeindices (n, k, :: Type{Rational{T}} ) where {T} = k < 16 && _storeindices (n, k, T)
52
+ _storeindices (n, k, T) = false
53
+ storeindices (n, k, :: Type{T} ) where {T<: Base.HWNumber } = _storeindices (n, k, T)
54
+ storeindices (n, k, T) = false
55
+
56
+ # order results of a sampler that does not order automatically
57
+ function sample_ordered! (sampler!, rng:: AbstractRNG , a:: AbstractArray , x:: AbstractArray )
58
+ n, k = length (a), length (x)
59
+ # todo: if eltype(x) <: Real && eltype(a) <: Real,
60
+ # in some cases it might be faster to check
61
+ # issorted(a) to see if we can just sort x
62
+ if storeindices (n, k, eltype (x))
63
+ sort! (sampler! (rng, Base. OneTo (n), x), by= real, lt= < )
64
+ @inbounds for i = 1 : k
65
+ x[i] = a[Int (x[i])]
66
+ end
67
+ else
68
+ indices = Array {Int} (undef, k)
69
+ sort! (sampler! (rng, Base. OneTo (n), indices))
70
+ @inbounds for i = 1 : k
71
+ x[i] = a[indices[i]]
72
+ end
73
+ end
74
+ return x
75
+ end
76
+
77
+ # special case of a range can be done more efficiently
78
+ sample_ordered! (sampler!, rng:: AbstractRNG , a:: AbstractRange , x:: AbstractArray ) =
79
+ sort! (sampler! (rng, a, x), rev= step (a)< 0 )
80
+
81
+ # weighted case:
82
+ sample_ordered! (sampler!, rng:: AbstractRNG , a:: AbstractArray ,
83
+ wv:: AbstractWeights , x:: AbstractArray ) =
84
+ sample_ordered! (rng, a, x) do rng, a, x
85
+ sampler! (rng, a, wv, x)
86
+ end
87
+
45
88
# ## draw a pair of distinct integers in [1:n]
46
89
47
90
"""
@@ -396,21 +439,24 @@ Draw a random sample of `length(x)` elements from an array `a`
396
439
and store the result in `x`. A polyalgorithm is used for sampling.
397
440
Sampling probabilities are proportional to the weights given in `wv`,
398
441
if provided. `replace` dictates whether sampling is performed with
399
- replacement and `order` dictates whether an ordered sample, also called
400
- a sequential sample, should be taken.
442
+ replacement. `ordered` dictates whether
443
+ an ordered sample (also called a sequential sample, i.e. a sample where
444
+ items appear in the same order as in `a`) should be taken.
401
445
402
446
Optionally specify a random number generator `rng` as the first argument
403
447
(defaults to `Random.GLOBAL_RNG`).
404
448
"""
405
449
function sample! (rng:: AbstractRNG , a:: AbstractArray , x:: AbstractArray ;
406
450
replace:: Bool = true , ordered:: Bool = false )
451
+ 1 == firstindex (a) == firstindex (x) ||
452
+ throw (ArgumentError (" non 1-based arrays are not supported" ))
407
453
n = length (a)
408
454
k = length (x)
409
455
k == 0 && return x
410
456
411
457
if replace # with replacement
412
458
if ordered
413
- sort ! (direct_sample! ( rng, a, x) )
459
+ sample_ordered ! (direct_sample!, rng, a, x)
414
460
else
415
461
direct_sample! (rng, a, x)
416
462
end
@@ -448,8 +494,9 @@ sample!(a::AbstractArray, x::AbstractArray; replace::Bool=true, ordered::Bool=fa
448
494
Select a random, optionally weighted sample of size `n` from an array `a`
449
495
using a polyalgorithm. Sampling probabilities are proportional to the weights
450
496
given in `wv`, if provided. `replace` dictates whether sampling is performed
451
- with replacement and `order` dictates whether an ordered sample, also called
452
- a sequential sample, should be taken.
497
+ with replacement. `ordered` dictates whether
498
+ an ordered sample (also called a sequential sample, i.e. a sample where
499
+ items appear in the same order as in `a`) should be taken.
453
500
454
501
Optionally specify a random number generator `rng` as the first argument
455
502
(defaults to `Random.GLOBAL_RNG`).
@@ -468,8 +515,9 @@ sample(a::AbstractArray, n::Integer; replace::Bool=true, ordered::Bool=false) =
468
515
Select a random, optionally weighted sample from an array `a` specifying
469
516
the dimensions `dims` of the output array. Sampling probabilities are
470
517
proportional to the weights given in `wv`, if provided. `replace` dictates
471
- whether sampling is performed with replacement and `order` dictates whether
472
- an ordered sample, also called a sequential sample, should be taken.
518
+ whether sampling is performed with replacement. `ordered` dictates whether
519
+ an ordered sample (also called a sequential sample, i.e. a sample where
520
+ items appear in the same order as in `a`) should be taken.
473
521
474
522
Optionally specify a random number generator `rng` as the first argument
475
523
(defaults to `Random.GLOBAL_RNG`).
@@ -781,7 +829,8 @@ Noting `k=length(x)` and `n=length(a)`, this algorithm takes ``O(k \\log(k) \\lo
781
829
processing time to draw ``k`` elements. It consumes ``O(k \\ log(n / k))`` random numbers.
782
830
"""
783
831
function efraimidis_aexpj_wsample_norep! (rng:: AbstractRNG , a:: AbstractArray ,
784
- wv:: AbstractWeights , x:: AbstractArray )
832
+ wv:: AbstractWeights , x:: AbstractArray ;
833
+ ordered:: Bool = false )
785
834
n = length (a)
786
835
length (wv) == n || throw (DimensionMismatch (" a and wv must be of same length (got $n and $(length (wv)) )." ))
787
836
k = length (x)
@@ -824,24 +873,36 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray,
824
873
threshold = pq[1 ]. first
825
874
X = threshold * randexp (rng)
826
875
end
827
-
828
- # fill output array with items in descending order
829
- @inbounds for i in k: - 1 : 1
830
- x[i] = a[heappop! (pq). second]
876
+ if ordered
877
+ # fill output array with items sorted as in a
878
+ sort! (pq, by= last)
879
+ @inbounds for i in 1 : k
880
+ x[i] = a[pq[i]. second]
881
+ end
882
+ else
883
+ # fill output array with items in descending order
884
+ @inbounds for i in k: - 1 : 1
885
+ x[i] = a[heappop! (pq). second]
886
+ end
831
887
end
832
888
return x
833
889
end
834
- efraimidis_aexpj_wsample_norep! (a:: AbstractArray , wv:: AbstractWeights , x:: AbstractArray ) =
835
- efraimidis_aexpj_wsample_norep! (Random. GLOBAL_RNG, a, wv, x)
890
+ efraimidis_aexpj_wsample_norep! (a:: AbstractArray , wv:: AbstractWeights , x:: AbstractArray ;
891
+ ordered:: Bool = false ) =
892
+ efraimidis_aexpj_wsample_norep! (Random. GLOBAL_RNG, a, wv, x; ordered= ordered)
836
893
837
894
function sample! (rng:: AbstractRNG , a:: AbstractArray , wv:: AbstractWeights , x:: AbstractArray ;
838
895
replace:: Bool = true , ordered:: Bool = false )
896
+ 1 == firstindex (a) == firstindex (wv) == firstindex (x) ||
897
+ throw (ArgumentError (" non 1-based arrays are not supported" ))
839
898
n = length (a)
840
899
k = length (x)
841
900
842
901
if replace
843
902
if ordered
844
- sort! (direct_sample! (rng, a, wv, x))
903
+ sample_ordered! (rng, a, wv, x) do rng, a, wv, x
904
+ sample! (rng, a, wv, x; replace= true , ordered= false )
905
+ end
845
906
else
846
907
if n < 40
847
908
direct_sample! (rng, a, wv, x)
@@ -856,11 +917,7 @@ function sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, x::Abs
856
917
end
857
918
else
858
919
k <= n || error (" Cannot draw $n samples from $k samples without replacement." )
859
-
860
- efraimidis_aexpj_wsample_norep! (rng, a, wv, x)
861
- if ordered
862
- sort! (x)
863
- end
920
+ efraimidis_aexpj_wsample_norep! (rng, a, wv, x; ordered= ordered)
864
921
end
865
922
return x
866
923
end
@@ -889,8 +946,9 @@ sample(a::AbstractArray, wv::AbstractWeights, dims::Dims;
889
946
890
947
Select a weighted sample from an array `a` and store the result in `x`. Sampling
891
948
probabilities are proportional to the weights given in `w`. `replace` dictates
892
- whether sampling is performed with replacement and `order` dictates whether an
893
- ordered sample, also called a sequential sample, should be taken.
949
+ whether sampling is performed with replacement. `ordered` dictates whether
950
+ an ordered sample (also called a sequential sample, i.e. a sample where
951
+ items appear in the same order as in `a`) should be taken.
894
952
895
953
Optionally specify a random number generator `rng` as the first argument
896
954
(defaults to `Random.GLOBAL_RNG`).
@@ -923,8 +981,9 @@ wsample(a::AbstractArray, w::RealVector) = wsample(Random.GLOBAL_RNG, a, w)
923
981
Select a weighted random sample of size `n` from `a` with probabilities proportional
924
982
to the weights given in `w` if `a` is present, otherwise select a random sample of size
925
983
`n` of the weights given in `w`. `replace` dictates whether sampling is performed with
926
- replacement and `order` dictates whether an ordered sample, also called a sequential
927
- sample, should be taken.
984
+ replacement. `ordered` dictates whether
985
+ an ordered sample (also called a sequential sample, i.e. a sample where
986
+ items appear in the same order as in `a`) should be taken.
928
987
929
988
Optionally specify a random number generator `rng` as the first argument
930
989
(defaults to `Random.GLOBAL_RNG`).
0 commit comments