16
16
17
17
# ### functions for counting a single list of integers (1D)
18
18
"""
19
- addcounts!(r, x, levels::UnitRange{<:Int }, [wv::AbstractWeights])
19
+ addcounts!(r, x, levels::UnitRange{<:Integer }, [wv::AbstractWeights])
20
20
21
21
Add the number of occurrences in `x` of each value in `levels` to an existing
22
- array `r`. If a weighting vector `wv` is specified, the sum of weights is used
23
- rather than the raw counts.
22
+ array `r`. For each `xi ∈ x`, if `xi == levels[j]`, then we increment `r[j]`.
23
+
24
+ If a weighting vector `wv` is specified, the sum of weights is used rather than the
25
+ raw counts.
24
26
"""
25
27
function addcounts! (r:: AbstractArray , x:: IntegerArray , levels:: IntUnitRange )
26
- # add counts of integers from x to r
28
+ # add counts of integers from x that fall within levels to r
27
29
28
- k = length (levels)
29
- length (r) == k || throw (DimensionMismatch ())
30
+ checkbounds (r, axes (levels)... )
30
31
31
- m0 = levels[ 1 ]
32
- m1 = levels[ end ]
33
- b = m0 - 1
32
+ m0 = first ( levels)
33
+ m1 = last ( levels)
34
+ b = m0 - firstindex (levels) # firstindex(levels) == 1 because levels::IntUnitRange
34
35
35
- @inbounds for i in 1 : length (x)
36
- xi = x[i]
36
+ @inbounds for xi in x
37
37
if m0 <= xi <= m1
38
38
r[xi - b] += 1
39
39
end
@@ -42,15 +42,21 @@ function addcounts!(r::AbstractArray, x::IntegerArray, levels::IntUnitRange)
42
42
end
43
43
44
44
function addcounts! (r:: AbstractArray , x:: IntegerArray , levels:: IntUnitRange , wv:: AbstractWeights )
45
- k = length (levels)
46
- length (r) == k || throw (DimensionMismatch ())
45
+ # add wv weighted counts of integers from x that fall within levels to r
46
+
47
+ length (x) == length (wv) ||
48
+ throw (DimensionMismatch (" x and wv must have the same length, got $(length (x)) and $(length (wv)) " ))
49
+
50
+ xv = vec (x) # discard shape because weights() discards shape
51
+
52
+ checkbounds (r, axes (levels)... )
47
53
48
- m0 = levels[ 1 ]
49
- m1 = levels[ end ]
54
+ m0 = first ( levels)
55
+ m1 = last ( levels)
50
56
b = m0 - 1
51
57
52
- @inbounds for i in 1 : length (x )
53
- xi = x [i]
58
+ @inbounds for i in eachindex (xv, wv )
59
+ xi = xv [i]
54
60
if m0 <= xi <= m1
55
61
r[xi - b] += wv[i]
56
62
end
@@ -69,8 +75,8 @@ falling in that range will be considered (the others will be ignored without
69
75
raising an error or a warning). If an integer `k` is provided, only values in the
70
76
range `1:k` will be considered.
71
77
72
- If a weighting vector `wv` is specified , the sum of the weights is used rather than the
73
- raw counts.
78
+ If a vector of weights `wv` is provided , the proportion of weights is computed rather
79
+ than the proportion of raw counts.
74
80
75
81
The output is a vector of length `length(levels)`.
76
82
"""
@@ -90,8 +96,10 @@ counts(x::IntegerArray, wv::AbstractWeights) = counts(x, span(x), wv)
90
96
proportions(x, levels=span(x), [wv::AbstractWeights])
91
97
92
98
Return the proportion of values in the range `levels` that occur in `x`.
93
- Equivalent to `counts(x, levels) / length(x)`. If a weighting vector `wv`
94
- is specified, the sum of the weights is used rather than the raw counts.
99
+ Equivalent to `counts(x, levels) / length(x)`.
100
+
101
+ If a vector of weights `wv` is provided, the proportion of weights is computed rather
102
+ than the proportion of raw counts.
95
103
"""
96
104
proportions (x:: IntegerArray , levels:: IntUnitRange ) = counts (x, levels) .* inv (length (x))
97
105
proportions (x:: IntegerArray , levels:: IntUnitRange , wv:: AbstractWeights ) =
@@ -101,6 +109,9 @@ proportions(x::IntegerArray, levels::IntUnitRange, wv::AbstractWeights) =
101
109
proportions(x, k::Integer, [wv::AbstractWeights])
102
110
103
111
Return the proportion of integers in 1 to `k` that occur in `x`.
112
+
113
+ If a vector of weights `wv` is provided, the proportion of weights is computed rather
114
+ than the proportion of raw counts.
104
115
"""
105
116
proportions (x:: IntegerArray , k:: Integer ) = proportions (x, 1 : k)
106
117
proportions (x:: IntegerArray , k:: Integer , wv:: AbstractWeights ) = proportions (x, 1 : k, wv)
@@ -110,26 +121,22 @@ proportions(x::IntegerArray, wv::AbstractWeights) = proportions(x, span(x), wv)
110
121
# ### functions for counting a single list of integers (2D)
111
122
112
123
function addcounts! (r:: AbstractArray , x:: IntegerArray , y:: IntegerArray , levels:: NTuple{2,IntUnitRange} )
113
- # add counts of integers from x to r
114
-
115
- n = length (x)
116
- length (y) == n || throw (DimensionMismatch ())
124
+ # add counts of pairs from zip(x,y) to r
117
125
118
126
xlevels, ylevels = levels
119
127
120
- kx = length (xlevels)
121
- ky = length (ylevels)
122
- size (r) == (kx, ky) || throw (DimensionMismatch ())
123
128
124
- mx0 = xlevels[1 ]
125
- mx1 = xlevels[end ]
126
- my0 = ylevels[1 ]
127
- my1 = ylevels[end ]
129
+ checkbounds (r, axes (xlevels, 1 ), axes (ylevels, 1 ))
130
+
131
+ mx0 = first (xlevels)
132
+ mx1 = last (xlevels)
133
+ my0 = first (ylevels)
134
+ my1 = last (ylevels)
128
135
129
136
bx = mx0 - 1
130
137
by = my0 - 1
131
138
132
- for i = 1 : n
139
+ for i in eachindex ( vec (x), vec (y))
133
140
xi = x[i]
134
141
yi = y[i]
135
142
if (mx0 <= xi <= mx1) && (my0 <= yi <= my1)
@@ -141,28 +148,31 @@ end
141
148
142
149
function addcounts! (r:: AbstractArray , x:: IntegerArray , y:: IntegerArray ,
143
150
levels:: NTuple{2,IntUnitRange} , wv:: AbstractWeights )
144
- # add counts of integers from x to r
151
+ # add counts of pairs from zip(x,y) to r
152
+
153
+ length (x) == length (y) == length (wv) ||
154
+ throw (DimensionMismatch (" x, y, and wv must have the same length, but got $(length (x)) , $(length (y)) , and $(length (wv)) " ))
145
155
146
- n = length (x)
147
- length (y) == length (wv) == n || throw (DimensionMismatch ())
156
+ axes (x) == axes (y) ||
157
+ throw (DimensionMismatch (" x and y must have the same axes, but got $(axes (x)) and $(axes (y)) " ))
158
+
159
+ xv, yv = vec (x), vec (y) # discard shape because weights() discards shape
148
160
149
161
xlevels, ylevels = levels
150
162
151
- kx = length (xlevels)
152
- ky = length (ylevels)
153
- size (r) == (kx, ky) || throw (DimensionMismatch ())
163
+ checkbounds (r, axes (xlevels, 1 ), axes (ylevels, 1 ))
154
164
155
- mx0 = xlevels[ 1 ]
156
- mx1 = xlevels[ end ]
157
- my0 = ylevels[ 1 ]
158
- my1 = ylevels[ end ]
165
+ mx0 = first ( xlevels)
166
+ mx1 = last ( xlevels)
167
+ my0 = first ( ylevels)
168
+ my1 = last ( ylevels)
159
169
160
170
bx = mx0 - 1
161
171
by = my0 - 1
162
172
163
- for i = 1 : n
164
- xi = x [i]
165
- yi = y [i]
173
+ for i in eachindex (xv, yv, wv)
174
+ xi = xv [i]
175
+ yi = yv [i]
166
176
if (mx0 <= xi <= mx1) && (my0 <= yi <= my1)
167
177
r[xi - bx, yi - by] += wv[i]
168
178
end
@@ -235,13 +245,15 @@ end
235
245
236
246
237
247
"""
238
- addcounts!(dict, x[, wv]; alg = :auto)
248
+ addcounts!(dict, x; alg = :auto)
249
+ addcounts!(dict, x, wv)
239
250
240
251
Add counts based on `x` to a count map. New entries will be added if new values come up.
252
+
241
253
If a weighting vector `wv` is specified, the sum of the weights is used rather than the
242
254
raw counts.
243
255
244
- `alg` can be one of:
256
+ `alg` is only allowed for unweighted counting and can be one of:
245
257
- `:auto` (default): if `StatsBase.radixsort_safe(eltype(x)) == true` then use
246
258
`:radixsort`, otherwise use `:dict`.
247
259
@@ -284,9 +296,9 @@ function addcounts_dict!(cm::Dict{T}, x) where T
284
296
end
285
297
286
298
# If the bits type is of small size i.e. it can have up to 65536 distinct values
287
- # then it is always better to apply a counting-sort like reduce algorithm for
299
+ # then it is always better to apply a counting-sort like reduce algorithm for
288
300
# faster results and less memory usage. However we still wish to enable others
289
- # to write generic algorithms, therefore the methods below still accept the
301
+ # to write generic algorithms, therefore the methods below still accept the
290
302
# `alg` argument but it is ignored.
291
303
function _addcounts! (:: Type{Bool} , cm:: Dict{Bool} , x:: AbstractArray{Bool} ; alg = :ignored )
292
304
sumx = sum (x)
@@ -335,32 +347,42 @@ const BaseRadixSortSafeTypes = Union{Int8, Int16, Int32, Int64, Int128,
335
347
" Can the type be safely sorted by radixsort"
336
348
radixsort_safe (:: Type{T} ) where T = T<: BaseRadixSortSafeTypes
337
349
338
- function _addcounts_radix_sort_loop! (cm:: Dict{T} , sx:: AbstractArray {T} ) where T
350
+ function _addcounts_radix_sort_loop! (cm:: Dict{T} , sx:: AbstractVector {T} ) where T
339
351
isempty (sx) && return cm
340
- last_sx = sx[ 1 ]
341
- tmpcount = get (cm, last_sx, 0 ) + 1
352
+ last_sx = first (sx)
353
+ start_i = firstindex (sx)
342
354
343
355
# now the data is sorted: can just run through and accumulate values before
344
356
# adding into the Dict
345
- @inbounds for i in 2 : length (sx)
357
+ @inbounds for i in start_i + 1 : lastindex (sx)
346
358
sxi = sx[i]
347
- if last_sx == sxi
348
- tmpcount += 1
349
- else
350
- cm[last_sx] = tmpcount
359
+ if last_sx != sxi
360
+ cm[last_sx] = get (cm, last_sx, 0 ) + i - start_i
351
361
last_sx = sxi
352
- tmpcount = get (cm, last_sx, 0 ) + 1
362
+ start_i = i
353
363
end
354
364
end
355
365
356
- cm[sx[end ]] = tmpcount
366
+ last_sx = last (sx)
367
+ cm[last_sx] = get (cm, last_sx, 0 ) + lastindex (sx) + 1 - start_i
357
368
358
369
return cm
359
370
end
360
371
372
+ function _alg (x:: AbstractArray )
373
+ @static if VERSION >= v " 1.9.0-DEV"
374
+ return Base. DEFAULT_UNSTABLE
375
+ else
376
+ firstindex (x) == 1 ||
377
+ throw (ArgumentError (" alg = :radixsort requires either one based indexing or Julia >= 1.9. " *
378
+ " Use `alg = :dict` as an alternative." ))
379
+ return SortingAlgorithms. RadixSort
380
+ end
381
+ end
382
+
361
383
function addcounts_radixsort! (cm:: Dict{T} , x:: AbstractArray{T} ) where T
362
384
# sort the x using radixsort
363
- sx = sort (x , alg = RadixSort )
385
+ sx = sort (vec (x) , alg= _alg (x) )
364
386
365
387
# Delegate the loop to a separate function since sort might not
366
388
# be inferred in Julia 0.6 after SortingAlgorithms is loaded.
@@ -369,18 +391,24 @@ function addcounts_radixsort!(cm::Dict{T}, x::AbstractArray{T}) where T
369
391
end
370
392
371
393
# fall-back for `x` an iterator
372
- function addcounts_radixsort! (cm:: Dict{T} , x) where T
373
- sx = sort! (collect (x), alg = RadixSort)
394
+ function addcounts_radixsort! (cm:: Dict{T} , x) where T
395
+ cx = vec (collect (x))
396
+ sx = sort! (cx, alg = _alg (cx))
374
397
return _addcounts_radix_sort_loop! (cm, sx)
375
398
end
376
399
377
400
function addcounts! (cm:: Dict{T} , x:: AbstractArray{T} , wv:: AbstractVector{W} ) where {T,W<: Real }
378
- n = length (x)
379
- length (wv) == n || throw (DimensionMismatch ())
401
+ # add wv weighted counts of integers from x to cm
402
+
403
+ length (x) == length (wv) ||
404
+ throw (DimensionMismatch (" x and wv must have the same length, got $(length (x)) and $(length (wv)) " ))
405
+
406
+ xv = vec (x) # discard shape because weights() discards shape
407
+
380
408
z = zero (W)
381
409
382
- for i = 1 : n
383
- @inbounds xi = x [i]
410
+ for i in eachindex (xv, wv)
411
+ @inbounds xi = xv [i]
384
412
@inbounds wi = wv[i]
385
413
cm[xi] = get (cm, xi, z) + wi
386
414
end
@@ -390,11 +418,14 @@ end
390
418
391
419
"""
392
420
countmap(x; alg = :auto)
393
- countmap(x::AbstractVector, w ::AbstractVector{<:Real}; alg = :auto )
421
+ countmap(x::AbstractVector, wv ::AbstractVector{<:Real})
394
422
395
- Return a dictionary mapping each unique value in `x` to its number
396
- of occurrences. A vector of weights `w` can be provided when `x` is a vector.
423
+ Return a dictionary mapping each unique value in `x` to its number of occurrences.
397
424
425
+ If a weighting vector `wv` is specified, the sum of weights is used rather than the
426
+ raw counts.
427
+
428
+ `alg` is only allowed for unweighted counting and can be one of:
398
429
- `:auto` (default): if `StatsBase.radixsort_safe(eltype(x)) == true` then use
399
430
`:radixsort`, otherwise use `:dict`.
400
431
@@ -414,9 +445,12 @@ countmap(x::AbstractArray{T}, wv::AbstractVector{W}) where {T,W<:Real} = addcoun
414
445
415
446
"""
416
447
proportionmap(x)
448
+ proportionmap(x::AbstractVector, w::AbstractVector{<:Real})
449
+
450
+ Return a dictionary mapping each unique value in `x` to its proportion in `x`.
417
451
418
- Return a dictionary mapping each unique value in `x` to its
419
- proportion in `x` .
452
+ If a vector of weights `wv` is provided, the proportion of weights is computed rather
453
+ than the proportion of raw counts .
420
454
"""
421
455
proportionmap (x:: AbstractArray ) = _normalize_countmap (countmap (x), length (x))
422
456
proportionmap (x:: AbstractArray , wv:: AbstractWeights ) = _normalize_countmap (countmap (x, wv), sum (wv))
0 commit comments