@@ -52,27 +52,34 @@ A user defined type could override this method to define an appropriate test fun
52
52
optimize_pair (pair:: Pair ) = pair
53
53
optimize_pair (pair:: Pair{<:AbstractArray} ) = Set (pair. first) => pair. second
54
54
55
- function recode! (dest:: AbstractArray{T} , src:: AbstractArray , default:: Any , pairs:: Pair... ) where {T}
55
+ function recode! (dest:: AbstractArray , src:: AbstractArray , default:: Any , pairs:: Pair... )
56
56
if length (dest) != length (src)
57
57
throw (DimensionMismatch (" dest and src must be of the same length (got $(length (dest)) and $(length (src)) )" ))
58
58
end
59
59
60
- opt_pairs = map ( optimize_pair, pairs)
60
+ opt_pairs = optimize_pair .( pairs)
61
61
62
+ _recode! (dest, src, default, opt_pairs)
63
+ end
64
+
65
+ function _recode! (dest:: AbstractArray{T} , src:: AbstractArray , default,
66
+ pairs:: NTuple{<:Any, Pair} ) where {T}
67
+ recode_to = last .(pairs)
68
+ recode_from = first .(pairs)
69
+
62
70
@inbounds for i in eachindex (dest, src)
63
71
x = src[i]
64
72
65
- for j in 1 : length (opt_pairs)
66
- p = opt_pairs[j]
67
- # we use isequal and recode_in because we cannot really distinguish scalars from collections
68
- if x ≅ p. first || recode_in (x, p. first)
69
- dest[i] = p. second
70
- @goto nextitem
71
- end
72
- end
73
-
73
+ # @inline is needed for type stability and Compat for compatibility before julia v1.8
74
+ # we use isequal and recode_in because we cannot really
75
+ # distinguish scalars from collections
76
+ j = Compat. @inline findfirst (y -> isequal (x, y) || recode_in (x,y), recode_from)
77
+
78
+ # Value in one of the pairs
79
+ if j != = nothing
80
+ dest[i] = recode_to[j]
74
81
# Value not in any of the pairs
75
- if ismissing (x)
82
+ elseif ismissing (x)
76
83
eltype (dest) >: Missing ||
77
84
throw (MissingException (" missing value found, but dest does not support them: " *
78
85
" recode them to a supported value" ))
@@ -89,21 +96,16 @@ function recode!(dest::AbstractArray{T}, src::AbstractArray, default::Any, pairs
89
96
else
90
97
dest[i] = default
91
98
end
92
-
93
- @label nextitem
94
99
end
95
100
96
101
dest
97
102
end
98
103
99
- function recode! (dest:: CategoricalArray{T} , src:: AbstractArray , default:: Any , pairs:: Pair... ) where {T}
100
- if length (dest) != length (src)
101
- throw (DimensionMismatch (" dest and src must be of the same length (got $(length (dest)) and $(length (src)) )" ))
102
- end
103
-
104
- opt_pairs = map (optimize_pair, pairs)
104
+ function _recode! (dest:: CategoricalArray{T, <:Any, R} , src:: AbstractArray , default:: Any ,
105
+ pairs:: NTuple{<:Any, Pair} ) where {T, R}
106
+ recode_from = first .(pairs)
107
+ vals = T[p. second for p in pairs]
105
108
106
- vals = T[p. second for p in opt_pairs]
107
109
default != = nothing && push! (vals, default)
108
110
109
111
levels! (dest. pool, filter! (! ismissing, unique (vals)))
@@ -112,22 +114,22 @@ function recode!(dest::CategoricalArray{T}, src::AbstractArray, default::Any, pa
112
114
dupvals = length (vals) != length (levels (dest. pool))
113
115
114
116
drefs = dest. refs
115
- pairmap = [ismissing (v) ? 0 : get (dest. pool, v) for v in vals]
116
- defaultref = default === nothing || ismissing (default) ? 0 : get (dest. pool, default)
117
+ pairmap = [ismissing (v) ? zero (R) : get (dest. pool, v) for v in vals]
118
+ defaultref = default === nothing || ismissing (default) ? zero (R) : get (dest. pool, default)
119
+
117
120
@inbounds for i in eachindex (drefs, src)
118
121
x = src[i]
119
122
120
- for j in 1 : length (opt_pairs)
121
- p = opt_pairs[j]
122
- # we use isequal and recode_in because we cannot really distinguish scalars from collections
123
- if x ≅ p. first || recode_in (x, p. first)
124
- drefs[i] = dupvals ? pairmap[j] : j
125
- @goto nextitem
126
- end
127
- end
123
+ # @inline is needed for type stability and Compat for compatibility before julia v1.8
124
+ # we use isequal and recode_in because we cannot really
125
+ # distinguish scalars from collections
126
+ j = Compat. @inline findfirst (y -> isequal (x, y) || recode_in (x, y), recode_from)
128
127
128
+ # Value in one of the pairs
129
+ if j != = nothing
130
+ drefs[i] = dupvals ? pairmap[j] : j
129
131
# Value not in any of the pairs
130
- if ismissing (x)
132
+ elseif ismissing (x)
131
133
eltype (dest) >: Missing ||
132
134
throw (MissingException (" missing value found, but dest does not support them: " *
133
135
" recode them to a supported value" ))
@@ -144,8 +146,6 @@ function recode!(dest::CategoricalArray{T}, src::AbstractArray, default::Any, pa
144
146
else
145
147
drefs[i] = defaultref
146
148
end
147
-
148
- @label nextitem
149
149
end
150
150
151
151
# Put existing levels first, and sort them if possible
@@ -168,25 +168,21 @@ function recode!(dest::CategoricalArray{T}, src::AbstractArray, default::Any, pa
168
168
dest
169
169
end
170
170
171
- function recode! (dest:: CategoricalArray{T, N, R} , src:: CategoricalArray ,
172
- default:: Any , pairs:: Pair... ) where {T, N, R<: Integer }
173
- if length (dest) != length (src)
174
- throw (DimensionMismatch (" dest and src must be of the same length " *
175
- " (got $(length (dest)) and $(length (src)) )" ))
176
- end
177
-
171
+ function _recode! (dest:: CategoricalArray{T, N, R} , src:: CategoricalArray ,
172
+ default:: Any , pairs:: NTuple{<:Any, Pair} ) where {T, N, R<: Integer }
173
+ recode_from = first .(pairs)
178
174
vals = T[p. second for p in pairs]
175
+
179
176
if default === nothing
180
177
srclevels = levels (src)
181
178
182
179
# Remove recoded levels as they won't appear in result
183
- firsts = (p. first for p in pairs)
184
180
keptlevels = Vector {T} (undef, 0 )
185
181
sizehint! (keptlevels, length (srclevels))
186
182
187
183
for l in srclevels
188
- if ! (any (x -> x ≅ l, firsts ) ||
189
- any (f -> recode_in (l, f), firsts ))
184
+ if ! (any (x -> x ≅ l, recode_from ) ||
185
+ any (f -> recode_in (l, f), recode_from ))
190
186
try
191
187
push! (keptlevels, l)
192
188
catch err
0 commit comments