Skip to content

Commit f2f6134

Browse files
authored
Merge pull request #14 from JuliaAI/dev
For a 0.2.0 release
2 parents 766aa90 + 73db982 commit f2f6134

File tree

10 files changed

+144
-136
lines changed

10 files changed

+144
-136
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LearnDataFrontEnds"
22
uuid = "5cca22a3-9356-470e-ba1b-8268d0135a4b"
33
authors = ["Anthony D. Blaom <[email protected]>"]
4-
version = "0.1.2"
4+
version = "0.2.0"
55

66
[deps]
77
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
@@ -11,7 +11,7 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
1111
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
1212

1313
[compat]
14-
CategoricalArrays = "0.10"
14+
CategoricalArrays = "1"
1515
LearnAPI = "0.2, 1, 2"
1616
MLCore = "1.0.0"
1717
StatsModels = "0.7.4"

docs/src/quick_start.md

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
- [Supervised classifiers](@ref)
55
- [Transformers](@ref)
66

7-
Refer to the front end [docstrings](@ref front_ends) for options ignored below.
7+
Refer to the front end [docstrings](@ref front_ends) for options ignored below.
88

99
## Supervised regressors
1010

@@ -31,35 +31,35 @@ Your [`LearnAPI.fit`](@ref) implementation will then look like this:
3131

3232
```julia
3333
function LearnAPI.fit(
34-
learner::MyLearner,
35-
observations::Obs;
36-
verbosity=1,
37-
)
38-
X = observations.features # p x n matrix
39-
y = observations.target # n-vector (use `Saffron(multitarget=true)` for matrix)
40-
feature_names = observations.names
34+
learner::MyLearner,
35+
observations::Obs;
36+
verbosity=1,
37+
)
38+
X = observations.features # p x n matrix
39+
y = observations.target # n-vector (use `Saffron(multitarget=true)` for matrix)
40+
feature_names = observations.names
4141

42-
# do stuff with `X`, `y` and `feature_names`:
43-
...
42+
# do stuff with `X`, `y` and `feature_names`:
43+
...
4444

4545
end
4646
LearnAPI.fit(learner::MyLearner, data; kwargs...) =
47-
LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)
47+
LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)
4848
```
4949

5050
For each [`KindOfProxy`](@ref) subtype `K` to be supported (e.g., `Point`), your
5151
[`LearnAPI.predict`](@ref) implementation(s) will look like this:
5252

5353
```julia
5454
function LearnAPI.predict(model::MyModel, :K, observations::Obs)
55-
X = observations.features # p x n matrix
56-
names = observations.names # if really needed
55+
X = observations.features # p x n matrix
56+
names = observations.names # if really needed
5757

58-
# do stuff with `X`:
59-
...
58+
# do stuff with `X`:
59+
...
6060
end
6161
LearnAPI.predict(model::MyModel, kind_of_proxy, X) =
62-
LearnAPI.predict(model, kind_of_proxy, obs(model, X))
62+
LearnAPI.predict(model, kind_of_proxy, obs(model, X))
6363
```
6464

6565
## Supervised classifiers
@@ -94,13 +94,13 @@ function LearnAPI.fit(
9494
X = observations.features # p x n matrix
9595
y = observations.target # n-vector
9696
decoder = observations.decoder
97-
classes_seen = observatioins.classes_seen
97+
levels_seen = observations.levels_seen
9898
feature_names = observations.names
9999

100100
# do stuff with `X`, `y` and `feature_names`:
101-
# return a `model` object which also stores the `decoder` and/or `classes_seen`
102-
# to make them available to `predict`.
103-
...
101+
# return a `model` object which also stores the `decoder` and/or `levels_seen`
102+
# to make them available to `predict`.
103+
...
104104
end
105105
LearnAPI.fit(learner::MyLearner, data; kwargs...) =
106106
LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)
@@ -116,10 +116,10 @@ function LearnAPI.predict(model::MyModel, :K, observations::Obs)
116116

117117
# Do stuff with `X` and `model` to obtain raw `predictions` (a vector of integer
118118
# codes for `K = Point`, or an `n x c` matrix of probabilities for `K = Distribution`).
119-
# Extract `decoder` or `classes_seen` from `model`.
119+
# Extract `decoder` or `levels_seen` from `model`.
120120
# For `K = Point`, return `decoder.(predictions)`.
121121
# For `K = Distribution`, return, say,
122-
# `CategoricalDistributions.Univariate(classes_seen, predictions)`.
122+
# `CategoricalDistributions.Univariate(levels_seen, predictions)`.
123123
...
124124
end
125125
LearnAPI.predict(model::MyModel, kind_of_proxy, X) = LearnAPI.predict(model,
@@ -152,29 +152,29 @@ Your [`LearnAPI.fit`](@ref) implementation will then look like this:
152152

153153
```julia
154154
function LearnAPI.fit(
155-
learner::MyLearner,
156-
observations::Obs;
157-
verbosity=1,
158-
)
159-
x = observations.features # p x n matrix
160-
feature_names = observations.names
161-
162-
# do stuff with `x` and `feature_names`:
163-
...
155+
learner::MyLearner,
156+
observations::Obs;
157+
verbosity=1,
158+
)
159+
x = observations.features # p x n matrix
160+
feature_names = observations.names
161+
162+
# do stuff with `x` and `feature_names`:
163+
...
164164
end
165165
LearnAPI.fit(learner::MyLearner, data; kwargs...) =
166-
LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)
166+
LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)
167167
```
168168

169169
Your [`LearnAPI.transform`](@ref) implementation will look like this:
170170

171171
```julia
172172
function LearnAPI.transform(model::MyModel, observations::Obs)
173-
x = observations.features # p x n matrix
174-
feature_names = observations.names # if really needed
173+
x = observations.features # p x n matrix
174+
feature_names = observations.names # if really needed
175175

176-
# do stuff with `x`:
177-
...
176+
# do stuff with `x`:
177+
...
178178
end
179179
LearnAPI.transform(model::MyModel, X) = LearnAPI.transform(model, obs(model, X))
180180
```

docs/src/reference.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,5 @@ LearnDataFrontEnds.feature_names
1515
LearnDataFrontEnds.swapdims
1616
LearnDataFrontEnds.decoder
1717
LearnDataFrontEnds.decompose
18-
LearnDataFrontEnds.classes
1918
LearnDataFrontEnds.canonify
2019
```

src/backends.jl

Lines changed: 77 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,86 @@ If [`Sage`](@ref)`(multitarget=..., code_type=...)` has been implemented, then
3131
`observations.target` has an integer element type controlled by `code_type`, and we
3232
additionally have:
3333
34-
- `observations.classes`: A categorical vector of the ordered target classes, as actually
35-
seen in the user-supplied target, with the full pool of classes available by applying
36-
`Categorical.levels` to the result. The corresponding integer codes will be
37-
`sort(unique(observations.target))`.
34+
- `observations.levels`: A categorical vector of the ordered target levels, as actually
35+
seen in the user-supplied target. The corresponding integer codes will be
36+
`sort(unique(observations.target))`. To get the full pool of levels, apply
37+
`CategoricalArrays.levels` to `observations.levels_seen`; see the example below.
3838
3939
- `observations.decoder`: A callable function that converts an integer code back to the
4040
original `CategoricalValue` it represents.
4141
4242
Pass the first onto `predict` for making probabilistic predictions, and the second for
4343
point predictions; see [`Sage`](@ref) for details.
4444
45+
# Extended help
46+
47+
In the example below, `observations` implements the full `Obs` interface described above,
48+
for a learner implementing the `Sage` front end:
49+
50+
```julia-repl
51+
using LearnAPI, LearnDataFrontEnds, LearnTestAPI
52+
using CategoricalDistributions, CategoricalArrays, DataFrames
53+
X = DataFrame(rand(10, 3), :auto)
54+
y = categorical(collect("ababababac"))
55+
learner = LearnTestAPI.ConstantClassifier()
56+
observations = obs(learner, (X[1:9,:], y[1:9]))
57+
58+
julia> observations.features
59+
3×9 Matrix{Float64}:
60+
0.234043 0.526468 0.227417 0.956471 … 0.00587146 0.169291 0.353518 0.402631
61+
0.631083 0.151317 0.781049 0.00320728 0.756519 0.15317 0.452169 0.127005
62+
0.285315 0.347433 0.69174 0.516915 0.900343 0.404006 0.448986 0.962649
63+
64+
julia> yint = observations.target
65+
9-element Vector{UInt32}:
66+
0x00000001
67+
0x00000002
68+
0x00000001
69+
0x00000002
70+
0x00000001
71+
0x00000002
72+
0x00000001
73+
0x00000002
74+
0x00000001
75+
76+
julia> observations.levels_seen
77+
2-element CategoricalArray{Char,1,UInt32}:
78+
'a'
79+
'b'
80+
81+
julia> sort(unique(observations.target))
82+
2-element Vector{UInt32}:
83+
0x00000001
84+
0x00000002
85+
86+
julia> observations.levels_seen |> levels
87+
3-element CategoricalArray{Char,1,UInt32}:
88+
'a'
89+
'b'
90+
'c'
91+
92+
julia> observations.decoder.(yint)
93+
9-element CategoricalArray{Char,1,UInt32}:
94+
'a'
95+
'b'
96+
'a'
97+
'b'
98+
'a'
99+
'b'
100+
'a'
101+
'b'
102+
'a'
103+
104+
julia> d = UnivariateFinite(observations.levels_seen, [0.4, 0.6])
105+
UnivariateFinite{Multiclass{3}}(a=>0.4, b=>0.6)
106+
107+
julia> levels(d)
108+
3-element CategoricalArray{Char,1,UInt32}:
109+
'a'
110+
'b'
111+
'c'
112+
```
113+
45114
"""
46115
abstract type Obs end
47116

@@ -111,7 +180,7 @@ struct SageObs{F,T,E,D} <: Obs
111180
features::F # p x n
112181
names::Vector{Symbol}
113182
target::T
114-
classes_seen::CategoricalArrays.CategoricalVector{E}
183+
levels_seen::CategoricalArrays.CategoricalVector{E}
115184
decoder::D
116185
end
117186

@@ -122,8 +191,8 @@ function Base.show(io::IO, ::MIME"text/plain", observations::SageObs)
122191
println(io, " features :: $(typeof(A))($(size(A)))")
123192
println(io, " names: $(observations.names)")
124193
println(io, " target :: $(typeof(y))($(size(y)))")
125-
println(io, " classes_seen: "*
126-
"$(CategoricalArrays.unwrap.(observations.classes_seen)) "*
194+
println(io, " levels_seen: "*
195+
"$(CategoricalArrays.unwrap.(observations.levels_seen)) "*
127196
"(categorical vector with complete pool)")
128197
print(io, " decoder: <callable>")
129198
end
@@ -133,7 +202,7 @@ Base.getindex(observations::SageObs, idx) =
133202
MLCore.getobs(observations.features, idx),
134203
observations.names,
135204
MLCore.getobs(observations.target, idx),
136-
observations.classes_seen,
205+
observations.levels_seen,
137206
observations.decoder,
138207
)
139208

src/saffron.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,13 +150,13 @@ function finalize(x, names, y, int) # here `int` is `levelcode` or `refcode` fu
150150
CategoricalArrays.CategoricalArray,
151151
SubArray{<:Any, <:Any, <:CategoricalArrays.CategoricalArray},
152152
} || throw(ERR_EXPECTED_CATEGORICAL)
153-
l = LearnDataFrontEnds.classes(y)
153+
l = CategoricalArrays.levels(y)
154154
u = unique(y)
155155
mask = map(in(u), l)
156-
_classes_seen = l[mask]
156+
_levels_seen = l[mask]
157157
_decoder = LearnDataFrontEnds.decoder(l)
158158

159-
return SageObs(x, names, int.(y), _classes_seen, _decoder)
159+
return SageObs(x, names, int.(y), _levels_seen, _decoder)
160160
end
161161

162162
# for input `(x::AbstractMatrix, y::MatrixOrVector)`:

src/sage.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,12 @@ function LearnAPI.fit(
104104
X = observations.features # p x n matrix
105105
y = observations.target # n-vector or q x n matrix
106106
decoder = observations.decoder
107-
classes_seen = observations.classes_seen
107+
levels_seen = observations.levels_seen
108108
feature_names = observations.names
109109
110110
# do stuff with `X`, `y` and `feature_names`:
111111
# return a `model` object which also stores the `decoder` and/or
112-
# `classes_seen` to make them available to `predict`.
112+
# `levels_seen` to make them available to `predict`.
113113
...
114114
115115
end
@@ -127,10 +127,10 @@ function LearnAPI.predict(model::MyModel, :K, observations::Obs)
127127
128128
# Do stuff with `X` and `model` to obtain raw `predictions` (a vector of integer
129129
# codes for `K = Point`, or an `n x c` matrix of probabilities for `K = Distribution`).
130-
# Extract `decoder` or `classes_seen` from `model`.
130+
# Extract `decoder` or `levels_seen` from `model`.
131131
# For `K = Point`, return `decoder.(predictions)`.
132132
# For `K = Distribution`, return, say,
133-
# `CategoricalDistributions.Univariate(classes_seen, predictions)`.
133+
# `CategoricalDistributions.Univariate(levels_seen, predictions)`.
134134
...
135135
end
136136
LearnAPI.predict(model::MyModel, kind_of_proxy, X) = LearnAPI.predict(model,

src/tools.jl

Lines changed: 3 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -112,58 +112,8 @@ function decompose(X, v, _targets::NTuple)
112112
return swapdims(A, v), collect(names), swapdims(B, v)
113113
end
114114

115-
"""
116-
classes(x)
117-
118-
*Private method.*
119-
120-
Return, as a `CategoricalVector`, all the categorical elements with
121-
the same pool as `CategoricalValue` `x` (including `x`), with an
122-
ordering consistent with the pool. Note that `x in classes(x)` is
123-
always true.
124-
125-
Not to be confused with `levels(x.pool)`. See the example below.
126-
127-
Also, overloaded for `x` a `CategoricalArray`, `CategoricalPool`, and for views of
128-
`CategoricalArray`.
129-
130-
julia> v = categorical(['c', 'b', 'c', 'a'])
131-
4-element CategoricalArrays.CategoricalArray{Char,1,UInt32}:
132-
'c'
133-
'b'
134-
'c'
135-
'a'
136-
137-
julia> levels(v)
138-
3-element Array{Char,1}:
139-
'a'
140-
'b'
141-
'c'
142-
143-
julia> x = v[4]
144-
CategoricalArrays.CategoricalValue{Char,UInt32} 'a'
145-
146-
julia> classes(x)
147-
3-element CategoricalArrays.CategoricalArray{Char,1,UInt32}:
148-
'a'
149-
'b'
150-
'c'
151-
152-
julia> levels(x.pool)
153-
3-element Array{Char,1}:
154-
'a'
155-
'b'
156-
'c'
157-
158-
"""
159-
classes(p::CategoricalArrays.CategoricalPool) = [p[i] for i in 1:length(p)]
160-
classes(x::CategoricalArrays.CategoricalValue) = classes(CategoricalArrays.pool(x))
161-
classes(v::CategoricalArrays.CategoricalArray) = classes(CategoricalArrays.pool(v))
162-
classes(v::SubArray{<:Any, <:Any, <:CategoricalArrays.CategoricalArray}) = classes(parent(v))
163-
164-
165115
struct CategoricalDecoder{V,R}
166-
classes::CategoricalArrays.CategoricalVector{
116+
levels::CategoricalArrays.CategoricalVector{
167117
V,
168118
R,
169119
V,
@@ -193,7 +143,7 @@ pool as `x`.
193143
*Warning:* There is no guarantee that `levelcode.(d.(u)) == u` will always holds.
194144
195145
"""
196-
decoder(x) = CategoricalDecoder(classes(x))
146+
decoder(x) = CategoricalDecoder(CategoricalArrays.levels(x))
197147

198148
(d::CategoricalDecoder{V,R})(i::Integer) where {V,R} =
199-
CategoricalArrays.CategoricalValue{V,R}(d.classes[i])
149+
CategoricalArrays.CategoricalValue{V,R}(d.levels[i])

0 commit comments

Comments
 (0)