Skip to content

Commit b04ad41

Browse files
authored
Merge pull request #34 from JuliaAI/scitype-issue
Resolve scitype name clashes
2 parents 20769cf + 51b0946 commit b04ad41

File tree

10 files changed

+60
-62
lines changed

10 files changed

+60
-62
lines changed

Project.toml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
1414
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
1515
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
1616
ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
17+
ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
1718
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1819
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
1920
TableOperations = "ab02a1b2-a7df-11e8-156e-fb1833f50b87"
@@ -22,14 +23,15 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
2223
[compat]
2324
BitBasis = "0.9"
2425
CategoricalArrays = "0.10"
25-
MLJModelInterface = "1.11"
2626
Combinatorics = "1"
2727
Dates = "1"
2828
Distributions = "0.25"
2929
LinearAlgebra = "1"
30+
MLJModelInterface = "1.11"
3031
OrderedCollections = "1"
3132
Parameters = "0.12"
32-
ScientificTypes = "3.0"
33+
ScientificTypes = "3.1.0"
34+
ScientificTypesBase = "3.0.0"
3335
Statistics = "1"
3436
StatsBase = "0.34"
3537
TableOperations = "1.2"
@@ -38,11 +40,11 @@ julia = "1.10"
3840

3941
[extras]
4042
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
41-
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
4243
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
4344
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
44-
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
45+
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
4546
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
47+
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
4648

4749
[targets]
4850
test = ["Test", "DataFrames", "MLJBase", "Random", "StableRNGs", "StatsModels"]

src/MLJTransforms.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
module MLJTransforms
22
using Tables
3-
using ScientificTypes
4-
using ScientificTypes: scitype
3+
# Note: The `scitype` in
4+
# MLJModelInterface clashes with the `scitype` in ScientificTypes. See also
5+
# https://github.com/JuliaAI/MLJBase.jl/issues/1002
6+
import ScientificTypes: elscitype, schema, coerce, ScientificTimeType
7+
using MLJModelInterface # exports `scitype`, which will call `ScientificTypes.scitype`,
8+
# once MLJBase is loaded (but this is not a dependency!)
59
using CategoricalArrays
6-
using MLJModelInterface
710
using TableOperations
811
using StatsBase
912
using LinearAlgebra
@@ -15,7 +18,6 @@ using Parameters
1518
using Dates
1619
using OrderedCollections
1720

18-
1921
const MMI = MLJModelInterface
2022

2123
# Functions of generic use across transformers

test/encoders/contrast_encoder.jl

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -195,16 +195,16 @@ end
195195

196196
df = DataFrame(X)
197197

198-
mf = ModelFrame(
199-
@formula(age ~ (name + height + favnum)),
198+
mf = StatsModels.ModelFrame(
199+
StatsModels.@formula(age ~ (name + height + favnum)),
200200
df,
201201
contrasts = Dict(
202202
:name => StatsModels.ContrastsCoding(buildrandomcontrast(nothing, 3)),
203203
:favnum => StatsModels.ContrastsCoding(buildrandomcontrast(nothing, 4)),
204204
),
205205
)
206206

207-
X_tr_sm = ModelMatrix(mf).m[:, 2:end]
207+
X_tr_sm = StatsModels.ModelMatrix(mf).m[:, 2:end]
208208

209209
@test X_tr_mlj == X_tr_sm
210210
end
@@ -221,24 +221,24 @@ end
221221
X_tr_mlj = Tables.matrix(X_tr)[:, 1:end-1]
222222
df = DataFrame(X)
223223

224-
mf = ModelFrame(
225-
@formula(age ~ (name + height + favnum)),
224+
mf = StatsModels.ModelFrame(
225+
StatsModels.@formula(age ~ (name + height + favnum)),
226226
df,
227227
contrasts = Dict(
228-
:name => HypothesisCoding(
228+
:name => StatsModels.HypothesisCoding(
229229
buildrandomhypothesis(nothing, 3);
230230
levels = levels(X.name),
231231
labels = [],
232232
),
233-
:favnum => HypothesisCoding(
233+
:favnum => StatsModels.HypothesisCoding(
234234
buildrandomhypothesis(nothing, 4);
235235
levels = levels(X.favnum),
236236
labels = [],
237237
),
238238
),
239239
)
240240

241-
X_tr_sm = ModelMatrix(mf).m[:, 2:end]
241+
X_tr_sm = StatsModels.ModelMatrix(mf).m[:, 2:end]
242242

243243
@test X_tr_mlj == X_tr_sm
244244
end
@@ -257,11 +257,11 @@ end
257257
for ind in 1:6
258258
stats_models(k, ind) = [
259259
StatsModels.ContrastsCoding(buildrandomcontrast(nothing, k)),
260-
DummyCoding(; base = (k == 3) ? "Mary" : 10),
261-
EffectsCoding(; base = (k == 3) ? "Mary" : 10),
262-
SeqDiffCoding(),
263-
HelmertCoding(),
264-
HypothesisCoding(
260+
StatsModels.DummyCoding(; base = (k == 3) ? "Mary" : 10),
261+
StatsModels.EffectsCoding(; base = (k == 3) ? "Mary" : 10),
262+
StatsModels.SeqDiffCoding(),
263+
StatsModels.HelmertCoding(),
264+
StatsModels.HypothesisCoding(
265265
buildrandomhypothesis(nothing, k);
266266
levels = (k == 3) ? levels(X.name) : levels(X.favnum),
267267
labels = [],
@@ -277,8 +277,8 @@ end
277277

278278
df = DataFrame(X)
279279

280-
mf = ModelFrame(
281-
@formula(age ~ (name + height + favnum)),
280+
mf = StatsModels.ModelFrame(
281+
StatsModels.@formula(age ~ (name + height + favnum)),
282282
df,
283283
contrasts = Dict(
284284
:name => stats_models(3, ind),
@@ -287,7 +287,7 @@ end
287287
)
288288

289289
X_tr_mlj = Tables.matrix(X_tr)[:, 1:end-1]
290-
X_tr_sm = ModelMatrix(mf).m[:, 2:end]
290+
X_tr_sm = StatsModels.ModelMatrix(mf).m[:, 2:end]
291291
@test X_tr_mlj X_tr_sm
292292
end
293293
end
@@ -298,11 +298,11 @@ end
298298
for ind2 in 2:5
299299
stats_models(k, ind) = [
300300
StatsModels.ContrastsCoding(buildrandomcontrast(nothing, k)),
301-
DummyCoding(; base = (k == 3) ? "Mary" : 10),
302-
EffectsCoding(; base = (k == 3) ? "Mary" : 10),
303-
SeqDiffCoding(),
304-
HelmertCoding(),
305-
HypothesisCoding(
301+
StatsModels.DummyCoding(; base = (k == 3) ? "Mary" : 10),
302+
StatsModels.EffectsCoding(; base = (k == 3) ? "Mary" : 10),
303+
StatsModels.SeqDiffCoding(),
304+
StatsModels.HelmertCoding(),
305+
StatsModels.HypothesisCoding(
306306
buildrandomhypothesis(nothing, k);
307307
levels = (k == 3) ? levels(X.name) : levels(X.favnum),
308308
labels = [],
@@ -331,8 +331,8 @@ end
331331

332332
df = DataFrame(X)
333333

334-
mf = ModelFrame(
335-
@formula(age ~ (name + height + favnum)),
334+
mf = StatsModels.ModelFrame(
335+
StatsModels.@formula(age ~ (name + height + favnum)),
336336
df,
337337
contrasts = Dict(
338338
:name => stats_models(3, ind1),
@@ -341,7 +341,7 @@ end
341341
)
342342

343343
X_tr_mlj = Tables.matrix(X_tr)[:, 1:end-1]
344-
X_tr_sm = ModelMatrix(mf).m[:, 2:end]
344+
X_tr_sm = StatsModels.ModelMatrix(mf).m[:, 2:end]
345345

346346
@test X_tr_mlj X_tr_sm
347347
end
@@ -358,7 +358,7 @@ end
358358
encoder = ContrastEncoder(ignore = true, ordered_factor = false)
359359
mach = machine(encoder, X)
360360
fit!(mach)
361-
Xnew_transf = MMI.transform(mach, X)
361+
Xnew_transf = MLJBase.transform(mach, X)
362362

363363
# same output
364364
@test X_transf == Xnew_transf
@@ -392,7 +392,7 @@ end
392392
buildmatrix = matrix_func[i],
393393
)
394394
mach = fit!(machine(encoder, X))
395-
Xnew = MMI.transform(mach, X)
395+
Xnew = MLJBase.transform(mach, X)
396396

397397
# Test Consistency with Types
398398
scs = schema(Xnew).scitypes
@@ -406,4 +406,4 @@ end
406406
@test last_type <: Integer && isconcretetype(last_type)
407407
@test last_sctype <: Count
408408
end
409-
end
409+
end

test/encoders/frequency_encoder.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ using MLJTransforms: frequency_encoder_fit, frequency_encoder_transform
55

66
X = dataset_forms[1]
77
normalize = [false, true]
8-
A_col, C_col, D_col, F_col = MMI.selectcols(X, [1, 3, 4, 6])
8+
A_col, C_col, D_col, F_col = selectcols(X, [1, 3, 4, 6])
99
for norm in normalize
1010
result = frequency_encoder_fit(X; normalize = norm)[:statistic_given_feat_val]
1111
enc =
@@ -72,7 +72,7 @@ end
7272
encoder = FrequencyEncoder(ignore = true, ordered_factor = false)
7373
mach = machine(encoder, X)
7474
fit!(mach)
75-
Xnew_transf = MMI.transform(mach, X)
75+
Xnew_transf = MLJBase.transform(mach, X)
7676

7777
# same output
7878
@test X_transf == Xnew_transf
@@ -111,7 +111,7 @@ end
111111

112112
encoder = FrequencyEncoder(ordered_factor = false, normalize = false)
113113
mach = fit!(machine(encoder, X))
114-
Xnew = MMI.transform(mach, X)
114+
Xnew = MLJBase.transform(mach, X)
115115

116116

117117
scs = schema(Xnew).scitypes

test/encoders/missingness_encoding.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ end
170170
encoder = MissingnessEncoder(ignore = true, ordered_factor = false)
171171
mach = machine(encoder, X)
172172
fit!(mach)
173-
Xnew_transf = MMI.transform(mach, X)
173+
Xnew_transf = MLJBase.transform(mach, X)
174174

175175
# same output
176176
@test isequal(X_transf, Xnew_transf)
@@ -197,7 +197,7 @@ end
197197

198198
encoder = MissingnessEncoder()
199199
mach = fit!(machine(encoder, Xm))
200-
Xnew = MMI.transform(mach, Xm)
200+
Xnew = MLJBase.transform(mach, Xm)
201201

202202
schema(Xm)
203203
schema(Xnew)

test/encoders/ordinal_encoding.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ push!(
1515
@test ordinal_encoder_fit(dataset_forms[1]) == ordinal_encoder_fit(dataset_forms[2])
1616
X = dataset_forms[1]
1717
result = ordinal_encoder_fit(X)[:index_given_feat_level]
18-
A_col, C_col, D_col, F_col = MMI.selectcols(X, [1, 3, 4, 6])
18+
A_col, C_col, D_col, F_col = selectcols(X, [1, 3, 4, 6])
1919
true_output = Dict{Symbol, Dict{Any, AbstractFloat}}(
2020
:F => Dict(
2121
"m" => findfirst(==("m"), levels(F_col)),
@@ -70,7 +70,7 @@ end
7070
encoder = OrdinalEncoder(ignore = true, ordered_factor = false)
7171
mach = machine(encoder, X)
7272
fit!(mach)
73-
Xnew_transf = MMI.transform(mach, X)
73+
Xnew_transf = MLJBase.transform(mach, X)
7474

7575
# same output
7676
@test X_transf == Xnew_transf
@@ -108,7 +108,7 @@ end
108108

109109
encoder = OrdinalEncoder(ordered_factor = false)
110110
mach = fit!(machine(encoder, X))
111-
Xnew = MMI.transform(mach, X)
111+
Xnew = MLJBase.transform(mach, X)
112112

113113
scs = schema(Xnew).scitypes
114114
ts = schema(Xnew).types
@@ -123,7 +123,7 @@ end
123123
## Int32 case
124124
encoder = OrdinalEncoder(ordered_factor = false, output_type = Int32)
125125
mach = fit!(machine(encoder, X))
126-
Xnew = MMI.transform(mach, X)
126+
Xnew = MLJBase.transform(mach, X)
127127
scs = schema(Xnew).scitypes
128128
ts = schema(Xnew).types
129129
# Check scitypes for previously categorical features

test/encoders/target_encoding.jl

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ end
6363
X, y = classification_forms[1]
6464
n = length(y)
6565

66-
A_col, C_col, D_col, F_col = MMI.selectcols(X, [1, 3, 4, 6])
66+
A_col, C_col, D_col, F_col = selectcols(X, [1, 3, 4, 6])
6767
true_output = Dict{Symbol, Dict{Any, AbstractFloat}}(
6868
:F => Dict(
6969
"m" => sum(y[F_col.=="m"] .== 0) / length(y[F_col.=="m"]),
@@ -119,7 +119,7 @@ end
119119
n = length(y)
120120
μ̂ = mean(y)
121121

122-
A_col, C_col, D_col, F_col = MMI.selectcols(X, [1, 3, 4, 6])
122+
A_col, C_col, D_col, F_col = selectcols(X, [1, 3, 4, 6])
123123
true_output = Dict{Symbol, Dict{Any, AbstractFloat}}(
124124
:F => Dict(
125125
"m" => mean(y[F_col.=="m"]),
@@ -172,7 +172,7 @@ end
172172
y_classes = classes(y)
173173
n = length(y)
174174

175-
A_col, C_col, D_col, F_col = MMI.selectcols(X, [1, 3, 4, 6])
175+
A_col, C_col, D_col, F_col = selectcols(X, [1, 3, 4, 6])
176176
true_output = Dict{Symbol, Dict{Any, AbstractVector{AbstractFloat}}}(
177177
:F => Dict(
178178
"m" =>
@@ -320,7 +320,7 @@ end
320320
TargetEncoder(ignore = true, ordered_factor = false, lambda = 0.5, m = 1.0)
321321
mach = machine(encoder, X, y)
322322
fit!(mach)
323-
Xnew_transf = MMI.transform(mach, X)
323+
Xnew_transf = MLJBase.transform(mach, X)
324324

325325
# same output
326326
@test X_transf == Xnew_transf
@@ -368,7 +368,7 @@ end
368368
D = [true, false, true, false, true]
369369
E = [1, 2, 3, 4, 5]
370370

371-
# Define the target variable
371+
# Define the target variable
372372
y = ["c1", "c2", "c3", "c1", "c2"]
373373

374374
# Combine into a named tuple
@@ -386,7 +386,7 @@ end
386386

387387
encoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0)
388388
mach = fit!(machine(encoder, X, y))
389-
Xnew = MMI.transform(mach, X)
389+
Xnew = MLJBase.transform(mach, X)
390390

391391
scs = schema(Xnew).scitypes
392392
ts = schema(Xnew).types
@@ -396,4 +396,3 @@ end
396396
@test scs[end] === schema(X).scitypes[end]
397397
@test ts[end] == schema(X).types[end]
398398
end
399-

test/generic.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ end
142142

143143
@testset "Test generic fit output" begin
144144
X = dataset_forms[1]
145-
A_col, C_col, D_col, F_col = MMI.selectcols(X, [1, 3, 4, 6])
145+
A_col, C_col, D_col, F_col = selectcols(X, [1, 3, 4, 6])
146146
result = dummy_encoder_fit(X)[:hash_given_feat_val]
147147
enc = (col, level) -> (hash(level))
148148
true_output = Dict{Symbol, Dict{Any, Any}}(

test/runtests.jl

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,16 @@
11
using MLJTransforms
22
using Test
33
using DataFrames
4-
using ScientificTypes
54
using CategoricalArrays
6-
using MLJModelInterface
75
using MLJBase
86
using StatsBase
97
using LinearAlgebra
10-
using StatsModels
8+
import StatsModels
119
using Random
12-
const MMI = MLJModelInterface
1310
using LinearAlgebra
14-
using StatsModels
1511

1612
# Other transformers
1713
using Tables, CategoricalArrays
18-
using ScientificTypes: scitype, schema
1914
using Statistics
2015
using StableRNGs
2116
stable_rng = StableRNGs.StableRNG(123)
@@ -40,4 +35,4 @@ include("transformers/other_transformers/interaction_transformer.jl")
4035
include("transformers/other_transformers/continuous_encoder.jl")
4136
include("transformers/other_transformers/univariate_boxcox_transformer.jl")
4237
include("transformers/other_transformers/standardizer.jl")
43-
include("transformers/other_transformers/univariate_discretizer.jl")
38+
include("transformers/other_transformers/univariate_discretizer.jl")

0 commit comments

Comments
 (0)