Skip to content

Commit f9d7d4c

Browse files
committed
Merge branch 'Contrast-Encoding' of https://github.com/JuliaAI/MLJTransforms.jl into Contrast-Encoding
2 parents cfa4868 + e45c7fa commit f9d7d4c

File tree

3 files changed

+14
-7
lines changed

3 files changed

+14
-7
lines changed

src/encoders/contrast_encoder/contrast_encoder.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ include("errors.jl")
22

33
"""
44
** Private Method **
5+
56
This and the following four methods implement the contrast matrix for dummy coding, sum coding,
67
backaward/forward difference coding and helmert coding.
78
Where `k` is the number of levels in the feature and the returned contrast matrix has dimensions (k,k-1).
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
MATRIX_SIZE_ERROR(k, matrix_size, feat_name)= "In ContrastEncoder, a categorical variable with $k levels should have a contrast matrix of size ($k, $k-1). However, the given contrast matrix by `buildmatrix` is $matrix_size for feature $feat_name."
2-
MATRIX_SIZE_ERROR_HYP(k, matrix_size, feat_name)= "In ContrastEncoder, a categorical variable with $k levels should have a hypothesis matrix of size ($k-1, $k). However, the given hypothesis matrix by `buildmatrix` is $matrix_size for feature $feat_name."
1+
MATRIX_SIZE_ERROR(k, matrix_size, feat_name)= "In ContrastEncoder, a categorical variable with $k levels should have a contrast matrix of size ($k, $k-1). However, the contrast matrix returned by `buildmatrix` is $matrix_size for feature $feat_name."
2+
MATRIX_SIZE_ERROR_HYP(k, matrix_size, feat_name)= "In ContrastEncoder, a categorical variable with $k levels should have a hypothesis matrix of size ($k-1, $k). However, the given hypothesis matrix returned by `buildmatrix` is $matrix_size for feature $feat_name."
33
IGNORE_MUST_FALSE_VEC_MODE = "In ContrastEncoder with mode given as a vector of symbols, the ignore argument must be set to false and features must be explictly specified in features."
44
BUILDFUNC_MUST_BE_SPECIFIED = "In ContrastEncoder with mode=:contrast or mode=:hypothesis, the `buildmatrix` argument must be specified."
55
LENGTH_MISMATCH_VEC_MODE(len_mode, len_feat) = "In ContrastEncoder with mode given as a vector of symbols, the length of the features argument must match the number of specified modes. However, the method received $(len_mode) modes and $(len_feat) features."

src/encoders/contrast_encoder/interface_mlj.jl

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,15 +126,21 @@ The fields of `report(mach)` are:
126126
using MLJ
127127
128128
# Define categorical dataset
129-
X = (name = categorical(["Ben", "John", "Mary", "John"]),
130-
height = [1.85, 1.67, 1.5, 1.67],
131-
favnum = categorical([7, 5, 10, 1]),
132-
age = [23, 23, 14, 23])
129+
X = (
130+
name = categorical(["Ben", "John", "Mary", "John"]),
131+
height = [1.85, 1.67, 1.5, 1.67],
132+
favnum = categorical([7, 5, 10, 1]),
133+
age = [23, 23, 14, 23],
134+
)
133135
134136
# Check scitype coercions:
135137
schema(X)
136138
137-
encoder = ContrastEncoder(features=[:name, :favnum]; ignore=false, mode = [:dummy, :helmert])
139+
encoder = ContrastEncoder(
140+
features = [:name, :favnum],
141+
ignore = false,
142+
mode = [:dummy, :helmert],
143+
)
138144
mach = fit!(machine(encoder, X))
139145
Xnew = transform(mach, X)
140146

0 commit comments

Comments
 (0)