Skip to content

Commit 81c319e

Browse files
committed
whitespace
1 parent cadd9a3 commit 81c319e

File tree

1 file changed

+61
-42
lines changed

1 file changed

+61
-42
lines changed

src/generic.jl

Lines changed: 61 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@ generic_fit(X,
1212
)
1313
```
1414
15-
Given a `feature_mapper` (see definition below), this method applies
16-
`feature_mapper` across a specified subset of categorical columns in X and returns a dictionary
17-
whose keys are the feature names, and each value is the corresponding
18-
level‑to‑value mapping produced by `feature_mapper`.
15+
Given a `feature_mapper` (see definition below), this method applies `feature_mapper`
16+
across a specified subset of categorical columns in X and returns a dictionary whose keys
17+
are the feature names, and each value is the corresponding level‑to‑value mapping produced
18+
by `feature_mapper`.
1919
20-
In essence, it spares effort of looping over each column and applying the `feature_mapper` function manually as well as handling the feature selection logic.
20+
In essence, it spares effort of looping over each column and applying the `feature_mapper`
21+
function manually as well as handling the feature selection logic.
2122
2223
2324
# Arguments
@@ -26,17 +27,22 @@ $X_doc
2627
$features_doc
2728
$ignore_doc
2829
$ordered_factor_doc
29-
- feature_mapper: function that, for a given vector (eg, corresponding to a categorical column from the dataset `X`),
30-
produces a mapping from each category level name in this vector to a scalar or vector according to specified transformation logic.
30+
31+
- feature_mapper: function that, for a given vector (eg, corresponding to a categorical
32+
column from the dataset `X`), produces a mapping from each category level name in this
33+
vector to a scalar or vector according to specified transformation logic.
3134
3235
# Note
3336
34-
- Any additional arguments (whether keyword or not) provided to this function are passed to the `feature_mapper` function which
35-
is helpful when `feature_mapper` requires additional arguments to compute the mapping (eg, hyperparameters).
37+
- Any additional arguments (whether keyword or not) provided to this function are passed
38+
to the `feature_mapper` function which is helpful when `feature_mapper` requires
39+
additional arguments to compute the mapping (eg, hyperparameters).
3640
3741
# Returns
38-
- `mapping_per_feat_level`: Maps each level for each feature in a subset of the categorical features of
39-
X into a scalar or a vector.
42+
43+
- `mapping_per_feat_level`: Maps each level for each feature in a subset of the
44+
categorical features of X into a scalar or a vector.
45+
4046
$encoded_features_doc
4147
"""
4248
function generic_fit(X,
@@ -50,11 +56,11 @@ function generic_fit(X,
5056
# 1. Get X column types and names
5157
feat_names = Tables.schema(X).names
5258

53-
#2. Modify column_names based on features
59+
#2. Modify column_names based on features
5460
if features isa Symbol
5561
features = [features]
5662
end
57-
63+
5864
if features isa AbstractVector{Symbol}
5965
# Original behavior for vector of symbols
6066
feat_names =
@@ -94,8 +100,9 @@ end
94100
"""
95101
**Private method.**
96102
97-
Function to generate new feature names: feat_name_0, feat_name_1,..., feat_name_n or if possible,
98-
feat_name_level_0, feat_name_level_1,..., feat_name_level_n
103+
Function to generate new feature names: feat_name_0, feat_name_1,..., feat_name_n or if
104+
possible, feat_name_level_0, feat_name_level_1,..., feat_name_level_n
105+
99106
"""
100107
function generate_new_feat_names(
101108
feat_name,
@@ -115,7 +122,8 @@ function generate_new_feat_names(
115122
suffix = repeat("_", count)
116123
if use_levelnames
117124
# Always use the first num_inds level names
118-
new_column_names = [ Symbol("$(feat_name)$(suffix)$(levels_vec[i])") for i in 1:num_inds ]
125+
new_column_names =
126+
[ Symbol("$(feat_name)$(suffix)$(levels_vec[i])") for i in 1:num_inds ]
119127
else
120128
# Always use numeric indices
121129
new_column_names = [ Symbol("$(feat_name)$(suffix)$i") for i in 1:num_inds ]
@@ -144,34 +152,42 @@ generic_transform(
144152
```
145153
146154
147-
Apply a per‐level feature mapping to selected categorical columns in `X`, returning a new table of the same type.
155+
Apply a per‐level feature mapping to selected categorical columns in `X`, returning a new
156+
table of the same type.
148157
149158
# Arguments
150159
151160
$X_doc
152-
- `mapping_per_feat_level::Dict{Symbol,Dict}`:
153-
A dict whose keys are feature names (`Symbol`) and values are themselves dictionaries
154-
mapping each observed level to either a scalar (if `single_feat=true`) or a fixed‐length vector
155-
(if `single_feat=false`). Only columns whose names appear in `mapping_per_feat_level` are
156-
transformed; others pass through unchanged.
157-
- `single_feat::Bool=true`:
158-
If `true`, each input level is mapped to a single scalar feature; if `false`,
159-
each input level is mapped to a length‑`k` vector, producing `k` output columns.
160-
- `ignore_unknown::Bool=false`:
161-
If `false`, novel levels in `X` (not seen during fit) will raise an error;
162-
if `true`, novel levels will be left unchanged (identity mapping).
163-
- `use_levelnames::Bool=false`:
164-
When `single_feat=false`, controls naming of the expanded columns: `true`: use actual level names (e.g. `:color_red`, `:color_blue`),
165-
`false`: use numeric indices (e.g. `:color_1`, `:color_2`).
166-
- `custom_levels::Union{Nothing,Vector}`:
167-
If not `nothing`, overrides the names of levels used to generate feature names when `single_feat=false`.
168-
- `ensure_categorical::Bool=false`:
169-
Only when `single_feat=true` and if `true`, preserves the categorical type of the column after
170-
recoding (eg, feature should still be recognized as `Multiclass` after transformation)
161+
162+
- `mapping_per_feat_level::Dict{Symbol,Dict}`: A dict whose keys are feature names
163+
(`Symbol`) and values are themselves dictionaries mapping each observed level to either
164+
a scalar (if `single_feat=true`) or a fixed‐length vector (if
165+
`single_feat=false`). Only columns whose names appear in `mapping_per_feat_level` are
166+
transformed; others pass through unchanged.
167+
168+
- `single_feat::Bool=true`: If `true`, each input level is mapped to a single scalar
169+
feature; if `false`, each input level is mapped to a length‑`k` vector, producing `k`
170+
output columns.
171+
172+
- `ignore_unknown::Bool=false`: If `false`, novel levels in `X` (not seen during fit) will
173+
raise an error; if `true`, novel levels will be left unchanged (identity mapping).
174+
175+
- `use_levelnames::Bool=false`: When `single_feat=false`, controls naming of the expanded
176+
columns: `true`: use actual level names (e.g. `:color_red`, `:color_blue`), `false`:
177+
use numeric indices (e.g. `:color_1`, `:color_2`).
178+
179+
- `custom_levels::Union{Nothing,Vector}`: If not `nothing`, overrides the names of levels
180+
used to generate feature names when `single_feat=false`.
181+
182+
- `ensure_categorical::Bool=false`: Only when `single_feat=true` and if `true`, preserves
183+
the categorical type of the column after recoding (eg, feature should still be
184+
recognized as `Multiclass` after transformation)
171185
172186
# Returns
173187
174-
A new table of potentially similar to `X` but with categorical columns transformed according to `mapping_per_feat_level`.
188+
A new table of potentially similar to `X` but with categorical columns transformed
189+
according to `mapping_per_feat_level`.
190+
175191
"""
176192
function generic_transform(
177193
X,
@@ -197,7 +213,8 @@ function generic_transform(
197213
# get the levels in test that are not in train
198214
lost_levels = setdiff(test_levels, train_levels)
199215
error(
200-
"While transforming, found novel levels for the column $(feat_name): $(lost_levels) that were not seen while training.",
216+
"While transforming, found novel levels for the column "*
217+
"$(feat_name): $(lost_levels) that were not seen while training.",
201218
)
202219
end
203220
end
@@ -206,10 +223,11 @@ function generic_transform(
206223
level2scalar = mapping_per_feat_level[feat_name]
207224
if ensure_categorical
208225
new_col = !isempty(level2scalar) ? recode(col, level2scalar...) : col
209-
else
210-
new_col = !isempty(level2scalar) ? unwrap.(recode(col, level2scalar...)) : col
226+
else
227+
new_col =
228+
!isempty(level2scalar) ? unwrap.(recode(col, level2scalar...)) : col
211229
end
212-
230+
213231
push!(new_cols, new_col)
214232
push!(new_feat_names, feat_name)
215233
else
@@ -221,7 +239,8 @@ function generic_transform(
221239
feat_names_with_inds = generate_new_feat_names(
222240
feat_name,
223241
length(first(mapping_per_feat_level[feat_name])[2]),
224-
(custom_levels === nothing) ? keys(mapping_per_feat_level[feat_name]) : custom_levels,
242+
(custom_levels === nothing) ?
243+
keys(mapping_per_feat_level[feat_name]) : custom_levels,
225244
feat_names;
226245
use_levelnames = use_levelnames,
227246
)

0 commit comments

Comments
 (0)