JuliaAI
diff --git a/‎src/encoders/frequency_encoding/frequency_encoding.jl‎
Lines changed: 9 additions & 9 deletions b/‎src/encoders/frequency_encoding/frequency_encoding.jl‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎src/encoders/frequency_encoding/interface_mlj.jl‎
Lines changed: 7 additions & 7 deletions b/‎src/encoders/frequency_encoding/interface_mlj.jl‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/encoders/ordinal_encoding/interface_mlj.jl‎
Lines changed: 7 additions & 7 deletions b/‎src/encoders/ordinal_encoding/interface_mlj.jl‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/encoders/ordinal_encoding/ordinal_encoding.jl‎
Lines changed: 8 additions & 8 deletions b/‎src/encoders/ordinal_encoding/ordinal_encoding.jl‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎src/encoders/target_encoding/interface_mlj.jl‎
Lines changed: 8 additions & 8 deletions b/‎src/encoders/target_encoding/interface_mlj.jl‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎src/encoders/target_encoding/target_encoding.jl‎
Lines changed: 8 additions & 8 deletions b/‎src/encoders/target_encoding/target_encoding.jl‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎src/generic.jl‎
Lines changed: 13 additions & 13 deletions b/‎src/generic.jl‎
Lines changed: 13 additions & 13 deletions
@@ -3,20 +3,20 @@
 **Private method.**
 
 Fit an encoder that encodes the categorical values in the specified
-categorical columns with their (normalized or raw) frequencies of occurrence in the dataset.
+categorical features with their (normalized or raw) frequencies of occurrence in the dataset.
 
 # Arguments
 
-  - `X`: A table where the elements of the categorical columns have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) `Multiclass` or `OrderedFactor`
-  - `features=[]`: A list of names of categorical columns given as symbols to exclude or include from encoding
-  - `ignore=true`: Whether to exclude or includes the columns given in `features`
+  - `X`: A table where the elements of the categorical features have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) `Multiclass` or `OrderedFactor`
+  - `features=[]`: A list of names of categorical features given as symbols to exclude or include from encoding
+  - `ignore=true`: Whether to exclude or includes the features given in `features`
   - `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them
   - `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.
 
 # Returns (in a dict)
 
-  - `statistic_given_feat_val`: The frequency of each level of each selected categorical column
-  - `encoded_features`: The subset of the categorical columns of X that were encoded
+  - `statistic_given_feat_val`: The frequency of each level of each selected categorical feature
+  - `encoded_features`: The subset of the categorical features of X that were encoded
 """
 function frequency_encoder_fit(
     X,
@@ -25,7 +25,7 @@ function frequency_encoder_fit(
     ordered_factor::Bool = false,
     normalize::Bool = false,
 )
-    # 1. Define column mapper
+    # 1. Define feature mapper
     function feature_mapper(col, name)
         frequency_map = (!normalize) ? countmap(col) : proportionmap(col)
         statistic_given_feat_val = Dict{Any, Real}(level=>frequency_map[level] for level in levels(col))
@@ -51,12 +51,12 @@ Encode the levels of a categorical variable in a given table with their (normali
 
 # Arguments
 
-  - `X`: A table where the elements of the categorical columns have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) `Multiclass` or `OrderedFactor`
+  - `X`: A table where the elements of the categorical features have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) `Multiclass` or `OrderedFactor`
   - `cache`: The output of `frequency_encoder_fit`
 
 # Returns
 
-  - `X_tr`: The table with selected columns after the selected columns are encoded by frequency encoding.
+  - `X_tr`: The table with selected features after the selected features are encoded by frequency encoding.
 """
 function frequency_encoder_transform(X, cache::Dict)
     statistic_given_feat_val = cache[:statistic_given_feat_val]
 
@@ -35,7 +35,7 @@ function MMI.fit(transformer::FrequencyEncoder, verbosity::Int, X)
     )
     fitresult = generic_cache[:statistic_given_feat_val]
 
-    report = (encoded_features = generic_cache[:encoded_features],)        # report only has list of encoded columns
+    report = (encoded_features = generic_cache[:encoded_features],)        # report only has list of encoded features
     cache = nothing
     return fitresult, cache, report
 end;
@@ -74,7 +74,7 @@ MMI.metadata_model(
 $(MMI.doc_header(FrequencyEncoder))
 
 `FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified
-    categorical columns with their (normalized or raw) frequencies of occurrence in the dataset. 
+    categorical features with their (normalized or raw) frequencies of occurrence in the dataset. 
 
 # Training data
 
@@ -92,8 +92,8 @@ Train the machine using `fit!(mach, rows=...)`.
 
 # Hyper-parameters
 
-- `features=[]`: A list of names of categorical columns given as symbols to exclude or include from encoding
-- `ignore=true`: Whether to exclude or include the columns given in `features`
+- `features=[]`: A list of names of categorical features given as symbols to exclude or include from encoding
+- `ignore=true`: Whether to exclude or include the features given in `features`
 - `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them
 - `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.
 
@@ -107,20 +107,20 @@ Train the machine using `fit!(mach, rows=...)`.
 
 The fields of `fitted_params(mach)` are:
 
-- `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical columns of X into its frequency.
+- `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.
 
 # Report
 
 The fields of `report(mach)` are:
 
-- `encoded_features`: The subset of the categorical columns of X that were encoded
+- `encoded_features`: The subset of the categorical features of X that were encoded
 
 # Examples
 
 ```julia
 using MLJ
 
-# Define categorical columns
+# Define categorical features
 A = ["g", "b", "g", "r", "r",]  
 B = [1.0, 2.0, 3.0, 4.0, 5.0,]
 C = ["f", "f", "f", "m", "f",]  
 
@@ -32,7 +32,7 @@ function MMI.fit(transformer::OrdinalEncoder, verbosity::Int, X)
     )
     fitresult =
         generic_cache[:index_given_feat_level]
-    report = (encoded_features = generic_cache[:encoded_features],)        # report only has list of encoded columns
+    report = (encoded_features = generic_cache[:encoded_features],)        # report only has list of encoded features
     cache = nothing
     return fitresult, cache, report
 end;
@@ -70,7 +70,7 @@ MMI.metadata_model(
 $(MMI.doc_header(OrdinalEncoder))
 
 `OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified
-    categorical columns with integers (ordered arbitrarily). This will create an implicit ordering between
+    categorical features with integers (ordered arbitrarily). This will create an implicit ordering between
     categories which may not be a proper modelling assumption.
 
 # Training data
@@ -89,8 +89,8 @@ Train the machine using `fit!(mach, rows=...)`.
 
 # Hyper-parameters
 
-- `features=[]`: A list of names of categorical columns given as symbols to exclude or include from encoding
-- `ignore=true`: Whether to exclude or includes the columns given in `features`
+- `features=[]`: A list of names of categorical features given as symbols to exclude or include from encoding
+- `ignore=true`: Whether to exclude or includes the features given in `features`
 - `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them
 
 # Operations
@@ -103,20 +103,20 @@ Train the machine using `fit!(mach, rows=...)`.
 
 The fields of `fitted_params(mach)` are:
 
-- `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical columns of X into an integer. 
+- `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer. 
 
 # Report
 
 The fields of `report(mach)` are:
 
-- `encoded_features`: The subset of the categorical columns of X that were encoded
+- `encoded_features`: The subset of the categorical features of X that were encoded
 
 # Examples
 
 ```julia
 using MLJ
 
-# Define categorical columns
+# Define categorical features
 A = ["g", "b", "g", "r", "r",]  
 B = [1.0, 2.0, 3.0, 4.0, 5.0,]
 C = ["f", "f", "f", "m", "f",]  
 
@@ -6,23 +6,23 @@ Fit an encoder to encode the levels of categorical variables in a given table as
 
 # Arguments
 
-  - `X`: A table where the elements of the categorical columns have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) `Multiclass` or `OrderedFactor`
-  - `features=[]`: A list of names of categorical columns given as symbols to exclude or include from encoding
-  - `ignore=true`: Whether to exclude or includes the columns given in `features`
+  - `X`: A table where the elements of the categorical features have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) `Multiclass` or `OrderedFactor`
+  - `features=[]`: A list of names of categorical features given as symbols to exclude or include from encoding
+  - `ignore=true`: Whether to exclude or includes the features given in `features`
   - `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them
 
 # Returns (in a dict)
 
-  - `index_given_feat_level`: Maps each level for each column in a subset of the categorical columns of X into an integer.
-  - `encoded_features`: The subset of the categorical columns of X that were encoded
+  - `index_given_feat_level`: Maps each level for each column in a subset of the categorical features of X into an integer.
+  - `encoded_features`: The subset of the categorical features of X that were encoded
 """
 function ordinal_encoder_fit(
     X,
     features::AbstractVector{Symbol} = Symbol[];
     ignore::Bool = true,
     ordered_factor::Bool = false,
 )
-    # 1. Define column mapper
+    # 1. Define feature mapper
     function feature_mapper(col, name)
         feat_levels = levels(col)
         index_given_feat_val =
@@ -50,12 +50,12 @@ Encode the levels of a categorical variable in a given table as integers.
 
 # Arguments
 
-  - `X`: A table where the elements of the categorical columns have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) `Multiclass` or `OrderedFactor`
+  - `X`: A table where the elements of the categorical features have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) `Multiclass` or `OrderedFactor`
   - `cache`: The output of `ordinal_encoder_fit`
 
 # Returns
 
-  - `X_tr`: The table with selected columns after the selected columns are encoded by ordinal encoding.
+  - `X_tr`: The table with selected features after the selected features are encoded by ordinal encoding.
 """
 function ordinal_encoder_transform(X, cache::Dict)
     index_given_feat_level = cache[:index_given_feat_level]
 
@@ -47,7 +47,7 @@ struct TargetEncoderResult{
     S <: AbstractString,
     A <: Any            # Useless but likely can't do much better
 } <: MMI.MLJType
-    # target statistic for each level of each categorical column
+    # target statistic for each level of each categorical feature
     y_stat_given_feat_level::Dict{A, A}
     task::S            # "Regression", "Classification" 
     num_classes::I     # num_classes in case of classification
@@ -77,7 +77,7 @@ function MMI.fit(transformer::TargetEncoder, verbosity::Int, X, y)
         generic_cache[:task],
         generic_cache[:num_classes],
     )
-    report = (encoded_features = generic_cache[:encoded_features],)        # report only has list of encoded columns
+    report = (encoded_features = generic_cache[:encoded_features],)        # report only has list of encoded features
     cache = nothing
     return fitresult, cache, report
 end;
@@ -140,8 +140,8 @@ Train the machine using `fit!(mach, rows=...)`.
 
 # Hyper-parameters
 
-- `features=[]`: A list of names of categorical columns given as symbols to exclude or include from encoding
-- `ignore=true`: Whether to exclude or includes the columns given in `features`
+- `features=[]`: A list of names of categorical features given as symbols to exclude or include from encoding
+- `ignore=true`: Whether to exclude or includes the features given in `features`
 - `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them
 - `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]
 - `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using
@@ -158,21 +158,21 @@ Train the machine using `fit!(mach, rows=...)`.
 The fields of `fitted_params(mach)` are:
 
 - `task`: Whether the task is `Classification` or `Regression`
-- `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical column. It maps each 
-    level in each categorical column to a statistic computed over the target.
+- `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each 
+    level in each categorical feature to a statistic computed over the target.
 
 # Report
 
 The fields of `report(mach)` are:
 
-- `encoded_features`: The subset of the categorical columns of X that were encoded
+- `encoded_features`: The subset of the categorical features of X that were encoded
 
 # Examples
 
 ```julia
 using MLJ
 
-# Define categorical columns
+# Define categorical features
 A = ["g", "b", "g", "r", "r",]  
 B = [1.0, 2.0, 3.0, 4.0, 5.0,]
 C = ["f", "f", "f", "m", "f",]  
 
@@ -110,15 +110,15 @@ end
 
     target_encoder_fit(X, y, features=[]; ignore=true, ordered_factor=false, λ = 1.0, m=0)
 
-Fit a target encoder on table X with target y by computing the necessary statistics for every categorical column.
+Fit a target encoder on table X with target y by computing the necessary statistics for every categorical feature.
 
 # Arguments
 
-  - `X`: A table where the elements of the categorical columns have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/)
+  - `X`: A table where the elements of the categorical features have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/)
     `Multiclass` or `OrderedFactor`
   - `y`:  An abstract vector of labels (e.g., strings) that correspond to the observations in X
-  - `features=[]`: A list of names of categorical columns given as symbols to exclude or include from encoding
-  - `ignore=true`: Whether to exclude or includes the columns given in `features`
+  - `features=[]`: A list of names of categorical features given as symbols to exclude or include from encoding
+  - `ignore=true`: Whether to exclude or includes the features given in `features`
   - `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them
   - `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]
   - `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using
@@ -127,7 +127,7 @@ Fit a target encoder on table X with target y by computing the necessary statist
 # Returns
 
   - `cache`: A dictionary containing a dictionary `y_stat_given_feat_level` with the necessary statistics needed to transform
-    every categorical column as well as other metadata needed for transform.
+    every categorical feature as well as other metadata needed for transform.
 """
 function target_encoder_fit(
     X,
@@ -229,13 +229,13 @@ end
 Transform given data with fitted target encoder cache.
 
 # Arguments
-- `X`: A table where the elements of the categorical columns have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) 
+- `X`: A table where the elements of the categorical features have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) 
 `Multiclass` or `OrderedFactor`
 - `cache`: A dictionary containing a dictionary `y_stat_given_feat_level` with the necessary statistics for 
-every categorical column as well as other metadata needed for transform
+every categorical feature as well as other metadata needed for transform
 
 # Returns
-- `X`: A table where the categorical columns as specified during fitting are transformed by target encoding. Other columns will remain
+- `X`: A table where the categorical features as specified during fitting are transformed by target encoding. Other features will remain
     the same. This will attempt to preserve the type of the table but may not succeed. 
 """
 
 
@@ -6,25 +6,25 @@
 
 A generic function to fit a class of transformers where its convenient to define a single `feature_mapper` function that
 takes the column as a vector and potentially other arguments (as passed in ...args and ...kwargs) and returns
-a dictionary that maps each level of the categorical column to a scalar or vector
+a dictionary that maps each level of the categorical feature to a scalar or vector
 according to the transformation logic. In other words, the `feature_mapper` simply answers the question "For level n of
-the current categorical column c, what should the new value or vector (multiple columns) be as defined by the transformation
+the current categorical feature c, what should the new value or vector (multiple features) be as defined by the transformation
 logic?"
 
 # Arguments
 
-    - `X`: A table where the elements of the categorical columns have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) 
+    - `X`: A table where the elements of the categorical features have [scitypes](https://juliaai.github.io/ScientificTypes.jl/dev/) 
     `Multiclass` or `OrderedFactor`
-    - `features=[]`: A list of names of categorical columns given as symbols to exclude or include from encoding
-    - `ignore=true`: Whether to exclude or includes the columns given in `features`
+    - `features=[]`: A list of names of categorical features given as symbols to exclude or include from encoding
+    - `ignore=true`: Whether to exclude or includes the features given in `features`
     - `ordered_factor=false`: Whether to encode `OrderedFactor` or ignore them
     - `feature_mapper`: Defined above. 
 
 # Returns
 
-    - `mapping_per_feat_level`: Maps each level for each column in a subset of the categorical columns of
+    - `mapping_per_feat_level`: Maps each level for each feature in a subset of the categorical features of
      X into a scalar or a vector. 
-    - `encoded_features`: The subset of the categorical columns of X that were encoded
+    - `encoded_features`: The subset of the categorical features of X that were encoded
 """
 function generic_fit(X,
     features::AbstractVector{Symbol} = Symbol[],
@@ -43,7 +43,7 @@ function generic_fit(X,
     # 3. Define mapping per column per level dictionary
     mapping_per_feat_level = Dict()
 
-    # 4. Use column mapper to compute the mapping of each level in each column
+    # 4. Use feature mapper to compute the mapping of each level in each column
     encoded_features = Symbol[]# to store column that were actually encoded
     for feat_name in feat_names
         feat_col = Tables.getcolumn(X, feat_name)
@@ -64,7 +64,7 @@ end
 """
 **Private method.**
 
-Function to generate new column names: feat_name_0, feat_name_1,..., feat_name_n
+Function to generate new feature names: feat_name_0, feat_name_1,..., feat_name_n
 """
 function generate_new_feat_names(feat_name, num_inds, existing_names)
     conflict = true        # will be kept true as long as there is a conflict
@@ -86,18 +86,18 @@ end
 **Private method.**
 
 Given a table `X` and a dictionary `mapping_per_feat_level` which maps each level for each column in 
-a subset of categorical columns of X into a scalar or a vector (as specified in single_feat)
+a subset of categorical features of X into a scalar or a vector (as specified in single_feat)
 
   - transforms each value (some level) in each column in `X` using the function in `mapping_per_feat_level` 
   into a scalar (single_feat=true)
 
   - transforms each value (some level) in each column in `X` using the function in `mapping_per_feat_level` 
-  into a set of k columns where k is the length of the vector (single_feat=false)
+  into a set of k features where k is the length of the vector (single_feat=false)
   - In both cases it attempts to preserve the type of the table.
   - In the latter case, it assumes that all levels under the same category are mapped to vectors of the same length. Such
-    assumption is necessary because any column in X must correspond to a constant number of columns 
+    assumption is necessary because any column in X must correspond to a constant number of features 
     in the output table (which is equal to k).
-  - Columns not in the dictionary are mapped to themselves (i.e., not changed).
+  - Features not in the dictionary are mapped to themselves (i.e., not changed).
   - Levels not in the nested dictionary are mapped to themselves if `identity_map_unknown` is true else raise an error.
 """
 function generic_transform(X, mapping_per_feat_level; single_feat = true, ignore_unknown = false)