@@ -41,9 +41,17 @@ class TargetEncoder(util.BaseEncoder, util.SupervisedTransformerMixin):
4141 smoothing: float
4242 smoothing effect to balance categorical average vs prior. Higher value means stronger regularization.
4343 The value must be strictly bigger than 0. Higher values mean a flatter S-curve (see min_samples_leaf).
44- hierarchy: dict
45- a dictionary of columns to map into hierarchies. Dictionary key(s) should be the column name from X
44+ hierarchy: dict or dataframe
45+ A dictionary or a dataframe to define the hierarchy for mapping.
46+
47+ If a dictionary, this contains a dict of columns to map into hierarchies. Dictionary key(s) should be the column name from X
4648 which requires mapping. For multiple hierarchical maps, this should be a dictionary of dictionaries.
49+
50+ If dataframe: a dataframe defining columns to be used for the hierarchies. Column names must take the form:
51+ HIER_colA_1, ... HIER_colA_N, HIER_colB_1, ... HIER_colB_M, ...
52+ where [colA, colB, ...] are given columns in cols list.
53+ 1:N and 1:M define the hierarchy for each column where 1 is the highest hierarchy (top of the tree). A single column or multiple
54+ can be used, as relevant.
4755
4856 Examples
4957 -------
@@ -75,7 +83,8 @@ class TargetEncoder(util.BaseEncoder, util.SupervisedTransformerMixin):
7583 dtypes: float64(13)
7684 memory usage: 51.5 KB
7785 None
78-
86+
87+ >>> from category_encoders.datasets import load_compass
7988 >>> X, y = load_compass()
8089 >>> hierarchical_map = {'compass': {'N': ('N', 'NE'), 'S': ('S', 'SE'), 'W': 'W'}}
8190 >>> enc = TargetEncoder(verbose=1, smoothing=2, min_samples_leaf=2, hierarchy=hierarchical_map, cols=['compass']).fit(X.loc[:,['compass']], y)
0 commit comments