@@ -159,8 +159,8 @@ def wasserstein_distance(
159159def density_from_rf (
160160 df : pd .DataFrame , estimator : BaseEnsemble , df_est : Optional [pd .DataFrame ] = None
161161):
162- """Estimates the density of the empirical distribution given by df at the sample points given by
163- df_est. The estimation uses an random forest estimator and relies on the average number of
162+ """Estimates the density of the empirical distribution given by df at the sample points given
163+ by df_est. The estimation uses an random forest estimator and relies on the average number of
164164 samples in the leaf corresponding to each estimation point.
165165
166166 Parameters
@@ -222,17 +222,23 @@ def kl_divergence_1D(df1: pd.Series, df2: pd.Series) -> float:
222222def kl_divergence (
223223 df1 : pd .DataFrame , df2 : pd .DataFrame , df_mask : pd .DataFrame , method : str = "columnwise"
224224) -> pd .Series :
225- """TODO Documentation
226- Kullback-Leibler divergence between distributions
227- If multivariate normal distributions:
228- https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
225+ """
226+ Estimation of the Kullback-Leibler divergence between too empirical distributions. Three
227+ methods are implemented:
228+ - columnwise, relying on a uniform binarization and only taking marginals into account
229+ (https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence),
230+ - gaussian, relying on a Gaussian approximation,
231+ - random_forest, experimental
229232
230233 Parameters
231234 ----------
232235 df1 : pd.DataFrame
236+ First empirical distribution
233237 df2 : pd.DataFrame
234- columnwise_evaluation: Optional[bool]
235- if the evalutation is computed column-wise. By default, is set to False
238+ Second empirical distribution
239+ df_mask: pd.DataFrame
240+ Mask indicating on what values the divergence should be computed
241+ method:
236242
237243 Returns
238244 -------
0 commit comments