11"""Script for characterising the holes."""
2+
23from abc import ABC , abstractmethod
34from itertools import combinations
45from typing import List , Optional , Tuple , Union
1617
1718
1819class McarTest (ABC ):
19- """Astract class for MCAR tests.
20+ """Abstract class for MCAR tests.
2021
2122 Parameters
2223 ----------
@@ -25,16 +26,14 @@ class McarTest(ABC):
2526
2627 Methods
2728 -------
28- test(df)
29+ test
2930 Abstract method to perform the MCAR test on the given DataFrame or
3031 NumPy array.
3132
3233 """
3334
3435 def __init__ (
35- self ,
36- random_state : Union [None , int ,
37- np .random .RandomState ] = None
36+ self , random_state : Union [None , int , np .random .RandomState ] = None
3837 ):
3938 """Initialize the McarTest class with a random state.
4039
@@ -48,8 +47,7 @@ def __init__(
4847
4948 @abstractmethod
5049 def test (
51- self ,
52- df : Union [pd .DataFrame , np .ndarray ]
50+ self , df : Union [pd .DataFrame , np .ndarray ]
5351 ) -> Union [float , Tuple [float , List [float ]]]:
5452 """Perform the MCAR test on the input data.
5553
@@ -581,6 +579,27 @@ def _parallel_process_permutation(
581579 target_idx : int ,
582580 oob_probabilities : np .ndarray ,
583581 ) -> float :
582+ """Process a permutation.
583+
584+ Parameters
585+ ----------
586+ X : np.ndarray
587+ input array
588+ M_perm : np.ndarray
589+ permutation array
590+ features_idx : np.ndarray
591+ index of the features
592+ target_idx : int
593+ index of the target
594+ oob_probabilities : np.ndarray
595+ out of bag probabilities
596+
597+ Returns
598+ -------
599+ float
600+ esimtated statistic U_hat
601+
602+ """
584603 y = self ._build_label (X , M_perm , features_idx , target_idx )
585604 return self ._U_hat (oob_probabilities , y )
586605
@@ -591,6 +610,25 @@ def _parallel_process_projection(
591610 features_idx : np .ndarray ,
592611 target_idx : int ,
593612 ) -> Tuple [float , List [float ]]:
613+ """Compute statistics for a projection.
614+
615+ Parameters
616+ ----------
617+ X : np.ndarray
618+ input array
619+ list_permutations : List[np.ndarray]
620+ list of permutations
621+ features_idx : np.ndarray
622+ index of the features
623+ target_idx : int
624+ index of the target
625+
626+ Returns
627+ -------
628+ Tuple[float, List[float]]
629+ estimated statistic u_hat and list of u_hat for each permutation
630+
631+ """
594632 X_features , y = self ._build_dataset (X , features_idx , target_idx )
595633 oob_probabilities = self ._get_oob_probabilities (X_features , y )
596634 u_hat = self ._U_hat (oob_probabilities , y )
@@ -663,7 +701,7 @@ def _compute_partial_p_value(
663701
664702 """
665703 U_k = B [k , :] @ U
666- p_v_k = 1.
704+ p_v_k = 1.0
667705
668706 for u_sigma_k in (B [k , :] @ U_sigma ).tolist ():
669707 if u_sigma_k >= U_k :
@@ -721,7 +759,7 @@ def test(
721759 U = U / self .nb_projections
722760 list_U_sigma = [x / self .nb_permutation for x in list_U_sigma ]
723761
724- p_value = 1.
762+ p_value = 1.0
725763 for u_sigma in list_U_sigma :
726764 if u_sigma >= U :
727765 p_value += 1
@@ -732,8 +770,12 @@ def test(
732770 return p_value
733771 else :
734772 B = self ._build_B (list_proj , n_cols )
735- U_matrix = np .array ([np .atleast_1d (item [0 ]) for item in parallel_results ])
736- U_sigma = np .array ([np .atleast_1d (item [1 ]) for item in parallel_results ])
773+ U_matrix = np .array (
774+ [np .atleast_1d (item [0 ]) for item in parallel_results ]
775+ )
776+ U_sigma = np .array (
777+ [np .atleast_1d (item [1 ]) for item in parallel_results ]
778+ )
737779 p_values = [
738780 self ._compute_partial_p_value (B , U_matrix , U_sigma , k )
739781 for k in range (n_cols )
0 commit comments