Skip to content

Commit 5634451

Browse files
author
Hông-Lan Botterman
committed
review docstring
1 parent df10acb commit 5634451

File tree

1 file changed

+53
-11
lines changed

1 file changed

+53
-11
lines changed

qolmat/analysis/holes_characterization.py

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Script for characterising the holes."""
2+
23
from abc import ABC, abstractmethod
34
from itertools import combinations
45
from typing import List, Optional, Tuple, Union
@@ -16,7 +17,7 @@
1617

1718

1819
class McarTest(ABC):
19-
"""Astract class for MCAR tests.
20+
"""Abstract class for MCAR tests.
2021
2122
Parameters
2223
----------
@@ -25,16 +26,14 @@ class McarTest(ABC):
2526
2627
Methods
2728
-------
28-
test(df)
29+
test
2930
Abstract method to perform the MCAR test on the given DataFrame or
3031
NumPy array.
3132
3233
"""
3334

3435
def __init__(
35-
self,
36-
random_state: Union[None, int,
37-
np.random.RandomState] = None
36+
self, random_state: Union[None, int, np.random.RandomState] = None
3837
):
3938
"""Initialize the McarTest class with a random state.
4039
@@ -48,8 +47,7 @@ def __init__(
4847

4948
@abstractmethod
5049
def test(
51-
self,
52-
df: Union[pd.DataFrame, np.ndarray]
50+
self, df: Union[pd.DataFrame, np.ndarray]
5351
) -> Union[float, Tuple[float, List[float]]]:
5452
"""Perform the MCAR test on the input data.
5553
@@ -581,6 +579,27 @@ def _parallel_process_permutation(
581579
target_idx: int,
582580
oob_probabilities: np.ndarray,
583581
) -> float:
582+
"""Process a permutation.
583+
584+
Parameters
585+
----------
586+
X : np.ndarray
587+
input array
588+
M_perm : np.ndarray
589+
permutation array
590+
features_idx : np.ndarray
591+
index of the features
592+
target_idx : int
593+
index of the target
594+
oob_probabilities : np.ndarray
595+
out of bag probabilities
596+
597+
Returns
598+
-------
599+
float
600+
esimtated statistic U_hat
601+
602+
"""
584603
y = self._build_label(X, M_perm, features_idx, target_idx)
585604
return self._U_hat(oob_probabilities, y)
586605

@@ -591,6 +610,25 @@ def _parallel_process_projection(
591610
features_idx: np.ndarray,
592611
target_idx: int,
593612
) -> Tuple[float, List[float]]:
613+
"""Compute statistics for a projection.
614+
615+
Parameters
616+
----------
617+
X : np.ndarray
618+
input array
619+
list_permutations : List[np.ndarray]
620+
list of permutations
621+
features_idx : np.ndarray
622+
index of the features
623+
target_idx : int
624+
index of the target
625+
626+
Returns
627+
-------
628+
Tuple[float, List[float]]
629+
estimated statistic u_hat and list of u_hat for each permutation
630+
631+
"""
594632
X_features, y = self._build_dataset(X, features_idx, target_idx)
595633
oob_probabilities = self._get_oob_probabilities(X_features, y)
596634
u_hat = self._U_hat(oob_probabilities, y)
@@ -663,7 +701,7 @@ def _compute_partial_p_value(
663701
664702
"""
665703
U_k = B[k, :] @ U
666-
p_v_k = 1.
704+
p_v_k = 1.0
667705

668706
for u_sigma_k in (B[k, :] @ U_sigma).tolist():
669707
if u_sigma_k >= U_k:
@@ -721,7 +759,7 @@ def test(
721759
U = U / self.nb_projections
722760
list_U_sigma = [x / self.nb_permutation for x in list_U_sigma]
723761

724-
p_value = 1.
762+
p_value = 1.0
725763
for u_sigma in list_U_sigma:
726764
if u_sigma >= U:
727765
p_value += 1
@@ -732,8 +770,12 @@ def test(
732770
return p_value
733771
else:
734772
B = self._build_B(list_proj, n_cols)
735-
U_matrix = np.array([np.atleast_1d(item[0]) for item in parallel_results])
736-
U_sigma = np.array([np.atleast_1d(item[1]) for item in parallel_results])
773+
U_matrix = np.array(
774+
[np.atleast_1d(item[0]) for item in parallel_results]
775+
)
776+
U_sigma = np.array(
777+
[np.atleast_1d(item[1]) for item in parallel_results]
778+
)
737779
p_values = [
738780
self._compute_partial_p_value(B, U_matrix, U_sigma, k)
739781
for k in range(n_cols)

0 commit comments

Comments
 (0)