33import typing
44
55from collections import deque
6+ import warnings
67
78import hpotk
89import pandas as pd
@@ -252,14 +253,14 @@ def verify_term_id(val: typing.Union[str, hpotk.TermId]) -> hpotk.TermId:
252253 raise ValueError (f"{ val } is neither `str` nor `hpotk.TermId`" )
253254
254255
255- class HpoMtcFilter (PhenotypeMtcFilter [hpotk .TermId ]):
256+ class IfHpoFilter (PhenotypeMtcFilter [hpotk .TermId ]):
256257 """
257- `HpoMtcFilter ` decides which phenotypes should be tested and which phenotypes are not worth testing.
258+ `IfHpoFilter ` decides which phenotypes should be tested and which phenotypes are not worth testing.
258259
259260 The class leverages a number of heuristics and domain decisions.
260- See :ref:`hpo-mt -filter` section for more info.
261+ See :ref:`hpo-if -filter` section for more info.
261262
262- We recommend creating an instance using the :func:`default_filter` static factory method.
263+ We recommend creating an instance using the :func:`~gpsea.analysis.mtc_filter.IfHpoFilter. default_filter` static factory method.
263264 """
264265
265266 NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO = PhenotypeMtcResult .fail (
@@ -340,7 +341,7 @@ def default_filter(
340341 general_hpo_term_set .update (second_level_terms )
341342 general_hpo_term_set .update (third_level_terms )
342343
343- return HpoMtcFilter (
344+ return IfHpoFilter (
344345 hpo = hpo ,
345346 term_frequency_threshold = term_frequency_threshold ,
346347 annotation_frequency_threshold = annotation_frequency_threshold ,
@@ -355,13 +356,15 @@ def __init__(
355356 general_hpo_terms : typing .Iterable [hpotk .TermId ],
356357 ):
357358 self ._hpo = hpo
358- assert isinstance (term_frequency_threshold , (int , float )) \
359- and 0. < term_frequency_threshold <= 1. , \
360- "The term_frequency_threshold must be in the range (0, 1]"
359+ assert (
360+ isinstance (term_frequency_threshold , (int , float ))
361+ and 0.0 < term_frequency_threshold <= 1.0
362+ ), "The term_frequency_threshold must be in the range (0, 1]"
361363 self ._hpo_term_frequency_filter = term_frequency_threshold
362- assert isinstance (annotation_frequency_threshold , (int , float )) \
363- and 0. < annotation_frequency_threshold <= 1. , \
364- "The annotation_frequency_threshold must be in the range (0, 1]"
364+ assert (
365+ isinstance (annotation_frequency_threshold , (int , float ))
366+ and 0.0 < annotation_frequency_threshold <= 1.0
367+ ), "The annotation_frequency_threshold must be in the range (0, 1]"
365368 self ._hpo_annotation_frequency_threshold = annotation_frequency_threshold
366369
367370 self ._general_hpo_terms = set (general_hpo_terms )
@@ -429,17 +432,17 @@ def filter(
429432 continue
430433
431434 if term_id in self ._general_hpo_terms :
432- results [idx ] = HpoMtcFilter .SKIPPING_GENERAL_TERM
435+ results [idx ] = IfHpoFilter .SKIPPING_GENERAL_TERM
433436 continue
434437
435438 if not self ._hpo .graph .is_ancestor_of (PHENOTYPIC_ABNORMALITY , term_id ):
436- results [idx ] = HpoMtcFilter .SKIPPING_NON_PHENOTYPE_TERM
439+ results [idx ] = IfHpoFilter .SKIPPING_NON_PHENOTYPE_TERM
437440 continue
438441
439442 ph_clf = pheno_clfs [idx ]
440443 contingency_matrix = counts [idx ]
441444
442- max_freq = HpoMtcFilter .get_maximum_group_observed_HPO_frequency (
445+ max_freq = IfHpoFilter .get_maximum_group_observed_HPO_frequency (
443446 contingency_matrix ,
444447 ph_clf = ph_clf ,
445448 )
@@ -465,19 +468,19 @@ def filter(
465468 results [idx ] = self ._not_powered_for_2_by_3
466469 continue
467470
468- if not HpoMtcFilter .some_cell_has_greater_than_one_count (
471+ if not IfHpoFilter .some_cell_has_greater_than_one_count (
469472 counts = contingency_matrix ,
470473 ph_clf = ph_clf ,
471474 ):
472- results [idx ] = HpoMtcFilter .NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO
475+ results [idx ] = IfHpoFilter .NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO
473476 continue
474477
475- elif HpoMtcFilter .one_genotype_has_zero_hpo_observations (
478+ elif IfHpoFilter .one_genotype_has_zero_hpo_observations (
476479 counts = contingency_matrix ,
477480 gt_clf = gt_clf ,
478481 ):
479482 results [idx ] = (
480- HpoMtcFilter .SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS
483+ IfHpoFilter .SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS
481484 )
482485 continue
483486
@@ -501,7 +504,7 @@ def filter(
501504 axis = None
502505 ) < 1 :
503506 # Do not test if the count is exactly the same to the counts in the only child term.
504- results [idx ] = HpoMtcFilter .SAME_COUNT_AS_THE_ONLY_CHILD
507+ results [idx ] = IfHpoFilter .SAME_COUNT_AS_THE_ONLY_CHILD
505508 continue
506509
507510 # ##
@@ -526,18 +529,18 @@ def possible_results(self) -> typing.Collection[PhenotypeMtcResult]:
526529 return (
527530 PhenotypeMtcFilter .OK ,
528531 self ._below_frequency_threshold , # HMF01
529- HpoMtcFilter .NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO , # HMF02
530- HpoMtcFilter .SAME_COUNT_AS_THE_ONLY_CHILD , # HMF03
531- HpoMtcFilter .SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS , # HMF05
532+ IfHpoFilter .NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO , # HMF02
533+ IfHpoFilter .SAME_COUNT_AS_THE_ONLY_CHILD , # HMF03
534+ IfHpoFilter .SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS , # HMF05
532535 self ._not_powered_for_2_by_2 , # HMF06
533536 self ._not_powered_for_2_by_3 , # HMF06
534- HpoMtcFilter .SKIPPING_NON_PHENOTYPE_TERM , # HMF07
535- HpoMtcFilter .SKIPPING_GENERAL_TERM , # HMF08
537+ IfHpoFilter .SKIPPING_NON_PHENOTYPE_TERM , # HMF07
538+ IfHpoFilter .SKIPPING_GENERAL_TERM , # HMF08
536539 self ._below_annotation_frequency_threshold , # HMF09
537540 )
538541
539542 def filter_method_name (self ) -> str :
540- return "HPO MTC filter"
543+ return "Independent filtering HPO filter"
541544
542545 @staticmethod
543546 def get_number_of_observed_hpo_observations (
@@ -629,3 +632,65 @@ def _get_ordered_terms(
629632
630633 # now, ordered_term_list is ordered from leaves to root
631634 return ordered_term_list
635+
636+
637+ class HpoMtcFilter (IfHpoFilter ):
638+ """
639+ `HpoMtcFilter` is deprecated and will be removed in `1.0.0`.
640+
641+ Use :class:`gpsea.analysis.mtc_filter.IfHpoFilter` instead.
642+ """
643+
644+ @staticmethod
645+ def default_filter (
646+ hpo : hpotk .MinimalOntology ,
647+ term_frequency_threshold : float = 0.4 ,
648+ annotation_frequency_threshold : float = 0.4 ,
649+ phenotypic_abnormality : hpotk .TermId = PHENOTYPIC_ABNORMALITY ,
650+ ):
651+ """
652+ Args:
653+ hpo: HPO
654+ term_frequency_threshold: a `float` in range :math:`(0, 1]` with the minimum frequency
655+ for an HPO term to have in at least one of the genotype groups
656+ (e.g., 22% in missense and 3% in nonsense genotypes would be OK,
657+ but not 13% missense and 10% nonsense genotypes if the threshold is 0.2).
658+ The default threshold is `0.4` (40%).
659+ annotation_frequency_threshold: a `float` in range :math:`(0, 1]` with the minimum frequency of
660+ annotation in the cohort. For instance, if the cohort consists of 100 individuals, and
661+ we have explicit observed observations for 20 and excluded for 10 individuals, then the
662+ annotation frequency is `0.3`. The purpose of this threshold is to omit terms for which
663+ we simply do not have much data overall. By default, we set a threshold to `0.4` (40%).
664+ phenotypic_abnormality: a :class:`~hpotk.TermId` corresponding to the root of HPO phenotype hierarchy.
665+ Having to specify this option should be very rarely, if ever.
666+ """
667+ warnings .warn (
668+ "HpoMtcFilter has been deprecated and will be removed in 1.0.0. Use `IfHpoFilter` instead." ,
669+ DeprecationWarning ,
670+ stacklevel = 2 ,
671+ )
672+ IfHpoFilter .default_filter (
673+ hpo = hpo ,
674+ term_frequency_threshold = term_frequency_threshold ,
675+ annotation_frequency_threshold = annotation_frequency_threshold ,
676+ phenotypic_abnormality = phenotypic_abnormality ,
677+ )
678+
679+ def __init__ (
680+ self ,
681+ hpo : hpotk .MinimalOntology ,
682+ term_frequency_threshold : float ,
683+ annotation_frequency_threshold : float ,
684+ general_hpo_terms : typing .Iterable [hpotk .TermId ],
685+ ):
686+ super ().__init__ (
687+ hpo ,
688+ term_frequency_threshold ,
689+ annotation_frequency_threshold ,
690+ general_hpo_terms ,
691+ )
692+ warnings .warn (
693+ "HpoMtcFilter has been deprecated and will be removed in 1.0.0. Use `IfHpoFilter` instead." ,
694+ DeprecationWarning ,
695+ stacklevel = 2 ,
696+ )
0 commit comments