33import typing
44
55from collections import deque
6+ import warnings
67
78import hpotk
89import pandas as pd
@@ -252,14 +253,14 @@ def verify_term_id(val: typing.Union[str, hpotk.TermId]) -> hpotk.TermId:
252253 raise ValueError (f"{ val } is neither `str` nor `hpotk.TermId`" )
253254
254255
255- class HpoMtcFilter (PhenotypeMtcFilter [hpotk .TermId ]):
256+ class IfHpoFilter (PhenotypeMtcFilter [hpotk .TermId ]):
256257 """
257- `HpoMtcFilter ` decides which phenotypes should be tested and which phenotypes are not worth testing.
258+ `IfHpoFilter ` decides which phenotypes should be tested and which phenotypes are not worth testing.
258259
259260 The class leverages a number of heuristics and domain decisions.
260- See :ref:`hpo-mt -filter` section for more info.
261+ See :ref:`hpo-if -filter` section for more info.
261262
262- We recommend creating an instance using the :func:`default_filter` static factory method.
263+ We recommend creating an instance using the :func:`~gpsea.analysis.mtc_filter.IfHpoFilter. default_filter` static factory method.
263264 """
264265
265266 NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO = PhenotypeMtcResult .fail (
@@ -293,7 +294,7 @@ def default_filter(
293294 (e.g., 22% in missense and 3% in nonsense genotypes would be OK,
294295 but not 13% missense and 10% nonsense genotypes if the threshold is 0.2).
295296 The default threshold is `0.4` (40%).
296- annotation_frequency_threshold: a `float` in range :math:`(0, 1) with the minimum frequency of
297+ annotation_frequency_threshold: a `float` in range :math:`(0, 1]` with the minimum frequency of
297298 annotation in the cohort. For instance, if the cohort consists of 100 individuals, and
298299 we have explicit observed observations for 20 and excluded for 10 individuals, then the
299300 annotation frequency is `0.3`. The purpose of this threshold is to omit terms for which
@@ -340,7 +341,7 @@ def default_filter(
340341 general_hpo_term_set .update (second_level_terms )
341342 general_hpo_term_set .update (third_level_terms )
342343
343- return HpoMtcFilter (
344+ return IfHpoFilter (
344345 hpo = hpo ,
345346 term_frequency_threshold = term_frequency_threshold ,
346347 annotation_frequency_threshold = annotation_frequency_threshold ,
@@ -355,7 +356,15 @@ def __init__(
355356 general_hpo_terms : typing .Iterable [hpotk .TermId ],
356357 ):
357358 self ._hpo = hpo
359+ assert (
360+ isinstance (term_frequency_threshold , (int , float ))
361+ and 0.0 < term_frequency_threshold <= 1.0
362+ ), "The term_frequency_threshold must be in the range (0, 1]"
358363 self ._hpo_term_frequency_filter = term_frequency_threshold
364+ assert (
365+ isinstance (annotation_frequency_threshold , (int , float ))
366+ and 0.0 < annotation_frequency_threshold <= 1.0
367+ ), "The annotation_frequency_threshold must be in the range (0, 1]"
359368 self ._hpo_annotation_frequency_threshold = annotation_frequency_threshold
360369
361370 self ._general_hpo_terms = set (general_hpo_terms )
@@ -423,17 +432,17 @@ def filter(
423432 continue
424433
425434 if term_id in self ._general_hpo_terms :
426- results [idx ] = HpoMtcFilter .SKIPPING_GENERAL_TERM
435+ results [idx ] = IfHpoFilter .SKIPPING_GENERAL_TERM
427436 continue
428437
429438 if not self ._hpo .graph .is_ancestor_of (PHENOTYPIC_ABNORMALITY , term_id ):
430- results [idx ] = HpoMtcFilter .SKIPPING_NON_PHENOTYPE_TERM
439+ results [idx ] = IfHpoFilter .SKIPPING_NON_PHENOTYPE_TERM
431440 continue
432441
433442 ph_clf = pheno_clfs [idx ]
434443 contingency_matrix = counts [idx ]
435444
436- max_freq = HpoMtcFilter .get_maximum_group_observed_HPO_frequency (
445+ max_freq = IfHpoFilter .get_maximum_group_observed_HPO_frequency (
437446 contingency_matrix ,
438447 ph_clf = ph_clf ,
439448 )
@@ -459,19 +468,19 @@ def filter(
459468 results [idx ] = self ._not_powered_for_2_by_3
460469 continue
461470
462- if not HpoMtcFilter .some_cell_has_greater_than_one_count (
471+ if not IfHpoFilter .some_cell_has_greater_than_one_count (
463472 counts = contingency_matrix ,
464473 ph_clf = ph_clf ,
465474 ):
466- results [idx ] = HpoMtcFilter .NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO
475+ results [idx ] = IfHpoFilter .NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO
467476 continue
468477
469- elif HpoMtcFilter .one_genotype_has_zero_hpo_observations (
478+ elif IfHpoFilter .one_genotype_has_zero_hpo_observations (
470479 counts = contingency_matrix ,
471480 gt_clf = gt_clf ,
472481 ):
473482 results [idx ] = (
474- HpoMtcFilter .SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS
483+ IfHpoFilter .SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS
475484 )
476485 continue
477486
@@ -495,7 +504,7 @@ def filter(
495504 axis = None
496505 ) < 1 :
497506 # Do not test if the count is exactly the same to the counts in the only child term.
498- results [idx ] = HpoMtcFilter .SAME_COUNT_AS_THE_ONLY_CHILD
507+ results [idx ] = IfHpoFilter .SAME_COUNT_AS_THE_ONLY_CHILD
499508 continue
500509
501510 # ##
@@ -520,18 +529,18 @@ def possible_results(self) -> typing.Collection[PhenotypeMtcResult]:
520529 return (
521530 PhenotypeMtcFilter .OK ,
522531 self ._below_frequency_threshold , # HMF01
523- HpoMtcFilter .NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO , # HMF02
524- HpoMtcFilter .SAME_COUNT_AS_THE_ONLY_CHILD , # HMF03
525- HpoMtcFilter .SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS , # HMF05
532+ IfHpoFilter .NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO , # HMF02
533+ IfHpoFilter .SAME_COUNT_AS_THE_ONLY_CHILD , # HMF03
534+ IfHpoFilter .SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS , # HMF05
526535 self ._not_powered_for_2_by_2 , # HMF06
527536 self ._not_powered_for_2_by_3 , # HMF06
528- HpoMtcFilter .SKIPPING_NON_PHENOTYPE_TERM , # HMF07
529- HpoMtcFilter .SKIPPING_GENERAL_TERM , # HMF08
537+ IfHpoFilter .SKIPPING_NON_PHENOTYPE_TERM , # HMF07
538+ IfHpoFilter .SKIPPING_GENERAL_TERM , # HMF08
530539 self ._below_annotation_frequency_threshold , # HMF09
531540 )
532541
533542 def filter_method_name (self ) -> str :
534- return "HPO MTC filter"
543+ return "Independent filtering HPO filter"
535544
536545 @staticmethod
537546 def get_number_of_observed_hpo_observations (
@@ -623,3 +632,65 @@ def _get_ordered_terms(
623632
624633 # now, ordered_term_list is ordered from leaves to root
625634 return ordered_term_list
635+
636+
637+ class HpoMtcFilter (IfHpoFilter ):
638+ """
639+ `HpoMtcFilter` is deprecated and will be removed in `1.0.0`.
640+
641+ Use :class:`gpsea.analysis.mtc_filter.IfHpoFilter` instead.
642+ """
643+
644+ @staticmethod
645+ def default_filter (
646+ hpo : hpotk .MinimalOntology ,
647+ term_frequency_threshold : float = 0.4 ,
648+ annotation_frequency_threshold : float = 0.4 ,
649+ phenotypic_abnormality : hpotk .TermId = PHENOTYPIC_ABNORMALITY ,
650+ ):
651+ """
652+ Args:
653+ hpo: HPO
654+ term_frequency_threshold: a `float` in range :math:`(0, 1]` with the minimum frequency
655+ for an HPO term to have in at least one of the genotype groups
656+ (e.g., 22% in missense and 3% in nonsense genotypes would be OK,
657+ but not 13% missense and 10% nonsense genotypes if the threshold is 0.2).
658+ The default threshold is `0.4` (40%).
659+ annotation_frequency_threshold: a `float` in range :math:`(0, 1]` with the minimum frequency of
660+ annotation in the cohort. For instance, if the cohort consists of 100 individuals, and
661+ we have explicit observed observations for 20 and excluded for 10 individuals, then the
662+ annotation frequency is `0.3`. The purpose of this threshold is to omit terms for which
663+ we simply do not have much data overall. By default, we set a threshold to `0.4` (40%).
664+ phenotypic_abnormality: a :class:`~hpotk.TermId` corresponding to the root of HPO phenotype hierarchy.
665+ Having to specify this option should be very rarely, if ever.
666+ """
667+ warnings .warn (
668+ "HpoMtcFilter has been deprecated and will be removed in 1.0.0. Use `IfHpoFilter` instead." ,
669+ DeprecationWarning ,
670+ stacklevel = 2 ,
671+ )
672+ IfHpoFilter .default_filter (
673+ hpo = hpo ,
674+ term_frequency_threshold = term_frequency_threshold ,
675+ annotation_frequency_threshold = annotation_frequency_threshold ,
676+ phenotypic_abnormality = phenotypic_abnormality ,
677+ )
678+
679+ def __init__ (
680+ self ,
681+ hpo : hpotk .MinimalOntology ,
682+ term_frequency_threshold : float ,
683+ annotation_frequency_threshold : float ,
684+ general_hpo_terms : typing .Iterable [hpotk .TermId ],
685+ ):
686+ super ().__init__ (
687+ hpo ,
688+ term_frequency_threshold ,
689+ annotation_frequency_threshold ,
690+ general_hpo_terms ,
691+ )
692+ warnings .warn (
693+ "HpoMtcFilter has been deprecated and will be removed in 1.0.0. Use `IfHpoFilter` instead." ,
694+ DeprecationWarning ,
695+ stacklevel = 2 ,
696+ )
0 commit comments