6464from sqlalchemy import select
6565from sqlalchemy .orm import Session , joinedload
6666
67+ from mavedb .models .score_calibration_functional_classification import ScoreCalibrationFunctionalClassification
6768from mavedb .models .score_set import ScoreSet
6869from mavedb .scripts .environment import with_database_session
69- from mavedb .view_models .score_calibration import FunctionalRange
7070
7171logger = logging .getLogger (__name__ )
7272
7373
74- def score_falls_within_range (score : float , functional_range : dict ) -> bool :
75- """Check if a score falls within a functional range using the view model."""
76- try :
77- range_obj = FunctionalRange .model_validate (functional_range )
78- return range_obj .is_contained_by_range (score )
79- except Exception as e :
80- logger .warning (f"Error validating functional range: { e } " )
81- return False
82-
83-
84- def has_acmg_classification (functional_range : dict ) -> bool :
85- """Check if a functional range has an ACMG classification."""
86- acmg_data = functional_range .get ("acmg_classification" )
87- return acmg_data is not None and (
88- acmg_data .get ("criterion" ) is not None
89- or acmg_data .get ("evidence_strength" ) is not None
90- or acmg_data .get ("points" ) is not None
91- )
92-
93-
9474@click .command ()
9575@with_database_session
9676def main (db : Session ) -> None :
@@ -106,28 +86,26 @@ def main(db: Session) -> None:
10686
10787 score_sets = db .scalars (query ).unique ().all ()
10888
109- total_variants = 0
110- classified_variants = 0
111- score_sets_with_acmg = 0
112- processed_variants : Set [int ] = set ()
89+ total_variants_count = 0
90+ classified_variants_count = 0
91+ score_sets_with_acmg_count = 0
11392 gene_list : Set [str ] = set ()
11493
11594 click .echo (f"Found { len (score_sets )} non-superseded score sets with calibrations" )
11695
11796 for score_set in score_sets :
11897 # Collect all ACMG-classified ranges from this score set's calibrations
119- acmg_ranges = []
98+ acmg_ranges : list [ ScoreCalibrationFunctionalClassification ] = []
12099 for calibration in score_set .score_calibrations :
121- if calibration .functional_ranges :
122- for func_range in calibration .functional_ranges :
123- if has_acmg_classification ( func_range ) :
124- acmg_ranges .append (func_range )
100+ if calibration .functional_classifications :
101+ for func_classification in calibration .functional_classifications :
102+ if func_classification . acmg_classification_id is not None :
103+ acmg_ranges .append (func_classification )
125104
126105 if not acmg_ranges :
127106 continue
128107
129- score_sets_with_acmg += 1
130- score_set_classified_variants = 0
108+ score_sets_with_acmg_count += 1
131109
132110 # Retain a list of unique target genes for reporting
133111 for target in score_set .target_genes :
@@ -137,47 +115,32 @@ def main(db: Session) -> None:
137115
138116 gene_list .add (target_name .strip ().upper ())
139117
140- for variant in score_set .variants :
141- if variant .id in processed_variants :
142- continue
143-
144- variant_data = variant .data
145- if not variant_data :
146- continue
147-
148- score_data = variant_data .get ("score_data" , {})
149- score = score_data .get ("score" )
150-
151- total_variants += 1
152- processed_variants .add (variant .id ) # type: ignore
153-
154- if score is None :
155- continue
156-
157- # Check if score falls within any ACMG-classified range in this score set
158- for func_range in acmg_ranges :
159- if score_falls_within_range (float (score ), func_range ):
160- classified_variants += 1
161- score_set_classified_variants += 1
162- break # Count variant only once per score set
118+ score_set_classified_variants : set [int ] = set ()
119+ for classified_range in acmg_ranges :
120+ variants_classified_by_range : list [int ] = [
121+ variant .id for variant in classified_range .variants if variant .id is not None
122+ ]
123+ score_set_classified_variants .update (variants_classified_by_range )
163124
164- if score_set_classified_variants > 0 :
125+ total_variants_count += score_set .num_variants or 0
126+ classified_variants_count += len (score_set_classified_variants )
127+ if score_set_classified_variants :
165128 click .echo (
166- f"Score set { score_set .urn } : { score_set_classified_variants } classified variants ({ score_set .num_variants } total variants)"
129+ f"Score set { score_set .urn } : { len ( score_set_classified_variants ) } classified variants ({ score_set .num_variants } total variants)"
167130 )
168131
169132 click .echo ("\n " + "=" * 60 )
170133 click .echo ("SUMMARY" )
171134 click .echo ("=" * 60 )
172- click .echo (f"Score sets with ACMG classifications: { score_sets_with_acmg } " )
173- click .echo (f"Total unique variants processed: { total_variants } " )
174- click .echo (f"Variants within ACMG-classified ranges: { classified_variants } " )
135+ click .echo (f"Score sets with ACMG classifications: { score_sets_with_acmg_count } " )
136+ click .echo (f"Total unique variants processed: { total_variants_count } " )
137+ click .echo (f"Variants within ACMG-classified ranges: { classified_variants_count } " )
175138 click .echo (f"Unique target genes covered ({ len (gene_list )} ):" )
176139 for gene in sorted (gene_list ):
177140 click .echo (f" - { gene } " )
178141
179- if total_variants > 0 :
180- percentage = (classified_variants / total_variants ) * 100
142+ if total_variants_count > 0 :
143+ percentage = (classified_variants_count / total_variants_count ) * 100
181144 click .echo (f"Classification rate: { percentage :.1f} %" )
182145
183146
0 commit comments