Skip to content

Commit 4962f4f

Browse files
committed
refactor: update calibrated variant effects script for new classification format
1 parent 969bf45 commit 4962f4f

File tree

1 file changed

+25
-62
lines changed

1 file changed

+25
-62
lines changed

src/mavedb/scripts/calibrated_variant_effects.py

Lines changed: 25 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -64,33 +64,13 @@
6464
from sqlalchemy import select
6565
from sqlalchemy.orm import Session, joinedload
6666

67+
from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification
6768
from mavedb.models.score_set import ScoreSet
6869
from mavedb.scripts.environment import with_database_session
69-
from mavedb.view_models.score_calibration import FunctionalRange
7070

7171
logger = logging.getLogger(__name__)
7272

7373

74-
def score_falls_within_range(score: float, functional_range: dict) -> bool:
75-
"""Check if a score falls within a functional range using the view model."""
76-
try:
77-
range_obj = FunctionalRange.model_validate(functional_range)
78-
return range_obj.is_contained_by_range(score)
79-
except Exception as e:
80-
logger.warning(f"Error validating functional range: {e}")
81-
return False
82-
83-
84-
def has_acmg_classification(functional_range: dict) -> bool:
85-
"""Check if a functional range has an ACMG classification."""
86-
acmg_data = functional_range.get("acmg_classification")
87-
return acmg_data is not None and (
88-
acmg_data.get("criterion") is not None
89-
or acmg_data.get("evidence_strength") is not None
90-
or acmg_data.get("points") is not None
91-
)
92-
93-
9474
@click.command()
9575
@with_database_session
9676
def main(db: Session) -> None:
@@ -106,28 +86,26 @@ def main(db: Session) -> None:
10686

10787
score_sets = db.scalars(query).unique().all()
10888

109-
total_variants = 0
110-
classified_variants = 0
111-
score_sets_with_acmg = 0
112-
processed_variants: Set[int] = set()
89+
total_variants_count = 0
90+
classified_variants_count = 0
91+
score_sets_with_acmg_count = 0
11392
gene_list: Set[str] = set()
11493

11594
click.echo(f"Found {len(score_sets)} non-superseded score sets with calibrations")
11695

11796
for score_set in score_sets:
11897
# Collect all ACMG-classified ranges from this score set's calibrations
119-
acmg_ranges = []
98+
acmg_ranges: list[ScoreCalibrationFunctionalClassification] = []
12099
for calibration in score_set.score_calibrations:
121-
if calibration.functional_ranges:
122-
for func_range in calibration.functional_ranges:
123-
if has_acmg_classification(func_range):
124-
acmg_ranges.append(func_range)
100+
if calibration.functional_classifications:
101+
for func_classification in calibration.functional_classifications:
102+
if func_classification.acmg_classification_id is not None:
103+
acmg_ranges.append(func_classification)
125104

126105
if not acmg_ranges:
127106
continue
128107

129-
score_sets_with_acmg += 1
130-
score_set_classified_variants = 0
108+
score_sets_with_acmg_count += 1
131109

132110
# Retain a list of unique target genes for reporting
133111
for target in score_set.target_genes:
@@ -137,47 +115,32 @@ def main(db: Session) -> None:
137115

138116
gene_list.add(target_name.strip().upper())
139117

140-
for variant in score_set.variants:
141-
if variant.id in processed_variants:
142-
continue
143-
144-
variant_data = variant.data
145-
if not variant_data:
146-
continue
147-
148-
score_data = variant_data.get("score_data", {})
149-
score = score_data.get("score")
150-
151-
total_variants += 1
152-
processed_variants.add(variant.id) # type: ignore
153-
154-
if score is None:
155-
continue
156-
157-
# Check if score falls within any ACMG-classified range in this score set
158-
for func_range in acmg_ranges:
159-
if score_falls_within_range(float(score), func_range):
160-
classified_variants += 1
161-
score_set_classified_variants += 1
162-
break # Count variant only once per score set
118+
score_set_classified_variants: set[int] = set()
119+
for classified_range in acmg_ranges:
120+
variants_classified_by_range: list[int] = [
121+
variant.id for variant in classified_range.variants if variant.id is not None
122+
]
123+
score_set_classified_variants.update(variants_classified_by_range)
163124

164-
if score_set_classified_variants > 0:
125+
total_variants_count += score_set.num_variants or 0
126+
classified_variants_count += len(score_set_classified_variants)
127+
if score_set_classified_variants:
165128
click.echo(
166-
f"Score set {score_set.urn}: {score_set_classified_variants} classified variants ({score_set.num_variants} total variants)"
129+
f"Score set {score_set.urn}: {len(score_set_classified_variants)} classified variants ({score_set.num_variants} total variants)"
167130
)
168131

169132
click.echo("\n" + "=" * 60)
170133
click.echo("SUMMARY")
171134
click.echo("=" * 60)
172-
click.echo(f"Score sets with ACMG classifications: {score_sets_with_acmg}")
173-
click.echo(f"Total unique variants processed: {total_variants}")
174-
click.echo(f"Variants within ACMG-classified ranges: {classified_variants}")
135+
click.echo(f"Score sets with ACMG classifications: {score_sets_with_acmg_count}")
136+
click.echo(f"Total unique variants processed: {total_variants_count}")
137+
click.echo(f"Variants within ACMG-classified ranges: {classified_variants_count}")
175138
click.echo(f"Unique target genes covered ({len(gene_list)}):")
176139
for gene in sorted(gene_list):
177140
click.echo(f" - {gene}")
178141

179-
if total_variants > 0:
180-
percentage = (classified_variants / total_variants) * 100
142+
if total_variants_count > 0:
143+
percentage = (classified_variants_count / total_variants_count) * 100
181144
click.echo(f"Classification rate: {percentage:.1f}%")
182145

183146

0 commit comments

Comments
 (0)