Skip to content

Commit 613718c

Browse files
authored
Merge pull request #506 from VariantEffect/jstone-dev/clinvar-control-query-optimization
ClinVar control query optimization and control response size reduction
2 parents 475b136 + e86db86 commit 613718c

File tree

3 files changed

+34
-21
lines changed

3 files changed

+34
-21
lines changed

src/mavedb/routers/score_sets.py

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from ga4gh.va_spec.base.core import Statement, ExperimentalVariantFunctionalImpactStudyResult
1313
from sqlalchemy import null, or_, select
1414
from sqlalchemy.exc import MultipleResultsFound, NoResultFound
15-
from sqlalchemy.orm import Session
15+
from sqlalchemy.orm import contains_eager, Session
1616

1717
from mavedb import deps
1818
from mavedb.lib.annotation.exceptions import MappingDataDoesntExistException
@@ -1463,30 +1463,27 @@ async def get_clinical_controls_for_score_set(
14631463

14641464
clinical_controls_query = (
14651465
select(ClinicalControl)
1466-
.join(MappedVariant, ClinicalControl.mapped_variants)
1467-
.join(Variant)
1468-
.where(Variant.score_set_id == item.id)
1466+
.join(ClinicalControl.mapped_variants)
1467+
.join(MappedVariant.variant)
1468+
.options(
1469+
contains_eager(ClinicalControl.mapped_variants)
1470+
.contains_eager(MappedVariant.variant)
1471+
)
1472+
.filter(MappedVariant.current.is_(True))
1473+
.filter(Variant.score_set_id == item.id)
14691474
)
14701475

14711476
if db_name is not None:
14721477
save_to_logging_context({"db_name": db_name})
1473-
clinical_controls_query = clinical_controls_query.where(ClinicalControl.db_name == db_name)
1478+
clinical_controls_query = clinical_controls_query.filter(ClinicalControl.db_name == db_name)
14741479

14751480
if db_version is not None:
14761481
save_to_logging_context({"db_version": db_version})
1477-
clinical_controls_query = clinical_controls_query.where(ClinicalControl.db_version == db_version)
1478-
1479-
clinical_controls_for_item: Sequence[ClinicalControl] = _db.scalars(clinical_controls_query).all()
1480-
clinical_controls_with_mapped_variant = []
1481-
for control_variant in clinical_controls_for_item:
1482-
control_variant.mapped_variants = [
1483-
mv for mv in control_variant.mapped_variants if mv.current and mv.variant.score_set_id == item.id
1484-
]
1482+
clinical_controls_query = clinical_controls_query.filter(ClinicalControl.db_version == db_version)
14851483

1486-
if control_variant.mapped_variants:
1487-
clinical_controls_with_mapped_variant.append(control_variant)
1484+
clinical_controls: Sequence[ClinicalControl] = _db.scalars(clinical_controls_query).unique().all()
14881485

1489-
if not clinical_controls_with_mapped_variant:
1486+
if not clinical_controls:
14901487
logger.info(
14911488
msg="No clinical control variants matching the provided filters are associated with the requested score set.",
14921489
extra=logging_context(),
@@ -1496,9 +1493,9 @@ async def get_clinical_controls_for_score_set(
14961493
detail=f"No clinical control variants matching the provided filters associated with score set URN {urn} were found",
14971494
)
14981495

1499-
save_to_logging_context({"resource_count": len(clinical_controls_for_item)})
1496+
save_to_logging_context({"resource_count": len(clinical_controls)})
15001497

1501-
return clinical_controls_for_item
1498+
return clinical_controls
15021499

15031500

15041501
@router.get(

src/mavedb/view_models/clinical_control.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class Config:
3939

4040

4141
class SavedClinicalControlWithMappedVariants(SavedClinicalControl):
42-
mapped_variants: Sequence["SavedMappedVariant"]
42+
mapped_variants: Sequence["MappedVariantForClinicalControl"]
4343

4444

4545
# Properties to return to non-admin clients
@@ -48,7 +48,7 @@ class ClinicalControl(SavedClinicalControl):
4848

4949

5050
class ClinicalControlWithMappedVariants(SavedClinicalControlWithMappedVariants):
51-
mapped_variants: Sequence["MappedVariant"]
51+
pass
5252

5353

5454
class ClinicalControlOptions(BaseModel):
@@ -57,7 +57,7 @@ class ClinicalControlOptions(BaseModel):
5757

5858

5959
# ruff: noqa: E402
60-
from mavedb.view_models.mapped_variant import MappedVariant, SavedMappedVariant, MappedVariantCreate
60+
from mavedb.view_models.mapped_variant import MappedVariantCreate, MappedVariantForClinicalControl
6161

6262
# ClinicalControlUpdate.model_rebuild()
6363
SavedClinicalControlWithMappedVariants.model_rebuild()

src/mavedb/view_models/mapped_variant.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,22 @@ class MappedVariantWithControls(SavedMappedVariantWithControls):
8080
gnomad_variants: Sequence["GnomADVariant"]
8181

8282

83+
class MappedVariantForClinicalControl(BaseModel):
84+
variant_urn: str
85+
86+
class Config:
87+
from_attributes = True
88+
89+
@model_validator(mode="before")
90+
def generate_score_set_urn_list(cls, data: Any):
91+
if not hasattr(data, "variant_urn") and hasattr(data, "variant"):
92+
try:
93+
data.__setattr__("variant_urn", None if not data.variant else data.variant.urn)
94+
except AttributeError as exc:
95+
raise ValidationError(f"Unable to create {cls.__name__} without attribute: {exc}.") # type: ignore
96+
return data
97+
98+
8399
# ruff: noqa: E402
84100
from mavedb.view_models.clinical_control import ClinicalControlBase, ClinicalControl, SavedClinicalControl
85101
from mavedb.view_models.gnomad_variant import GnomADVariantBase, GnomADVariant, SavedGnomADVariant

0 commit comments

Comments
 (0)