Skip to content

Commit abbc846

Browse files
committed
feat: add ClinGen namespace to CSV export functionality
Additionally, adds a few tests for existing namespaces to increase coverage of namespaced CSV export.
1 parent e75c25f commit abbc846

File tree

3 files changed

+95
-6
lines changed

3 files changed

+95
-6
lines changed

src/mavedb/lib/score_sets.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ def find_publish_or_private_superseded_score_set_tail(
502502
def get_score_set_variants_as_csv(
503503
db: Session,
504504
score_set: ScoreSet,
505-
namespaces: List[Literal["scores", "counts", "vep", "gnomad"]],
505+
namespaces: List[Literal["scores", "counts", "vep", "gnomad", "clingen"]],
506506
namespaced: Optional[bool] = None,
507507
start: Optional[int] = None,
508508
limit: Optional[int] = None,
@@ -519,8 +519,8 @@ def get_score_set_variants_as_csv(
519519
The database session to use.
520520
score_set : ScoreSet
521521
The score set to get the variants from.
522-
namespaces : List[Literal["scores", "counts", "vep", "gnomad"]]
523-
The namespaces for data. Now there are only scores, counts, VEP, and gnomAD. ClinVar will be added in the future.
522+
namespaces : List[Literal["scores", "counts", "vep", "gnomad", "clingen"]]
523+
The namespaces for data. Now there are only scores, counts, VEP, gnomAD, and ClinGen. ClinVar will be added in the future.
524524
namespaced: Optional[bool] = None
525525
Whether namespace the columns or not.
526526
start : int, optional
@@ -569,6 +569,8 @@ def get_score_set_variants_as_csv(
569569
namespaced_score_set_columns["vep"].append("vep_functional_consequence")
570570
if "gnomad" in namespaced_score_set_columns:
571571
namespaced_score_set_columns["gnomad"].append("gnomad_af")
572+
if "clingen" in namespaced_score_set_columns:
573+
namespaced_score_set_columns["clingen"].append("clingen_allele_id")
572574
variants: Sequence[Variant] = []
573575
mappings: Optional[list[Optional[MappedVariant]]] = None
574576
gnomad_data: Optional[list[Optional[GnomADVariant]]] = None
@@ -841,6 +843,15 @@ def variant_to_csv_row(
841843
value = na_rep
842844
key = f"gnomad.{column_key}" if namespaced else column_key
843845
row[key] = value
846+
for column_key in columns.get("clingen", []):
847+
if column_key == "clingen_allele_id":
848+
clingen_allele_id = mapping.clingen_allele_id if mapping else None
849+
if clingen_allele_id is not None:
850+
value = str(clingen_allele_id)
851+
else:
852+
value = na_rep
853+
key = f"clingen.{column_key}" if namespaced else column_key
854+
row[key] = value
844855
return row
845856

846857

src/mavedb/routers/score_sets.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -706,8 +706,8 @@ def get_score_set_variants_csv(
706706
urn: str,
707707
start: int = Query(default=None, description="Start index for pagination"),
708708
limit: int = Query(default=None, description="Maximum number of variants to return"),
709-
namespaces: List[Literal["scores", "counts", "vep", "gnomad"]] = Query(
710-
default=["scores"], description="One or more data types to include: scores, counts, clinVar, gnomAD, VEP"
709+
namespaces: List[Literal["scores", "counts", "vep", "gnomad", "clingen"]] = Query(
710+
default=["scores"], description="One or more data types to include: scores, counts, ClinGen, gnomAD, VEP"
711711
),
712712
drop_na_columns: Optional[bool] = None,
713713
include_custom_columns: Optional[bool] = None,
@@ -732,7 +732,7 @@ def get_score_set_variants_csv(
732732
The index to start from. If None, starts from the beginning.
733733
limit : Optional[int]
734734
The maximum number of variants to return. If None, returns all variants.
735-
namespaces: List[Literal["scores", "counts", "vep", "gnomad"]]
735+
namespaces: List[Literal["scores", "counts", "vep", "gnomad", "clingen"]]
736736
The namespaces of all columns except for accession, hgvs_nt, hgvs_pro, and hgvs_splice.
737737
We may add ClinVar in the future.
738738
drop_na_columns : bool, optional

tests/routers/test_score_set.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
TEST_SAVED_GENERIC_CLINICAL_CONTROL,
5454
TEST_SAVED_GNOMAD_VARIANT,
5555
TEST_USER,
56+
VALID_CLINGEN_CA_ID,
5657
)
5758
from tests.helpers.dependency_overrider import DependencyOverrider
5859
from tests.helpers.util.common import (
@@ -2853,6 +2854,83 @@ def test_download_scores_counts_and_post_mapped_variants_file(
28532854
)
28542855

28552856

2857+
# Additional namespace export tests: VEP, ClinGen, gnomAD
2858+
def test_download_vep_file_in_variant_data_path(session, data_provider, client, setup_router_db, data_files):
2859+
experiment = create_experiment(client)
2860+
score_set = create_seq_score_set(client, experiment["urn"])
2861+
score_set = mock_worker_variant_insertion(
2862+
client, session, data_provider, score_set, data_files / "scores.csv", data_files / "counts.csv"
2863+
)
2864+
# Create mapped variants with VEP consequence populated
2865+
create_mapped_variants_for_score_set(session, score_set["urn"], TEST_MAPPED_VARIANT_WITH_HGVS_G_EXPRESSION)
2866+
2867+
with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue:
2868+
published_score_set = publish_score_set(client, score_set["urn"])
2869+
worker_queue.assert_called_once()
2870+
2871+
response = client.get(
2872+
f"/api/v1/score-sets/{published_score_set['urn']}/variants/data?namespaces=vep&include_post_mapped_hgvs=true&drop_na_columns=true"
2873+
)
2874+
assert response.status_code == 200
2875+
reader = csv.DictReader(StringIO(response.text))
2876+
assert "vep.vep_functional_consequence" in reader.fieldnames
2877+
# At least one row should contain the test consequence value
2878+
rows = list(reader)
2879+
assert any(row.get("vep.vep_functional_consequence") == "missense_variant" for row in rows)
2880+
2881+
2882+
def test_download_clingen_file_in_variant_data_path(session, data_provider, client, setup_router_db, data_files):
2883+
experiment = create_experiment(client)
2884+
score_set = create_seq_score_set(client, experiment["urn"])
2885+
score_set = mock_worker_variant_insertion(
2886+
client, session, data_provider, score_set, data_files / "scores.csv", data_files / "counts.csv"
2887+
)
2888+
# Create mapped variants then set ClinGen allele id for first mapped variant
2889+
create_mapped_variants_for_score_set(session, score_set["urn"], TEST_MAPPED_VARIANT_WITH_HGVS_G_EXPRESSION)
2890+
db_score_set = session.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set["urn"]).one()
2891+
first_mapped_variant = db_score_set.variants[0].mapped_variants[0]
2892+
first_mapped_variant.clingen_allele_id = VALID_CLINGEN_CA_ID
2893+
session.add(first_mapped_variant)
2894+
session.commit()
2895+
2896+
with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue:
2897+
published_score_set = publish_score_set(client, score_set["urn"])
2898+
worker_queue.assert_called_once()
2899+
2900+
response = client.get(
2901+
f"/api/v1/score-sets/{published_score_set['urn']}/variants/data?namespaces=clingen&include_post_mapped_hgvs=true&drop_na_columns=true"
2902+
)
2903+
assert response.status_code == 200
2904+
reader = csv.DictReader(StringIO(response.text))
2905+
assert "clingen.clingen_allele_id" in reader.fieldnames
2906+
rows = list(reader)
2907+
assert rows[0].get("clingen.clingen_allele_id") == VALID_CLINGEN_CA_ID
2908+
2909+
2910+
def test_download_gnomad_file_in_variant_data_path(session, data_provider, client, setup_router_db, data_files):
2911+
experiment = create_experiment(client)
2912+
score_set = create_seq_score_set(client, experiment["urn"])
2913+
score_set = mock_worker_variant_insertion(
2914+
client, session, data_provider, score_set, data_files / "scores.csv", data_files / "counts.csv"
2915+
)
2916+
# Link a gnomAD variant to the first mapped variant (version may not match export filter)
2917+
score_set = create_seq_score_set_with_mapped_variants(
2918+
client, session, data_provider, experiment["urn"], data_files / "scores.csv"
2919+
)
2920+
link_gnomad_variants_to_mapped_variants(session, score_set)
2921+
2922+
with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue:
2923+
published_score_set = publish_score_set(client, score_set["urn"])
2924+
worker_queue.assert_called_once()
2925+
2926+
response = client.get(
2927+
f"/api/v1/score-sets/{published_score_set['urn']}/variants/data?namespaces=gnomad&drop_na_columns=true"
2928+
)
2929+
assert response.status_code == 200
2930+
reader = csv.DictReader(StringIO(response.text))
2931+
assert "gnomad.gnomad_af" in reader.fieldnames
2932+
2933+
28562934
########################################################################################################################
28572935
# Fetching clinical controls and control options for a score set
28582936
########################################################################################################################

0 commit comments

Comments
 (0)