11import click
22import logging
3- from typing import Sequence
3+ import re
4+ from typing import Optional , Sequence
45
5- from sqlalchemy import select
6+ from sqlalchemy import and_ , select
67from sqlalchemy .orm import Session
78
89from mavedb .models .score_set import ScoreSet
1617
1718logger = logging .getLogger (__name__ )
1819
20+ intronic_variant_with_reference_regex = re .compile (r":c\..*[+-]" )
21+ variant_with_reference_regex = re .compile (r":" )
1922
20- def submit_urns_to_clingen (db : Session , urns : Sequence [str ], debug : bool ) -> list [str ]:
23+ def submit_urns_to_clingen (db : Session , urns : Sequence [str ], unlinked_only : bool , prefer_unmapped_hgvs : bool , debug : bool ) -> list [str ]:
2124 ldh_service = ClinGenLdhService (url = LDH_SUBMISSION_ENDPOINT )
2225 ldh_service .authenticate ()
2326
@@ -37,13 +40,12 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> lis
3740 continue
3841
3942 logger .info (f"Submitting mapped variants to LDH service for score set with URN: { urn } " )
43+ mapped_variant_join_clause = and_ (MappedVariant .variant_id == Variant .id , MappedVariant .post_mapped .is_not (None ), MappedVariant .current .is_ (True ))
4044 variant_objects = db .execute (
4145 select (Variant , MappedVariant )
42- .join (MappedVariant )
46+ .join (MappedVariant , mapped_variant_join_clause , isouter = True )
4347 .join (ScoreSet )
4448 .where (ScoreSet .urn == urn )
45- .where (MappedVariant .post_mapped .is_not (None ))
46- .where (MappedVariant .current .is_ (True ))
4749 ).all ()
4850
4951 if not variant_objects :
@@ -52,12 +54,48 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> lis
5254
5355 logger .debug (f"Preparing { len (variant_objects )} mapped variants for submission" )
5456
55- variant_content : list [tuple [str , Variant , MappedVariant ]] = []
57+ variant_content : list [tuple [str , Variant , Optional [ MappedVariant ] ]] = []
5658 for variant , mapped_variant in variant_objects :
57- variation = hgvs_from_mapped_variant (mapped_variant )
59+ if mapped_variant is None :
60+ if variant .hgvs_nt is not None and intronic_variant_with_reference_regex .search (variant .hgvs_nt ):
61+ # Use the hgvs_nt string for unmapped intronic variants. This is because our mapper does not yet
62+ # support mapping intronic variants.
63+ variation = [variant .hgvs_nt ]
64+ if variation :
65+ logger .info (f"Using hgvs_nt for unmapped intronic variant { variant .urn } : { variation } " )
66+ elif variant .hgvs_nt is not None and variant_with_reference_regex .search (variant .hgvs_nt ):
67+ # Use the hgvs_nt string for other unmapped NT variants in accession-based score sets.
68+ variation = [variant .hgvs_nt ]
69+ if variation :
70+ logger .info (f"Using hgvs_nt for unmapped non-intronic variant { variant .urn } : { variation } " )
71+ elif variant .hgvs_pro is not None and variant_with_reference_regex .search (variant .hgvs_pro ):
72+ # Use the hgvs_pro string for unmapped PRO variants in accession-based score sets.
73+ variation = [variant .hgvs_pro ]
74+ if variation :
75+ logger .info (f"Using hgvs_pro for unmapped non-intronic variant { variant .urn } : { variation } " )
76+ else :
77+ logger .warning (f"No variation found for unmapped variant { variant .urn } (nt: { variant .hgvs_nt } , aa: { variant .hgvs_pro } , splice: { variant .hgvs_splice } )." )
78+ continue
79+ else :
80+ if unlinked_only and mapped_variant .clingen_allele_id :
81+ continue
82+ # If the script was run with the --prefer-unmapped-hgvs flag, use the hgvs_nt string rather than the
83+ # mapped variant, as long as the variant is accession-based.
84+ if prefer_unmapped_hgvs and variant .hgvs_nt is not None and variant_with_reference_regex .search (variant .hgvs_nt ):
85+ variation = [variant .hgvs_nt ]
86+ if variation :
87+ logger .info (f"Using hgvs_nt for mapped variant { variant .urn } : { variation } " )
88+ elif prefer_unmapped_hgvs and variant .hgvs_pro is not None and variant_with_reference_regex .search (variant .hgvs_pro ):
89+ variation = [variant .hgvs_pro ]
90+ if variation :
91+ logger .info (f"Using hgvs_pro for mapped variant { variant .urn } : { variation } " ) # continue # TEMPORARY. Only submit unmapped variants.
92+ else :
93+ variation = hgvs_from_mapped_variant (mapped_variant )
94+ if variation :
95+ logger .info (f"Using mapped variant for { variant .urn } : { variation } " )
5896
5997 if not variation :
60- logger .warning (f"No variation found for variant { variant .urn } ." )
98+ logger .warning (f"No variation found for mapped variant { variant .urn } (nt: { variant . hgvs_nt } , aa: { variant . hgvs_pro } , splice: { variant . hgvs_splice } ) ." )
6199 continue
62100
63101 for allele in variation :
@@ -90,11 +128,13 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> lis
90128@click .command ()
91129@with_database_session
92130@click .argument ("urns" , nargs = - 1 )
93- @click .option ("--all" , help = "Submit mapped variants for every score set in MaveDB." , is_flag = True )
131+ @click .option ("--all" , help = "Submit variants for every score set in MaveDB." , is_flag = True )
132+ @click .option ("--unlinked" , default = False , help = "Only submit variants that have not already been linked to ClinGen alleles." , is_flag = True )
133+ @click .option ("--prefer-unmapped-hgvs" , default = False , help = "If the unmapped HGVS string is accession-based, use it in the submission instead of the mapped variant." , is_flag = True )
94134@click .option ("--suppress-output" , help = "Suppress final print output to the console." , is_flag = True )
95135@click .option ("--debug" , help = "Enable debug mode. This will send only one request at most to ClinGen" , is_flag = True )
96136def submit_clingen_urns_command (
97- db : Session , urns : Sequence [str ], all : bool , suppress_output : bool , debug : bool
137+ db : Session , urns : Sequence [str ], all : bool , unlinked : bool , prefer_unmapped_hgvs : bool , suppress_output : bool , debug : bool
98138) -> None :
99139 """
100140 Submit data to ClinGen for mapped variant allele ID generation for the given URNs.
@@ -111,7 +151,7 @@ def submit_clingen_urns_command(
111151 logger .error ("No URNs provided. Please provide at least one URN." )
112152 return
113153
114- submitted_variant_urns = submit_urns_to_clingen (db , urns , debug )
154+ submitted_variant_urns = submit_urns_to_clingen (db , urns , unlinked , prefer_unmapped_hgvs , debug )
115155
116156 if not suppress_output :
117157 print (", " .join (submitted_variant_urns ))
0 commit comments