33import logging
44from typing import Any
55
6+ from ga4gh .cat_vrs .models import Constraint
67from ga4gh .va_spec .base .core import Statement
78from ga4gh .vrs .models import Allele , Expression , SequenceLocation
89
@@ -82,27 +83,39 @@ def convert_gks_to_hl7_v2(statement: Statement) -> dict[str, Any]:
8283 # 505 - Discrete Genetic Variant (placeholder until models solidify)
8384 # TODO: need to wait for models for this or find out what expected format is
8485
85- members = subject_variant .members or []
86- genomic_allele , genomic_location = _find_genomic_allele_and_location (members )
86+ constraints = subject_variant .constraints or []
87+ allele , location = None , None
88+ if constraints :
89+ allele , location = _find_genomic_allele_and_location (constraints )
90+ else :
91+ err = "subjectVariant.constraints is missing or empty"
92+ raise ValueError (err )
8793
8894 # Get hgvs.g expression from the allele (e.g., 'NC_000007.13:g.140453136A>T')
8995 # use seqrepo here instead
90- expression = _find_expression (genomic_allele , syntax = "hgvs.g" )
91- hgvs_g = expression .value if expression else None
92- chromosome , g_dot = _parse_hgvs_g (hgvs_g )
96+ genomic_expression = _find_expression (allele , syntax = "hgvs.g" )
97+ hgvs_g = genomic_expression .value if genomic_expression else None
98+ chromosome_ref_seq , g_dot = _parse_hgvs_dot (hgvs_g )
9399
94100 # 511 - Allele start/end
95- allele_start , allele_end = _get_location_interval (genomic_location )
101+ allele_start , allele_end = _get_location_interval (location )
96102
97103 # 513 - DNA Region
98104
99105 # 514 - Gene Studied
106+ gene_studied = proposition .geneContextQualifier .name
100107
101108 # 516 - Transcript Reference Sequence ID
102109
103110 # 518 - DNA Change
111+ coding_expression = _find_expression (allele , syntax = "hgvs.c" )
112+ hgvs_c = coding_expression .value if coding_expression else None
113+ c_dot = _parse_hgvs_dot (hgvs_c )[1 ]
104114
105115 # 520 - Amino Acid Change
116+ protein_expression = _find_expression (allele , syntax = "hgvs.p" )
117+ hgvs_p = protein_expression .value if protein_expression else None
118+ p_dot = _parse_hgvs_dot (hgvs_p )[1 ]
106119
107120 # 521 - Molecular Consequence - on hold until approved
108121
@@ -148,11 +161,17 @@ def convert_gks_to_hl7_v2(statement: Statement) -> dict[str, Any]:
148161
149162 # 575 - Interpretation Note
150163
151- result : dict [str , Any ] = {}
152- result [HL7V2 ["VARIANT_NAME" ]] = variant_name
153- result [HL7V2 ["CHROMOSOME" ]] = chromosome
154- result [HL7V2 ["ALLELE_START" ]] = allele_start
155- result [HL7V2 ["ALLELE_END" ]] = allele_end
164+ result : dict [str , Any ] = HL7V2 .copy () # start with all keys
165+ result ["VARIANT_NAME" ] = variant_name
166+ # TODO: this needs to be converted to shorthand
167+ result ["CHROMOSOME" ] = chromosome_ref_seq
168+ result ["ALLELE_START" ] = allele_start
169+ result ["ALLELE_END" ] = allele_end
170+ result ["GENE_STUDIED" ] = gene_studied
171+ result ["DNA_CHANGE" ] = c_dot
172+ result ["AMINO_ACID_CHANGE" ] = p_dot
173+ result ["GENOMIC_DNA_CHANGE" ] = g_dot
174+ result ["GENOMIC_REFERENCE_SEQUENCE_ID" ] = chromosome_ref_seq
156175
157176 return result
158177
@@ -161,24 +180,19 @@ def convert_gks_to_hl7_v2(statement: Statement) -> dict[str, Any]:
161180
162181
163182def _find_genomic_allele_and_location (
164- members : list [Allele ],
183+ constraints : list [Constraint ],
165184) -> tuple [Allele , SequenceLocation ] | None :
166185 """
167- From a list of members, return the first (allele, location)
168- whose location.sequenceReference.moleculeType == 'genomic'.
169- # TODO: not sure if this is a reliable field to check for getting the genomic alleles -
170- # consider checking expressions instead or as a backup. -> yes
186+ From a list of constraints, return the first (allele, location)
171187 """
172- for allele in members :
188+ for constraint in constraints :
189+ if constraint .root .type != "DefiningAlleleConstraint" :
190+ continue
191+ allele = constraint .root .allele
173192 location = allele .location
174193 if location is None :
175194 continue
176- seq_ref = location .sequenceReference
177- molecule_type = seq_ref .moleculeType if seq_ref else None
178- # TODO: it would be nice to make this helper take this as a parameter for more potential usability later
179- # TODO: use seq refget and lookup in seqrepo to get hgvs.g
180- if molecule_type == "genomic" :
181- return allele , location
195+ return allele , location
182196 return None
183197
184198
@@ -190,7 +204,7 @@ def _find_expression(allele: Allele, syntax: str) -> Expression | None:
190204 expressions = allele .expressions or []
191205
192206 for expr in expressions :
193- s = expr .get ( " syntax" )
207+ s = expr .syntax
194208 if s == syntax :
195209 return expr
196210 # TODO: raise error?
@@ -209,16 +223,16 @@ def _get_location_interval(location: SequenceLocation) -> tuple[int, int]:
209223# --- Helpers: transformation / parsing ---------------------------------------
210224
211225
212- def _parse_hgvs_g ( hgvs_g_value : str ) -> tuple [str , str ]:
226+ def _parse_hgvs_dot ( hgvs_value : str ) -> tuple [str , str ]:
213227 """
214- Parse an hgvs.g expression.
228+ Parse an hgvs.(g,c,p) expression.
215229
216230 Expected styles:
217231 - 'NC_000007.13:g.140453136A>T'
218232
219233 Returns:
220- (chromosome, g_dot ) where chromosome is the left of ':', and g_dot includes 'g.' onwards.
234+ (chromosome, dot ) where chromosome is the left of ':', and dot includes g.,c.,or p. onwards.
221235 """
222- chromosome , g_dot = hgvs_g_value .split (":" , 1 )
236+ chromosome , dot = hgvs_value .split (":" , 1 )
223237
224- return chromosome , g_dot
238+ return chromosome , dot
0 commit comments