Skip to content

Commit 8c734b5

Browse files
authored
Merge pull request #15 from VariantEffect/improve-vrs-2-output
Improve VRS 2 output
2 parents 088d8e9 + 8b2b75e commit 8c734b5

File tree

5 files changed

+54
-43
lines changed

5 files changed

+54
-43
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ dependencies = [
3636
"biopython",
3737
"tqdm",
3838
"click",
39-
"cool-seq-tool>=0.4.0.dev1",
40-
"ga4gh.vrs~=2.0.0-a6",
39+
"cool-seq-tool==0.4.0.dev3",
40+
"ga4gh.vrs==2.0.0-a6",
4141
"gene_normalizer[etl,pg]==0.3.0-dev2",
4242
"pydantic>=2",
4343
"python-dotenv",

src/dcd_mapping/annotate.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
ScoresetMetadata,
4343
TargetSequenceType,
4444
TxSelectResult,
45+
VrsVersion,
4546
)
4647

4748
_logger = logging.getLogger(__name__)
@@ -243,8 +244,9 @@ def _annotate_allele_mapping(
243244
mapped_score: MappedScore,
244245
tx_results: TxSelectResult | None,
245246
metadata: ScoresetMetadata,
247+
vrs_version: VrsVersion = VrsVersion.V_2,
246248
) -> ScoreAnnotationWithLayer:
247-
"""Perform annotations and create VRS 1.3 equivalents for allele mappings."""
249+
"""Perform annotations and, if necessary, create VRS 1.3 equivalents for allele mappings."""
248250
pre_mapped: Allele = mapped_score.pre_mapped
249251
post_mapped: Allele = mapped_score.post_mapped
250252

@@ -274,24 +276,27 @@ def _annotate_allele_mapping(
274276
hgvs_string, syntax = _get_hgvs_string(post_mapped, accession)
275277
post_mapped.expressions = [Expression(syntax=syntax, value=hgvs_string)]
276278

277-
pre_mapped_vod = _allele_to_vod(pre_mapped)
278-
post_mapped_vod = _allele_to_vod(post_mapped)
279+
if vrs_version == VrsVersion.V_1_3:
280+
pre_mapped = _allele_to_vod(pre_mapped)
281+
post_mapped = _allele_to_vod(post_mapped)
279282

280283
return ScoreAnnotationWithLayer(
281-
pre_mapped=pre_mapped_vod,
282-
post_mapped=post_mapped_vod,
283-
pre_mapped_2_0=pre_mapped,
284-
post_mapped_2_0=post_mapped,
284+
pre_mapped=pre_mapped,
285+
post_mapped=post_mapped,
286+
vrs_version=vrs_version,
285287
mavedb_id=mapped_score.accession_id,
286288
score=float(mapped_score.score) if mapped_score.score else None,
287289
annotation_layer=mapped_score.annotation_layer,
288290
)
289291

290292

291293
def _annotate_haplotype_mapping(
292-
mapping: MappedScore, tx_results: TxSelectResult | None, metadata: ScoresetMetadata
294+
mapping: MappedScore,
295+
tx_results: TxSelectResult | None,
296+
metadata: ScoresetMetadata,
297+
vrs_version: VrsVersion = VrsVersion.V_2,
293298
) -> ScoreAnnotationWithLayer:
294-
"""Perform annotations and create VRS 1.3 equivalents for haplotype mappings."""
299+
"""Perform annotations and, if necessary, create VRS 1.3 equivalents for haplotype mappings."""
295300
pre_mapped: Haplotype = mapping.pre_mapped # type: ignore
296301
post_mapped: Haplotype = mapping.post_mapped # type: ignore
297302
# get vrs_ref_allele_seq for pre-mapped variants
@@ -324,14 +329,14 @@ def _annotate_haplotype_mapping(
324329
hgvs, syntax = _get_hgvs_string(allele, accession)
325330
allele.expressions = [Expression(syntax=syntax, value=hgvs)]
326331

327-
pre_mapped_converted = _haplotype_to_haplotype_1_3(pre_mapped)
328-
post_mapped_converted = _haplotype_to_haplotype_1_3(post_mapped)
332+
if vrs_version == VrsVersion.V_1_3:
333+
pre_mapped = _haplotype_to_haplotype_1_3(pre_mapped)
334+
post_mapped = _haplotype_to_haplotype_1_3(post_mapped)
329335

330336
return ScoreAnnotationWithLayer(
331-
pre_mapped=pre_mapped_converted,
332-
post_mapped=post_mapped_converted,
333-
pre_mapped_2_0=pre_mapped,
334-
post_mapped_2_0=post_mapped,
337+
pre_mapped=pre_mapped,
338+
post_mapped=post_mapped,
339+
vrs_version=vrs_version,
335340
mavedb_id=mapping.accession_id,
336341
score=float(mapping.score) if mapping.score is not None else None,
337342
annotation_layer=mapping.annotation_layer,
@@ -342,6 +347,7 @@ def annotate(
342347
mapped_scores: list[MappedScore],
343348
tx_results: TxSelectResult | None,
344349
metadata: ScoresetMetadata,
350+
vrs_version: VrsVersion = VrsVersion.V_2,
345351
) -> list[ScoreAnnotationWithLayer]:
346352
"""Given a list of mappings, add additional contextual data:
347353
@@ -365,13 +371,17 @@ def annotate(
365371
mapped_score.post_mapped, Haplotype
366372
):
367373
score_annotations.append(
368-
_annotate_haplotype_mapping(mapped_score, tx_results, metadata)
374+
_annotate_haplotype_mapping(
375+
mapped_score, tx_results, metadata, vrs_version
376+
)
369377
)
370378
elif isinstance(mapped_score.pre_mapped, Allele) and isinstance(
371379
mapped_score.post_mapped, Allele
372380
):
373381
score_annotations.append(
374-
_annotate_allele_mapping(mapped_score, tx_results, metadata)
382+
_annotate_allele_mapping(
383+
mapped_score, tx_results, metadata, vrs_version
384+
)
375385
)
376386
else:
377387
ValueError("inconsistent variant structure")
@@ -464,7 +474,6 @@ def save_mapped_output_json(
464474
mappings: list[ScoreAnnotationWithLayer],
465475
align_result: AlignmentResult,
466476
tx_output: TxSelectResult | None,
467-
include_vrs_2: bool = False,
468477
preferred_layer_only: bool = False,
469478
output_path: Path | None = None,
470479
) -> Path:
@@ -474,7 +483,6 @@ def save_mapped_output_json(
474483
:param mave_vrs_mappings: A dictionary of VrsObject1_x objects
475484
:param align_result: Alignment information for a score set
476485
:param tx_output: Transcript output for a score set
477-
:param include_vrs_2: if true, also include VRS 2.0 mappings
478486
:param output_path: specific location to save output to. Default to
479487
<dcd_mapping_data_dir>/urn:mavedb:00000XXX-X-X_mapping_<ISO8601 datetime>.json
480488
:return: output location
@@ -523,19 +531,14 @@ def save_mapped_output_json(
523531
mapped_scores=mapped_scores,
524532
)
525533

526-
if not include_vrs_2:
527-
for m in output.mapped_scores:
528-
m.pre_mapped_2_0 = None
529-
m.post_mapped_2_0 = None
530-
531534
if not output_path:
532535
now = datetime.datetime.now(tz=datetime.UTC).isoformat()
533536
output_path = LOCAL_STORE_PATH / f"{urn}_mapping_{now}.json"
534537

535538
_logger.info("Saving mapping output to %s", output_path)
536539
with output_path.open("w") as file:
537540
json.dump(
538-
json.loads(output.model_dump_json(exclude_unset=True, exclude_none=True)),
541+
output.model_dump(exclude_unset=True, exclude_none=True),
539542
file,
540543
indent=4,
541544
)

src/dcd_mapping/cli.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from dcd_mapping.align import AlignmentError
1010
from dcd_mapping.main import map_scoreset_urn
1111
from dcd_mapping.resource_utils import ResourceAcquisitionError
12+
from dcd_mapping.schemas import VrsVersion
1213
from dcd_mapping.transcripts import TxSelectError
1314
from dcd_mapping.vrs_map import VrsMapError
1415

@@ -33,11 +34,12 @@
3334
help="Desired location at which output file should be saved",
3435
)
3536
@click.option(
36-
"--include_vrs_2",
37+
"--vrs_version",
3738
"-v",
38-
is_flag=True,
39-
default=False,
40-
help="Include VRS 2.0 mappings",
39+
type=click.Choice(["1.3", "2"]),
40+
default="2",
41+
show_default=True,
42+
help="Version to use for output VRS objects",
4143
)
4244
@click.option(
4345
"--prefer_genomic",
@@ -49,7 +51,7 @@ def cli(
4951
urn: str,
5052
debug: bool,
5153
output: Path | None,
52-
include_vrs_2: bool,
54+
vrs_version: VrsVersion,
5355
prefer_genomic: bool,
5456
) -> None:
5557
"""Get VRS mapping on preferred transcript for URN.
@@ -72,7 +74,7 @@ def cli(
7274
_logger.debug("debug logging enabled")
7375
try:
7476
asyncio.run(
75-
map_scoreset_urn(urn, output, include_vrs_2, prefer_genomic, silent=False)
77+
map_scoreset_urn(urn, output, vrs_version, prefer_genomic, silent=False)
7678
)
7779
except (
7880
LookupError,

src/dcd_mapping/main.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from dcd_mapping.schemas import (
1616
ScoreRow,
1717
ScoresetMetadata,
18+
VrsVersion,
1819
)
1920
from dcd_mapping.transcripts import TxSelectError, select_transcript
2021
from dcd_mapping.vrs_map import VrsMapError, vrs_map
@@ -123,7 +124,7 @@ async def map_scoreset(
123124
metadata: ScoresetMetadata,
124125
records: list[ScoreRow],
125126
output_path: Path | None = None,
126-
include_vrs_2: bool = False,
127+
vrs_version: VrsVersion = VrsVersion.V_2,
127128
prefer_genomic: bool = False,
128129
silent: bool = True,
129130
) -> None:
@@ -177,13 +178,12 @@ async def map_scoreset(
177178
_emit_info("VRS mapping complete.", silent)
178179

179180
_emit_info("Annotating metadata and saving to file...", silent)
180-
vrs_results = annotate(vrs_results, transcript, metadata)
181+
vrs_results = annotate(vrs_results, transcript, metadata, vrs_version)
181182
final_output = save_mapped_output_json(
182183
metadata.urn,
183184
vrs_results,
184185
alignment_result,
185186
transcript,
186-
include_vrs_2,
187187
prefer_genomic,
188188
output_path,
189189
)
@@ -193,15 +193,15 @@ async def map_scoreset(
193193
async def map_scoreset_urn(
194194
urn: str,
195195
output_path: Path | None = None,
196-
include_vrs_2: bool = False,
196+
vrs_version: VrsVersion = VrsVersion.V_2,
197197
prefer_genomic: bool = False,
198198
silent: bool = True,
199199
) -> None:
200200
"""Perform end-to-end mapping for a scoreset.
201201
202202
:param urn: identifier for a scoreset.
203203
:param output_path: optional path to save output at
204-
:param include_vrs_2: if true, include VRS 2.0 mappings in output JSON
204+
:param vrs_version: version of VRS objects to output (1.3 or 2)
205205
:param silent: if True, suppress console information output
206206
"""
207207
try:
@@ -213,5 +213,5 @@ async def map_scoreset_urn(
213213
click.echo(f"Error: {msg}")
214214
raise e
215215
await map_scoreset(
216-
metadata, records, output_path, include_vrs_2, prefer_genomic, silent
216+
metadata, records, output_path, vrs_version, prefer_genomic, silent
217217
)

src/dcd_mapping/schemas.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ class TargetType(str, Enum):
2424
OTHER_NC = "Other noncoding"
2525

2626

27+
class VrsVersion(str, Enum):
28+
"""Define VRS versions"""
29+
30+
V_1_3 = "1.3"
31+
V_2 = "2"
32+
33+
2734
class UniProtRef(BaseModel):
2835
"""Store metadata associated with MaveDB UniProt reference"""
2936

@@ -157,10 +164,9 @@ class ScoreAnnotation(BaseModel):
157164
This model defines what an individual mapping instance looks like in the final JSON.
158165
"""
159166

160-
pre_mapped: vrs_v1_schemas.VariationDescriptor | vrs_v1_schemas.Haplotype
161-
post_mapped: vrs_v1_schemas.VariationDescriptor | vrs_v1_schemas.Haplotype
162-
pre_mapped_2_0: Allele | Haplotype | None = None
163-
post_mapped_2_0: Allele | Haplotype | None = None
167+
pre_mapped: vrs_v1_schemas.VariationDescriptor | vrs_v1_schemas.Haplotype | Allele | Haplotype
168+
post_mapped: vrs_v1_schemas.VariationDescriptor | vrs_v1_schemas.Haplotype | Allele | Haplotype
169+
vrs_version: VrsVersion
164170
mavedb_id: StrictStr
165171
relation: Literal["SO:is_homologous_to"] = "SO:is_homologous_to"
166172
score: float | None

0 commit comments

Comments
 (0)