Skip to content

Commit 50e1da7

Browse files
committed
refactor variant export
1 parent 25b0fcc commit 50e1da7

File tree

2 files changed

+130
-100
lines changed

2 files changed

+130
-100
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ga4ghphetools"
3-
version = "0.5.14"
3+
version = "0.5.15"
44
edition = "2021"
55
keywords = ["GA4GH", "Phenopacket Schema", "Human Phenotype Ontology"]
66
description = "Generate GA4GH phenopackets from tabular data"

src/ppkt/ppkt_variant_exporter.rs

Lines changed: 129 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::collections::HashMap;
22

33
use phenopackets::ga4gh::vrsatile::v1::Expression;
4+
use phenopackets::ga4gh::vrsatile::v1::Extension;
45
use phenopackets::ga4gh::vrsatile::v1::GeneDescriptor;
56
use phenopackets::ga4gh::vrsatile::v1::MoleculeContext;
67
use phenopackets::ga4gh::vrsatile::v1::VariationDescriptor;
@@ -22,6 +23,59 @@ use crate::dto::hgvs_variant::HgvsVariant;
2223
use crate::dto::intergenic_variant::IntergenicHgvsVariant;
2324
use crate::dto::structural_variant::StructuralVariant;
2425

26+
27+
/// This is a helper ot reduce redundancy
28+
enum VariantRef<'a> {
29+
Hgvs(&'a HgvsVariant),
30+
Sv(&'a StructuralVariant),
31+
Intergenic(&'a IntergenicHgvsVariant),
32+
}
33+
34+
/// A convenience builder structure to simplify creation of the
35+
/// GA4GH VariationDescriptor message
36+
struct VDescBuilder {
37+
id: String,
38+
gene_context: Option<GeneDescriptor>,
39+
expressions: Vec<Expression>,
40+
vcf_record: Option<VcfRecord>,
41+
structural_type: Option<OntologyClass>,
42+
label: String,
43+
allelic_state: OntologyClass,
44+
extensions: Vec<Extension>,
45+
}
46+
47+
impl VDescBuilder {
48+
fn build(self) -> VariationDescriptor {
49+
VariationDescriptor {
50+
id: self.id,
51+
variation: None,
52+
label: self.label,
53+
description: String::default(),
54+
gene_context: self.gene_context,
55+
expressions: self.expressions,
56+
vcf_record: self.vcf_record,
57+
xrefs: vec![],
58+
alternate_labels: vec![],
59+
extensions: self.extensions,
60+
molecule_context: MoleculeContext::Genomic.into(),
61+
structural_type: self.structural_type,
62+
vrs_ref_allele_seq: String::default(),
63+
allelic_state: Some(self.allelic_state),
64+
}
65+
}
66+
67+
fn with_optional_gene(mut self, hgnc_id: Option<String>, symbol: Option<String>) -> Self {
68+
if let (Some(id), Some(sym)) = (hgnc_id, symbol) {
69+
self.gene_context = Some(PpktVariantExporter::gene_descriptor(id, sym));
70+
}
71+
self
72+
}
73+
74+
}
75+
76+
77+
78+
2579
/// Structure to coordinate extraction of Variant (Interpretation) information to export to Phenopacket
2680
2781
pub struct PpktVariantExporter {
@@ -44,6 +98,31 @@ impl PpktVariantExporter {
4498
}
4599
}
46100

101+
/// A helper function to simplify getting the desired Variant object
102+
/// (HGVS, SV, intergenic, mitochondrial) from the allele string
103+
fn lookup_variant<'a>(
104+
&'a self,
105+
allele: &str,
106+
) -> Option<VariantRef<'a>> {
107+
self.hgvs_variants
108+
.get(allele)
109+
.map(VariantRef::Hgvs)
110+
.or_else(|| self.structural_variants.get(allele).map(VariantRef::Sv))
111+
.or_else(|| self.intergenic_variants.get(allele).map(VariantRef::Intergenic))
112+
}
113+
114+
/// We add the codes ACMG Pathogenic and Unknown Therapeutic actionability
115+
/// to each variant description
116+
fn pathogenic_variant(vdesc: VariationDescriptor) -> VariantInterpretation {
117+
VariantInterpretation {
118+
acmg_pathogenicity_classification:
119+
AcmgPathogenicityClassification::Pathogenic.into(),
120+
therapeutic_actionability:
121+
TherapeuticActionability::UnknownActionability.into(),
122+
variation_descriptor: Some(vdesc),
123+
}
124+
}
125+
47126
/// Builds a list of `Interpretation` objects for a given phenopacket row.
48127
///
49128
/// This function performs the following steps:
@@ -74,19 +153,14 @@ impl PpktVariantExporter {
74153
if allele_count == 0 {
75154
return Err(format!("No alleles found in row {:?}", ppkt_row));
76155
}
77-
if let Some(hgvs) = self.hgvs_variants.get(allele) {
78-
let vinterp = self.get_hgvs_variant_interpretation( hgvs, allele_count);
79-
v_interpretation_list.push(vinterp);
80-
} else if let Some(sv) = self.structural_variants.get(allele) {
81-
let vinterp = self.get_sv_variant_interpretation(sv, allele_count);
82-
v_interpretation_list.push(vinterp);
83-
} else if let Some(ig) = self.intergenic_variants.get(allele) {
84-
let vinterp = self.get_intergenic_variant_interpretation(ig, allele_count);
85-
v_interpretation_list.push(vinterp);
86-
87-
} else {
88-
return Err(format!("Could not find validated variant for allele {}", allele));
89-
}
156+
let vinterp = match self.lookup_variant(allele) {
157+
Some(VariantRef::Hgvs(v)) => self.get_hgvs_variant_interpretation(v, allele_count),
158+
Some(VariantRef::Sv(v)) => self.get_sv_variant_interpretation(v, allele_count),
159+
Some(VariantRef::Intergenic(v)) =>
160+
self.get_intergenic_variant_interpretation(v, allele_count),
161+
None => return Err(format!("Could not find validated variant for allele {}", allele)),
162+
};
163+
v_interpretation_list.push(vinterp);
90164
}
91165
if self.disease_list.is_empty() {
92166
return Err(format!("No disease objects found"));
@@ -132,47 +206,41 @@ impl PpktVariantExporter {
132206
Ok(interpretation_list)
133207
}
134208

209+
/// Create a GeneDescriptor message for the Phenopacket
210+
/// The elements are used in the gene context field.
211+
fn gene_descriptor(hgnc_id: impl Into<String>, symbol: impl Into<String>)
212+
-> GeneDescriptor {
213+
GeneDescriptor {
214+
value_id: hgnc_id.into(),
215+
symbol: symbol.into(),
216+
description: String::default(),
217+
alternate_ids: vec![],
218+
alternate_symbols: vec![],
219+
xrefs: vec![],
220+
}
221+
}
135222

136223

137224
fn get_sv_variant_interpretation(
138225
&self,
139226
sv: &StructuralVariant,
140227
allele_count: usize
141228
) -> VariantInterpretation {
142-
let gene_ctxt = GeneDescriptor{
143-
value_id: sv.hgnc_id().to_string(),
144-
symbol: sv.gene_symbol().to_string(),
145-
description: String::default(),
146-
alternate_ids: vec![] ,
147-
alternate_symbols: vec![] ,
148-
xrefs: vec![]
149-
};
229+
let gene_ctxt = Self::gene_descriptor(sv.hgnc_id(), sv.gene_symbol());
150230
let is_x = sv.is_x_chromosomal();
151231
let sv_class = sv.get_sequence_ontology_term();
152232
let allelic_state = self.get_genotype_term(allele_count, sv.is_x_chromosomal());
153-
154-
let vdesc = VariationDescriptor {
233+
let vdesc = VDescBuilder {
155234
id: sv.variant_key().to_string(),
156-
variation: None,
157-
label: sv.label().to_string(),
158-
description: String::default(),
159235
gene_context: Some(gene_ctxt),
160236
expressions: vec![],
161237
vcf_record: None,
162-
xrefs: vec![],
163-
alternate_labels: vec![],
164-
extensions: vec![],
165-
molecule_context: MoleculeContext::Genomic.into(),
166238
structural_type: Some(sv_class),
167-
vrs_ref_allele_seq: String::default(),
168-
allelic_state: Some(allelic_state),
169-
};
170-
let vi = VariantInterpretation{
171-
acmg_pathogenicity_classification: AcmgPathogenicityClassification::Pathogenic.into(),
172-
therapeutic_actionability: TherapeuticActionability::UnknownActionability.into(),
173-
variation_descriptor: Some(vdesc)
174-
};
175-
vi
239+
label: sv.label().to_string(),
240+
allelic_state,
241+
extensions: vec![],
242+
}.build();
243+
Self::pathogenic_variant(vdesc)
176244
}
177245

178246

@@ -181,14 +249,7 @@ impl PpktVariantExporter {
181249
hgvs: &HgvsVariant,
182250
allele_count: usize)
183251
-> VariantInterpretation {
184-
let gene_ctxt = GeneDescriptor{
185-
value_id: hgvs.hgnc_id().to_string(),
186-
symbol: hgvs.symbol().to_string(),
187-
description: String::default(),
188-
alternate_ids: vec![] ,
189-
alternate_symbols: vec![] ,
190-
xrefs: vec![]
191-
};
252+
let gene_ctxt = Self::gene_descriptor(hgvs.hgnc_id(), hgvs.symbol());
192253
let vcf_record = Self::get_vcf_record(
193254
hgvs.assembly(),
194255
hgvs.chr(),
@@ -216,28 +277,17 @@ impl PpktVariantExporter {
216277
expression_list.push(hgvs_p);
217278
};
218279
let allelic_state = self.get_genotype_term(allele_count, hgvs.is_x_chromosomal());
219-
let vdesc = VariationDescriptor{
220-
id: hgvs.variant_key().to_string(),
221-
variation: None,
222-
label: String::default(),
223-
description: String::default(),
280+
let vdesc = VDescBuilder {
281+
id: hgvs.variant_key(),
224282
gene_context: Some(gene_ctxt),
225283
expressions: expression_list,
226284
vcf_record: Some(vcf_record),
227-
xrefs: vec![],
228-
alternate_labels: vec![],
229-
extensions: vec![],
230-
molecule_context: MoleculeContext::Genomic.into(),
231285
structural_type: None,
232-
vrs_ref_allele_seq: String::default(),
233-
allelic_state: Some(allelic_state)
234-
};
235-
let vi = VariantInterpretation{
236-
acmg_pathogenicity_classification: AcmgPathogenicityClassification::Pathogenic.into(),
237-
therapeutic_actionability: TherapeuticActionability::UnknownActionability.into(),
238-
variation_descriptor: Some(vdesc)
239-
};
240-
vi
286+
label: String::default(),
287+
allelic_state,
288+
extensions: vec![]
289+
}.build();
290+
Self::pathogenic_variant(vdesc)
241291
}
242292

243293

@@ -305,40 +355,20 @@ impl PpktVariantExporter {
305355
};
306356
let expression_list = vec![hgvs_g];
307357
let allelic_state = self.get_genotype_term(allele_count, ig.is_x_chromosomal());
308-
let mut vdesc = VariationDescriptor{
309-
id: ig.variant_key().to_string(),
310-
variation: None,
311-
label: String::default(),
312-
description: String::default(),
313-
gene_context: None,
314-
expressions: expression_list,
315-
vcf_record: Some(vcf_record),
316-
xrefs: vec![],
317-
alternate_labels: vec![],
318-
extensions: vec![],
319-
molecule_context: MoleculeContext::Genomic.into(),
320-
structural_type: None,
321-
vrs_ref_allele_seq: String::default(),
322-
allelic_state: Some(allelic_state)
323-
};
324-
if let (Some(hgnc_id), Some(symbol)) = (ig.hgnc_id(), ig.symbol()) {
325-
let gene_ctxt = GeneDescriptor {
326-
value_id: hgnc_id,
327-
symbol: symbol,
328-
description: String::default(),
329-
alternate_ids: vec![],
330-
alternate_symbols: vec![],
331-
xrefs: vec![]
332-
};
333-
vdesc.gene_context = Some(gene_ctxt);
334-
};
335-
let vi = VariantInterpretation{
336-
acmg_pathogenicity_classification: AcmgPathogenicityClassification::Pathogenic.into(),
337-
therapeutic_actionability: TherapeuticActionability::UnknownActionability.into(),
338-
variation_descriptor: Some(vdesc)
339-
};
340-
vi
341-
358+
359+
let vdesc = VDescBuilder {
360+
id: ig.variant_key().to_string(),
361+
gene_context: None,
362+
expressions: expression_list,
363+
vcf_record: Some(vcf_record),
364+
structural_type: None,
365+
label: String::default(),
366+
allelic_state,
367+
extensions: vec![],
368+
}
369+
.with_optional_gene(ig.hgnc_id(), ig.symbol())
370+
.build();
371+
Self::pathogenic_variant(vdesc)
342372
}
343373

344374

0 commit comments

Comments
 (0)