Skip to content

Commit 38ee7ff

Browse files
committed
correcting & QC MOI/allele count
1 parent 1d0da51 commit 38ee7ff

File tree

15 files changed

+918
-223
lines changed

15 files changed

+918
-223
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ga4ghphetools"
3-
version = "0.4.156"
3+
version = "0.5.1"
44
edition = "2021"
55
keywords = ["GA4GH", "Phenopacket Schema", "Human Phenotype Ontology"]
66
description = "Generate GA4GH phenopackets from tabular data"
@@ -44,6 +44,7 @@ tera = "1.20.1"
4444
uuid = { version = "1.18", features = ["v4"] }
4545
rust_xlsxwriter = { version = "0.92.2", optional = true }
4646
urlencoding = "2.1.3"
47+
walkdir = "2.5"
4748

4849
[features]
4950
cli = ["dep:clap"]

src/dto/cohort_dto.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,20 @@ pub struct ModeOfInheritance {
143143
pub citation: String
144144
}
145145

146+
impl ModeOfInheritance {
147+
pub fn is_autosomal_dominant(&self) -> bool {
148+
self.hpo_id == "HP:0000006"
149+
}
150+
151+
pub fn is_autosomal_recessive(&self) -> bool {
152+
self.hpo_id == "HP:0000007"
153+
}
154+
155+
pub fn is_x_chromosomal(&self) -> bool {
156+
self.hpo_id == "HP:0001417" || self.hpo_id == "HP:0001423" || self.hpo_id == "HP:0001419"
157+
}
158+
}
159+
146160

147161

148162

src/dto/intergenic_variant.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ impl IntergenicHgvsVariant {
133133

134134
}
135135

136-
136+
#[cfg(test)]
137137
mod tests {
138138
use super::*;
139139

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub mod hpoa;
2929
pub mod factory;
3030
pub mod persistence;
3131
pub mod ppkt;
32+
pub mod repo;
3233
pub mod variant;
3334

3435
#[cfg(test)]

src/ppkt/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use phenopackets::schema::v2::Phenopacket;
1010

1111
use crate::{dto::cohort_dto::CohortData, ppkt::ppkt_exporter::PpktExporter};
1212

13+
mod ppkt_variant_exporter;
1314
pub mod ppkt_exporter;
1415
pub mod ppkt_row;
1516

src/ppkt/ppkt_exporter.rs

Lines changed: 13 additions & 221 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ use crate::dto::cohort_dto::{CohortData, DiseaseData, RowData};
2424

2525
use crate::dto::hgvs_variant::HgvsVariant;
2626
use crate::dto::hpo_term_dto::HpoTermDuplet;
27+
use crate::dto::intergenic_variant::IntergenicHgvsVariant;
2728
use crate::dto::structural_variant::StructuralVariant;
29+
use crate::ppkt::ppkt_variant_exporter::PpktVariantExporter;
2830
use phenopacket_tools;
2931
use phenopacket_tools::builders::builder::Builder;
3032

@@ -239,133 +241,9 @@ impl PpktExporter {
239241

240242

241243

242-
fn get_sv_variant_interpretation(
243-
sv: &StructuralVariant,
244-
allele_count: usize
245-
) -> VariantInterpretation {
246-
let gene_ctxt = GeneDescriptor{
247-
value_id: sv.hgnc_id().to_string(),
248-
symbol: sv.gene_symbol().to_string(),
249-
description: String::default(),
250-
alternate_ids: vec![] ,
251-
alternate_symbols: vec![] ,
252-
xrefs: vec![]
253-
};
254-
let is_x = sv.is_x_chromosomal();
255-
let sv_class = sv.get_sequence_ontology_term();
256-
let allelic_state = Self::get_allele_term(allele_count, sv.is_x_chromosomal());
257-
258-
let vdesc = VariationDescriptor {
259-
id: sv.variant_key().to_string(),
260-
variation: None,
261-
label: sv.label().to_string(),
262-
description: String::default(),
263-
gene_context: Some(gene_ctxt),
264-
expressions: vec![],
265-
vcf_record: None,
266-
xrefs: vec![],
267-
alternate_labels: vec![],
268-
extensions: vec![],
269-
molecule_context: MoleculeContext::Genomic.into(),
270-
structural_type: Some(sv_class),
271-
vrs_ref_allele_seq: String::default(),
272-
allelic_state: Some(allelic_state),
273-
};
274-
let vi = VariantInterpretation{
275-
acmg_pathogenicity_classification: AcmgPathogenicityClassification::Pathogenic.into(),
276-
therapeutic_actionability: TherapeuticActionability::UnknownActionability.into(),
277-
variation_descriptor: Some(vdesc)
278-
};
279-
vi
280-
}
281244

282-
fn get_allele_term(allele_count: usize, is_x: bool) -> OntologyClass {
283-
if allele_count == 2 {
284-
return OntologyClass {
285-
id: "GENO:0000136".to_string(),
286-
label: "homozygous".to_string(),
287-
};
288-
} else if is_x {
289-
return OntologyClass {
290-
id: "GENO:0000134".to_string(),
291-
label: "hemizygous".to_string(),
292-
};
293-
} else {
294-
return OntologyClass {
295-
id: "GENO:0000135".to_string(),
296-
label: "heterozygous".to_string(),
297-
};
298-
}
299-
}
300-
301-
fn get_hgvs_variant_interpretation(
302-
hgvs: &HgvsVariant,
303-
allele_count: usize)
304-
-> VariantInterpretation {
305-
let gene_ctxt = GeneDescriptor{
306-
value_id: hgvs.hgnc_id().to_string(),
307-
symbol: hgvs.symbol().to_string(),
308-
description: String::default(),
309-
alternate_ids: vec![] ,
310-
alternate_symbols: vec![] ,
311-
xrefs: vec![]
312-
};
313-
let vcf_record = VcfRecord{
314-
genome_assembly: hgvs.assembly().to_string(),
315-
chrom: hgvs.chr().to_string(),
316-
pos: hgvs.position() as u64,
317-
id: String::default(),
318-
r#ref: hgvs.ref_allele().to_string(),
319-
alt: hgvs.alt_allele().to_string(),
320-
qual: String::default(),
321-
filter: String::default(),
322-
info: String::default(),
323-
};
324245

325-
let hgvs_c = Expression{
326-
syntax: "hgvs.c".to_string(),
327-
value: format!("{}:{}", hgvs.transcript(), hgvs.hgvs()),
328-
version: String::default()
329-
};
330-
let mut expression_list = vec![hgvs_c];
331-
let hgvs_g = Expression{
332-
syntax: "hgvs.g".to_string(),
333-
value: hgvs.g_hgvs().to_string(),
334-
version: String::default(),
335-
};
336-
expression_list.push(hgvs_g);
337-
if let Some(hgsvp) = hgvs.p_hgvs() {
338-
let hgvs_p = Expression{
339-
syntax: "hgvs.p".to_string(),
340-
value: hgsvp,
341-
version: String::default(),
342-
};
343-
expression_list.push(hgvs_p);
344-
};
345-
let allelic_state = Self::get_allele_term(allele_count, hgvs.is_x_chromosomal());
346-
let vdesc = VariationDescriptor{
347-
id: hgvs.variant_key().to_string(),
348-
variation: None,
349-
label: String::default(),
350-
description: String::default(),
351-
gene_context: Some(gene_ctxt),
352-
expressions: expression_list,
353-
vcf_record: Some(vcf_record),
354-
xrefs: vec![],
355-
alternate_labels: vec![],
356-
extensions: vec![],
357-
molecule_context: MoleculeContext::Genomic.into(),
358-
structural_type: None,
359-
vrs_ref_allele_seq: String::default(),
360-
allelic_state: Some(allelic_state)
361-
};
362-
let vi = VariantInterpretation{
363-
acmg_pathogenicity_classification: AcmgPathogenicityClassification::Pathogenic.into(),
364-
therapeutic_actionability: TherapeuticActionability::UnknownActionability.into(),
365-
variation_descriptor: Some(vdesc)
366-
};
367-
vi
368-
}
246+
369247

370248
/// Generate a random identifier (used in this struct for Interpretation objects).
371249
pub fn generate_id() -> String {
@@ -376,100 +254,9 @@ impl PpktExporter {
376254
.collect()
377255
}
378256

257+
379258

380-
fn extract_gene_symbol(vi: &VariantInterpretation) -> Result<String, String> {
381-
vi
382-
.variation_descriptor
383-
.as_ref()
384-
.and_then(|vd| vd.gene_context.as_ref())
385-
.map(|gc| gc.symbol.clone())
386-
.ok_or_else(|| format!(
387-
"Missing gene symbol for variant interpretation: {:?}",
388-
vi.variation_descriptor
389-
))
390-
}
391-
392-
/// Builds a list of `Interpretation` objects for a given phenopacket row.
393-
///
394-
/// This function performs the following steps:
395-
/// 1. Iterates through each allele in the input `RowData` and constructs corresponding
396-
/// `VariantInterpretation` objects based on HGVS or structural variant information.
397-
/// 2. Ensures allele counts are valid (1 or 2). Returns an error if invalid or if a matching
398-
/// validated variant cannot be found.
399-
/// 3. Validates that only one disease is present (melded/multiple diseases not implemented yet).
400-
/// 4. Extracts disease information and maps `GenomicInterpretation` objects to gene symbols.
401-
/// 5. For each disease, builds a `Diagnosis` linking its known genes to the corresponding
402-
/// genomic interpretations (if available).
403-
/// 6. Wraps all constructed diagnoses into `Interpretation` objects.
404-
///
405-
/// # Arguments
406-
/// * `ppkt_row` - A `RowData` object containing per-patient genotype and phenotype information.
407-
///
408-
/// # Returns
409-
/// * `Ok(Vec<Interpretation>)` if all data were valid and interpretable.
410-
/// * `Err(String)` if any validation, mapping, or extraction step failed (e.g., missing allele,
411-
/// missing gene symbol, inconsistent disease data).
412-
pub fn get_interpretation_list(
413-
&self,
414-
ppkt_row: &RowData)
415-
-> std::result::Result<Vec<Interpretation>, String> {
416-
let mut v_interpretation_list: Vec<VariantInterpretation> = Vec::new();
417-
for (allele, count) in &ppkt_row.allele_count_map {
418-
let allele_count = *count;
419-
if allele_count == 0 {
420-
return Err(format!("No alleles found in row {:?}", ppkt_row));
421-
}
422-
if let Some(hgvs) = self.cohort_dto.hgvs_variants.get(allele) {
423-
let vinterp = Self::get_hgvs_variant_interpretation( hgvs, allele_count);
424-
v_interpretation_list.push(vinterp);
425-
} else if let Some(sv) = self.cohort_dto.structural_variants.get(allele) {
426-
let vinterp = Self::get_sv_variant_interpretation(sv, allele_count);
427-
v_interpretation_list.push(vinterp);
428-
} else {
429-
return Err(format!("Could not find validated variant for allele {}", allele));
430-
}
431-
}
432-
if self.cohort_dto.disease_list.is_empty() {
433-
return Err(format!("No disease objects found"));
434-
}
435-
436-
let mut g_interpretation_map: HashMap<String, GenomicInterpretation> = HashMap::new();
437-
for vi in v_interpretation_list {
438-
let gi = GenomicInterpretation{
439-
subject_or_biosample_id: ppkt_row.individual_data.individual_id.to_string(),
440-
interpretation_status: InterpretationStatus::Causative.into(),
441-
call: Some(Call::VariantInterpretation(vi.clone()))
442-
};
443-
let symbol = Self::extract_gene_symbol(&vi)?;
444-
g_interpretation_map.insert(symbol, gi);
445-
}
446-
let mut interpretation_list: Vec<Interpretation> = vec![];
447-
for disease in &self.cohort_dto.disease_list {
448-
let disease_clz = OntologyClass{
449-
id: disease.disease_id.clone(),
450-
label: disease.disease_label.clone(),
451-
};
452-
let mut diagnosis = Diagnosis{
453-
disease: Some(disease_clz),
454-
genomic_interpretations: vec![],
455-
};
456-
for gene in &disease.gene_transcript_list {
457-
let symbol = gene.gene_symbol.to_string();
458-
if let Some(g_interp) = g_interpretation_map.get(&symbol) {
459-
diagnosis.genomic_interpretations.push(g_interp.clone());
460-
}
461-
}
462-
let i = Interpretation{
463-
id: Self::generate_id(),
464-
progress_status: ProgressStatus::Solved.into(),
465-
diagnosis: Some(diagnosis),
466-
summary: String::default(),
467-
};
468-
interpretation_list.push(i);
469-
}
470-
Ok(interpretation_list)
471-
}
472-
259+
473260

474261
fn get_ontology_class(&self, term: &HpoTermDuplet) -> Result<OntologyClass, String> {
475262
let hpo_id = term.hpo_id();
@@ -524,11 +311,16 @@ impl PpktExporter {
524311
}
525312

526313

527-
pub fn extract_phenopacket_from_dto(
314+
fn extract_phenopacket_from_row(
528315
&self,
529316
ppkt_row_dto: &RowData,
530317
) -> Result<Phenopacket, String> {
531-
let interpretation_list = self.get_interpretation_list(ppkt_row_dto)?;
318+
let individual = self.extract_individual(ppkt_row_dto)?;
319+
let is_male = &ppkt_row_dto.individual_data.sex == "M";
320+
321+
let ppkt_var_exporter = PpktVariantExporter::new(is_male,&self.cohort_dto);
322+
let interpretation_list = ppkt_var_exporter.get_interpretation_list(ppkt_row_dto)?;
323+
532324
let ppkt = Phenopacket{
533325
id: self.get_phenopacket_id(ppkt_row_dto),
534326
subject: Some(self.extract_individual(ppkt_row_dto)?),
@@ -585,7 +377,7 @@ pub fn strip_phenopacket_defaults(root: &mut Value) {
585377
pub fn get_all_phenopackets(&self) -> Result<Vec<Phenopacket>, String> {
586378
let mut ppkt_list: Vec<Phenopacket> = Vec::new();
587379
for row in &self.cohort_dto.rows {
588-
let ppkt = self.extract_phenopacket_from_dto(row)?;
380+
let ppkt = self.extract_phenopacket_from_row(row)?;
589381
ppkt_list.push(ppkt);
590382
}
591383

0 commit comments

Comments
 (0)