Skip to content

Commit 0cc4143

Browse files
committed
Updating template variant handling
1 parent 4d6ae98 commit 0cc4143

File tree

4 files changed

+34
-24
lines changed

4 files changed

+34
-24
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ga4ghphetools"
3-
version = "0.5.9"
3+
version = "0.5.10"
44
edition = "2021"
55
keywords = ["GA4GH", "Phenopacket Schema", "Human Phenotype Ontology"]
66
description = "Generate GA4GH phenopackets from tabular data"

src/etl/etl_tools.rs

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -423,28 +423,37 @@ impl EtlTools {
423423
Ok(())
424424
}
425425

426-
/// Check that the alleles in the rows have full variant objects in the maps
427-
/// Note that we allow na because some cohorts have a mix of mono- and biallelic cases, meaning that
428-
/// one of the allele columns may contain "na" (not available).
429-
fn qc_variants(&self) -> Result<(), String> {
430-
let allele_set: HashSet<String> = self
431-
.raw_table()
426+
fn is_mapped_variant(&self, allele: &str) -> bool {
427+
self.raw_table().hgvs_variants.contains_key(allele)
428+
|| self.raw_table().structural_variants.contains_key(allele)
429+
|| self.raw_table().intergenic_variants.contains_key(allele)
430+
}
431+
432+
/// Our strategy for dealing with variants here is to
433+
/// mark mapped variants in the cell and leave the other cells as error.
434+
/// We do not need to return an error to the GUI
435+
fn qc_variants(&mut self) -> Result<(), String> {
436+
let table = &mut self.dto;
437+
438+
table
432439
.table
433440
.columns
434-
.iter()
435-
.filter(|col| col.header.column_type == EtlColumnType::Variant)
436-
.flat_map(|col| col.values.iter().cloned())
437-
.map(|etl_val| etl_val.current)
438-
.collect();
439-
// These alleles must be in either the HGVS or the SV map (i.e., validated)
440-
for allele in &allele_set {
441-
if allele != "na" &&
442-
! self.raw_table().hgvs_variants.contains_key(allele) &&
443-
! self.raw_table().structural_variants.contains_key(allele)
444-
{
445-
return Err(format!("Unmapped allele: '{allele}'"));
446-
}
447-
}
441+
.iter_mut()
442+
.filter(|c| c.header.column_type == EtlColumnType::Variant)
443+
.flat_map(|c| &mut c.values)
444+
.for_each(|cell| {
445+
let allele = cell.current.as_str();
446+
let valid = allele != "na"
447+
&& (table.hgvs_variants.contains_key(allele)
448+
|| table.structural_variants.contains_key(allele)
449+
|| table.intergenic_variants.contains_key(allele));
450+
451+
cell.status = if valid {
452+
EtlCellStatus::Transformed
453+
} else {
454+
EtlCellStatus::Error
455+
};
456+
});
448457

449458
Ok(())
450459
}
@@ -529,7 +538,7 @@ impl EtlTools {
529538
}
530539

531540

532-
fn qc(&self) -> Result<(), String> {
541+
fn qc(&mut self) -> Result<(), String> {
533542
if self.raw_table().table.columns.is_empty() {
534543
return Err("EtlDto table with no columns".to_string());
535544
}
@@ -549,7 +558,7 @@ impl EtlTools {
549558

550559
/// Note that only Mendelian is supported for Excel file bulk imports
551560
/// Other MOIs are too complicated to be reliably imported in this way.
552-
pub fn get_cohort_data(&self) -> Result<CohortData, String> {
561+
pub fn get_cohort_data(&mut self) -> Result<CohortData, String> {
553562
self.check_is_completely_transformed()?;
554563
self.qc()?;
555564
let hpo_duplets = Self::all_hpo_duplets(&self);

src/etl/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ pub fn get_cohort_data_from_etl_dto(
7171
hpo: Arc<FullCsrOntology>,
7272
dto: EtlDto,
7373
) -> Result<CohortData, String> {
74-
let etl_tools = EtlTools::from_dto(hpo, &dto);
74+
let mut etl_tools = EtlTools::from_dto(hpo, &dto);
7575
etl_tools.get_cohort_data()
7676
}
7777

src/factory/cohort_factory.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,7 @@ impl CohortFactory {
671671
// but they should be the same from the way we construct the map
672672
updated_cohort.hgvs_variants.extend(transformed.hgvs_variants);
673673
updated_cohort.structural_variants.extend(transformed.structural_variants);
674+
updated_cohort.intergenic_variants.extend(transformed.intergenic_variants);
674675

675676
Ok(updated_cohort)
676677
}

0 commit comments

Comments
 (0)