Skip to content

Commit 03bc6bf

Browse files
committed
adding repo qc
1 parent 38ee7ff commit 03bc6bf

File tree

8 files changed

+38
-138
lines changed

8 files changed

+38
-138
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ga4ghphetools"
3-
version = "0.5.1"
3+
version = "0.5.2"
44
edition = "2021"
55
keywords = ["GA4GH", "Phenopacket Schema", "Human Phenotype Ontology"]
66
description = "Generate GA4GH phenopackets from tabular data"

src/ppkt/ppkt_exporter.rs

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,14 @@ use ontolius::{Identified, TermId};
99
use ontolius::ontology::csr::FullCsrOntology;
1010
use ontolius::ontology::{MetadataAware, OntologyTerms};
1111
use phenopacket_tools::builders::time_elements::time_element_from_str;
12-
use phenopackets::ga4gh::vrsatile::v1::{Expression, GeneDescriptor, MoleculeContext, VariationDescriptor, VcfRecord};
13-
use phenopackets::schema::v2::core::genomic_interpretation::{Call, InterpretationStatus};
14-
use phenopackets::schema::v2::core::interpretation::ProgressStatus;
15-
use phenopackets::schema::v2::core::{Diagnosis, KaryotypicSex, OntologyClass};
12+
use phenopackets::schema::v2::core::{KaryotypicSex, OntologyClass};
1613
use phenopackets::schema::v2::core::vital_status::Status;
17-
use phenopackets::schema::v2::core::{AcmgPathogenicityClassification, Disease, ExternalReference, GenomicInterpretation, Individual, Interpretation, MetaData, PhenotypicFeature, Sex, TherapeuticActionability, VariantInterpretation, VitalStatus};
14+
use phenopackets::schema::v2::core::{Disease, ExternalReference, Individual, MetaData, PhenotypicFeature, Sex, VitalStatus};
1815
use phenopackets::schema::v2::Phenopacket;
19-
20-
use rand::Rng;
2116
use regex::Regex;
2217
use serde_json::Value;
2318
use crate::dto::cohort_dto::{CohortData, DiseaseData, RowData};
24-
25-
use crate::dto::hgvs_variant::HgvsVariant;
2619
use crate::dto::hpo_term_dto::HpoTermDuplet;
27-
use crate::dto::intergenic_variant::IntergenicHgvsVariant;
28-
use crate::dto::structural_variant::StructuralVariant;
2920
use crate::ppkt::ppkt_variant_exporter::PpktVariantExporter;
3021
use phenopacket_tools;
3122
use phenopacket_tools::builders::builder::Builder;
@@ -200,7 +191,6 @@ impl PpktExporter {
200191
if disease_id_list.is_empty() {
201192
return Err("No disease data found".to_string());
202193
}
203-
println!("{}{}: ppkt:row-{:?}", file!(), line!(), ppkt_row);
204194
let has_multiple_dx = disease_id_list.len() > 1;
205195
let mut disease_list: Vec<Disease> = Vec::new();
206196
for dx_id in disease_id_list {
@@ -240,23 +230,6 @@ impl PpktExporter {
240230
}
241231

242232

243-
244-
245-
246-
247-
248-
/// Generate a random identifier (used in this struct for Interpretation objects).
249-
pub fn generate_id() -> String {
250-
rand::rng()
251-
.sample_iter(&rand::distr::Alphanumeric)
252-
.take(24)
253-
.map(char::from)
254-
.collect()
255-
}
256-
257-
258-
259-
260233

261234
fn get_ontology_class(&self, term: &HpoTermDuplet) -> Result<OntologyClass, String> {
262235
let hpo_id = term.hpo_id();

src/repo/cohort_qc.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
use std::{collections::HashMap, fs, path::PathBuf};
1+
use std::collections::HashMap;
22

33
use phenopackets::schema::v2::Phenopacket;
44

5-
use crate::{dto::cohort_dto::{CohortData, CohortType, DiseaseData}, repo::{disease_qc::DiseaseQc, qc_report::QcReport}};
5+
use crate::{dto::cohort_dto::{CohortData, CohortType}, repo::{disease_qc::DiseaseQc, qc_report::QcReport}};
66

77

88

src/repo/disease_qc.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,19 @@ impl DiseaseQc {
4646
for ppkt in &self.ppkt_list {
4747
let ac = Self::get_allele_count(ppkt);
4848
if ! allowable_allele_counts.contains(&ac) {
49-
return Some(format!("Expected counts of {:?} but got {} for {} ({})", allowable_allele_counts, ac, self.disease_data.disease_label, self.disease_data.disease_id))
49+
return Some(format!("{}: Expected counts of {:?} but got {} for {}.", ppkt.id,allowable_allele_counts, ac, self.disease_data_display()))
5050
}
5151
}
5252
None
5353
}
5454

55+
fn disease_data_display(&self) -> String {
56+
format!("{} ({}/{})",
57+
self.disease_data.disease_label,
58+
self.disease_data.disease_id,
59+
self.disease_data.gene_transcript_list[0].gene_symbol)
60+
}
61+
5562

5663
fn get_allele_count(ppkt: &Phenopacket) -> usize {
5764
let mut ac = 0 as usize;

src/variant/intergenic_hgvs_validator.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ response
320320
assert_eq!("NC_000019.10:g.12887294G>A", intergen.g_hgvs());
321321
assert!(intergen.gene_hgvs().is_some());
322322
assert_eq!("NG_009292.1:g.1135G>A", intergen.gene_hgvs().unwrap());
323-
let expected_var_key= "NC_000019_10_g_12887294GtoA";
323+
let expected_var_key= "NC_000019_10_g12887294GtoA";
324324
assert_eq!(expected_var_key, intergen.variant_key());
325325

326326
}

tests/integration_cohort.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,26 @@ pub fn acvr1_cohort_with_repeated_term(
102102
assert!(result.is_err());
103103
let err_str = result.err().unwrap();
104104
assert_eq!("Duplicate entry in HPO Header: Ectopic ossification in muscle tissue (HP:0011987)", err_str);
105+
}
106+
107+
108+
#[rstest]
109+
fn export_with_het(
110+
hpo: Arc<FullCsrOntology>
111+
) {
112+
let cohort = "/Users/robin/GIT/phenopacket-store/notebooks/KLF1/KLF1_CDAN4B_individuals.json";
113+
let file_data = std::fs::read_to_string(cohort)
114+
.map_err(|e|
115+
format!("Could not extract string data from {}: {}", cohort, e.to_string())).unwrap();
116+
let cohort: CohortData = serde_json::from_str(&file_data)
117+
.map_err(|e| format!("Could not transform string {} to CohortDto: {}",
118+
file_data, e.to_string())).unwrap();
119+
let orcid = "0000-0000-0000-0001".to_string();
120+
let ppkt_list = ga4ghphetools::ppkt::get_phenopackets(cohort, orcid, hpo).unwrap();
121+
let interp = ppkt_list[2].interpretations[0].clone();
122+
if let Some(dx) = interp.diagnosis {
123+
println!("# genomic interpretations = {}", dx.genomic_interpretations.len());
124+
}
105125

106126
}
107127

tests/integration_combine_cohort_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use ga4ghphetools::dto::{cohort_dto::{CohortData, RowData}, hpo_term_dto::{CellV
44
use ontolius::ontology::csr::FullCsrOntology;
55
use rstest::{rstest};
66
mod common;
7-
use common::cohort_data_fixtures::{cohort_data_1, cohort_data_2, hpo_term_pool};
7+
use common::cohort_data_fixtures::{cohort_data_1, cohort_data_2};
88

99
use crate::common::hpo_fixture::hpo;
1010

tests/integration_new_row_tests.rs

Lines changed: 3 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@ use serde_json::Value;
1515
/// Make sure that our test matrix is valid before we start changing fields to check if we pick up errors
1616
#[rstest]
1717
fn test_valid_input(matrix: Vec<Vec<String>>, hpo: Arc<FullCsrOntology>) {
18-
let res = CohortFactory::dto_from_mendelian_template(matrix, hpo.clone(), false, |p,q|{// no progress bar for test
19-
});
18+
let res = CohortFactory::dto_from_mendelian_template(matrix, hpo.clone(), false, |_p:u32,_q:u32|{/* no progress bar for test*/});
2019
assert!(res.is_ok());
2120
}
21+
22+
2223
pub fn strip_phenopacket_defaults(root: &mut Value) {
2324
// Top-level `subject`
2425
if let Value::Object(root_map) = root {
@@ -155,104 +156,3 @@ fn validity_of_previous_column(
155156
}
156157
return Ok(());
157158
}
158-
159-
/*
160-
#[rstest]
161-
fn add_new_row_test_1(
162-
one_case_matrix: Vec<Vec<String>>,
163-
hpo: Arc<FullCsrOntology>,
164-
case_5_dto: CaseDto
165-
) {
166-
let mut phetools = PheTools::new(hpo);
167-
assert_eq!(3, one_case_matrix.len()); // original matrix has headers and four data rows
168-
let original_matrix = one_case_matrix.clone();
169-
let res = phetools.load_matrix(one_case_matrix);
170-
assert!(res.is_ok());
171-
let dto_cloned = case_5_dto.clone(); // needed only for testing
172-
let strab = HpoTermDto::new("HP:0000486", "Strabismus", "observed");
173-
let hpo_dto_list = vec![strab];
174-
let res = phetools.add_row_with_hpo_data(case_5_dto, hpo_dto_list);
175-
assert!(res.is_ok());
176-
// Check that the constant items are what we want
177-
let new_matrix = phetools.get_string_matrix().expect("Could not unwrap matrix with added row");
178-
assert_eq!(4, new_matrix.len());
179-
// Check that the first six rows are identical to the original matrix
180-
// Note that we can only do this for the constant columns, because we have added new HPO columns
181-
let first_four_new_rows: Vec<Vec<String>> = new_matrix.iter().take(4).cloned().collect();
182-
//debug_matrix_comparison(&original_matrix, &new_matrix, 17);
183-
let are_equal = first_n_columns_equal(&original_matrix, &first_four_new_rows, 17);
184-
assert!(are_equal);
185-
// now check that the non-HPO entries in the new line are OK
186-
let fourth_row = new_matrix[3].clone();
187-
assert_eq!(dto_cloned.pmid, fourth_row[0]);
188-
assert_eq!(dto_cloned.title, fourth_row[1]);
189-
assert_eq!(dto_cloned.individual_id, fourth_row[2]);
190-
assert_eq!(dto_cloned.comment, fourth_row[3]);
191-
assert_eq!(dto_cloned.allele_1, fourth_row[9]);
192-
assert_eq!(dto_cloned.allele_2, fourth_row[10]);
193-
assert_eq!(dto_cloned.variant_comment, fourth_row[11]);
194-
assert_eq!(dto_cloned.age_of_onset, fourth_row[12]);
195-
assert_eq!(dto_cloned.age_at_last_encounter, fourth_row[13]);
196-
assert_eq!(dto_cloned.deceased, fourth_row[14]);
197-
assert_eq!(dto_cloned.sex, fourth_row[15]);
198-
println!("VII: {:?}\n", &fourth_row);
199-
assert_eq!("na", fourth_row[16]); // constant HPO separator column
200-
// Now check the HPO columns
201-
// The DTO added: thick_eye_brow: excluded; grand_mal: observed; strabismus: observed; esotropia_observed
202-
validity_of_new_column("Strabismus", "HP:0000486", "observed", &new_matrix).expect("Strabismus observed in DTO");
203-
// If we get here, we have passed all tests!
204-
assert!(true);
205-
}
206-
207-
208-
///Check that all entries in a column are the same
209-
fn check_data_entries_unique(
210-
new_matrix: &Vec<Vec<String>>,
211-
colname: &str,
212-
new_entry: &str) -> Result<(), String>
213-
{
214-
let col = get_index_of_column(&new_matrix, colname)?;
215-
for i in 2..new_matrix.len() {
216-
if new_matrix[i][col] != new_entry {
217-
return Err(format!("Expected new_matrix[{}][{}]={} but got {}",
218-
i, col, new_entry, new_matrix[i][col]));
219-
}
220-
}
221-
Ok(())
222-
}
223-
224-
225-
226-
227-
/// Check that all entries in the constant, disease-gene-bundle block are identical
228-
/// If so, then the new row contains the same values for
229-
/// disease_id, disease_label, HGNC_id, gene_symbol, and transcript
230-
#[rstest]
231-
fn add_new_row_check_disease_gene_bundle(
232-
matrix: Vec<Vec<String>>,
233-
hpo: Arc<FullCsrOntology>,
234-
case_5_dto: CaseDto,
235-
hpo_dto_list_1: Vec<HpoTermDto>
236-
) {
237-
let mut phetools = PheTools::new(hpo);
238-
assert_eq!(6, matrix.len()); // original matrix has headers and four data rows
239-
let original_matrix = matrix.clone();
240-
let res = phetools.load_matrix(matrix);
241-
assert!(res.is_ok());
242-
let dto_cloned = case_5_dto.clone(); // needed only for testing
243-
let res = phetools.add_row_with_hpo_data(case_5_dto, hpo_dto_list_1);
244-
assert!(res.is_ok());
245-
// Check that the constant items are what we want
246-
let new_matrix = phetools.get_string_matrix().expect("Could not unwrap matrix with added row");
247-
assert_eq!(7, new_matrix.len());
248-
check_data_entries_unique(&new_matrix, "disease_id", "OMIM:617865").expect("Expected all entries to be 'OMIM:617865'");
249-
check_data_entries_unique(&new_matrix, "disease_label", "Neurodevelopmental disorder with movement abnormalities, abnormal gait, and autistic features")
250-
.expect("Expected all entries to be 'Neurodevelopmental disorder with movement abnormalities, abnormal gait, and autistic features'");
251-
check_data_entries_unique(&new_matrix, "HGNC_id", "HGNC:29316").expect("Expected all entries to be 'HGNC:29316'");
252-
check_data_entries_unique(&new_matrix, "gene_symbol", "ZSWIM6").expect("Expected all entries to be 'ZSWIM6'");
253-
check_data_entries_unique(&new_matrix, "transcript", "NM_020928.2").expect("Expected all entries to be 'NM_020928.2'");
254-
// if we get here, all tests were OK!
255-
assert!(true);
256-
}
257-
258-
*/

0 commit comments

Comments
 (0)