@@ -24,7 +24,9 @@ use crate::dto::cohort_dto::{CohortData, DiseaseData, RowData};
2424
2525use crate :: dto:: hgvs_variant:: HgvsVariant ;
2626use crate :: dto:: hpo_term_dto:: HpoTermDuplet ;
27+ use crate :: dto:: intergenic_variant:: IntergenicHgvsVariant ;
2728use crate :: dto:: structural_variant:: StructuralVariant ;
29+ use crate :: ppkt:: ppkt_variant_exporter:: PpktVariantExporter ;
2830use phenopacket_tools;
2931use phenopacket_tools:: builders:: builder:: Builder ;
3032
@@ -239,133 +241,9 @@ impl PpktExporter {
239241
240242
241243
242- fn get_sv_variant_interpretation (
243- sv : & StructuralVariant ,
244- allele_count : usize
245- ) -> VariantInterpretation {
246- let gene_ctxt = GeneDescriptor {
247- value_id : sv. hgnc_id ( ) . to_string ( ) ,
248- symbol : sv. gene_symbol ( ) . to_string ( ) ,
249- description : String :: default ( ) ,
250- alternate_ids : vec ! [ ] ,
251- alternate_symbols : vec ! [ ] ,
252- xrefs : vec ! [ ]
253- } ;
254- let is_x = sv. is_x_chromosomal ( ) ;
255- let sv_class = sv. get_sequence_ontology_term ( ) ;
256- let allelic_state = Self :: get_allele_term ( allele_count, sv. is_x_chromosomal ( ) ) ;
257-
258- let vdesc = VariationDescriptor {
259- id : sv. variant_key ( ) . to_string ( ) ,
260- variation : None ,
261- label : sv. label ( ) . to_string ( ) ,
262- description : String :: default ( ) ,
263- gene_context : Some ( gene_ctxt) ,
264- expressions : vec ! [ ] ,
265- vcf_record : None ,
266- xrefs : vec ! [ ] ,
267- alternate_labels : vec ! [ ] ,
268- extensions : vec ! [ ] ,
269- molecule_context : MoleculeContext :: Genomic . into ( ) ,
270- structural_type : Some ( sv_class) ,
271- vrs_ref_allele_seq : String :: default ( ) ,
272- allelic_state : Some ( allelic_state) ,
273- } ;
274- let vi = VariantInterpretation {
275- acmg_pathogenicity_classification : AcmgPathogenicityClassification :: Pathogenic . into ( ) ,
276- therapeutic_actionability : TherapeuticActionability :: UnknownActionability . into ( ) ,
277- variation_descriptor : Some ( vdesc)
278- } ;
279- vi
280- }
281244
282- fn get_allele_term ( allele_count : usize , is_x : bool ) -> OntologyClass {
283- if allele_count == 2 {
284- return OntologyClass {
285- id : "GENO:0000136" . to_string ( ) ,
286- label : "homozygous" . to_string ( ) ,
287- } ;
288- } else if is_x {
289- return OntologyClass {
290- id : "GENO:0000134" . to_string ( ) ,
291- label : "hemizygous" . to_string ( ) ,
292- } ;
293- } else {
294- return OntologyClass {
295- id : "GENO:0000135" . to_string ( ) ,
296- label : "heterozygous" . to_string ( ) ,
297- } ;
298- }
299- }
300-
301- fn get_hgvs_variant_interpretation (
302- hgvs : & HgvsVariant ,
303- allele_count : usize )
304- -> VariantInterpretation {
305- let gene_ctxt = GeneDescriptor {
306- value_id : hgvs. hgnc_id ( ) . to_string ( ) ,
307- symbol : hgvs. symbol ( ) . to_string ( ) ,
308- description : String :: default ( ) ,
309- alternate_ids : vec ! [ ] ,
310- alternate_symbols : vec ! [ ] ,
311- xrefs : vec ! [ ]
312- } ;
313- let vcf_record = VcfRecord {
314- genome_assembly : hgvs. assembly ( ) . to_string ( ) ,
315- chrom : hgvs. chr ( ) . to_string ( ) ,
316- pos : hgvs. position ( ) as u64 ,
317- id : String :: default ( ) ,
318- r#ref : hgvs. ref_allele ( ) . to_string ( ) ,
319- alt : hgvs. alt_allele ( ) . to_string ( ) ,
320- qual : String :: default ( ) ,
321- filter : String :: default ( ) ,
322- info : String :: default ( ) ,
323- } ;
324245
325- let hgvs_c = Expression {
326- syntax : "hgvs.c" . to_string ( ) ,
327- value : format ! ( "{}:{}" , hgvs. transcript( ) , hgvs. hgvs( ) ) ,
328- version : String :: default ( )
329- } ;
330- let mut expression_list = vec ! [ hgvs_c] ;
331- let hgvs_g = Expression {
332- syntax : "hgvs.g" . to_string ( ) ,
333- value : hgvs. g_hgvs ( ) . to_string ( ) ,
334- version : String :: default ( ) ,
335- } ;
336- expression_list. push ( hgvs_g) ;
337- if let Some ( hgsvp) = hgvs. p_hgvs ( ) {
338- let hgvs_p = Expression {
339- syntax : "hgvs.p" . to_string ( ) ,
340- value : hgsvp,
341- version : String :: default ( ) ,
342- } ;
343- expression_list. push ( hgvs_p) ;
344- } ;
345- let allelic_state = Self :: get_allele_term ( allele_count, hgvs. is_x_chromosomal ( ) ) ;
346- let vdesc = VariationDescriptor {
347- id : hgvs. variant_key ( ) . to_string ( ) ,
348- variation : None ,
349- label : String :: default ( ) ,
350- description : String :: default ( ) ,
351- gene_context : Some ( gene_ctxt) ,
352- expressions : expression_list,
353- vcf_record : Some ( vcf_record) ,
354- xrefs : vec ! [ ] ,
355- alternate_labels : vec ! [ ] ,
356- extensions : vec ! [ ] ,
357- molecule_context : MoleculeContext :: Genomic . into ( ) ,
358- structural_type : None ,
359- vrs_ref_allele_seq : String :: default ( ) ,
360- allelic_state : Some ( allelic_state)
361- } ;
362- let vi = VariantInterpretation {
363- acmg_pathogenicity_classification : AcmgPathogenicityClassification :: Pathogenic . into ( ) ,
364- therapeutic_actionability : TherapeuticActionability :: UnknownActionability . into ( ) ,
365- variation_descriptor : Some ( vdesc)
366- } ;
367- vi
368- }
246+
369247
370248 /// Generate a random identifier (used in this struct for Interpretation objects).
371249 pub fn generate_id ( ) -> String {
@@ -376,100 +254,9 @@ impl PpktExporter {
376254 . collect ( )
377255 }
378256
257+
379258
380- fn extract_gene_symbol ( vi : & VariantInterpretation ) -> Result < String , String > {
381- vi
382- . variation_descriptor
383- . as_ref ( )
384- . and_then ( |vd| vd. gene_context . as_ref ( ) )
385- . map ( |gc| gc. symbol . clone ( ) )
386- . ok_or_else ( || format ! (
387- "Missing gene symbol for variant interpretation: {:?}" ,
388- vi. variation_descriptor
389- ) )
390- }
391-
392- /// Builds a list of `Interpretation` objects for a given phenopacket row.
393- ///
394- /// This function performs the following steps:
395- /// 1. Iterates through each allele in the input `RowData` and constructs corresponding
396- /// `VariantInterpretation` objects based on HGVS or structural variant information.
397- /// 2. Ensures allele counts are valid (1 or 2). Returns an error if invalid or if a matching
398- /// validated variant cannot be found.
399- /// 3. Validates that only one disease is present (melded/multiple diseases not implemented yet).
400- /// 4. Extracts disease information and maps `GenomicInterpretation` objects to gene symbols.
401- /// 5. For each disease, builds a `Diagnosis` linking its known genes to the corresponding
402- /// genomic interpretations (if available).
403- /// 6. Wraps all constructed diagnoses into `Interpretation` objects.
404- ///
405- /// # Arguments
406- /// * `ppkt_row` - A `RowData` object containing per-patient genotype and phenotype information.
407- ///
408- /// # Returns
409- /// * `Ok(Vec<Interpretation>)` if all data were valid and interpretable.
410- /// * `Err(String)` if any validation, mapping, or extraction step failed (e.g., missing allele,
411- /// missing gene symbol, inconsistent disease data).
412- pub fn get_interpretation_list (
413- & self ,
414- ppkt_row : & RowData )
415- -> std:: result:: Result < Vec < Interpretation > , String > {
416- let mut v_interpretation_list: Vec < VariantInterpretation > = Vec :: new ( ) ;
417- for ( allele, count) in & ppkt_row. allele_count_map {
418- let allele_count = * count;
419- if allele_count == 0 {
420- return Err ( format ! ( "No alleles found in row {:?}" , ppkt_row) ) ;
421- }
422- if let Some ( hgvs) = self . cohort_dto . hgvs_variants . get ( allele) {
423- let vinterp = Self :: get_hgvs_variant_interpretation ( hgvs, allele_count) ;
424- v_interpretation_list. push ( vinterp) ;
425- } else if let Some ( sv) = self . cohort_dto . structural_variants . get ( allele) {
426- let vinterp = Self :: get_sv_variant_interpretation ( sv, allele_count) ;
427- v_interpretation_list. push ( vinterp) ;
428- } else {
429- return Err ( format ! ( "Could not find validated variant for allele {}" , allele) ) ;
430- }
431- }
432- if self . cohort_dto . disease_list . is_empty ( ) {
433- return Err ( format ! ( "No disease objects found" ) ) ;
434- }
435-
436- let mut g_interpretation_map: HashMap < String , GenomicInterpretation > = HashMap :: new ( ) ;
437- for vi in v_interpretation_list {
438- let gi = GenomicInterpretation {
439- subject_or_biosample_id : ppkt_row. individual_data . individual_id . to_string ( ) ,
440- interpretation_status : InterpretationStatus :: Causative . into ( ) ,
441- call : Some ( Call :: VariantInterpretation ( vi. clone ( ) ) )
442- } ;
443- let symbol = Self :: extract_gene_symbol ( & vi) ?;
444- g_interpretation_map. insert ( symbol, gi) ;
445- }
446- let mut interpretation_list: Vec < Interpretation > = vec ! [ ] ;
447- for disease in & self . cohort_dto . disease_list {
448- let disease_clz = OntologyClass {
449- id : disease. disease_id . clone ( ) ,
450- label : disease. disease_label . clone ( ) ,
451- } ;
452- let mut diagnosis = Diagnosis {
453- disease : Some ( disease_clz) ,
454- genomic_interpretations : vec ! [ ] ,
455- } ;
456- for gene in & disease. gene_transcript_list {
457- let symbol = gene. gene_symbol . to_string ( ) ;
458- if let Some ( g_interp) = g_interpretation_map. get ( & symbol) {
459- diagnosis. genomic_interpretations . push ( g_interp. clone ( ) ) ;
460- }
461- }
462- let i = Interpretation {
463- id : Self :: generate_id ( ) ,
464- progress_status : ProgressStatus :: Solved . into ( ) ,
465- diagnosis : Some ( diagnosis) ,
466- summary : String :: default ( ) ,
467- } ;
468- interpretation_list. push ( i) ;
469- }
470- Ok ( interpretation_list)
471- }
472-
259+
473260
474261 fn get_ontology_class ( & self , term : & HpoTermDuplet ) -> Result < OntologyClass , String > {
475262 let hpo_id = term. hpo_id ( ) ;
@@ -524,11 +311,16 @@ impl PpktExporter {
524311 }
525312
526313
527- pub fn extract_phenopacket_from_dto (
314+ fn extract_phenopacket_from_row (
528315 & self ,
529316 ppkt_row_dto : & RowData ,
530317 ) -> Result < Phenopacket , String > {
531- let interpretation_list = self . get_interpretation_list ( ppkt_row_dto) ?;
318+ let individual = self . extract_individual ( ppkt_row_dto) ?;
319+ let is_male = & ppkt_row_dto. individual_data . sex == "M" ;
320+
321+ let ppkt_var_exporter = PpktVariantExporter :: new ( is_male, & self . cohort_dto ) ;
322+ let interpretation_list = ppkt_var_exporter. get_interpretation_list ( ppkt_row_dto) ?;
323+
532324 let ppkt = Phenopacket {
533325 id : self . get_phenopacket_id ( ppkt_row_dto) ,
534326 subject : Some ( self . extract_individual ( ppkt_row_dto) ?) ,
@@ -585,7 +377,7 @@ pub fn strip_phenopacket_defaults(root: &mut Value) {
585377 pub fn get_all_phenopackets ( & self ) -> Result < Vec < Phenopacket > , String > {
586378 let mut ppkt_list: Vec < Phenopacket > = Vec :: new ( ) ;
587379 for row in & self . cohort_dto . rows {
588- let ppkt = self . extract_phenopacket_from_dto ( row) ?;
380+ let ppkt = self . extract_phenopacket_from_row ( row) ?;
589381 ppkt_list. push ( ppkt) ;
590382 }
591383
0 commit comments