1010
1111from cmat .clinvar_xml_io import ClinVarDataset
1212from cmat .clinvar_xml_io .clinical_classification import MultipleClinicalClassificationsError
13+ from cmat .clinvar_xml_io .filtering import filter_by_submission_name
1314from cmat .output_generation import consequence_type as CT
1415from cmat .output_generation .report import Report
1516
@@ -64,7 +65,8 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
6465
6566 logger .info ('Processing ClinVar records' )
6667 i = - 1
67- for clinvar_record in ClinVarDataset (clinvar_xml ):
68+ dataset = ClinVarDataset (clinvar_xml )
69+ for clinvar_set in dataset .iter_cvs ():
6870 # If start & end provided, only process records in the range [start, end)
6971 i += 1
7072 if start and i < start :
@@ -78,7 +80,13 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
7880
7981 # Catch any exceptions for this record so we can continue processing.
8082 try :
81- # Failure mode 0 (skip). Contains multiple clinical classification annotations.
83+ # Failure mode 1 (fatal). Record is only supported by submissions deemed to be unusable.
84+ if not filter_by_submission_name (clinvar_set ):
85+ report .clinvar_fatal_excluded_submission += 1
86+ continue
87+ clinvar_record = clinvar_set .rcv
88+
89+ # Failure mode 2 (skip). Contains multiple clinical classification annotations.
8290 # This is new as of V2 of the ClinVar XSD and should definitely be supported at some point,
8391 # but as it can cause parsing complications we catch these cases first.
8492 # See GH issue for context: https://github.com/EBIvariation/CMAT/issues/396
@@ -87,18 +95,18 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
8795 report .clinvar_skip_multiple_clinical_classifications += 1
8896 continue
8997
90- # Failure mode 1 (fatal). A ClinVar record contains no valid traits (traits which have at least one valid,
98+ # Failure mode 3 (fatal). A ClinVar record contains no valid traits (traits which have at least one valid,
9199 # potentially mappable name).
92100 if not clinvar_record .traits_with_valid_names :
93101 report .clinvar_fatal_no_valid_traits += 1
94102 continue
95- # Failure mode 2 (fatal). A ClinVar record contains no valid clinical significance terms, likely due to
103+ # Failure mode 4 (fatal). A ClinVar record contains no valid clinical significance terms, likely due to
96104 # submissions being flagged.
97105 if not clinvar_record .valid_clinical_significances :
98106 report .clinvar_fatal_no_clinical_significance += 1
99107 continue
100108
101- # Failure mode 3 (skip). A ClinVar record contains an unsupported variation type.
109+ # Failure mode 5 (skip). A ClinVar record contains an unsupported variation type.
102110 if clinvar_record .measure is None :
103111 report .clinvar_skip_unsupported_variation += 1
104112 continue
@@ -110,7 +118,7 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
110118 grouped_diseases = group_diseases_by_efo_mapping (clinvar_record .traits_with_valid_names ,
111119 string_to_efo_mappings )
112120
113- # Failure mode 4 (skip). No functional consequences are available.
121+ # Failure mode 6 (skip). No functional consequences are available.
114122 if not consequence_types :
115123 report .clinvar_skip_no_functional_consequences += 1
116124 continue
@@ -121,7 +129,7 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
121129 if is_structural_variant (clinvar_record .measure ):
122130 report .structural_variants += len (consequence_types )
123131
124- # Failure mode 5 (skip). A ClinVar record has at least one trait with at least one valid name, but no
132+ # Failure mode 7 (skip). A ClinVar record has at least one trait with at least one valid name, but no
125133 # suitable EFO mappings were found in the database. This will still generate an evidence string, but is
126134 # tracked as a failure so we can continue to measure mapping coverage.
127135 if not contains_mapping (grouped_diseases ):
@@ -175,8 +183,8 @@ def clinvar_to_evidence_strings(string_to_efo_mappings, variant_to_gene_mappings
175183 except Exception as e :
176184 # We catch exceptions but record when one is thrown, so that the pipeline will crash after processing all
177185 # records and printing the report.
178- logger .error (f'Problem generating evidence for { clinvar_record .accession } ' )
179- logger .error (f'Error: { e } ' )
186+ logger .error (f'Problem generating evidence for { clinvar_set . rcv .accession } ' )
187+ logger .error (f'Error: { repr ( e ) } ' )
180188 exception_raised = True
181189 continue
182190
0 commit comments