@@ -73,29 +73,39 @@ def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets:
7373
7474 # if there is more than one target, we expect variants to be fully qualified
7575 if fully_qualified :
76- invalid_fully_qualified = {v for v in variants if len (str (v ).split (":" )) != 2 }
76+ invalid_fully_qualified = [f"{ len (str (v ).split (':' ))} invalid fully qualified found from row { idx } "
77+ for idx , v in enumerate (variants ) if len (str (v ).split (":" )) != 2 ]
7778 if invalid_fully_qualified :
7879 raise ValidationError (
79- f"variants in the provided column '{ column .name } ' were expected to be fully qualified, "
80- "but are not described in relation to an accession. "
81- "Validation errors found: \n " + " \n " . join ( invalid_fully_qualified ) )
80+ f"variant column '{ column .name } ' has { len ( invalid_fully_qualified ) } unqualified variants." ,
81+ triggers = invalid_fully_qualified
82+ )
8283
83- inconsistent_prefixes = {v for v in variants if len (set (str (v ).split (":" )[1 ][:2 ] for v in variants )) > 1 }
84+ inconsistent_prefixes = [f"row { idx } : '{ v } ' uses inconsistent prefix '{ str (v ).split (':' )[1 ][:2 ]} '"
85+ for idx , v in enumerate (variants )
86+ if len (set (str (v ).split (":" )[1 ][:2 ] for v in variants )) > 1 ]
8487 if inconsistent_prefixes :
8588 raise ValidationError (
86- f"variant column '{ column .name } ' has inconsistent variant prefixes':\n " + "\n " .join (inconsistent_prefixes ))
89+ f"variant column '{ column .name } ' has { len (inconsistent_prefixes )} inconsistent variant prefixes." ,
90+ triggers = inconsistent_prefixes
91+ )
8792
88- invalid_prefixes = {v for v in variants if str (v ).split (":" )[1 ][:2 ] not in prefixes }
93+ invalid_prefixes = [f"row { idx } : '{ v } ' uses invalid prefix '{ str (v ).split (':' )[1 ][:2 ]} '"
94+ for idx , v in enumerate (variants ) if str (v ).split (":" )[1 ][:2 ] not in prefixes ]
8995 if invalid_prefixes :
9096 raise ValidationError (
91- f"variant column '{ column .name } ' has invalid variant prefixes':\n " + "\n " .join (invalid_prefixes ))
97+ f"variant column '{ column .name } ' has { len (invalid_prefixes )} invalid variant prefixes." ,
98+ triggers = invalid_prefixes
99+ )
92100
93- invalid_accessions = {v for v in variants if str (v ).split (":" )[0 ] not in targets }
101+ invalid_accessions = [f"accession identifier { str (v ).split (':' )[0 ]} from row { idx } , variant { v } not found"
102+ for idx , v in enumerate (variants ) if str (v ).split (":" )[0 ] not in targets ]
94103 if invalid_accessions :
95104 raise ValidationError (
96105 f"variant column '{ column .name } ' has invalid accession identifiers; "
97- "some accession identifiers present in the score file were not added as targets."
98- "Validation errors found:\n " + "\n " .join (invalid_accessions ))
106+ f"{ len (invalid_accessions )} accession identifiers present in the score file were not added as targets." ,
107+ triggers = invalid_accessions
108+ )
99109
100110 else :
101111 if len (set (v [:2 ] for v in variants )) > 1 :
0 commit comments