@@ -73,17 +73,38 @@ def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets:
7373
7474 # if there is more than one target, we expect variants to be fully qualified
7575 if fully_qualified :
76- if not all (len (str (v ).split (":" )) == 2 for v in variants ):
76+ invalid_fully_qualified = [f"{ len (str (v ).split (':' ))} invalid fully qualified found from row { idx } "
77+ for idx , v in enumerate (variants ) if len (str (v ).split (":" )) != 2 ]
78+ if invalid_fully_qualified :
7779 raise ValidationError (
78- f"variants in the provided column '{ column .name } ' were expected to be fully qualified, but are not described in relation to an accession"
80+ f"variant column '{ column .name } ' has { len (invalid_fully_qualified )} unqualified variants." ,
81+ triggers = invalid_fully_qualified
7982 )
80- if len (set (str (v ).split (":" )[1 ][:2 ] for v in variants )) > 1 :
81- raise ValidationError (f"variant column '{ column .name } ' has inconsistent variant prefixes" )
82- if not all (str (v ).split (":" )[1 ][:2 ] in prefixes for v in variants ):
83- raise ValidationError (f"variant column '{ column .name } ' has invalid variant prefixes" )
84- if not all (str (v ).split (":" )[0 ] in targets for v in variants ):
83+
84+ inconsistent_prefixes = [f"row { idx } : '{ v } ' uses inconsistent prefix '{ str (v ).split (':' )[1 ][:2 ]} '"
85+ for idx , v in enumerate (variants )
86+ if len (set (str (v ).split (":" )[1 ][:2 ] for v in variants )) > 1 ]
87+ if inconsistent_prefixes :
88+ raise ValidationError (
89+ f"variant column '{ column .name } ' has { len (inconsistent_prefixes )} inconsistent variant prefixes." ,
90+ triggers = inconsistent_prefixes
91+ )
92+
93+ invalid_prefixes = [f"row { idx } : '{ v } ' uses invalid prefix '{ str (v ).split (':' )[1 ][:2 ]} '"
94+ for idx , v in enumerate (variants ) if str (v ).split (":" )[1 ][:2 ] not in prefixes ]
95+ if invalid_prefixes :
96+ raise ValidationError (
97+ f"variant column '{ column .name } ' has { len (invalid_prefixes )} invalid variant prefixes." ,
98+ triggers = invalid_prefixes
99+ )
100+
101+ invalid_accessions = [f"accession identifier { str (v ).split (':' )[0 ]} from row { idx } , variant { v } not found"
102+ for idx , v in enumerate (variants ) if str (v ).split (":" )[0 ] not in targets ]
103+ if invalid_accessions :
85104 raise ValidationError (
86- f"variant column '{ column .name } ' has invalid accession identifiers; some accession identifiers present in the score file were not added as targets"
105+ f"variant column '{ column .name } ' has invalid accession identifiers; "
106+ f"{ len (invalid_accessions )} accession identifiers present in the score file were not added as targets." ,
107+ triggers = invalid_accessions
87108 )
88109
89110 else :
0 commit comments