Skip to content

Commit a16e7b3

Browse files
authored
Merge pull request #420 from VariantEffect/improve/estelle/350/surfaceValidationErrors
Show the variants that have problem in error message.
2 parents 2977ea6 + 489c59c commit a16e7b3

File tree

1 file changed

+29
-8
lines changed

1 file changed

+29
-8
lines changed

src/mavedb/lib/validation/dataframe/column.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,38 @@ def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets:
7373

7474
# if there is more than one target, we expect variants to be fully qualified
7575
if fully_qualified:
76-
if not all(len(str(v).split(":")) == 2 for v in variants):
76+
invalid_fully_qualified = [f"{len(str(v).split(':'))} invalid fully qualified found from row {idx}"
77+
for idx, v in enumerate(variants) if len(str(v).split(":")) != 2]
78+
if invalid_fully_qualified:
7779
raise ValidationError(
78-
f"variants in the provided column '{column.name}' were expected to be fully qualified, but are not described in relation to an accession"
80+
f"variant column '{column.name}' has {len(invalid_fully_qualified)} unqualified variants.",
81+
triggers=invalid_fully_qualified
7982
)
80-
if len(set(str(v).split(":")[1][:2] for v in variants)) > 1:
81-
raise ValidationError(f"variant column '{column.name}' has inconsistent variant prefixes")
82-
if not all(str(v).split(":")[1][:2] in prefixes for v in variants):
83-
raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes")
84-
if not all(str(v).split(":")[0] in targets for v in variants):
83+
84+
inconsistent_prefixes = [f"row {idx}: '{v}' uses inconsistent prefix '{str(v).split(':')[1][:2]}'"
85+
for idx, v in enumerate(variants)
86+
if len(set(str(v).split(":")[1][:2] for v in variants)) > 1]
87+
if inconsistent_prefixes:
88+
raise ValidationError(
89+
f"variant column '{column.name}' has {len(inconsistent_prefixes)} inconsistent variant prefixes.",
90+
triggers=inconsistent_prefixes
91+
)
92+
93+
invalid_prefixes = [f"row {idx}: '{v}' uses invalid prefix '{str(v).split(':')[1][:2]}'"
94+
for idx, v in enumerate(variants) if str(v).split(":")[1][:2] not in prefixes]
95+
if invalid_prefixes:
96+
raise ValidationError(
97+
f"variant column '{column.name}' has {len(invalid_prefixes)} invalid variant prefixes.",
98+
triggers=invalid_prefixes
99+
)
100+
101+
invalid_accessions = [f"accession identifier {str(v).split(':')[0]} from row {idx}, variant {v} not found"
102+
for idx, v in enumerate(variants) if str(v).split(":")[0] not in targets]
103+
if invalid_accessions:
85104
raise ValidationError(
86-
f"variant column '{column.name}' has invalid accession identifiers; some accession identifiers present in the score file were not added as targets"
105+
f"variant column '{column.name}' has invalid accession identifiers; "
106+
f"{len(invalid_accessions)} accession identifiers present in the score file were not added as targets.",
107+
triggers=invalid_accessions
87108
)
88109

89110
else:

0 commit comments

Comments
 (0)