Skip to content

Commit 633dcf5

Browse files
committed
Modify codes to triggers
1 parent 21fc0dc commit 633dcf5

File tree

1 file changed

+21
-11
lines changed

1 file changed

+21
-11
lines changed

src/mavedb/lib/validation/dataframe/column.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -73,29 +73,39 @@ def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets:
7373

7474
# if there is more than one target, we expect variants to be fully qualified
7575
if fully_qualified:
76-
invalid_fully_qualified = {v for v in variants if len(str(v).split(":")) != 2}
76+
invalid_fully_qualified = [f"{len(str(v).split(':'))} invalid fully qualified found from row {idx}"
77+
for idx, v in enumerate(variants) if len(str(v).split(":")) != 2]
7778
if invalid_fully_qualified:
7879
raise ValidationError(
79-
f"variants in the provided column '{column.name}' were expected to be fully qualified, "
80-
"but are not described in relation to an accession. "
81-
"Validation errors found:\n" + "\n".join(invalid_fully_qualified))
80+
f"variant column '{column.name}' has {len(invalid_fully_qualified)} unqualified variants.",
81+
triggers=invalid_fully_qualified
82+
)
8283

83-
inconsistent_prefixes = {v for v in variants if len(set(str(v).split(":")[1][:2] for v in variants)) > 1}
84+
inconsistent_prefixes = [f"row {idx}: '{v}' uses inconsistent prefix '{str(v).split(':')[1][:2]}'"
85+
for idx, v in enumerate(variants)
86+
if len(set(str(v).split(":")[1][:2] for v in variants)) > 1]
8487
if inconsistent_prefixes:
8588
raise ValidationError(
86-
f"variant column '{column.name}' has inconsistent variant prefixes':\n" + "\n".join(inconsistent_prefixes))
89+
f"variant column '{column.name}' has {len(inconsistent_prefixes)} inconsistent variant prefixes.",
90+
triggers=inconsistent_prefixes
91+
)
8792

88-
invalid_prefixes = {v for v in variants if str(v).split(":")[1][:2] not in prefixes}
93+
invalid_prefixes = [f"row {idx}: '{v}' uses invalid prefix '{str(v).split(':')[1][:2]}'"
94+
for idx, v in enumerate(variants) if str(v).split(":")[1][:2] not in prefixes]
8995
if invalid_prefixes:
9096
raise ValidationError(
91-
f"variant column '{column.name}' has invalid variant prefixes':\n" + "\n".join(invalid_prefixes))
97+
f"variant column '{column.name}' has {len(invalid_prefixes)} invalid variant prefixes.",
98+
triggers=invalid_prefixes
99+
)
92100

93-
invalid_accessions = {v for v in variants if str(v).split(":")[0] not in targets}
101+
invalid_accessions = [f"accession identifier {str(v).split(':')[0]} from row {idx}, variant {v} not found"
102+
for idx, v in enumerate(variants) if str(v).split(":")[0] not in targets]
94103
if invalid_accessions:
95104
raise ValidationError(
96105
f"variant column '{column.name}' has invalid accession identifiers; "
97-
"some accession identifiers present in the score file were not added as targets."
98-
"Validation errors found:\n" + "\n".join(invalid_accessions))
106+
f"{len(invalid_accessions)} accession identifiers present in the score file were not added as targets.",
107+
triggers=invalid_accessions
108+
)
99109

100110
else:
101111
if len(set(v[:2] for v in variants)) > 1:

0 commit comments

Comments
 (0)