Skip to content

Commit 88809c2

Browse files
Raise an error for all string values on input
1 parent fa44513 commit 88809c2

File tree

3 files changed

+53
-9
lines changed

3 files changed

+53
-9
lines changed

tests/test_bcftools_validation.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ def run_vcztools(args: str, expect_error=False) -> tuple[str, str]:
4646
("view --no-version", "sample.vcf.gz"),
4747
("view --no-version", "chr22.vcf.gz"),
4848
("view --no-version", "msprime_diploid.vcf.gz"),
49-
("view --no-version -i 'ID == \"rs6054257\"'", "sample.vcf.gz"),
49+
# Filters based on strings disabled:
50+
# https://github.com/sgkit-dev/vcztools/issues/189
51+
# ("view --no-version -i 'ID == \"rs6054257\"'", "sample.vcf.gz"),
5052
("view --no-version -i 'INFO/DP > 10'", "sample.vcf.gz"),
5153
# Filters based on FMT values are currently disabled.
5254
# https://github.com/sgkit-dev/vcztools/issues/180

tests/test_filter.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,12 @@ def test_invalid_expressions(self, parser, expression):
3131
@pytest.mark.parametrize(
3232
("expression", "exception_class"),
3333
[
34-
('INFO/HAYSTACK ~ "needle"', filter_mod.UnsupportedRegexError),
34+
# NOTE: using an integer here so that we don't trigger the
35+
# generic string issue. Can fix this later when we've gotten
36+
# some partial string handling implemented
37+
("INFO/HAYSTACK ~ 0", filter_mod.UnsupportedRegexError),
38+
('CHROM="1"', filter_mod.UnsupportedStringsError),
39+
('FILTER="PASS"', filter_mod.UnsupportedStringsError),
3540
("INFO/X[0] == 1", filter_mod.UnsupportedArraySubscriptError),
3641
("INFO/AF[0] > 0.3", filter_mod.UnsupportedArraySubscriptError),
3742
("FORMAT/AD[0:0] > 30", filter_mod.UnsupportedArraySubscriptError),
@@ -135,7 +140,9 @@ def test_referenced_fields(self, expr, expected):
135140
("a + 1 + 2", "(variant_a)+(1)+(2)"),
136141
("a + (1 + 2)", "(variant_a)+((1)+(2))"),
137142
("POS<10", "(variant_position)<(10)"),
138-
('CHROM=="chr1"', "(variant_contig)==('chr1')"),
143+
# Filters based on strings disabled:
144+
# https://github.com/sgkit-dev/vcztools/issues/189
145+
# ('CHROM=="chr1"', "(variant_contig)==('chr1')"),
139146
],
140147
)
141148
def test_repr(self, expr, expected):
@@ -148,8 +155,6 @@ class TestBcftoolsParser:
148155
"expr",
149156
[
150157
"2",
151-
'"x"',
152-
'"INFO/STRING"',
153158
"2 + 2",
154159
"(2 + 3) / 2",
155160
"2 / (2 + 3)",
@@ -167,8 +172,12 @@ class TestBcftoolsParser:
167172
"1 + 2 == 1 + 2 + 3",
168173
"(1 + 2) == (1 + 2 + 3)",
169174
"(1 == 1) != (2 == 2)",
170-
'("x" == "x")',
171175
"-1 == 1 + 2 - 4",
176+
# Filters based on strings disabled:
177+
# https://github.com/sgkit-dev/vcztools/issues/189
178+
# '("x" == "x")',
179+
# '"x"',
180+
# '"INFO/STRING"',
172181
],
173182
)
174183
def test_python_arithmetic_expressions(self, expr):
@@ -177,6 +186,24 @@ def test_python_arithmetic_expressions(self, expr):
177186
result = parsed[0].eval({})
178187
assert result == eval(expr)
179188

189+
@pytest.mark.parametrize(
190+
("expr", "data"),
191+
[
192+
('("x" == "x")', {}),
193+
('"x"', {}),
194+
('"INFO/STRING"', {}),
195+
('a == "string"', {"a": "string"}),
196+
],
197+
)
198+
def test_python_string_expressions_data(self, expr, data):
199+
# Filters based on strings disabled:
200+
# https://github.com/sgkit-dev/vcztools/issues/189
201+
parser = filter_mod.make_bcftools_filter_parser()
202+
with pytest.raises(filter_mod.UnsupportedStringsError):
203+
parser.parse_string(expr, parse_all=True)
204+
# result = parsed[0].eval({})
205+
# assert result == eval(expr)
206+
180207
@pytest.mark.parametrize(
181208
("expr", "data"),
182209
[
@@ -189,7 +216,6 @@ def test_python_arithmetic_expressions(self, expr):
189216
("a == a", {"a": 1}),
190217
("-a == -a", {"a": 1}),
191218
("-a == b", {"a": 1, "b": -1}),
192-
('a == "string"', {"a": "string"}),
193219
],
194220
)
195221
def test_python_arithmetic_expressions_data(self, expr, data):

vcztools/filter.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ class UnsupportedArraySubscriptError(UnsupportedFilteringFeatureError):
3737
feature = "Array subscripts"
3838

3939

40+
class UnsupportedStringsError(UnsupportedFilteringFeatureError):
41+
issue = "189"
42+
feature = "String values currently not supported"
43+
44+
4045
# The parser and evaluation model here are based on the eval_arith example
4146
# in the pyparsing docs:
4247
# https://github.com/pyparsing/pyparsing/blob/master/examples/eval_arith.py
@@ -63,6 +68,15 @@ def eval(self, data):
6368
return self.tokens
6469

6570

71+
class Number(Constant):
72+
pass
73+
74+
75+
class String(Constant):
76+
def __init__(self, tokens):
77+
raise UnsupportedStringsError()
78+
79+
6680
class Identifier(EvaluationNode):
6781
def __init__(self, mapper, tokens):
6882
self.field_name = mapper(tokens[0])
@@ -189,9 +203,11 @@ def make_bcftools_filter_parser(all_fields=None, map_vcf_identifiers=True):
189203
if all_fields is None:
190204
all_fields = set()
191205

192-
constant = (pp.common.number | pp.QuotedString('"')).set_parse_action(Constant)
193-
identifier = pp.common.identifier()
206+
number = pp.common.number.set_parse_action(Number)
207+
string = pp.QuotedString('"').set_parse_action(String)
208+
constant = number | string
194209

210+
identifier = pp.common.identifier()
195211
vcf_prefixes = pp.Literal("INFO/") | pp.Literal("FORMAT/") | pp.Literal("FMT/")
196212
vcf_identifier = pp.Combine(vcf_prefixes + identifier) | identifier
197213

0 commit comments

Comments
 (0)