Skip to content

Commit fa44513

Browse files
Raise error for array subscripts
Matching full grammar
1 parent d90fab2 commit fa44513

File tree

2 files changed

+41
-14
lines changed

2 files changed

+41
-14
lines changed

tests/test_filter.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,26 @@ def test_invalid_expressions(self, parser, expression):
3131
@pytest.mark.parametrize(
3232
("expression", "exception_class"),
3333
[
34-
# ('INFO/HAYSTACK ~ "needle"', filter_mod.UnsupportedRegexError),
34+
('INFO/HAYSTACK ~ "needle"', filter_mod.UnsupportedRegexError),
3535
("INFO/X[0] == 1", filter_mod.UnsupportedArraySubscriptError),
36+
("INFO/AF[0] > 0.3", filter_mod.UnsupportedArraySubscriptError),
37+
("FORMAT/AD[0:0] > 30", filter_mod.UnsupportedArraySubscriptError),
38+
("DP4[*] == 0", filter_mod.UnsupportedArraySubscriptError),
39+
("FORMAT/DP[1-3] > 10", filter_mod.UnsupportedArraySubscriptError),
40+
("FORMAT/DP[1-] < 7", filter_mod.UnsupportedArraySubscriptError),
41+
("FORMAT/DP[0,2-4] > 20", filter_mod.UnsupportedArraySubscriptError),
42+
("FORMAT/AD[0:*]", filter_mod.UnsupportedArraySubscriptError),
43+
("FORMAT/AD[0:]", filter_mod.UnsupportedArraySubscriptError),
44+
("FORMAT/AD[*:1]", filter_mod.UnsupportedArraySubscriptError),
45+
(
46+
"(DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3",
47+
filter_mod.UnsupportedArraySubscriptError,
48+
),
3649
],
3750
)
3851
def test_unsupported_syntax(self, parser, expression, exception_class):
39-
# with pytest.raises(exception_class):
40-
parser.parse_string(expression, parse_all=True)
52+
with pytest.raises(exception_class):
53+
parser.parse_string(expression, parse_all=True)
4154

4255

4356
class TestFilterExpressionSample:

vcztools/filter.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,15 @@ def referenced_fields(self):
7979
return frozenset([self.field_name])
8080

8181

82+
class IndexedIdentifier(Identifier):
83+
def __init__(self, mapper, tokens):
84+
super().__init__(mapper, tokens[0])
85+
# Only literal integers are supported as indexes in bcftools
86+
# assert isinstance(self.index, str)
87+
self.index = tokens[0][1]
88+
raise UnsupportedArraySubscriptError()
89+
90+
8291
class RegexOperator(EvaluationNode):
8392
def __init__(self, tokens):
8493
raise UnsupportedRegexError()
@@ -186,26 +195,31 @@ def make_bcftools_filter_parser(all_fields=None, map_vcf_identifiers=True):
186195
vcf_prefixes = pp.Literal("INFO/") | pp.Literal("FORMAT/") | pp.Literal("FMT/")
187196
vcf_identifier = pp.Combine(vcf_prefixes + identifier) | identifier
188197

189-
# indexed_identifier = pp.Forward()
190-
# indexed_identifier <<= identifier + (
191-
# pp.Literal("[") + pp.common.integer + pp.Literal("]"))
192-
# fn_call = (ident + lpar - Group(expr_list) + rpar).setParseAction(
193-
# insert_fn_argcount_tuple
194-
# )
195-
196-
# lbrack, rbrack = map(pp.Suppress, "[]")
197-
# indexed_identifier = identifier + lbrack - pp.Group(pp.common.integer) + rbrack
198-
# print(indexed_identifier)
198+
lbracket, rbracket = map(pp.Suppress, "[]")
199+
# TODO we need to define the indexing grammar more carefully, but
200+
# this at least let's us match correct strings and raise an informative
201+
# error
202+
index_expr = pp.OneOrMore(
203+
pp.common.number
204+
| pp.Literal("*")
205+
| pp.Literal(":")
206+
| pp.Literal("-")
207+
| pp.Literal(",")
208+
)
209+
indexed_identifier = pp.Group(vcf_identifier + (lbracket + index_expr + rbracket))
199210

200211
name_mapper = _identity
201212
if map_vcf_identifiers:
202213
name_mapper = functools.partial(vcf_name_to_vcz_name, all_fields)
203214
identifier = vcf_identifier.set_parse_action(
204215
functools.partial(Identifier, name_mapper)
205216
)
217+
indexed_identifier = indexed_identifier.set_parse_action(
218+
functools.partial(IndexedIdentifier, name_mapper)
219+
)
206220
comp_op = pp.oneOf("< = == > >= <= !=")
207221
filter_expression = pp.infix_notation(
208-
constant | identifier,
222+
constant | indexed_identifier | identifier,
209223
[
210224
("-", 1, pp.OpAssoc.RIGHT, UnaryMinus),
211225
(pp.one_of("* /"), 2, pp.OpAssoc.LEFT, BinaryOperator),

0 commit comments

Comments
 (0)