Skip to content

Commit dda712e

Browse files
Copilotwilliambdean
andcommitted
Add support for boolean column filtering with is: and has: syntax
Co-authored-by: williambdean <57733339+williambdean@users.noreply.github.com>
1 parent 9b5ab7c commit dda712e

File tree

3 files changed

+53
-26
lines changed

3 files changed

+53
-26
lines changed

src/frame_search/search.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
Operator = Literal[":", ">", "<", ">=", "<="]
1010

11-
Value = Union[str, float, int, datetime]
11+
Value = Union[str, float, int, datetime, bool]
1212

1313

1414
@dataclass
@@ -39,20 +39,28 @@ def is_standalone(self) -> bool:
3939
def parse_query(query: str):
4040
# Regex to capture:
4141
# 1. Optional negation operator (NOT or -)
42-
# 2. key:operator:value (e.g., name:alice, age:">30", city:"New York")
43-
# - Group 2: key (\w+)
44-
# - Group 3: operator (:|>|<)
45-
# - Group 4: value (either "[^"]*" for quoted strings or \S+ for non-whitespace)
46-
# 3. standalone value (\S+)
47-
# - Group 5: standalone value
48-
pattern = r'(?:(NOT|-)\s*)?(\w+):("[^"]*"|\S+)|(?:(NOT|-)\s*)?(\S+)'
42+
# 2. is:column or has:column for boolean columns (must come before key:value)
43+
# - Group 2: is or has
44+
# - Group 3: column name
45+
# 3. key:operator:value (e.g., name:alice, age:">30", city:"New York")
46+
# - Group 4: key (\w+)
47+
# - Group 5: value (either "[^"]*" for quoted strings or \S+ for non-whitespace)
48+
# 4. standalone value (\S+)
49+
# - Group 7: standalone value
50+
pattern = r'(?:(NOT|-)\s*)?(is|has):(\w+)|(?:(NOT|-)\s*)?(\w+):("[^"]*"|\S+)|(?:(NOT|-)\s*)?(\S+)'
4951
return list(re.findall(pattern, query))
5052

5153

5254
def _parse_value(value: str) -> Value:
5355
if value.startswith('"') and value.endswith('"'):
5456
value = value[1:-1]
5557

58+
# Handle boolean values
59+
if value.lower() == "true":
60+
return True
61+
elif value.lower() == "false":
62+
return False
63+
5664
try:
5765
return datetime.fromisoformat(value)
5866
except ValueError:
@@ -84,10 +92,24 @@ def get_search_parts(query: str) -> list[SearchPart]:
8492

8593
search_parts = []
8694
for match in matches:
87-
negation, key, value, standalone_negation, standalone_value = match
88-
negated = bool(negation or standalone_negation)
89-
90-
if standalone_value: # This is the standalone value group
95+
(
96+
is_has_negation,
97+
is_has_keyword,
98+
is_has_column,
99+
negation,
100+
key,
101+
value,
102+
standalone_negation,
103+
standalone_value,
104+
) = match
105+
negated = bool(negation or standalone_negation or is_has_negation)
106+
107+
if is_has_keyword: # This is the is:column or has:column group
108+
# For is: and has:, we treat them as column:True
109+
search_parts.append(
110+
SearchPart(key=is_has_column, operator=":", value=True, negated=negated)
111+
)
112+
elif standalone_value: # This is the standalone value group
91113
search_parts.append(
92114
SearchPart(
93115
key=None, operator=None, value=standalone_value, negated=negated

tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ def search():
5959
"Hobby": nw.String,
6060
"City of Interest": nw.String,
6161
"First Visit": nw.Datetime,
62+
"seen_movie": nw.Boolean,
63+
"older_than_30": nw.Boolean,
6264
},
6365
),
6466
)

tests/test_search.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,29 +39,36 @@ def test_range_different_type_raises(lower, upper) -> None:
3939
"query, expected",
4040
[
4141
pytest.param("", [], id="empty-query"),
42-
pytest.param("bob", [("", "", "", "", "bob")], id="default"),
42+
pytest.param("bob", [("", "", "", "", "", "", "", "bob")], id="default"),
4343
pytest.param(
44-
"name:alice", [("", "name", "alice", "", "")], id="contains-match"
44+
"name:alice",
45+
[("", "", "", "", "name", "alice", "", "")],
46+
id="contains-match",
47+
),
48+
pytest.param("age:>=30", [("", "", "", "", "age", ">=30", "", "")], id="ge"),
49+
pytest.param("age:>30", [("", "", "", "", "age", ">30", "", "")], id="gt"),
50+
pytest.param("age:<30", [("", "", "", "", "age", "<30", "", "")], id="lt"),
51+
pytest.param(
52+
"age:35.5", [("", "", "", "", "age", "35.5", "", "")], id="exact-numeric"
4553
),
46-
pytest.param("age:>=30", [("", "age", ">=30", "", "")], id="ge"),
47-
pytest.param("age:>30", [("", "age", ">30", "", "")], id="gt"),
48-
pytest.param("age:<30", [("", "age", "<30", "", "")], id="lt"),
49-
pytest.param("age:35.5", [("", "age", "35.5", "", "")], id="exact-numeric"),
5054
pytest.param(
5155
"opening_date:<2023-01-01",
52-
[("", "opening_date", "<2023-01-01", "", "")],
56+
[("", "", "", "", "opening_date", "<2023-01-01", "", "")],
5357
id="date-lt",
5458
),
5559
pytest.param(
5660
"bob age:>30",
57-
[("", "", "", "", "bob"), ("", "age", ">30", "", "")],
61+
[
62+
("", "", "", "", "", "", "", "bob"),
63+
("", "", "", "", "age", ">30", "", ""),
64+
],
5865
id="mixed",
5966
),
6067
pytest.param(
6168
'hobby:reading city:"New York"',
6269
[
63-
("", "hobby", "reading", "", ""),
64-
("", "city", '"New York"', "", ""),
70+
("", "", "", "", "hobby", "reading", "", ""),
71+
("", "", "", "", "city", '"New York"', "", ""),
6572
],
6673
id="multiple-conditions",
6774
),
@@ -162,25 +169,21 @@ def test_get_search_parts(query, expected) -> None:
162169
"is:older_than_30",
163170
[2, 3],
164171
id="boolean-is",
165-
marks=pytest.mark.xfail,
166172
),
167173
pytest.param(
168174
"older_than_30:True",
169175
[2, 3],
170176
id="boolean-True",
171-
marks=pytest.mark.xfail,
172177
),
173178
pytest.param(
174179
"older_than_30:False",
175180
[0, 1],
176181
id="boolean-False",
177-
marks=pytest.mark.xfail,
178182
),
179183
pytest.param(
180184
"has:seen_movie",
181185
[0, 2],
182186
id="boolean-has",
183-
marks=pytest.mark.xfail,
184187
),
185188
],
186189
)

0 commit comments

Comments
 (0)