Skip to content

Commit 8b43eb8

Browse files
authored
Add between operator (#2335)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change Fixes #792 # Are these changes tested? # Are there any user-facing changes? Add `BETWEEN` operator <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent f8ccd82 commit 8b43eb8

File tree

4 files changed

+50
-2
lines changed

4 files changed

+50
-2
lines changed

mkdocs/docs/expression-dsl.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ age_greater_than_18 = GreaterThan("age", 18)
6060

6161
# Greater than or equal to
6262
age_greater_than_or_equal_18 = GreaterThanOrEqual("age", 18)
63+
64+
6365
```
6466

6567
#### Set Predicates

mkdocs/docs/row-filter-syntax.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,15 @@ column NOT LIKE 'prefix%'
100100
!!! important
101101
The `%` wildcard is only supported at the end of the pattern. Using it in the middle or beginning of the pattern will raise an error.
102102

103+
## BETWEEN
104+
105+
The BETWEEN operator filters a numeric value against an inclusive range, e.g. `a between 1 and 2` is equivalent to `a >= 1 and a <= 2`.
106+
107+
```sql
108+
column BETWEEN 1 AND 2
109+
column BETWEEN 1.0 AND 2.0
110+
```
111+
103112
## Logical Operations
104113

105114
Combine multiple conditions using logical operators:

pyiceberg/expressions/parser.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
NULL = CaselessKeyword("null")
8080
NAN = CaselessKeyword("nan")
8181
LIKE = CaselessKeyword("like")
82+
BETWEEN = CaselessKeyword("between")
8283

8384
unquoted_identifier = Word(alphas + "_", alphanums + "_$")
8485
quoted_identifier = QuotedString('"', escChar="\\", unquoteResults=True)
@@ -106,6 +107,7 @@ def _(result: ParseResults) -> Reference:
106107
string = sgl_quoted_string.set_results_name("raw_quoted_string")
107108
decimal = common.real().set_results_name("decimal")
108109
integer = common.signed_integer().set_results_name("integer")
110+
number = common.number().set_results_name("number")
109111
literal = Group(string | decimal | integer | boolean).set_results_name("literal")
110112
literal_set = Group(
111113
DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer) | DelimitedList(boolean)
@@ -149,6 +151,12 @@ def _(result: ParseResults) -> Literal[L]:
149151
left_ref = column + comparison_op + literal
150152
right_ref = literal + comparison_op + column
151153
comparison = left_ref | right_ref
154+
between = column + BETWEEN + number + AND + number
155+
156+
157+
@between.set_parse_action
158+
def _(result: ParseResults) -> BooleanExpression:
159+
return And(GreaterThanOrEqual(result.column, result[2]), LessThanOrEqual(result.column, result[4]))
152160

153161

154162
@left_ref.set_parse_action
@@ -258,7 +266,7 @@ def _evaluate_like_statement(result: ParseResults) -> BooleanExpression:
258266
return EqualTo(result.column, StringLiteral(literal_like.value.replace("\\%", "%")))
259267

260268

261-
predicate = (comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate")
269+
predicate = (between | comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate")
262270

263271

264272
def handle_not(result: ParseResults) -> Not:

tests/expressions/test_parser.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,10 @@
3939
NotNull,
4040
NotStartsWith,
4141
Or,
42+
Reference,
4243
StartsWith,
4344
)
44-
from pyiceberg.expressions.literals import DecimalLiteral
45+
from pyiceberg.expressions.literals import DecimalLiteral, LongLiteral
4546

4647

4748
def test_always_true() -> None:
@@ -238,3 +239,31 @@ def test_quoted_column_with_dots() -> None:
238239

239240
def test_quoted_column_with_spaces() -> None:
240241
assert EqualTo("Foo Bar", "data") == parser.parse("\"Foo Bar\" = 'data'")
242+
243+
244+
def test_valid_between() -> None:
245+
assert And(
246+
left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)),
247+
right=LessThanOrEqual(Reference(name="foo"), LongLiteral(3)),
248+
) == parser.parse("foo between 1 and 3")
249+
assert And(
250+
left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)),
251+
right=LessThanOrEqual(Reference(name="foo"), LongLiteral(1)),
252+
) == parser.parse("foo between 1 and 1")
253+
assert And(
254+
left=GreaterThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(1.0))),
255+
right=LessThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(4.0))),
256+
) == parser.parse("foo between 1.0 and 4.0")
257+
assert parser.parse("foo between 1 and 3") == parser.parse("1 <= foo and foo <= 3")
258+
259+
260+
def test_invalid_between() -> None:
261+
# boolean
262+
with pytest.raises(ParseException) as exc_info:
263+
parser.parse("foo between true and false")
264+
assert "Expected number, found 'true'" in str(exc_info)
265+
266+
# string
267+
with pytest.raises(ParseException) as exc_info:
268+
parser.parse("foo between 'a' and 'b'")
269+
assert 'Expected number, found "\'"' in str(exc_info)

0 commit comments

Comments
 (0)