From 53ae6c9cd70ebffd79eac35e0ff60c537d486f16 Mon Sep 17 00:00:00 2001 From: Jesse Tuglu Date: Fri, 15 Aug 2025 19:19:34 -0700 Subject: [PATCH 1/4] Add between keyword --- pyiceberg/expressions/parser.py | 12 +++++++++++- tests/expressions/test_parser.py | 20 ++++++++++++++++++-- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py index b9b6f9aba7..1c2586672b 100644 --- a/pyiceberg/expressions/parser.py +++ b/pyiceberg/expressions/parser.py @@ -79,6 +79,7 @@ NULL = CaselessKeyword("null") NAN = CaselessKeyword("nan") LIKE = CaselessKeyword("like") +BETWEEN = CaselessKeyword("between") unquoted_identifier = Word(alphas + "_", alphanums + "_$") quoted_identifier = QuotedString('"', escChar="\\", unquoteResults=True) @@ -106,6 +107,7 @@ def _(result: ParseResults) -> Reference: string = sgl_quoted_string.set_results_name("raw_quoted_string") decimal = common.real().set_results_name("decimal") integer = common.signed_integer().set_results_name("integer") +number = common.number().set_results_name("number") literal = Group(string | decimal | integer | boolean).set_results_name("literal") literal_set = Group( DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer) | DelimitedList(boolean) @@ -149,8 +151,16 @@ def _(result: ParseResults) -> Literal[L]: left_ref = column + comparison_op + literal right_ref = literal + comparison_op + column comparison = left_ref | right_ref +between = column + BETWEEN + number + AND + number +@between.set_parse_action +def _(result: ParseResults) -> BooleanExpression: + return And( + GreaterThanOrEqual(result.column, result[2]), + LessThanOrEqual(result.column, result[4]) + ) + @left_ref.set_parse_action def _(result: ParseResults) -> BooleanExpression: if result.op == "<": @@ -258,7 +268,7 @@ def _evaluate_like_statement(result: ParseResults) -> BooleanExpression: return EqualTo(result.column, StringLiteral(literal_like.value.replace("\\%", "%"))) -predicate = (comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate") +predicate = (between | comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate") def handle_not(result: ParseResults) -> Not: diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py index 064fdb8f68..85c475c28e 100644 --- a/tests/expressions/test_parser.py +++ b/tests/expressions/test_parser.py @@ -39,9 +39,9 @@ NotNull, NotStartsWith, Or, - StartsWith, + StartsWith, Reference, ) -from pyiceberg.expressions.literals import DecimalLiteral +from pyiceberg.expressions.literals import DecimalLiteral, LongLiteral def test_always_true() -> None: @@ -238,3 +238,19 @@ def test_quoted_column_with_dots() -> None: def test_quoted_column_with_spaces() -> None: assert EqualTo("Foo Bar", "data") == parser.parse("\"Foo Bar\" = 'data'") + +def test_valid_between() -> None: + assert And(left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)), right=LessThanOrEqual(Reference(name="foo"), LongLiteral(3))) == parser.parse("foo between 1 and 3") + assert And(left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)), right=LessThanOrEqual(Reference(name="foo"), LongLiteral(1))) == parser.parse("foo between 1 and 1") + assert And(left=GreaterThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(1.0))), right=LessThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(4.0)))) == parser.parse("foo between 1.0 and 4.0") + +def test_invalid_between() -> None: + # boolean + with pytest.raises(ParseException) as exc_info: + parser.parse("foo between true and false") + assert "Expected number, found 'true'" in str(exc_info) + + # string + with pytest.raises(ParseException) as exc_info: + parser.parse("foo between 'a' and 'b'") + assert "Expected number, found \"\'\"" in str(exc_info) From 376ca2b13fc8a4f12dec0b8d7312550aed14c36e Mon Sep 17 00:00:00 2001 From: Jesse Tuglu Date: Sat, 16 Aug 2025 14:36:54 -0700 Subject: [PATCH 2/4] Fix formatting and add extra test --- pyiceberg/expressions/parser.py | 6 ++---- tests/expressions/test_parser.py | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py index 1c2586672b..1966363829 100644 --- a/pyiceberg/expressions/parser.py +++ b/pyiceberg/expressions/parser.py @@ -156,10 +156,8 @@ def _(result: ParseResults) -> Literal[L]: @between.set_parse_action def _(result: ParseResults) -> BooleanExpression: - return And( - GreaterThanOrEqual(result.column, result[2]), - LessThanOrEqual(result.column, result[4]) - ) + return And(GreaterThanOrEqual(result.column, result[2]), LessThanOrEqual(result.column, result[4])) + @left_ref.set_parse_action def _(result: ParseResults) -> BooleanExpression: diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py index 85c475c28e..1988c340a6 100644 --- a/tests/expressions/test_parser.py +++ b/tests/expressions/test_parser.py @@ -39,7 +39,8 @@ NotNull, NotStartsWith, Or, - StartsWith, Reference, + Reference, + StartsWith, ) from pyiceberg.expressions.literals import DecimalLiteral, LongLiteral @@ -239,10 +240,22 @@ def test_quoted_column_with_dots() -> None: def test_quoted_column_with_spaces() -> None: assert EqualTo("Foo Bar", "data") == parser.parse("\"Foo Bar\" = 'data'") + def test_valid_between() -> None: - assert And(left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)), right=LessThanOrEqual(Reference(name="foo"), LongLiteral(3))) == parser.parse("foo between 1 and 3") - assert And(left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)), right=LessThanOrEqual(Reference(name="foo"), LongLiteral(1))) == parser.parse("foo between 1 and 1") - assert And(left=GreaterThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(1.0))), right=LessThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(4.0)))) == parser.parse("foo between 1.0 and 4.0") + assert And( + left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)), + right=LessThanOrEqual(Reference(name="foo"), LongLiteral(3)), + ) == parser.parse("foo between 1 and 3") + assert And( + left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)), + right=LessThanOrEqual(Reference(name="foo"), LongLiteral(1)), + ) == parser.parse("foo between 1 and 1") + assert And( + left=GreaterThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(1.0))), + right=LessThanOrEqual(Reference(name="foo"), DecimalLiteral(Decimal(4.0))), + ) == parser.parse("foo between 1.0 and 4.0") + assert parser.parse("foo between 1 and 3") == parser.parse("1 <= foo and foo <= 3") + def test_invalid_between() -> None: # boolean @@ -253,4 +266,4 @@ def test_invalid_between() -> None: # string with pytest.raises(ParseException) as exc_info: parser.parse("foo between 'a' and 'b'") - assert "Expected number, found \"\'\"" in str(exc_info) + assert 'Expected number, found "\'"' in str(exc_info) From 8bc8c763f2fa85c34cc3a54647978f52aba1b040 Mon Sep 17 00:00:00 2001 From: Jesse Tuglu Date: Sun, 17 Aug 2025 15:15:52 -0700 Subject: [PATCH 3/4] Fix weird markdown errors --- mkdocs/docs/api.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 89d5692d0b..0e0dc375de 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -1031,6 +1031,7 @@ Expert Iceberg users may choose to commit existing parquet files to the Iceberg ### Example Add files to Iceberg table: + ```python # Given that these parquet files have schema consistent with the Iceberg table @@ -1047,6 +1048,7 @@ tbl.add_files(file_paths=file_paths) ``` Add files to Iceberg table with custom snapshot properties: + ```python # Assume an existing Iceberg table object `tbl` From 82239368d6e17402e58b068296d8d4d6bcbf8991 Mon Sep 17 00:00:00 2001 From: Jesse Tuglu Date: Mon, 18 Aug 2025 08:30:04 -0700 Subject: [PATCH 4/4] Add docs --- mkdocs/docs/expression-dsl.md | 2 ++ mkdocs/docs/row-filter-syntax.md | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/mkdocs/docs/expression-dsl.md b/mkdocs/docs/expression-dsl.md index cf5784dd2e..420043bd55 100644 --- a/mkdocs/docs/expression-dsl.md +++ b/mkdocs/docs/expression-dsl.md @@ -60,6 +60,8 @@ age_greater_than_18 = GreaterThan("age", 18) # Greater than or equal to age_greater_than_or_equal_18 = GreaterThanOrEqual("age", 18) + + ``` #### Set Predicates diff --git a/mkdocs/docs/row-filter-syntax.md b/mkdocs/docs/row-filter-syntax.md index 2191b9fd4a..ce3b46c092 100644 --- a/mkdocs/docs/row-filter-syntax.md +++ b/mkdocs/docs/row-filter-syntax.md @@ -100,6 +100,15 @@ column NOT LIKE 'prefix%' !!! important The `%` wildcard is only supported at the end of the pattern. Using it in the middle or beginning of the pattern will raise an error. +## BETWEEN + +The BETWEEN operator filters a numeric value against an inclusive range, e.g. `a between 1 and 2` is equivalent to `a >= 1 and a <= 2`. + +```sql +column BETWEEN 1 AND 2 +column BETWEEN 1.0 AND 2.0 +``` + ## Logical Operations Combine multiple conditions using logical operators: