Skip to content

Commit a09de69

Browse files
danielcweeksFokko
andcommitted
Update like statements to reflect sql behaviors (#91)
* Update like statements to reflect sql behaciors * Codestyle * Codestyle * Handle NotStartsWith * Update pyiceberg/expressions/parser.py Co-authored-by: Fokko Driesprong <[email protected]> * Update tests/expressions/test_parser.py Co-authored-by: Fokko Driesprong <[email protected]> --------- Co-authored-by: Fokko Driesprong <[email protected]>
1 parent 1b186d6 commit a09de69

File tree

2 files changed

+38
-5
lines changed

2 files changed

+38
-5
lines changed

pyiceberg/expressions/parser.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
import re
1718
from decimal import Decimal
1819

1920
from pyparsing import (
@@ -51,7 +52,6 @@
5152
NotIn,
5253
NotNaN,
5354
NotNull,
54-
NotStartsWith,
5555
Or,
5656
Reference,
5757
StartsWith,
@@ -78,6 +78,8 @@
7878
identifier = Word(alphas, alphanums + "_$").set_results_name("identifier")
7979
column = DelimitedList(identifier, delim=".", combine=False).set_results_name("column")
8080

81+
like_regex = r'(?P<valid_wildcard>(?<!\\)%$)|(?P<invalid_wildcard>(?<!\\)%)'
82+
8183

8284
@column.set_parse_action
8385
def _(result: ParseResults) -> Reference:
@@ -217,12 +219,25 @@ def _(result: ParseResults) -> BooleanExpression:
217219

218220
@starts_with.set_parse_action
219221
def _(result: ParseResults) -> BooleanExpression:
220-
return StartsWith(result.column, result.raw_quoted_string)
222+
return _evaluate_like_statement(result)
221223

222224

223225
@not_starts_with.set_parse_action
224226
def _(result: ParseResults) -> BooleanExpression:
225-
return NotStartsWith(result.column, result.raw_quoted_string)
227+
return ~_evaluate_like_statement(result)
228+
229+
230+
def _evaluate_like_statement(result: ParseResults) -> BooleanExpression:
231+
literal_like: StringLiteral = result.raw_quoted_string
232+
233+
match = re.search(like_regex, literal_like.value)
234+
235+
if match and match.groupdict()['invalid_wildcard']:
236+
raise ValueError("LIKE expressions only supports wildcard, '%', at the end of a string")
237+
elif match and match.groupdict()['valid_wildcard']:
238+
return StartsWith(result.column, StringLiteral(literal_like.value[:-1].replace('\\%', '%')))
239+
else:
240+
return EqualTo(result.column, StringLiteral(literal_like.value.replace('\\%', '%')))
226241

227242

228243
predicate = (comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate")

tests/expressions/test_parser.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,30 @@ def test_multiple_and_or() -> None:
168168
) == parser.parse("foo is not null and foo < 5 or (foo > 10 and foo < 100 and bar is null)")
169169

170170

171+
def test_like_equality() -> None:
172+
assert EqualTo("foo", "data") == parser.parse("foo LIKE 'data'")
173+
assert EqualTo("foo", "data%") == parser.parse("foo LIKE 'data\\%'")
174+
175+
171176
def test_starts_with() -> None:
172-
assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data'")
177+
assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data%'")
178+
assert StartsWith("foo", "some % data") == parser.parse("foo LIKE 'some \\% data%'")
179+
assert StartsWith("foo", "some data%") == parser.parse("foo LIKE 'some data\\%%'")
180+
181+
182+
def test_invalid_likes() -> None:
183+
invalid_statements = ["foo LIKE '%data%'", "foo LIKE 'da%ta'", "foo LIKE '%data'"]
184+
185+
for statement in invalid_statements:
186+
with pytest.raises(ValueError) as exc_info:
187+
parser.parse(statement)
188+
189+
assert "LIKE expressions only supports wildcard, '%', at the end of a string" in str(exc_info)
173190

174191

175192
def test_not_starts_with() -> None:
176-
assert NotStartsWith("foo", "data") == parser.parse("foo NOT LIKE 'data'")
193+
assert NotEqualTo("foo", "data") == parser.parse("foo NOT LIKE 'data'")
194+
assert NotStartsWith("foo", "data") == parser.parse("foo NOT LIKE 'data%'")
177195

178196

179197
def test_with_function() -> None:

0 commit comments

Comments
 (0)