Skip to content

Commit 5d53c56

Browse files
committed
str_find
1 parent d30d5bb commit 5d53c56

File tree

6 files changed

+90
-0
lines changed

6 files changed

+90
-0
lines changed

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,29 @@ def _(op: ops.StrContainsRegexOp, expr: TypedExpr) -> sge.Expression:
182182
return sge.RegexpLike(this=expr.expr, expression=sge.convert(op.pat))
183183

184184

185+
@UNARY_OP_REGISTRATION.register(ops.StrFindOp)
186+
def _(op: ops.StrFindOp, expr: TypedExpr) -> sge.Expression:
187+
if op.end is not None:
188+
# BigQuery's INSTR doesn't support `end`, so we need to use SUBSTR.
189+
# Also, INSTR is 1-based, so we need to adjust the start position.
190+
return sge.func(
191+
"INSTR",
192+
sge.Substring(
193+
this=expr.expr,
194+
start=sge.convert(op.start + 1) if op.start is not None else None,
195+
length=sge.convert(op.end - (op.start or 0)),
196+
),
197+
sge.convert(op.substr),
198+
) - sge.convert(1)
199+
else:
200+
return sge.func(
201+
"INSTR",
202+
expr.expr,
203+
sge.convert(op.substr),
204+
sge.convert(op.start + 1) if op.start is not None else None,
205+
) - sge.convert(1)
206+
207+
185208
@UNARY_OP_REGISTRATION.register(ops.StrContainsOp)
186209
def _(op: ops.StrContainsOp, expr: TypedExpr) -> sge.Expression:
187210
return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%"))
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
INSTR(`bfcol_0`, 'e', 1) - 1 AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
INSTR(SUBSTRING(`bfcol_0`, 1, 5), 'e') - 1 AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
INSTR(`bfcol_0`, 'e', 3) - 1 AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
INSTR(SUBSTRING(`bfcol_0`, 3, 3), 'e') - 1 AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`

tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,21 @@ def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot):
533533
snapshot.assert_match(sql, "out.sql")
534534

535535

536+
def test_str_find(scalar_types_df: bpd.DataFrame, snapshot):
537+
bf_df = scalar_types_df[["string_col"]]
538+
sql = _apply_unary_op(bf_df, ops.StrFindOp("e", start=0, end=None), "string_col")
539+
snapshot.assert_match(sql, "out.sql")
540+
541+
sql = _apply_unary_op(bf_df, ops.StrFindOp("e", start=2, end=None), "string_col")
542+
snapshot.assert_match(sql, "out_with_start.sql")
543+
544+
sql = _apply_unary_op(bf_df, ops.StrFindOp("e", start=0, end=5), "string_col")
545+
snapshot.assert_match(sql, "out_with_end.sql")
546+
547+
sql = _apply_unary_op(bf_df, ops.StrFindOp("e", start=2, end=5), "string_col")
548+
snapshot.assert_match(sql, "out_with_start_and_end.sql")
549+
550+
536551
def test_strip(scalar_types_df: bpd.DataFrame, snapshot):
537552
bf_df = scalar_types_df[["string_col"]]
538553
sql = _apply_unary_op(bf_df, ops.StrStripOp(" "), "string_col")

0 commit comments

Comments
 (0)