Skip to content

Commit 5be74df

Browse files
committed
chore: implement StrExtractOp and StrRepeatOp
1 parent 32a6c74 commit 5be74df

File tree

4 files changed

+50
-0
lines changed

4 files changed

+50
-0
lines changed

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,13 @@ def _(op: ops.StrContainsRegexOp, expr: TypedExpr) -> sge.Expression:
182182
return sge.RegexpLike(this=expr.expr, expression=sge.convert(op.pat))
183183

184184

185+
@UNARY_OP_REGISTRATION.register(ops.StrExtractOp)
186+
def _(op: ops.StrExtractOp, expr: TypedExpr) -> sge.Expression:
187+
return sge.RegexpExtract(
188+
this=expr.expr, expression=sge.convert(op.pat), group=sge.convert(op.n)
189+
)
190+
191+
185192
@UNARY_OP_REGISTRATION.register(ops.StrFindOp)
186193
def _(op: ops.StrFindOp, expr: TypedExpr) -> sge.Expression:
187194
# INSTR is 1-based, so we need to adjust the start position.
@@ -211,6 +218,10 @@ def _(op: ops.StrContainsOp, expr: TypedExpr) -> sge.Expression:
211218
return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%"))
212219

213220

221+
@UNARY_OP_REGISTRATION.register(ops.StrRepeatOp)
222+
def _(op: ops.StrRepeatOp, expr: TypedExpr) -> sge.Expression:
223+
return sge.Repeat(this=expr.expr, times=sge.convert(op.repeats))
224+
214225
@UNARY_OP_REGISTRATION.register(ops.date_op)
215226
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
216227
return sge.Date(this=expr.expr)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
REGEXP_EXTRACT(`bfcol_0`, '([a-z]*)') AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
REPEAT(`bfcol_0`, 2) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`

tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,19 @@ def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot):
524524
snapshot.assert_match(sql, "out.sql")
525525

526526

527+
def test_str_extract(scalar_types_df: bpd.DataFrame, snapshot):
528+
bf_df = scalar_types_df[["string_col"]]
529+
sql = _apply_unary_op(bf_df, ops.StrExtractOp(r"([a-z]*)", 1), "string_col")
530+
531+
snapshot.assert_match(sql, "out.sql")
532+
533+
534+
def test_str_repeat(scalar_types_df: bpd.DataFrame, snapshot):
535+
bf_df = scalar_types_df[["string_col"]]
536+
sql = _apply_unary_op(bf_df, ops.StrRepeatOp(2), "string_col")
537+
snapshot.assert_match(sql, "out.sql")
538+
539+
527540
def test_str_find(scalar_types_df: bpd.DataFrame, snapshot):
528541
bf_df = scalar_types_df[["string_col"]]
529542
sql = _apply_unary_op(bf_df, ops.StrFindOp("e", start=None, end=None), "string_col")

0 commit comments

Comments
 (0)