Skip to content

Commit d30d5bb

Browse files
committed
str_pad
1 parent 26df6e6 commit d30d5bb

File tree

5 files changed

+115
-0
lines changed

5 files changed

+115
-0
lines changed

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,47 @@ def _(op: ops.StrLstripOp, expr: TypedExpr) -> sge.Expression:
449449
return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT")
450450

451451

452+
@UNARY_OP_REGISTRATION.register(ops.StrPadOp)
453+
def _(op: ops.StrPadOp, expr: TypedExpr) -> sge.Expression:
454+
pad_length = sge.func(
455+
"GREATEST", sge.Length(this=expr.expr), sge.convert(op.length)
456+
)
457+
if op.side == "left":
458+
return sge.func(
459+
"LPAD",
460+
expr.expr,
461+
pad_length,
462+
sge.convert(op.fillchar),
463+
)
464+
elif op.side == "right":
465+
return sge.func(
466+
"RPAD",
467+
expr.expr,
468+
pad_length,
469+
sge.convert(op.fillchar),
470+
)
471+
else: # side == both
472+
lpad_amount = sge.Cast(
473+
this=sge.func(
474+
"SAFE_DIVIDE",
475+
sge.Sub(this=pad_length, expression=sge.Length(this=expr.expr)),
476+
sge.convert(2),
477+
),
478+
to="INT64",
479+
) + sge.Length(this=expr.expr)
480+
return sge.func(
481+
"RPAD",
482+
sge.func(
483+
"LPAD",
484+
expr.expr,
485+
lpad_amount,
486+
sge.convert(op.fillchar),
487+
),
488+
pad_length,
489+
sge.convert(op.fillchar),
490+
)
491+
492+
452493
@UNARY_OP_REGISTRATION.register(ops.neg_op)
453494
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
454495
return sge.Neg(this=expr.expr)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
RPAD(
9+
LPAD(
10+
`bfcol_0`,
11+
CAST(SAFE_DIVIDE(GREATEST(LENGTH(`bfcol_0`), 10) - LENGTH(`bfcol_0`), 2) AS INT64) + LENGTH(`bfcol_0`),
12+
'-'
13+
),
14+
GREATEST(LENGTH(`bfcol_0`), 10),
15+
'-'
16+
) AS `bfcol_1`
17+
FROM `bfcte_0`
18+
)
19+
SELECT
20+
`bfcol_1` AS `string_col`
21+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
LPAD(`bfcol_0`, GREATEST(LENGTH(`bfcol_0`), 10), '-') AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
RPAD(`bfcol_0`, GREATEST(LENGTH(`bfcol_0`), 10), '-') AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`

tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,33 @@ def test_str_get(scalar_types_df: bpd.DataFrame, snapshot):
466466
snapshot.assert_match(sql, "out.sql")
467467

468468

469+
def test_str_pad_left(scalar_types_df: bpd.DataFrame, snapshot):
470+
bf_df = scalar_types_df[["string_col"]]
471+
sql = _apply_unary_op(
472+
bf_df, ops.StrPadOp(length=10, fillchar="-", side="left"), "string_col"
473+
)
474+
475+
snapshot.assert_match(sql, "out.sql")
476+
477+
478+
def test_str_pad_right(scalar_types_df: bpd.DataFrame, snapshot):
479+
bf_df = scalar_types_df[["string_col"]]
480+
sql = _apply_unary_op(
481+
bf_df, ops.StrPadOp(length=10, fillchar="-", side="right"), "string_col"
482+
)
483+
484+
snapshot.assert_match(sql, "out.sql")
485+
486+
487+
def test_str_pad_both(scalar_types_df: bpd.DataFrame, snapshot):
488+
bf_df = scalar_types_df[["string_col"]]
489+
sql = _apply_unary_op(
490+
bf_df, ops.StrPadOp(length=10, fillchar="-", side="both"), "string_col"
491+
)
492+
493+
snapshot.assert_match(sql, "out.sql")
494+
495+
469496
def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot):
470497
bf_df = scalar_types_df[["string_col"]]
471498
sql = _apply_unary_op(bf_df, ops.StrSliceOp(1, 3), "string_col")

0 commit comments

Comments
 (0)