str_pad

chelsea-lin · chelsea-lin · commit d30d5bb46f42 · 2025-08-21T18:44:14.000Z
diff --git a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py
@@ -449,6 +449,47 @@ def _(op: ops.StrLstripOp, expr: TypedExpr) -> sge.Expression:
     return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT")
 
 
+@UNARY_OP_REGISTRATION.register(ops.StrPadOp)
+def _(op: ops.StrPadOp, expr: TypedExpr) -> sge.Expression:
+    pad_length = sge.func(
+        "GREATEST", sge.Length(this=expr.expr), sge.convert(op.length)
+    )
+    if op.side == "left":
+        return sge.func(
+            "LPAD",
+            expr.expr,
+            pad_length,
+            sge.convert(op.fillchar),
+        )
+    elif op.side == "right":
+        return sge.func(
+            "RPAD",
+            expr.expr,
+            pad_length,
+            sge.convert(op.fillchar),
+        )
+    else:  # side == both
+        lpad_amount = sge.Cast(
+            this=sge.func(
+                "SAFE_DIVIDE",
+                sge.Sub(this=pad_length, expression=sge.Length(this=expr.expr)),
+                sge.convert(2),
+            ),
+            to="INT64",
+        ) + sge.Length(this=expr.expr)
+        return sge.func(
+            "RPAD",
+            sge.func(
+                "LPAD",
+                expr.expr,
+                lpad_amount,
+                sge.convert(op.fillchar),
+            ),
+            pad_length,
+            sge.convert(op.fillchar),
+        )
+
+
 @UNARY_OP_REGISTRATION.register(ops.neg_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Neg(this=expr.expr)
diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad_both/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad_both/out.sql
@@ -0,0 +1,21 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `string_col` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    *,
+    RPAD(
+      LPAD(
+        `bfcol_0`,
+        CAST(SAFE_DIVIDE(GREATEST(LENGTH(`bfcol_0`), 10) - LENGTH(`bfcol_0`), 2) AS INT64) + LENGTH(`bfcol_0`),
+        '-'
+      ),
+      GREATEST(LENGTH(`bfcol_0`), 10),
+      '-'
+    ) AS `bfcol_1`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `string_col`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad_left/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad_left/out.sql
@@ -0,0 +1,13 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `string_col` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    *,
+    LPAD(`bfcol_0`, GREATEST(LENGTH(`bfcol_0`), 10), '-') AS `bfcol_1`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `string_col`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad_right/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad_right/out.sql
@@ -0,0 +1,13 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `string_col` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    *,
+    RPAD(`bfcol_0`, GREATEST(LENGTH(`bfcol_0`), 10), '-') AS `bfcol_1`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `string_col`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py
@@ -466,6 +466,33 @@ def test_str_get(scalar_types_df: bpd.DataFrame, snapshot):
     snapshot.assert_match(sql, "out.sql")
 
 
+def test_str_pad_left(scalar_types_df: bpd.DataFrame, snapshot):
+    bf_df = scalar_types_df[["string_col"]]
+    sql = _apply_unary_op(
+        bf_df, ops.StrPadOp(length=10, fillchar="-", side="left"), "string_col"
+    )
+
+    snapshot.assert_match(sql, "out.sql")
+
+
+def test_str_pad_right(scalar_types_df: bpd.DataFrame, snapshot):
+    bf_df = scalar_types_df[["string_col"]]
+    sql = _apply_unary_op(
+        bf_df, ops.StrPadOp(length=10, fillchar="-", side="right"), "string_col"
+    )
+
+    snapshot.assert_match(sql, "out.sql")
+
+
+def test_str_pad_both(scalar_types_df: bpd.DataFrame, snapshot):
+    bf_df = scalar_types_df[["string_col"]]
+    sql = _apply_unary_op(
+        bf_df, ops.StrPadOp(length=10, fillchar="-", side="both"), "string_col"
+    )
+
+    snapshot.assert_match(sql, "out.sql")
+
+
 def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot):
     bf_df = scalar_types_df[["string_col"]]
     sql = _apply_unary_op(bf_df, ops.StrSliceOp(1, 3), "string_col")