googleapis
diff --git a/‎bigframes/core/compile/sqlglot/expressions/unary_compiler.py
Lines changed: 90 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/expressions/unary_compiler.py
Lines changed: 90 additions & 0 deletions
diff --git a/‎bigframes/dataframe.py
Lines changed: 16 additions & 11 deletions b/‎bigframes/dataframe.py
Lines changed: 16 additions & 11 deletions
diff --git a/‎tests/system/large/functions/test_managed_function.py
Lines changed: 32 additions & 6 deletions b/‎tests/system/large/functions/test_managed_function.py
Lines changed: 32 additions & 6 deletions
diff --git a/‎tests/system/large/functions/test_remote_function.py
Lines changed: 21 additions & 4 deletions b/‎tests/system/large/functions/test_remote_function.py
Lines changed: 21 additions & 4 deletions
diff --git a/‎tests/system/small/test_dataframe.py
Lines changed: 12 additions & 0 deletions b/‎tests/system/small/test_dataframe.py
Lines changed: 12 additions & 0 deletions
diff --git a/‎tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/multiple_patterns.sql
Lines changed: 13 additions & 0 deletions b/‎tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/multiple_patterns.sql
Lines changed: 13 additions & 0 deletions
diff --git a/‎tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/no_pattern.sql
Lines changed: 13 additions & 0 deletions b/‎tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/no_pattern.sql
Lines changed: 13 additions & 0 deletions
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import functools
 import typing
 
 import pandas as pd
@@ -292,6 +293,18 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Extract(this=sge.Identifier(this="DAYOFYEAR"), expression=expr.expr)
 
 
+@UNARY_OP_REGISTRATION.register(ops.EndsWithOp)
+def _(op: ops.EndsWithOp, expr: TypedExpr) -> sge.Expression:
+    if not op.pat:
+        return sge.false()
+
+    def to_endswith(pat: str) -> sge.Expression:
+        return sge.func("ENDS_WITH", expr.expr, sge.convert(pat))
+
+    conditions = [to_endswith(pat) for pat in op.pat]
+    return functools.reduce(lambda x, y: sge.Or(this=x, expression=y), conditions)
+
+
 @UNARY_OP_REGISTRATION.register(ops.exp_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Case(
@@ -344,6 +357,27 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.func("ST_BOUNDARY", expr.expr)
 
 
+@UNARY_OP_REGISTRATION.register(ops.GeoStBufferOp)
+def _(op: ops.GeoStBufferOp, expr: TypedExpr) -> sge.Expression:
+    return sge.func(
+        "ST_BUFFER",
+        expr.expr,
+        sge.convert(op.buffer_radius),
+        sge.convert(op.num_seg_quarter_circle),
+        sge.convert(op.use_spheroid),
+    )
+
+
+@UNARY_OP_REGISTRATION.register(ops.geo_st_centroid_op)
+def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
+    return sge.func("ST_CENTROID", expr.expr)
+
+
+@UNARY_OP_REGISTRATION.register(ops.geo_st_convexhull_op)
+def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
+    return sge.func("ST_CONVEXHULL", expr.expr)
+
+
 @UNARY_OP_REGISTRATION.register(ops.geo_st_geogfromtext_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.func("SAFE.ST_GEOGFROMTEXT", expr.expr)
@@ -516,6 +550,17 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Lower(this=expr.expr)
 
 
+@UNARY_OP_REGISTRATION.register(ops.MapOp)
+def _(op: ops.MapOp, expr: TypedExpr) -> sge.Expression:
+    return sge.Case(
+        this=expr.expr,
+        ifs=[
+            sge.If(this=sge.convert(key), true=sge.convert(value))
+            for key, value in op.mappings
+        ],
+    )
+
+
 @UNARY_OP_REGISTRATION.register(ops.minute_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Extract(this=sge.Identifier(this="MINUTE"), expression=expr.expr)
@@ -601,6 +646,18 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     )
 
 
+@UNARY_OP_REGISTRATION.register(ops.StartsWithOp)
+def _(op: ops.StartsWithOp, expr: TypedExpr) -> sge.Expression:
+    if not op.pat:
+        return sge.false()
+
+    def to_startswith(pat: str) -> sge.Expression:
+        return sge.func("STARTS_WITH", expr.expr, sge.convert(pat))
+
+    conditions = [to_startswith(pat) for pat in op.pat]
+    return functools.reduce(lambda x, y: sge.Or(this=x, expression=y), conditions)
+
+
 @UNARY_OP_REGISTRATION.register(ops.StrStripOp)
 def _(op: ops.StrStripOp, expr: TypedExpr) -> sge.Expression:
     return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr)
@@ -624,6 +681,11 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     )
 
 
+@UNARY_OP_REGISTRATION.register(ops.StringSplitOp)
+def _(op: ops.StringSplitOp, expr: TypedExpr) -> sge.Expression:
+    return sge.Split(this=expr.expr, expression=sge.convert(op.pat))
+
+
 @UNARY_OP_REGISTRATION.register(ops.StrGetOp)
 def _(op: ops.StrGetOp, expr: TypedExpr) -> sge.Expression:
     return sge.Substring(
@@ -776,3 +838,31 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
 @UNARY_OP_REGISTRATION.register(ops.year_op)
 def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
     return sge.Extract(this=sge.Identifier(this="YEAR"), expression=expr.expr)
+
+
+@UNARY_OP_REGISTRATION.register(ops.ZfillOp)
+def _(op: ops.ZfillOp, expr: TypedExpr) -> sge.Expression:
+    return sge.Case(
+        ifs=[
+            sge.If(
+                this=sge.EQ(
+                    this=sge.Substring(
+                        this=expr.expr, start=sge.convert(1), length=sge.convert(1)
+                    ),
+                    expression=sge.convert("-"),
+                ),
+                true=sge.Concat(
+                    expressions=[
+                        sge.convert("-"),
+                        sge.func(
+                            "LPAD",
+                            sge.Substring(this=expr.expr, start=sge.convert(1)),
+                            sge.convert(op.width - 1),
+                            sge.convert("0"),
+                        ),
+                    ]
+                ),
+            )
+        ],
+        default=sge.func("LPAD", expr.expr, sge.convert(op.width), sge.convert("0")),
+    )
@@ -2828,6 +2828,19 @@ def itertuples(
             for item in df.itertuples(index=index, name=name):
                 yield item
 
+    def _apply_callable(self, condition):
+        """Executes the possible callable condition as needed."""
+        if callable(condition):
+            # When it's a bigframes function.
+            if hasattr(condition, "bigframes_bigquery_function"):
+                return self.apply(condition, axis=1)
+
+            # When it's a plain Python function.
+            return condition(self)
+
+        # When it's not a callable.
+        return condition
+
     def where(self, cond, other=None):
         if isinstance(other, bigframes.series.Series):
             raise ValueError("Seires is not a supported replacement type!")
@@ -2839,16 +2852,8 @@ def where(self, cond, other=None):
 
         # Execute it with the DataFrame when cond or/and other is callable.
         # It can be either a plain python function or remote/managed function.
-        if callable(cond):
-            if hasattr(cond, "bigframes_bigquery_function"):
-                cond = self.apply(cond, axis=1)
-            else:
-                cond = cond(self)
-        if callable(other):
-            if hasattr(other, "bigframes_bigquery_function"):
-                other = self.apply(other, axis=1)
-            else:
-                other = other(self)
+        cond = self._apply_callable(cond)
+        other = self._apply_callable(other)
 
         aligned_block, (_, _) = self._block.join(cond._block, how="left")
         # No left join is needed when 'other' is None or constant.
@@ -2899,7 +2904,7 @@ def where(self, cond, other=None):
         return result
 
     def mask(self, cond, other=None):
-        return self.where(~cond, other=other)
+        return self.where(~self._apply_callable(cond), other=other)
 
     def dropna(
         self,
 
@@ -965,7 +965,7 @@ def float_parser(row):
         )
 
 
-def test_managed_function_df_where(session, dataset_id, scalars_dfs):
+def test_managed_function_df_where_mask(session, dataset_id, scalars_dfs):
     try:
 
         # The return type has to be bool type for callable where condition.
@@ -987,15 +987,15 @@ def is_sum_positive(a, b):
         pd_int64_df = scalars_pandas_df[int64_cols]
         pd_int64_df_filtered = pd_int64_df.dropna()
 
-        # Use callable condition in dataframe.where method.
+        # Test callable condition in dataframe.where method.
         bf_result = bf_int64_df_filtered.where(is_sum_positive_mf).to_pandas()
         # Pandas doesn't support such case, use following as workaround.
         pd_result = pd_int64_df_filtered.where(pd_int64_df_filtered.sum(axis=1) > 0)
 
         # Ignore any dtype difference.
         pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
-        # Make sure the read_gbq_function path works for this function.
+        # Make sure the read_gbq_function path works for dataframe.where method.
         is_sum_positive_ref = session.read_gbq_function(
             function_name=is_sum_positive_mf.bigframes_bigquery_function
         )
@@ -1012,14 +1012,27 @@ def is_sum_positive(a, b):
             bf_result_gbq, pd_result_gbq, check_dtype=False
         )
 
+        # Test callable condition in dataframe.mask method.
+        bf_result_gbq = bf_int64_df_filtered.mask(
+            is_sum_positive_ref, -bf_int64_df_filtered
+        ).to_pandas()
+        pd_result_gbq = pd_int64_df_filtered.mask(
+            pd_int64_df_filtered.sum(axis=1) > 0, -pd_int64_df_filtered
+        )
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(
+            bf_result_gbq, pd_result_gbq, check_dtype=False
+        )
+
     finally:
         # Clean up the gcp assets created for the managed function.
         cleanup_function_assets(
             is_sum_positive_mf, session.bqclient, ignore_failures=False
         )
 
 
-def test_managed_function_df_where_series(session, dataset_id, scalars_dfs):
+def test_managed_function_df_where_mask_series(session, dataset_id, scalars_dfs):
     try:
 
         # The return type has to be bool type for callable where condition.
@@ -1041,14 +1054,14 @@ def is_sum_positive_series(s):
         pd_int64_df = scalars_pandas_df[int64_cols]
         pd_int64_df_filtered = pd_int64_df.dropna()
 
-        # Use callable condition in dataframe.where method.
+        # Test callable condition in dataframe.where method.
         bf_result = bf_int64_df_filtered.where(is_sum_positive_series).to_pandas()
         pd_result = pd_int64_df_filtered.where(is_sum_positive_series)
 
         # Ignore any dtype difference.
         pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
-        # Make sure the read_gbq_function path works for this function.
+        # Make sure the read_gbq_function path works for dataframe.where method.
         is_sum_positive_series_ref = session.read_gbq_function(
             function_name=is_sum_positive_series_mf.bigframes_bigquery_function,
             is_row_processor=True,
@@ -1070,6 +1083,19 @@ def func_for_other(x):
             bf_result_gbq, pd_result_gbq, check_dtype=False
         )
 
+        # Test callable condition in dataframe.mask method.
+        bf_result_gbq = bf_int64_df_filtered.mask(
+            is_sum_positive_series_ref, func_for_other
+        ).to_pandas()
+        pd_result_gbq = pd_int64_df_filtered.mask(
+            is_sum_positive_series, func_for_other
+        )
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(
+            bf_result_gbq, pd_result_gbq, check_dtype=False
+        )
+
     finally:
         # Clean up the gcp assets created for the managed function.
         cleanup_function_assets(
 
@@ -2850,7 +2850,7 @@ def foo(x: int) -> int:
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_remote_function_df_where(session, dataset_id, scalars_dfs):
+def test_remote_function_df_where_mask(session, dataset_id, scalars_dfs):
     try:
 
         # The return type has to be bool type for callable where condition.
@@ -2873,14 +2873,22 @@ def is_sum_positive(a, b):
         pd_int64_df = scalars_pandas_df[int64_cols]
         pd_int64_df_filtered = pd_int64_df.dropna()
 
-        # Use callable condition in dataframe.where method.
+        # Test callable condition in dataframe.where method.
         bf_result = bf_int64_df_filtered.where(is_sum_positive_mf, 0).to_pandas()
         # Pandas doesn't support such case, use following as workaround.
         pd_result = pd_int64_df_filtered.where(pd_int64_df_filtered.sum(axis=1) > 0, 0)
 
         # Ignore any dtype difference.
         pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
+        # Test callable condition in dataframe.mask method.
+        bf_result = bf_int64_df_filtered.mask(is_sum_positive_mf, 0).to_pandas()
+        # Pandas doesn't support such case, use following as workaround.
+        pd_result = pd_int64_df_filtered.mask(pd_int64_df_filtered.sum(axis=1) > 0, 0)
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
     finally:
         # Clean up the gcp assets created for the remote function.
         cleanup_function_assets(
@@ -2889,7 +2897,7 @@ def is_sum_positive(a, b):
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_remote_function_df_where_series(session, dataset_id, scalars_dfs):
+def test_remote_function_df_where_mask_series(session, dataset_id, scalars_dfs):
     try:
 
         # The return type has to be bool type for callable where condition.
@@ -2916,7 +2924,7 @@ def is_sum_positive_series(s):
         def func_for_other(x):
             return -x
 
-        # Use callable condition in dataframe.where method.
+        # Test callable condition in dataframe.where method.
         bf_result = bf_int64_df_filtered.where(
             is_sum_positive_series, func_for_other
         ).to_pandas()
@@ -2925,6 +2933,15 @@ def func_for_other(x):
         # Ignore any dtype difference.
         pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
+        # Test callable condition in dataframe.mask method.
+        bf_result = bf_int64_df_filtered.mask(
+            is_sum_positive_series_mf, func_for_other
+        ).to_pandas()
+        pd_result = pd_int64_df_filtered.mask(is_sum_positive_series, func_for_other)
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
     finally:
         # Clean up the gcp assets created for the remote function.
         cleanup_function_assets(
 
@@ -406,6 +406,18 @@ def test_mask_series_cond(scalars_df_index, scalars_pandas_df_index):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+def test_mask_callable(scalars_df_index, scalars_pandas_df_index):
+    def is_positive(x):
+        return x > 0
+
+    bf_df = scalars_df_index[["int64_too", "int64_col", "float64_col"]]
+    pd_df = scalars_pandas_df_index[["int64_too", "int64_col", "float64_col"]]
+    bf_result = bf_df.mask(cond=is_positive, other=lambda x: x + 1).to_pandas()
+    pd_result = pd_df.mask(cond=is_positive, other=lambda x: x + 1)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
 def test_where_multi_column(scalars_df_index, scalars_pandas_df_index):
     # Test when a dataframe has multi-columns.
     columns = ["int64_col", "float64_col"]
 
@@ -0,0 +1,13 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `string_col` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    *,
+    ENDS_WITH(`bfcol_0`, 'ab') OR ENDS_WITH(`bfcol_0`, 'cd') AS `bfcol_1`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `string_col`
+FROM `bfcte_1`
@@ -0,0 +1,13 @@
+WITH `bfcte_0` AS (
+  SELECT
+    `string_col` AS `bfcol_0`
+  FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
+), `bfcte_1` AS (
+  SELECT
+    *,
+    FALSE AS `bfcol_1`
+  FROM `bfcte_0`
+)
+SELECT
+  `bfcol_1` AS `string_col`
+FROM `bfcte_1`