diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/no_pattern.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/no_pattern.sql deleted file mode 100644 index e9f61ddd7c..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/no_pattern.sql +++ /dev/null @@ -1,13 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `string_col` AS `bfcol_0` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - FALSE AS `bfcol_1` - FROM `bfcte_0` -) -SELECT - `bfcol_1` AS `string_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/multiple_patterns.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/out.sql similarity index 62% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/multiple_patterns.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/out.sql index f224471e79..e3ac5ec033 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/multiple_patterns.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/out.sql @@ -5,9 +5,13 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - ENDS_WITH(`bfcol_0`, 'ab') OR ENDS_WITH(`bfcol_0`, 'cd') AS `bfcol_1` + ENDS_WITH(`bfcol_0`, 'ab') AS `bfcol_1`, + ENDS_WITH(`bfcol_0`, 'ab') OR ENDS_WITH(`bfcol_0`, 'cd') AS `bfcol_2`, + FALSE AS `bfcol_3` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `string_col` + `bfcol_1` AS `single`, + `bfcol_2` AS `double`, + `bfcol_3` AS `empty` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/single_pattern.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/single_pattern.sql deleted file mode 100644 index a4e259f0b2..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/single_pattern.sql +++ /dev/null @@ -1,13 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `string_col` AS `bfcol_0` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - ENDS_WITH(`bfcol_0`, 'ab') AS `bfcol_1` - FROM `bfcte_0` -) -SELECT - `bfcol_1` AS `string_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/no_pattern.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/no_pattern.sql deleted file mode 100644 index e9f61ddd7c..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/no_pattern.sql +++ /dev/null @@ -1,13 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `string_col` AS `bfcol_0` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - FALSE AS `bfcol_1` - FROM `bfcte_0` -) -SELECT - `bfcol_1` AS `string_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/multiple_patterns.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/out.sql similarity index 61% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/multiple_patterns.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/out.sql index 061b57e208..9679c95f75 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/multiple_patterns.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/out.sql @@ -5,9 +5,13 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - STARTS_WITH(`bfcol_0`, 'ab') OR STARTS_WITH(`bfcol_0`, 'cd') AS `bfcol_1` + STARTS_WITH(`bfcol_0`, 'ab') AS `bfcol_1`, + STARTS_WITH(`bfcol_0`, 'ab') OR STARTS_WITH(`bfcol_0`, 'cd') AS `bfcol_2`, + FALSE AS `bfcol_3` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `string_col` + `bfcol_1` AS `single`, + `bfcol_2` AS `double`, + `bfcol_3` AS `empty` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/single_pattern.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/single_pattern.sql deleted file mode 100644 index 726ce05b8c..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/single_pattern.sql +++ /dev/null @@ -1,13 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `string_col` AS `bfcol_0` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - STARTS_WITH(`bfcol_0`, 'ab') AS `bfcol_1` - FROM `bfcte_0` -) -SELECT - `bfcol_1` AS `string_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out.sql index dfc100e413..b850262d80 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out.sql @@ -5,9 +5,15 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - INSTR(`bfcol_0`, 'e', 1) - 1 AS `bfcol_1` + INSTR(`bfcol_0`, 'e', 1) - 1 AS `bfcol_1`, + INSTR(`bfcol_0`, 'e', 3) - 1 AS `bfcol_2`, + INSTR(SUBSTRING(`bfcol_0`, 1, 5), 'e') - 1 AS `bfcol_3`, + INSTR(SUBSTRING(`bfcol_0`, 3, 3), 'e') - 1 AS `bfcol_4` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `string_col` + `bfcol_1` AS `none_none`, + `bfcol_2` AS `start_none`, + `bfcol_3` AS `none_end`, + `bfcol_4` AS `start_end` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out_with_end.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out_with_end.sql deleted file mode 100644 index 78edf662b9..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out_with_end.sql +++ /dev/null @@ -1,13 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `string_col` AS `bfcol_0` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - INSTR(SUBSTRING(`bfcol_0`, 1, 5), 'e') - 1 AS `bfcol_1` - FROM `bfcte_0` -) -SELECT - `bfcol_1` AS `string_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out_with_start.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out_with_start.sql deleted file mode 100644 index d0dfc11a53..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out_with_start.sql +++ /dev/null @@ -1,13 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `string_col` AS `bfcol_0` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - INSTR(`bfcol_0`, 'e', 3) - 1 AS `bfcol_1` - FROM `bfcte_0` -) -SELECT - `bfcol_1` AS `string_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out_with_start_and_end.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out_with_start_and_end.sql deleted file mode 100644 index a91ab32946..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out_with_start_and_end.sql +++ /dev/null @@ -1,13 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `string_col` AS `bfcol_0` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - INSTR(SUBSTRING(`bfcol_0`, 3, 3), 'e') - 1 AS `bfcol_1` - FROM `bfcte_0` -) -SELECT - `bfcol_1` AS `string_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/left.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/left.sql deleted file mode 100644 index ee95900b3e..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/left.sql +++ /dev/null @@ -1,13 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `string_col` AS `bfcol_0` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - LPAD(`bfcol_0`, GREATEST(LENGTH(`bfcol_0`), 10), '-') AS `bfcol_1` - FROM `bfcte_0` -) -SELECT - `bfcol_1` AS `string_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/both.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/out.sql similarity index 63% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/both.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/out.sql index 4701b0237a..4226843122 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/both.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/out.sql @@ -5,6 +5,8 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, + LPAD(`bfcol_0`, GREATEST(LENGTH(`bfcol_0`), 10), '-') AS `bfcol_1`, + RPAD(`bfcol_0`, GREATEST(LENGTH(`bfcol_0`), 10), '-') AS `bfcol_2`, RPAD( LPAD( `bfcol_0`, @@ -13,9 +15,11 @@ WITH `bfcte_0` AS ( ), GREATEST(LENGTH(`bfcol_0`), 10), '-' - ) AS `bfcol_1` + ) AS `bfcol_3` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `string_col` + `bfcol_1` AS `left`, + `bfcol_2` AS `right`, + `bfcol_3` AS `both` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/right.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/right.sql deleted file mode 100644 index 17e59c553f..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/right.sql +++ /dev/null @@ -1,13 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `string_col` AS `bfcol_0` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - RPAD(`bfcol_0`, GREATEST(LENGTH(`bfcol_0`), 10), '-') AS `bfcol_1` - FROM `bfcte_0` -) -SELECT - `bfcol_1` AS `string_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_struct_field/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_struct_field/out.sql index b3e8fde0b2..60ae78b755 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_struct_field/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_struct_field/out.sql @@ -5,9 +5,11 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - `bfcol_0`.`name` AS `bfcol_1` + `bfcol_0`.`name` AS `bfcol_1`, + `bfcol_0`.`name` AS `bfcol_2` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `people` + `bfcol_1` AS `string`, + `bfcol_2` AS `int` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py index 8f3af11842..815bb84a9a 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py @@ -12,437 +12,525 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import pytest from bigframes import operations as ops +from bigframes.core import expression as expr from bigframes.operations._op_converters import convert_index, convert_slice import bigframes.pandas as bpd pytest.importorskip("pytest_snapshot") -def _apply_unary_op(obj: bpd.DataFrame, op: ops.UnaryOp, arg: str) -> str: +def _apply_unary_ops( + obj: bpd.DataFrame, + ops_list: typing.Sequence[expr.Expression], + new_names: typing.Sequence[str], +) -> str: array_value = obj._block.expr - op_expr = op.as_expr(arg) - result, col_ids = array_value.compute_values([op_expr]) + result, old_names = array_value.compute_values(ops_list) # Rename columns for deterministic golden SQL results. - assert len(col_ids) == 1 - result = result.rename_columns({col_ids[0]: arg}).select_columns([arg]) + assert len(old_names) == len(new_names) + col_ids = {old_name: new_name for old_name, new_name in zip(old_names, new_names)} + result = result.rename_columns(col_ids).select_columns(new_names) sql = result.session._executor.to_sql(result, enable_cache=False) return sql def test_arccosh(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.arccosh_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arccosh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_arccos(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.arccos_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arccos_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_arcsin(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.arcsin_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arcsin_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_arcsinh(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.arcsinh_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arcsinh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_arctan(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.arctan_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arctan_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_arctanh(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.arctanh_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arctanh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_abs(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.abs_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.abs_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_capitalize(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.capitalize_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.capitalize_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_ceil(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.ceil_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ceil_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_date(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.date_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.date_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_day(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.day_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.day_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_dayofweek(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.dayofweek_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.dayofweek_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_dayofyear(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.dayofyear_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.dayofyear_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_endswith(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.EndsWithOp(pat=("ab",)), "string_col") - snapshot.assert_match(sql, "single_pattern.sql") - - sql = _apply_unary_op(bf_df, ops.EndsWithOp(pat=("ab", "cd")), "string_col") - snapshot.assert_match(sql, "multiple_patterns.sql") - - sql = _apply_unary_op(bf_df, ops.EndsWithOp(pat=()), "string_col") - snapshot.assert_match(sql, "no_pattern.sql") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + ops_map = { + "single": ops.EndsWithOp(pat=("ab",)).as_expr(col_name), + "double": ops.EndsWithOp(pat=("ab", "cd")).as_expr(col_name), + "empty": ops.EndsWithOp(pat=()).as_expr(col_name), + } + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") def test_exp(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.exp_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.exp_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_expm1(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.expm1_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.expm1_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_floor_dt(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.FloorDtOp("D"), "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.FloorDtOp("D").as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_floor(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.floor_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.floor_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_geo_area(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["geography_col"]] - sql = _apply_unary_op(bf_df, ops.geo_area_op, "geography_col") + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.geo_area_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["geography_col"]] - sql = _apply_unary_op(bf_df, ops.geo_st_astext_op, "geography_col") + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.geo_st_astext_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_geo_st_boundary(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["geography_col"]] - sql = _apply_unary_op(bf_df, ops.geo_st_boundary_op, "geography_col") + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.geo_st_boundary_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_geo_st_buffer(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["geography_col"]] - sql = _apply_unary_op(bf_df, ops.GeoStBufferOp(1.0, 8.0, False), "geography_col") + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.GeoStBufferOp(1.0, 8.0, False).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_geo_st_centroid(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["geography_col"]] - sql = _apply_unary_op(bf_df, ops.geo_st_centroid_op, "geography_col") + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.geo_st_centroid_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_geo_st_convexhull(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["geography_col"]] - sql = _apply_unary_op(bf_df, ops.geo_st_convexhull_op, "geography_col") + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.geo_st_convexhull_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_geo_st_geogfromtext(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.geo_st_geogfromtext_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.geo_st_geogfromtext_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_geo_st_isclosed(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["geography_col"]] - sql = _apply_unary_op(bf_df, ops.geo_st_isclosed_op, "geography_col") + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.geo_st_isclosed_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_geo_st_length(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["geography_col"]] - sql = _apply_unary_op(bf_df, ops.GeoStLengthOp(True), "geography_col") + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.GeoStLengthOp(True).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_geo_x(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["geography_col"]] - sql = _apply_unary_op(bf_df, ops.geo_x_op, "geography_col") + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.geo_x_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_geo_y(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["geography_col"]] - sql = _apply_unary_op(bf_df, ops.geo_y_op, "geography_col") + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.geo_y_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): - bf_df = repeated_types_df[["string_list_col"]] - sql = _apply_unary_op(bf_df, ops.ArrayToStringOp(delimiter="."), "string_list_col") + col_name = "string_list_col" + bf_df = repeated_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.ArrayToStringOp(delimiter=".").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_array_index(repeated_types_df: bpd.DataFrame, snapshot): - bf_df = repeated_types_df[["string_list_col"]] - sql = _apply_unary_op(bf_df, convert_index(1), "string_list_col") + col_name = "string_list_col" + bf_df = repeated_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [convert_index(1).as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot): - bf_df = repeated_types_df[["string_list_col"]] - sql = _apply_unary_op(bf_df, convert_slice(slice(1, None)), "string_list_col") + col_name = "string_list_col" + bf_df = repeated_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [convert_slice(slice(1, None)).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snapshot): - bf_df = repeated_types_df[["string_list_col"]] - sql = _apply_unary_op(bf_df, convert_slice(slice(1, 5)), "string_list_col") + col_name = "string_list_col" + bf_df = repeated_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [convert_slice(slice(1, 5)).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_cos(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.cos_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.cos_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_cosh(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.cosh_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.cosh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_hash(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.hash_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.hash_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_hour(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.hour_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.hour_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_invert(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["int64_col"]] - sql = _apply_unary_op(bf_df, ops.invert_op, "int64_col") + col_name = "int64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.invert_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_is_in(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["int64_col"]] - sql = _apply_unary_op(bf_df, ops.IsInOp(values=(1, 2, 3)), "int64_col") + col_name = "int64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.IsInOp(values=(1, 2, 3)).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_isalnum(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.isalnum_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isalnum_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_isalpha(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.isalpha_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isalpha_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_isdecimal(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.isdecimal_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isdecimal_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_isdigit(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.isdigit_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isdigit_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_islower(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.islower_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.islower_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_isnumeric(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.isnumeric_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isnumeric_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_isspace(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.isspace_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isspace_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_isupper(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.isupper_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isupper_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_len(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.len_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.len_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_ln(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.ln_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ln_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_log10(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.log10_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.log10_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_log1p(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.log1p_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.log1p_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_lower(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.lower_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.lower_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_map(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op( - bf_df, ops.MapOp(mappings=(("value1", "mapped1"),)), "string_col" + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, + [ops.MapOp(mappings=(("value1", "mapped1"),)).as_expr(col_name)], + [col_name], ) snapshot.assert_match(sql, "out.sql") def test_lstrip(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StrLstripOp(" "), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrLstripOp(" ").as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_minute(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.minute_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.minute_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_month(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.month_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.month_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_neg(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.neg_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.neg_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_normalize(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.normalize_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.normalize_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -460,257 +548,297 @@ def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): def test_pos(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.pos_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.pos_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_quarter(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.quarter_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_replace_str(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.ReplaceStrOp("e", "a"), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.ReplaceStrOp("e", "a").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_regex_replace_str(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.RegexReplaceStrOp(r"e", "a"), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.RegexReplaceStrOp(r"e", "a").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_reverse(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.reverse_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.reverse_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_second(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.second_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.second_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_rstrip(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StrRstripOp(" "), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrRstripOp(" ").as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_sqrt(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.sqrt_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.sqrt_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_startswith(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StartsWithOp(pat=("ab",)), "string_col") - snapshot.assert_match(sql, "single_pattern.sql") - - sql = _apply_unary_op(bf_df, ops.StartsWithOp(pat=("ab", "cd")), "string_col") - snapshot.assert_match(sql, "multiple_patterns.sql") - sql = _apply_unary_op(bf_df, ops.StartsWithOp(pat=()), "string_col") - snapshot.assert_match(sql, "no_pattern.sql") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + ops_map = { + "single": ops.StartsWithOp(pat=("ab",)).as_expr(col_name), + "double": ops.StartsWithOp(pat=("ab", "cd")).as_expr(col_name), + "empty": ops.StartsWithOp(pat=()).as_expr(col_name), + } + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") def test_str_get(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StrGetOp(1), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrGetOp(1).as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_str_pad(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op( - bf_df, ops.StrPadOp(length=10, fillchar="-", side="left"), "string_col" - ) - snapshot.assert_match(sql, "left.sql") - - sql = _apply_unary_op( - bf_df, ops.StrPadOp(length=10, fillchar="-", side="right"), "string_col" - ) - snapshot.assert_match(sql, "right.sql") - - sql = _apply_unary_op( - bf_df, ops.StrPadOp(length=10, fillchar="-", side="both"), "string_col" - ) - snapshot.assert_match(sql, "both.sql") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + ops_map = { + "left": ops.StrPadOp(length=10, fillchar="-", side="left").as_expr(col_name), + "right": ops.StrPadOp(length=10, fillchar="-", side="right").as_expr(col_name), + "both": ops.StrPadOp(length=10, fillchar="-", side="both").as_expr(col_name), + } + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StrSliceOp(1, 3), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrSliceOp(1, 3).as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_strftime(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.StrftimeOp("%Y-%m-%d"), "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.StrftimeOp("%Y-%m-%d").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): - bf_df = nested_structs_types_df[["people"]] + col_name = "people" + bf_df = nested_structs_types_df[[col_name]] - # When a name string is provided. - sql = _apply_unary_op(bf_df, ops.StructFieldOp("name"), "people") - snapshot.assert_match(sql, "out.sql") + ops_map = { + # When a name string is provided. + "string": ops.StructFieldOp("name").as_expr(col_name), + # When an index integer is provided. + "int": ops.StructFieldOp(0).as_expr(col_name), + } + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) - # When an index integer is provided. - sql = _apply_unary_op(bf_df, ops.StructFieldOp(0), "people") snapshot.assert_match(sql, "out.sql") def test_str_contains(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StrContainsOp("e"), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.StrContainsOp("e").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StrContainsRegexOp("e"), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.StrContainsRegexOp("e").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_str_extract(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StrExtractOp(r"([a-z]*)", 1), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.StrExtractOp(r"([a-z]*)", 1).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_str_repeat(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StrRepeatOp(2), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrRepeatOp(2).as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_str_find(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StrFindOp("e", start=None, end=None), "string_col") - snapshot.assert_match(sql, "out.sql") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + ops_map = { + "none_none": ops.StrFindOp("e", start=None, end=None).as_expr(col_name), + "start_none": ops.StrFindOp("e", start=2, end=None).as_expr(col_name), + "none_end": ops.StrFindOp("e", start=None, end=5).as_expr(col_name), + "start_end": ops.StrFindOp("e", start=2, end=5).as_expr(col_name), + } + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) - sql = _apply_unary_op(bf_df, ops.StrFindOp("e", start=2, end=None), "string_col") - snapshot.assert_match(sql, "out_with_start.sql") - - sql = _apply_unary_op(bf_df, ops.StrFindOp("e", start=None, end=5), "string_col") - snapshot.assert_match(sql, "out_with_end.sql") - - sql = _apply_unary_op(bf_df, ops.StrFindOp("e", start=2, end=5), "string_col") - snapshot.assert_match(sql, "out_with_start_and_end.sql") + snapshot.assert_match(sql, "out.sql") def test_strip(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StrStripOp(" "), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrStripOp(" ").as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.iso_day_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.iso_week_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_iso_year(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.iso_year_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.isnull_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isnull_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_notnull(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.notnull_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.notnull_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_sin(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.sin_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.sin_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_sinh(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.sinh_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.sinh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_string_split(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.StringSplitOp(pat=","), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.StringSplitOp(pat=",").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_tan(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.tan_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.tan_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_tanh(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["float64_col"]] - sql = _apply_unary_op(bf_df, ops.tanh_op, "float64_col") + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.tanh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_time(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.time_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.time_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["int64_col"]] - sql = _apply_unary_op(bf_df, ops.ToDatetimeOp(), "int64_col") + col_name = "int64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["int64_col"]] - sql = _apply_unary_op(bf_df, ops.ToTimestampOp(), "int64_col") + col_name = "int64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -725,104 +853,133 @@ def test_to_timedelta(scalar_types_df: bpd.DataFrame, snapshot): def test_unix_micros(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.UnixMicros(), "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.UnixMicros().as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_unix_millis(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.UnixMillis(), "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.UnixMillis().as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_unix_seconds(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.UnixSeconds(), "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.UnixSeconds().as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_timedelta_floor(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["int64_col"]] - sql = _apply_unary_op(bf_df, ops.timedelta_floor_op, "int64_col") + col_name = "int64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.timedelta_floor_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_json_extract(json_types_df: bpd.DataFrame, snapshot): - bf_df = json_types_df[["json_col"]] - sql = _apply_unary_op(bf_df, ops.JSONExtract(json_path="$"), "json_col") + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONExtract(json_path="$").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_json_extract_array(json_types_df: bpd.DataFrame, snapshot): - bf_df = json_types_df[["json_col"]] - sql = _apply_unary_op(bf_df, ops.JSONExtractArray(json_path="$"), "json_col") + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONExtractArray(json_path="$").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot): - bf_df = json_types_df[["json_col"]] - sql = _apply_unary_op(bf_df, ops.JSONExtractStringArray(json_path="$"), "json_col") + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONExtractStringArray(json_path="$").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_json_query(json_types_df: bpd.DataFrame, snapshot): - bf_df = json_types_df[["json_col"]] - sql = _apply_unary_op(bf_df, ops.JSONQuery(json_path="$"), "json_col") + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONQuery(json_path="$").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_json_query_array(json_types_df: bpd.DataFrame, snapshot): - bf_df = json_types_df[["json_col"]] - sql = _apply_unary_op(bf_df, ops.JSONQueryArray(json_path="$"), "json_col") + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONQueryArray(json_path="$").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_json_value(json_types_df: bpd.DataFrame, snapshot): - bf_df = json_types_df[["json_col"]] - sql = _apply_unary_op(bf_df, ops.JSONValue(json_path="$"), "json_col") + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONValue(json_path="$").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.ParseJSON(), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ParseJSON().as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): - bf_df = json_types_df[["json_col"]] - sql = _apply_unary_op(bf_df, ops.ToJSONString(), "json_col") + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ToJSONString().as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_upper(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.upper_op, "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.upper_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_year(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["timestamp_col"]] - sql = _apply_unary_op(bf_df, ops.year_op, "timestamp_col") + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.year_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_zfill(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["string_col"]] - sql = _apply_unary_op(bf_df, ops.ZfillOp(width=10), "string_col") + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ZfillOp(width=10).as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql")