Skip to content

Commit 0ff1395

Browse files
authored
refactor: rename _apply_unary_ops to _apply_ops_to_sql (#2214)
* refactor: rename _apply_unary_ops to _apply_ops_to_sql * fix lint * fix mypy
1 parent b23cf83 commit 0ff1395

File tree

12 files changed

+142
-139
lines changed

12 files changed

+142
-139
lines changed

bigframes/testing/utils.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -448,12 +448,12 @@ def get_function_name(func, package_requirements=None, is_row_processor=False):
448448
return f"bigframes_{function_hash}"
449449

450450

451-
def _apply_unary_ops(
451+
def _apply_ops_to_sql(
452452
obj: bpd.DataFrame,
453453
ops_list: Sequence[ex.Expression],
454454
new_names: Sequence[str],
455455
) -> str:
456-
"""Applies a list of unary ops to the given DataFrame and returns the SQL
456+
"""Applies a list of ops to the given DataFrame and returns the SQL
457457
representing the resulting DataFrame."""
458458
array_value = obj._block.expr
459459
result, old_names = array_value.compute_values(ops_list)
@@ -485,13 +485,6 @@ def _apply_nary_op(
485485
) -> str:
486486
"""Applies a nary op to the given DataFrame and return the SQL representing
487487
the resulting DataFrame."""
488-
array_value = obj._block.expr
489488
op_expr = op.as_expr(*args)
490-
result, col_ids = array_value.compute_values([op_expr])
491-
492-
# Rename columns for deterministic golden SQL results.
493-
assert len(col_ids) == 1
494-
result = result.rename_columns({col_ids[0]: args[0]}).select_columns([args[0]])
495-
496-
sql = result.session._executor.to_sql(result, enable_cache=False)
489+
sql = _apply_ops_to_sql(obj, [op_expr], [args[0]]) # type: ignore
497490
return sql

tests/unit/core/compile/sqlglot/expressions/test_ai_ops.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def test_ai_generate(scalar_types_df: dataframe.DataFrame, snapshot):
3939
output_schema=None,
4040
)
4141

42-
sql = utils._apply_unary_ops(
42+
sql = utils._apply_ops_to_sql(
4343
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
4444
)
4545

@@ -58,7 +58,7 @@ def test_ai_generate_with_output_schema(scalar_types_df: dataframe.DataFrame, sn
5858
output_schema="x INT64, y FLOAT64",
5959
)
6060

61-
sql = utils._apply_unary_ops(
61+
sql = utils._apply_ops_to_sql(
6262
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
6363
)
6464

@@ -82,7 +82,7 @@ def test_ai_generate_with_model_param(scalar_types_df: dataframe.DataFrame, snap
8282
output_schema=None,
8383
)
8484

85-
sql = utils._apply_unary_ops(
85+
sql = utils._apply_ops_to_sql(
8686
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
8787
)
8888

@@ -100,7 +100,7 @@ def test_ai_generate_bool(scalar_types_df: dataframe.DataFrame, snapshot):
100100
model_params=None,
101101
)
102102

103-
sql = utils._apply_unary_ops(
103+
sql = utils._apply_ops_to_sql(
104104
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
105105
)
106106

@@ -125,7 +125,7 @@ def test_ai_generate_bool_with_model_param(
125125
model_params=json.dumps(dict()),
126126
)
127127

128-
sql = utils._apply_unary_ops(
128+
sql = utils._apply_ops_to_sql(
129129
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
130130
)
131131

@@ -144,7 +144,7 @@ def test_ai_generate_int(scalar_types_df: dataframe.DataFrame, snapshot):
144144
model_params=None,
145145
)
146146

147-
sql = utils._apply_unary_ops(
147+
sql = utils._apply_ops_to_sql(
148148
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
149149
)
150150

@@ -170,7 +170,7 @@ def test_ai_generate_int_with_model_param(
170170
model_params=json.dumps(dict()),
171171
)
172172

173-
sql = utils._apply_unary_ops(
173+
sql = utils._apply_ops_to_sql(
174174
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
175175
)
176176

@@ -189,7 +189,7 @@ def test_ai_generate_double(scalar_types_df: dataframe.DataFrame, snapshot):
189189
model_params=None,
190190
)
191191

192-
sql = utils._apply_unary_ops(
192+
sql = utils._apply_ops_to_sql(
193193
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
194194
)
195195

@@ -215,7 +215,7 @@ def test_ai_generate_double_with_model_param(
215215
model_params=json.dumps(dict()),
216216
)
217217

218-
sql = utils._apply_unary_ops(
218+
sql = utils._apply_ops_to_sql(
219219
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
220220
)
221221

@@ -230,7 +230,7 @@ def test_ai_if(scalar_types_df: dataframe.DataFrame, snapshot):
230230
connection_id=CONNECTION_ID,
231231
)
232232

233-
sql = utils._apply_unary_ops(
233+
sql = utils._apply_ops_to_sql(
234234
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
235235
)
236236

@@ -246,7 +246,7 @@ def test_ai_classify(scalar_types_df: dataframe.DataFrame, snapshot):
246246
connection_id=CONNECTION_ID,
247247
)
248248

249-
sql = utils._apply_unary_ops(scalar_types_df, [op.as_expr(col_name)], ["result"])
249+
sql = utils._apply_ops_to_sql(scalar_types_df, [op.as_expr(col_name)], ["result"])
250250

251251
snapshot.assert_match(sql, "out.sql")
252252

@@ -259,7 +259,7 @@ def test_ai_score(scalar_types_df: dataframe.DataFrame, snapshot):
259259
connection_id=CONNECTION_ID,
260260
)
261261

262-
sql = utils._apply_unary_ops(
262+
sql = utils._apply_ops_to_sql(
263263
scalar_types_df, [op.as_expr(col_name, col_name)], ["result"]
264264
)
265265

tests/unit/core/compile/sqlglot/expressions/test_array_ops.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot):
2626
col_name = "string_list_col"
2727
bf_df = repeated_types_df[[col_name]]
28-
sql = utils._apply_unary_ops(
28+
sql = utils._apply_ops_to_sql(
2929
bf_df, [ops.ArrayToStringOp(delimiter=".").as_expr(col_name)], [col_name]
3030
)
3131

@@ -35,7 +35,7 @@ def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot):
3535
def test_array_index(repeated_types_df: bpd.DataFrame, snapshot):
3636
col_name = "string_list_col"
3737
bf_df = repeated_types_df[[col_name]]
38-
sql = utils._apply_unary_ops(
38+
sql = utils._apply_ops_to_sql(
3939
bf_df, [convert_index(1).as_expr(col_name)], [col_name]
4040
)
4141

@@ -45,7 +45,7 @@ def test_array_index(repeated_types_df: bpd.DataFrame, snapshot):
4545
def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot):
4646
col_name = "string_list_col"
4747
bf_df = repeated_types_df[[col_name]]
48-
sql = utils._apply_unary_ops(
48+
sql = utils._apply_ops_to_sql(
4949
bf_df, [convert_slice(slice(1, None)).as_expr(col_name)], [col_name]
5050
)
5151

@@ -55,7 +55,7 @@ def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot)
5555
def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snapshot):
5656
col_name = "string_list_col"
5757
bf_df = repeated_types_df[[col_name]]
58-
sql = utils._apply_unary_ops(
58+
sql = utils._apply_ops_to_sql(
5959
bf_df, [convert_slice(slice(1, 5)).as_expr(col_name)], [col_name]
6060
)
6161

tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot):
4040
"float_in_ints": ops.IsInOp(values=(1, 2, 3, None)).as_expr(float_col),
4141
}
4242

43-
sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys()))
43+
sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))
4444
snapshot.assert_match(sql, "out.sql")
4545

4646

tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,15 @@
2525
def test_date(scalar_types_df: bpd.DataFrame, snapshot):
2626
col_name = "timestamp_col"
2727
bf_df = scalar_types_df[[col_name]]
28-
sql = utils._apply_unary_ops(bf_df, [ops.date_op.as_expr(col_name)], [col_name])
28+
sql = utils._apply_ops_to_sql(bf_df, [ops.date_op.as_expr(col_name)], [col_name])
2929

3030
snapshot.assert_match(sql, "out.sql")
3131

3232

3333
def test_day(scalar_types_df: bpd.DataFrame, snapshot):
3434
col_name = "timestamp_col"
3535
bf_df = scalar_types_df[[col_name]]
36-
sql = utils._apply_unary_ops(bf_df, [ops.day_op.as_expr(col_name)], [col_name])
36+
sql = utils._apply_ops_to_sql(bf_df, [ops.day_op.as_expr(col_name)], [col_name])
3737

3838
snapshot.assert_match(sql, "out.sql")
3939

@@ -43,14 +43,14 @@ def test_dayofweek(scalar_types_df: bpd.DataFrame, snapshot):
4343
bf_df = scalar_types_df[col_names]
4444
ops_map = {col_name: ops.dayofweek_op.as_expr(col_name) for col_name in col_names}
4545

46-
sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys()))
46+
sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))
4747
snapshot.assert_match(sql, "out.sql")
4848

4949

5050
def test_dayofyear(scalar_types_df: bpd.DataFrame, snapshot):
5151
col_name = "timestamp_col"
5252
bf_df = scalar_types_df[[col_name]]
53-
sql = utils._apply_unary_ops(
53+
sql = utils._apply_ops_to_sql(
5454
bf_df, [ops.dayofyear_op.as_expr(col_name)], [col_name]
5555
)
5656

@@ -75,7 +75,7 @@ def test_floor_dt(scalar_types_df: bpd.DataFrame, snapshot):
7575
"datetime_col_us": ops.FloorDtOp("us").as_expr("datetime_col"),
7676
}
7777

78-
sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys()))
78+
sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))
7979
snapshot.assert_match(sql, "out.sql")
8080

8181

@@ -85,7 +85,7 @@ def test_floor_dt_op_invalid_freq(scalar_types_df: bpd.DataFrame):
8585
with pytest.raises(
8686
NotImplementedError, match="Unsupported freq paramater: invalid"
8787
):
88-
utils._apply_unary_ops(
88+
utils._apply_ops_to_sql(
8989
bf_df,
9090
[ops.FloorDtOp(freq="invalid").as_expr(col_name)], # type:ignore
9191
[col_name],
@@ -95,31 +95,31 @@ def test_floor_dt_op_invalid_freq(scalar_types_df: bpd.DataFrame):
9595
def test_hour(scalar_types_df: bpd.DataFrame, snapshot):
9696
col_name = "timestamp_col"
9797
bf_df = scalar_types_df[[col_name]]
98-
sql = utils._apply_unary_ops(bf_df, [ops.hour_op.as_expr(col_name)], [col_name])
98+
sql = utils._apply_ops_to_sql(bf_df, [ops.hour_op.as_expr(col_name)], [col_name])
9999

100100
snapshot.assert_match(sql, "out.sql")
101101

102102

103103
def test_minute(scalar_types_df: bpd.DataFrame, snapshot):
104104
col_name = "timestamp_col"
105105
bf_df = scalar_types_df[[col_name]]
106-
sql = utils._apply_unary_ops(bf_df, [ops.minute_op.as_expr(col_name)], [col_name])
106+
sql = utils._apply_ops_to_sql(bf_df, [ops.minute_op.as_expr(col_name)], [col_name])
107107

108108
snapshot.assert_match(sql, "out.sql")
109109

110110

111111
def test_month(scalar_types_df: bpd.DataFrame, snapshot):
112112
col_name = "timestamp_col"
113113
bf_df = scalar_types_df[[col_name]]
114-
sql = utils._apply_unary_ops(bf_df, [ops.month_op.as_expr(col_name)], [col_name])
114+
sql = utils._apply_ops_to_sql(bf_df, [ops.month_op.as_expr(col_name)], [col_name])
115115

116116
snapshot.assert_match(sql, "out.sql")
117117

118118

119119
def test_normalize(scalar_types_df: bpd.DataFrame, snapshot):
120120
col_name = "timestamp_col"
121121
bf_df = scalar_types_df[[col_name]]
122-
sql = utils._apply_unary_ops(
122+
sql = utils._apply_ops_to_sql(
123123
bf_df, [ops.normalize_op.as_expr(col_name)], [col_name]
124124
)
125125

@@ -129,23 +129,23 @@ def test_normalize(scalar_types_df: bpd.DataFrame, snapshot):
129129
def test_quarter(scalar_types_df: bpd.DataFrame, snapshot):
130130
col_name = "timestamp_col"
131131
bf_df = scalar_types_df[[col_name]]
132-
sql = utils._apply_unary_ops(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name])
132+
sql = utils._apply_ops_to_sql(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name])
133133

134134
snapshot.assert_match(sql, "out.sql")
135135

136136

137137
def test_second(scalar_types_df: bpd.DataFrame, snapshot):
138138
col_name = "timestamp_col"
139139
bf_df = scalar_types_df[[col_name]]
140-
sql = utils._apply_unary_ops(bf_df, [ops.second_op.as_expr(col_name)], [col_name])
140+
sql = utils._apply_ops_to_sql(bf_df, [ops.second_op.as_expr(col_name)], [col_name])
141141

142142
snapshot.assert_match(sql, "out.sql")
143143

144144

145145
def test_strftime(scalar_types_df: bpd.DataFrame, snapshot):
146146
col_name = "timestamp_col"
147147
bf_df = scalar_types_df[[col_name]]
148-
sql = utils._apply_unary_ops(
148+
sql = utils._apply_ops_to_sql(
149149
bf_df, [ops.StrftimeOp("%Y-%m-%d").as_expr(col_name)], [col_name]
150150
)
151151

@@ -155,15 +155,15 @@ def test_strftime(scalar_types_df: bpd.DataFrame, snapshot):
155155
def test_time(scalar_types_df: bpd.DataFrame, snapshot):
156156
col_name = "timestamp_col"
157157
bf_df = scalar_types_df[[col_name]]
158-
sql = utils._apply_unary_ops(bf_df, [ops.time_op.as_expr(col_name)], [col_name])
158+
sql = utils._apply_ops_to_sql(bf_df, [ops.time_op.as_expr(col_name)], [col_name])
159159

160160
snapshot.assert_match(sql, "out.sql")
161161

162162

163163
def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot):
164164
col_name = "int64_col"
165165
bf_df = scalar_types_df[[col_name]]
166-
sql = utils._apply_unary_ops(
166+
sql = utils._apply_ops_to_sql(
167167
bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name]
168168
)
169169

@@ -173,7 +173,7 @@ def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot):
173173
def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot):
174174
col_name = "int64_col"
175175
bf_df = scalar_types_df[[col_name]]
176-
sql = utils._apply_unary_ops(
176+
sql = utils._apply_ops_to_sql(
177177
bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name]
178178
)
179179

@@ -183,7 +183,7 @@ def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot):
183183
def test_unix_micros(scalar_types_df: bpd.DataFrame, snapshot):
184184
col_name = "timestamp_col"
185185
bf_df = scalar_types_df[[col_name]]
186-
sql = utils._apply_unary_ops(
186+
sql = utils._apply_ops_to_sql(
187187
bf_df, [ops.UnixMicros().as_expr(col_name)], [col_name]
188188
)
189189

@@ -193,7 +193,7 @@ def test_unix_micros(scalar_types_df: bpd.DataFrame, snapshot):
193193
def test_unix_millis(scalar_types_df: bpd.DataFrame, snapshot):
194194
col_name = "timestamp_col"
195195
bf_df = scalar_types_df[[col_name]]
196-
sql = utils._apply_unary_ops(
196+
sql = utils._apply_ops_to_sql(
197197
bf_df, [ops.UnixMillis().as_expr(col_name)], [col_name]
198198
)
199199

@@ -203,7 +203,7 @@ def test_unix_millis(scalar_types_df: bpd.DataFrame, snapshot):
203203
def test_unix_seconds(scalar_types_df: bpd.DataFrame, snapshot):
204204
col_name = "timestamp_col"
205205
bf_df = scalar_types_df[[col_name]]
206-
sql = utils._apply_unary_ops(
206+
sql = utils._apply_ops_to_sql(
207207
bf_df, [ops.UnixSeconds().as_expr(col_name)], [col_name]
208208
)
209209

@@ -213,31 +213,35 @@ def test_unix_seconds(scalar_types_df: bpd.DataFrame, snapshot):
213213
def test_year(scalar_types_df: bpd.DataFrame, snapshot):
214214
col_name = "timestamp_col"
215215
bf_df = scalar_types_df[[col_name]]
216-
sql = utils._apply_unary_ops(bf_df, [ops.year_op.as_expr(col_name)], [col_name])
216+
sql = utils._apply_ops_to_sql(bf_df, [ops.year_op.as_expr(col_name)], [col_name])
217217

218218
snapshot.assert_match(sql, "out.sql")
219219

220220

221221
def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot):
222222
col_name = "timestamp_col"
223223
bf_df = scalar_types_df[[col_name]]
224-
sql = utils._apply_unary_ops(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name])
224+
sql = utils._apply_ops_to_sql(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name])
225225

226226
snapshot.assert_match(sql, "out.sql")
227227

228228

229229
def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot):
230230
col_name = "timestamp_col"
231231
bf_df = scalar_types_df[[col_name]]
232-
sql = utils._apply_unary_ops(bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name])
232+
sql = utils._apply_ops_to_sql(
233+
bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name]
234+
)
233235

234236
snapshot.assert_match(sql, "out.sql")
235237

236238

237239
def test_iso_year(scalar_types_df: bpd.DataFrame, snapshot):
238240
col_name = "timestamp_col"
239241
bf_df = scalar_types_df[[col_name]]
240-
sql = utils._apply_unary_ops(bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name])
242+
sql = utils._apply_ops_to_sql(
243+
bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name]
244+
)
241245

242246
snapshot.assert_match(sql, "out.sql")
243247

0 commit comments

Comments
 (0)