32
32
NamedTuple ,
33
33
Optional ,
34
34
Sequence ,
35
+ Tuple ,
35
36
TYPE_CHECKING ,
36
37
Union ,
37
38
)
38
39
import warnings
39
40
40
41
import ibis
41
42
import pandas
43
+ import pyarrow
42
44
import requests
43
45
44
46
if TYPE_CHECKING :
@@ -182,15 +184,11 @@ def create_bq_remote_function(
182
184
# Create BQ function
183
185
# https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_remote_function_2
184
186
bq_function_args = []
185
- bq_function_return_type = third_party_ibis_bqtypes .BigQueryType .from_ibis (
186
- output_type
187
- )
187
+ bq_function_return_type = output_type
188
188
189
189
# We are expecting the input type annotations to be 1:1 with the input args
190
- for idx , name in enumerate (input_args ):
191
- bq_function_args .append (
192
- f"{ name } { third_party_ibis_bqtypes .BigQueryType .from_ibis (input_types [idx ])} "
193
- )
190
+ for name , type_ in zip (input_args , input_types ):
191
+ bq_function_args .append (f"{ name } { type_ } " )
194
192
195
193
remote_function_options = {
196
194
"endpoint" : endpoint ,
@@ -259,16 +257,31 @@ def get_cloud_function_endpoint(self, name):
259
257
return None
260
258
261
259
def generate_cloud_function_code (
262
- self , def_ , directory , package_requirements = None , is_row_processor = False
260
+ self ,
261
+ def_ ,
262
+ directory ,
263
+ * ,
264
+ input_types : Tuple [str ],
265
+ output_type : str ,
266
+ package_requirements = None ,
267
+ is_row_processor = False ,
263
268
):
264
- """Generate the cloud function code for a given user defined function."""
269
+ """Generate the cloud function code for a given user defined function.
270
+
271
+ Args:
272
+ input_types (tuple[str]):
273
+ Types of the input arguments in BigQuery SQL data type names.
274
+ output_type (str):
275
+ Types of the output scalar as a BigQuery SQL data type name.
276
+ """
265
277
266
278
# requirements.txt
267
279
requirements = ["cloudpickle >= 2.1.0" ]
268
280
if is_row_processor :
269
281
# bigframes remote function will send an entire row of data as json,
270
282
# which would be converted to a pandas series and processed
271
283
requirements .append (f"pandas=={ pandas .__version__ } " )
284
+ requirements .append (f"pyarrow=={ pyarrow .__version__ } " )
272
285
if package_requirements :
273
286
requirements .extend (package_requirements )
274
287
requirements = sorted (requirements )
@@ -278,26 +291,45 @@ def generate_cloud_function_code(
278
291
279
292
# main.py
280
293
entry_point = bigframes .functions .remote_function_template .generate_cloud_function_main_code (
281
- def_ , directory , is_row_processor
294
+ def_ ,
295
+ directory ,
296
+ input_types = input_types ,
297
+ output_type = output_type ,
298
+ is_row_processor = is_row_processor ,
282
299
)
283
300
return entry_point
284
301
285
302
def create_cloud_function (
286
303
self ,
287
304
def_ ,
288
305
cf_name ,
306
+ * ,
307
+ input_types : Tuple [str ],
308
+ output_type : str ,
289
309
package_requirements = None ,
290
310
timeout_seconds = 600 ,
291
311
max_instance_count = None ,
292
312
is_row_processor = False ,
293
313
vpc_connector = None ,
294
314
):
295
- """Create a cloud function from the given user defined function."""
315
+ """Create a cloud function from the given user defined function.
316
+
317
+ Args:
318
+ input_types (tuple[str]):
319
+ Types of the input arguments in BigQuery SQL data type names.
320
+ output_type (str):
321
+ Types of the output scalar as a BigQuery SQL data type name.
322
+ """
296
323
297
324
# Build and deploy folder structure containing cloud function
298
325
with tempfile .TemporaryDirectory () as directory :
299
326
entry_point = self .generate_cloud_function_code (
300
- def_ , directory , package_requirements , is_row_processor
327
+ def_ ,
328
+ directory ,
329
+ package_requirements = package_requirements ,
330
+ input_types = input_types ,
331
+ output_type = output_type ,
332
+ is_row_processor = is_row_processor ,
301
333
)
302
334
archive_path = shutil .make_archive (directory , "zip" , directory )
303
335
@@ -444,11 +476,13 @@ def provision_bq_remote_function(
444
476
cf_endpoint = self .create_cloud_function (
445
477
def_ ,
446
478
cloud_function_name ,
447
- package_requirements ,
448
- cloud_function_timeout ,
449
- cloud_function_max_instance_count ,
450
- is_row_processor ,
451
- cloud_function_vpc_connector ,
479
+ input_types = input_types ,
480
+ output_type = output_type ,
481
+ package_requirements = package_requirements ,
482
+ timeout_seconds = cloud_function_timeout ,
483
+ max_instance_count = cloud_function_max_instance_count ,
484
+ is_row_processor = is_row_processor ,
485
+ vpc_connector = cloud_function_vpc_connector ,
452
486
)
453
487
else :
454
488
logger .info (f"Cloud function { cloud_function_name } already exists." )
@@ -957,16 +991,21 @@ def try_delattr(attr):
957
991
958
992
rf_name , cf_name = remote_function_client .provision_bq_remote_function (
959
993
func ,
960
- ibis_signature .input_types ,
961
- ibis_signature .output_type ,
962
- reuse ,
963
- name ,
964
- packages ,
965
- max_batching_rows ,
966
- cloud_function_timeout ,
967
- cloud_function_max_instances ,
968
- is_row_processor ,
969
- cloud_function_vpc_connector ,
994
+ input_types = tuple (
995
+ third_party_ibis_bqtypes .BigQueryType .from_ibis (type_ )
996
+ for type_ in ibis_signature .input_types
997
+ ),
998
+ output_type = third_party_ibis_bqtypes .BigQueryType .from_ibis (
999
+ ibis_signature .output_type
1000
+ ),
1001
+ reuse = reuse ,
1002
+ name = name ,
1003
+ package_requirements = packages ,
1004
+ max_batching_rows = max_batching_rows ,
1005
+ cloud_function_timeout = cloud_function_timeout ,
1006
+ cloud_function_max_instance_count = cloud_function_max_instances ,
1007
+ is_row_processor = is_row_processor ,
1008
+ cloud_function_vpc_connector = cloud_function_vpc_connector ,
970
1009
)
971
1010
972
1011
# TODO: Move ibis logic to compiler step
0 commit comments