googleapis
diff --git a/‎bigframes/functions/remote_function.py
Lines changed: 8 additions & 171 deletions b/‎bigframes/functions/remote_function.py
Lines changed: 8 additions & 171 deletions
@@ -24,7 +24,6 @@
 import string
 import sys
 import tempfile
-import textwrap
 from typing import (
     Any,
     cast,
@@ -61,6 +60,7 @@
 from bigframes import clients
 import bigframes.constants as constants
 import bigframes.dtypes
+import bigframes.functions.remote_function_template
 
 logger = logging.getLogger(__name__)
 
@@ -258,171 +258,8 @@ def get_cloud_function_endpoint(self, name):
             pass
         return None
 
-    def generate_udf_code(self, def_, dir):
-        """Generate serialized bytecode using cloudpickle given a udf."""
-        udf_code_file_name = "udf.py"
-        udf_bytecode_file_name = "udf.cloudpickle"
-
-        # original code, only for debugging purpose
-        udf_code = textwrap.dedent(inspect.getsource(def_))
-        udf_code_file_path = os.path.join(dir, udf_code_file_name)
-        with open(udf_code_file_path, "w") as f:
-            f.write(udf_code)
-
-        # serialized bytecode
-        udf_bytecode_file_path = os.path.join(dir, udf_bytecode_file_name)
-        with open(udf_bytecode_file_path, "wb") as f:
-            cloudpickle.dump(def_, f, protocol=_pickle_protocol_version)
-
-        return udf_code_file_name, udf_bytecode_file_name
-
-    def generate_cloud_function_main_code(self, def_, dir, is_row_processor=False):
-        """Get main.py code for the cloud function for the given user defined function."""
-
-        # Pickle the udf with all its dependencies
-        udf_code_file, udf_bytecode_file = self.generate_udf_code(def_, dir)
-        handler_func_name = "udf_http"
-
-        # We want to build a cloud function that works for BQ remote functions,
-        # where we receive `calls` in json which is a batch of rows from BQ SQL.
-        # The number and the order of values in each row is expected to exactly
-        # match to the number and order of arguments in the udf , e.g. if the udf is
-        #   def foo(x: int, y: str):
-        #     ...
-        # then the http request body could look like
-        # {
-        #   ...
-        #   "calls" : [
-        #     [123, "hello"],
-        #     [456, "world"]
-        #   ]
-        #   ...
-        # }
-        # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#input_format
-        code = """\
-import cloudpickle
-import functions_framework
-from flask import jsonify
-import json
-"""
-        if is_row_processor:
-            code += """\
-import ast
-import math
-import pandas as pd
-
-def get_pd_series(row):
-    row_json = json.loads(row)
-    col_names = row_json["names"]
-    col_types = row_json["types"]
-    col_values = row_json["values"]
-    index_length = row_json["indexlength"]
-    dtype = row_json["dtype"]
-
-    # At this point we are assuming that col_names, col_types and col_values are
-    # arrays of the same length, representing column names, types and values for
-    # one row of data
-
-    # column names are not necessarily strings
-    # they are serialized as repr(name) at source
-    evaluated_col_names = []
-    for col_name in col_names:
-        try:
-            col_name = ast.literal_eval(col_name)
-        except Exception as ex:
-            raise NameError(f"Failed to evaluate column name from '{col_name}': {ex}")
-        evaluated_col_names.append(col_name)
-    col_names = evaluated_col_names
-
-    # Supported converters for pandas to python types
-    value_converters = {
-        "boolean": lambda val: val == "true",
-        "Int64": int,
-        "Float64": float,
-        "string": str,
-    }
-
-    def convert_value(value, value_type):
-        value_converter = value_converters.get(value_type)
-        if value_converter is None:
-            raise ValueError(f"Don't know how to handle type '{value_type}'")
-        if value is None:
-            return None
-        return value_converter(value)
-
-    index_values = [
-        pd.Series([convert_value(col_values[i], col_types[i])], dtype=col_types[i])[0]
-        for i in range(index_length)
-    ]
-
-    data_col_names = col_names[index_length:]
-    data_col_types = col_types[index_length:]
-    data_col_values = col_values[index_length:]
-    data_col_values = [
-        pd.Series([convert_value(a, data_col_types[i])], dtype=data_col_types[i])[0]
-        for i, a in enumerate(data_col_values)
-    ]
-
-    row_index = index_values[0] if len(index_values) == 1 else tuple(index_values)
-    row_series = pd.Series(data_col_values, index=data_col_names, name=row_index, dtype=dtype)
-    return row_series
-"""
-        code += f"""\
-
-# original udf code is in {udf_code_file}
-# serialized udf code is in {udf_bytecode_file}
-with open("{udf_bytecode_file}", "rb") as f:
-    udf = cloudpickle.load(f)
-
-def {handler_func_name}(request):
-    try:
-        request_json = request.get_json(silent=True)
-        calls = request_json["calls"]
-        replies = []
-        for call in calls:
-"""
-
-        if is_row_processor:
-            code += """\
-            reply = udf(get_pd_series(call[0]))
-            if isinstance(reply, float) and (math.isnan(reply) or math.isinf(reply)):
-                # json serialization of the special float values (nan, inf, -inf)
-                # is not in strict compliance of the JSON specification
-                # https://docs.python.org/3/library/json.html#basic-usage.
-                # Let's convert them to a quoted string representation ("NaN",
-                # "Infinity", "-Infinity" respectively) which is handled by
-                # BigQuery
-                reply = json.dumps(reply)
-            elif pd.isna(reply):
-                # Pandas N/A values are not json serializable, so use a python
-                # equivalent instead
-                reply = None
-            elif hasattr(reply, "item"):
-                # Numpy types are not json serializable, so use its Python
-                # value instead
-                reply = reply.item()
-"""
-        else:
-            code += """\
-            reply = udf(*call)
-"""
-        code += """\
-            replies.append(reply)
-        return_json = json.dumps({"replies" : replies})
-        return return_json
-    except Exception as e:
-        return jsonify( { "errorMessage": str(e) } ), 400
-"""
-
-        main_py = os.path.join(dir, "main.py")
-        with open(main_py, "w") as f:
-            f.write(code)
-        logger.debug(f"Wrote {os.path.abspath(main_py)}:\n{open(main_py).read()}")
-
-        return handler_func_name
-
     def generate_cloud_function_code(
-        self, def_, dir, package_requirements=None, is_row_processor=False
+        self, def_, directory, package_requirements=None, is_row_processor=False
     ):
         """Generate the cloud function code for a given user defined function."""
 
@@ -435,13 +272,13 @@ def generate_cloud_function_code(
         if package_requirements:
             requirements.extend(package_requirements)
         requirements = sorted(requirements)
-        requirements_txt = os.path.join(dir, "requirements.txt")
+        requirements_txt = os.path.join(directory, "requirements.txt")
         with open(requirements_txt, "w") as f:
             f.write("\n".join(requirements))
 
         # main.py
-        entry_point = self.generate_cloud_function_main_code(
-            def_, dir, is_row_processor
+        entry_point = bigframes.functions.remote_function_template.generate_cloud_function_main_code(
+            def_, directory, is_row_processor
         )
         return entry_point
 
@@ -458,11 +295,11 @@ def create_cloud_function(
         """Create a cloud function from the given user defined function."""
 
         # Build and deploy folder structure containing cloud function
-        with tempfile.TemporaryDirectory() as dir:
+        with tempfile.TemporaryDirectory() as directory:
             entry_point = self.generate_cloud_function_code(
-                def_, dir, package_requirements, is_row_processor
+                def_, directory, package_requirements, is_row_processor
             )
-            archive_path = shutil.make_archive(dir, "zip", dir)
+            archive_path = shutil.make_archive(directory, "zip", directory)
 
             # We are creating cloud function source code from the currently running
             # python version. Use the same version to deploy. This is necessary