From 1d03b5ae85a20aa6746af0c30756bc1c69a9de2f Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 27 May 2020 10:45:08 -0400
Subject: [PATCH 01/31] created new file with arrow and modified base function

---
 tfx_bsl/beam/run_inference_arrow.py      | 1166 ++++++++++++++++++++++
 tfx_bsl/beam/run_inference_arrow_test.py |  581 +++++++++++
 2 files changed, 1747 insertions(+)
 create mode 100644 tfx_bsl/beam/run_inference_arrow.py
 create mode 100644 tfx_bsl/beam/run_inference_arrow_test.py

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
new file mode 100644
index 00000000..316b65a5
--- /dev/null
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -0,0 +1,1166 @@
+# Copyright 2019 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Run batch inference on saved model."""
+
+from __future__ import absolute_import
+from __future__ import division
+# Standard __future__ imports
+from __future__ import print_function
+
+import abc
+import base64
+import collections
+import os
+import platform
+import sys
+import time
+try:
+  import resource
+except ImportError:
+  resource = None
+
+from absl import logging
+import apache_beam as beam
+import pyarrow as pa
+from apache_beam.options.pipeline_options import GoogleCloudOptions
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.utils import retry
+import googleapiclient
+from googleapiclient import discovery
+from googleapiclient import http
+import numpy as np
+import six
+import tensorflow as tf
+from tfx_bsl.beam import shared
+from tfx_bsl.public.proto import model_spec_pb2
+from tfx_bsl.telemetry import util
+from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \
+    Tuple, Union
+
+# TODO(b/140306674): stop using the internal TF API.
+from tensorflow.python.saved_model import loader_impl
+from tensorflow_serving.apis import classification_pb2
+from tensorflow_serving.apis import inference_pb2
+from tensorflow_serving.apis import prediction_log_pb2
+from tensorflow_serving.apis import regression_pb2
+
+
+# TODO(b/131873699): Remove once 1.x support is dropped.
+# pylint: disable=g-import-not-at-top
+try:
+  # We need to import this in order to register all quantiles ops, even though
+  # it's not directly used.
+  from tensorflow.contrib.boosted_trees.python.ops import quantile_ops as _  # pylint: disable=unused-import
+except ImportError:
+  pass
+
+_DEFAULT_INPUT_KEY = 'examples'
+_METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer'
+_METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess'
+_METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION = 'CloudAIPlatformPrediction'
+_MILLISECOND_TO_MICROSECOND = 1000
+_MICROSECOND_TO_NANOSECOND = 1000
+_SECOND_TO_MICROSECOND = 1000000
+_REMOTE_INFERENCE_NUM_RETRIES = 5
+
+# We define the following aliases of Any because the actual types are not
+# public.
+_SignatureDef = Any
+_MetaGraphDef = Any
+_SavedModel = Any
+
+# TODO (Maxine): what is this?
+_BulkInferResult = Union[prediction_log_pb2.PredictLog,
+                         Tuple[tf.train.Example, regression_pb2.Regression],
+                         Tuple[tf.train.Example,
+                               inference_pb2.MultiInferenceResponse],
+                         Tuple[tf.train.Example,
+                               classification_pb2.Classifications]]
+
+
+# TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7
+class OperationType(object):
+  CLASSIFICATION = 'CLASSIFICATION'
+  REGRESSION = 'REGRESSION'
+  PREDICTION = 'PREDICTION'
+  MULTIHEAD = 'MULTIHEAD'
+
+
+# TODO (Me): pTransform from examples/sequence example here
+
+# TODO (Me): Union[bytes, pa.RecordBatch]?
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def RunInferenceImpl(  # pylint: disable=invalid-name
+    examples: beam.pvalue.PCollection,
+    inference_spec_type: model_spec_pb2.InferenceSpecType
+) -> beam.pvalue.PCollection:
+  """Implementation of RunInference API.
+
+  Args:
+    examples: A PCollection containing RecordBatch.
+    inference_spec_type: Model inference endpoint.
+
+  Returns:
+    A PCollection containing prediction logs.
+
+  Raises:
+    ValueError; when operation is not supported.
+  """
+  logging.info('RunInference on model: %s', inference_spec_type)
+
+  batched_examples = examples | 'BatchExamples' >> beam.BatchElements()
+  operation_type = _get_operation_type(inference_spec_type)
+  if operation_type == OperationType.CLASSIFICATION:
+    return batched_examples | 'Classify' >> _Classify(inference_spec_type)
+  elif operation_type == OperationType.REGRESSION:
+    return batched_examples | 'Regress' >> _Regress(inference_spec_type)
+  elif operation_type == OperationType.PREDICTION:
+    return batched_examples | 'Predict' >> _Predict(inference_spec_type)
+  elif operation_type == OperationType.MULTIHEAD:
+    return (batched_examples
+            | 'MultiInference' >> _MultiInference(inference_spec_type))
+  else:
+    raise ValueError('Unsupported operation_type %s' % operation_type)
+
+
+_IOTensorSpec = collections.namedtuple(
+    '_IOTensorSpec',
+    ['input_tensor_alias', 'input_tensor_name', 'output_alias_tensor_names'])
+
+_Signature = collections.namedtuple('_Signature', ['name', 'signature_def'])
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
+              inference_spec_type: model_spec_pb2.InferenceSpecType):
+  """Performs classify PTransform."""
+  if _using_in_process_inference(inference_spec_type):
+    return (pcoll
+            | 'Classify' >> beam.ParDo(
+                _BatchClassifyDoFn(inference_spec_type, shared.Shared()))
+            | 'BuildPredictionLogForClassifications' >> beam.ParDo(
+                _BuildPredictionLogForClassificationsDoFn()))
+  else:
+    raise NotImplementedError
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
+             inference_spec_type: model_spec_pb2.InferenceSpecType):
+  """Performs regress PTransform."""
+  if _using_in_process_inference(inference_spec_type):
+    return (pcoll
+            | 'Regress' >> beam.ParDo(
+                _BatchRegressDoFn(inference_spec_type, shared.Shared()))
+            | 'BuildPredictionLogForRegressions' >> beam.ParDo(
+                _BuildPredictionLogForRegressionsDoFn()))
+  else:
+    raise NotImplementedError
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
+             inference_spec_type: model_spec_pb2.InferenceSpecType):
+  """Performs predict PTransform."""
+  if _using_in_process_inference(inference_spec_type):
+    predictions = (
+        pcoll
+        | 'Predict' >> beam.ParDo(
+            _BatchPredictDoFn(inference_spec_type, shared.Shared())))
+  else:
+    predictions = (
+        pcoll
+        | 'RemotePredict' >> beam.ParDo(
+            _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options)))
+  return (predictions
+          | 'BuildPredictionLogForPredictions' >> beam.ParDo(
+              _BuildPredictionLogForPredictionsDoFn()))
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
+                    inference_spec_type: model_spec_pb2.InferenceSpecType):
+  """Performs multi inference PTransform."""
+  if _using_in_process_inference(inference_spec_type):
+    return (
+        pcoll
+        | 'MultiInference' >> beam.ParDo(
+            _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared()))
+        | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn()))
+  else:
+    raise NotImplementedError
+
+
+@six.add_metaclass(abc.ABCMeta)
+class _BaseDoFn(beam.DoFn):
+  """Base DoFn that performs bulk inference."""
+
+  class _MetricsCollector(object):
+    """A collector for beam metrics."""
+
+    def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
+      operation_type = _get_operation_type(inference_spec_type)
+      proximity_descriptor = (
+          _METRICS_DESCRIPTOR_IN_PROCESS
+          if _using_in_process_inference(inference_spec_type) else
+          _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION)
+      namespace = util.MakeTfxNamespace(
+          [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor])
+
+      # Metrics
+      self._inference_counter = beam.metrics.Metrics.counter(
+          namespace, 'num_inferences')
+      self._num_instances = beam.metrics.Metrics.counter(
+          namespace, 'num_instances')
+      self._inference_request_batch_size = beam.metrics.Metrics.distribution(
+          namespace, 'inference_request_batch_size')
+      self._inference_request_batch_byte_size = (
+          beam.metrics.Metrics.distribution(
+              namespace, 'inference_request_batch_byte_size'))
+      # Batch inference latency in microseconds.
+      self._inference_batch_latency_micro_secs = (
+          beam.metrics.Metrics.distribution(
+              namespace, 'inference_batch_latency_micro_secs'))
+      self._model_byte_size = beam.metrics.Metrics.distribution(
+          namespace, 'model_byte_size')
+      # Model load latency in milliseconds.
+      self._load_model_latency_milli_secs = beam.metrics.Metrics.distribution(
+          namespace, 'load_model_latency_milli_secs')
+
+      # Metrics cache
+      self.load_model_latency_milli_secs_cache = None
+      self.model_byte_size_cache = None
+
+    def update_metrics_with_cache(self):
+      if self.load_model_latency_milli_secs_cache is not None:
+        self._load_model_latency_milli_secs.update(
+            self.load_model_latency_milli_secs_cache)
+        self.load_model_latency_milli_secs_cache = None
+      if self.model_byte_size_cache is not None:
+        self._model_byte_size.update(self.model_byte_size_cache)
+        self.model_byte_size_cache = None
+
+    def update(self, elements: List[str], latency_micro_secs: int) -> None:
+      self._inference_batch_latency_micro_secs.update(latency_micro_secs)
+      self._num_instances.inc(len(elements))
+      self._inference_counter.inc(len(elements))
+      self._inference_request_batch_size.update(len(elements))
+      self._inference_request_batch_byte_size.update(
+          sum(element.ByteSize() for element in elements))
+
+  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
+    super(_BaseDoFn, self).__init__()
+    self._clock = None
+    self._metrics_collector = self._MetricsCollector(inference_spec_type)
+
+  def setup(self):
+    self._clock = _ClockFactory.make_clock()
+
+  def process(
+      self, elements: pa.RecordBatch
+  ) -> Iterable[Any]:
+    batch_start_time = self._clock.get_current_time_in_microseconds()
+    # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter
+    # extract record batch from here, assuming first column
+    serialized_examples = elements.column(0)
+    outputs = self.run_inference(serialized_examples)
+    result = self._post_process(serialized_examples, outputs)
+    self._metrics_collector.update(
+        elements,
+        self._clock.get_current_time_in_microseconds() - batch_start_time)
+    return result
+
+  def finish_bundle(self):
+    self._metrics_collector.update_metrics_with_cache()
+
+  @abc.abstractmethod
+  def run_inference(
+    self, elements: List[str]
+  ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
+    raise NotImplementedError
+
+  @abc.abstractmethod
+  def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]:
+    raise NotImplementedError
+
+
+def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
+  """Retries for HttpError.
+
+  Retries if error is unavailable (503) or resource exhausted (429).
+  Resource exhausted may happen when qps or bandwidth exceeds quota.
+
+  Args:
+    exception: Exception from inference http request execution.
+  Returns:
+    A boolean of whether retry.
+  """
+
+  return (isinstance(exception, googleapiclient.errors.HttpError) and
+          exception.resp.status in (503, 429))
+
+# TODO (Maxine): change all example to serialized
+@beam.typehints.with_input_types(List[str])
+# Using output typehints triggers NotImplementedError('BEAM-2717)' on
+# streaming mode on Dataflow runner.
+# TODO(b/151468119): Consider to re-batch with online serving request size
+# limit, and re-batch with RPC failures(InvalidArgument) regarding request size.
+# @beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
+class _RemotePredictDoFn(_BaseDoFn):
+  """A DoFn that performs predictions from a cloud-hosted TensorFlow model.
+
+  Supports both batch and streaming processing modes.
+  NOTE: Does not work on DirectRunner for streaming jobs [BEAM-7885].
+
+  In order to request predictions, you must deploy your trained model to AI
+  Platform Prediction in the TensorFlow SavedModel format. See
+  [Exporting a SavedModel for prediction]
+  (https://cloud.google.com/ai-platform/prediction/docs/exporting-savedmodel-for-prediction)
+  for more details.
+
+  To send binary data, you have to make sure that the name of an input ends in
+  `_bytes`.
+
+  NOTE: The returned `PredictLog` instances do not have `PredictRequest` part
+  filled. The reason is that it is difficult to determine the input tensor name
+  without having access to cloud-hosted model's signatures.
+  """
+
+  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
+               pipeline_options: PipelineOptions):
+    super(_RemotePredictDoFn, self).__init__(inference_spec_type)
+    self._api_client = None
+
+    project_id = (
+        inference_spec_type.ai_platform_prediction_model_spec.project_id or
+        pipeline_options.view_as(GoogleCloudOptions).project)
+    if not project_id:
+      raise ValueError('Either a non-empty project id or project flag in '
+                       ' beam pipeline options needs be provided.')
+
+    model_name = (
+        inference_spec_type.ai_platform_prediction_model_spec.model_name)
+    if not model_name:
+      raise ValueError('A non-empty model name must be provided.')
+
+    version_name = (
+        inference_spec_type.ai_platform_prediction_model_spec.version_name)
+    name_spec = 'projects/{}/models/{}'
+    # If version is not specified, the default version for a model is used.
+    if version_name:
+      name_spec += '/versions/{}'
+    self._full_model_name = name_spec.format(project_id, model_name,
+                                             version_name)
+
+  def setup(self):
+    super(_RemotePredictDoFn, self).setup()
+    # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to
+    # user agent once custom header is supported in googleapiclient.
+    self._api_client = discovery.build('ml', 'v1')
+
+  # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff.
+  @retry.with_exponential_backoff(
+      initial_delay_secs=1.0,
+      num_retries=_REMOTE_INFERENCE_NUM_RETRIES,
+      retry_filter=_retry_on_unavailable_and_resource_error_filter)
+  def _execute_request(
+      self,
+      request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]:
+    result = request.execute()
+    if 'error' in result:
+      raise ValueError(result['error'])
+    return result
+
+  def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest:
+    return self._api_client.projects().predict(
+        name=self._full_model_name, body=body)
+
+  @classmethod
+  def _prepare_instances(
+      cls, elements: List[tf.train.Example]
+  ) -> Generator[Mapping[Text, Any], None, None]:
+    for example in elements:
+      # TODO(b/151468119): support tf.train.SequenceExample
+      if not isinstance(example, tf.train.Example):
+        raise ValueError('Remote prediction only supports tf.train.Example')
+
+      instance = {}
+      for input_name, feature in example.features.feature.items():
+        attr_name = feature.WhichOneof('kind')
+        if attr_name is None:
+          continue
+        attr = getattr(feature, attr_name)
+        values = cls._parse_feature_content(attr.value, attr_name,
+                                            cls._sending_as_binary(input_name))
+        # Flatten a sequence if its length is 1
+        values = (values[0] if len(values) == 1 else values)
+        instance[input_name] = values
+      yield instance
+
+  @staticmethod
+  def _sending_as_binary(input_name: Text) -> bool:
+    """Whether data should be sent as binary."""
+    return input_name.endswith('_bytes')
+
+  @staticmethod
+  def _parse_feature_content(values: Sequence[Any], attr_name: Text,
+                             as_binary: bool) -> Sequence[Any]:
+    """Parse the content of tf.train.Feature object.
+
+    If bytes_list, parse a list of bytes-like objects to a list of strings so
+    that it would be JSON serializable.
+
+    If float_list or int64_list, do nothing.
+
+    If data should be sent as binary, mark it as binary by replacing it with
+    a single attribute named 'b64'.
+    """
+    if as_binary:
+      return [{'b64': base64.b64encode(x).decode()} for x in values]
+    elif attr_name == 'bytes_list':
+      return [x.decode() for x in values]
+    else:
+      return values
+
+  def run_inference(
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
+  ) -> Sequence[Mapping[Text, Any]]:
+    body = {'instances': list(self._prepare_instances(elements))}
+    request = self._make_request(body)
+    response = self._execute_request(request)
+    return response['predictions']
+
+  def _post_process(
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]],
+      outputs: Sequence[Mapping[Text, Any]]
+  ) -> Iterable[prediction_log_pb2.PredictLog]:
+    result = []
+    for output in outputs:
+      predict_log = prediction_log_pb2.PredictLog()
+      for output_alias, values in output.items():
+        values = np.array(values)
+        tensor_proto = tf.make_tensor_proto(
+            values=values,
+            dtype=tf.as_dtype(values.dtype).as_datatype_enum,
+            shape=np.expand_dims(values, axis=0).shape)
+        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
+      result.append(predict_log)
+    return result
+
+
+# TODO(b/131873699): Add typehints once
+# [BEAM-8381](https://issues.apache.org/jira/browse/BEAM-8381)
+# is fixed.
+# TODO(b/143484017): Add batch_size back off in the case there are functional
+# reasons large batch sizes cannot be handled.
+class _BaseBatchSavedModelDoFn(_BaseDoFn):
+  """A DoFn that runs in-process batch inference with a model.
+
+    Models need to have the required serving signature as mentioned in
+    [Tensorflow Serving](https://www.tensorflow.org/tfx/serving/signature_defs)
+
+    This function will check model signatures first. Then it will load and run
+    model inference in batch.
+  """
+
+  def __init__(
+      self,
+      inference_spec_type: model_spec_pb2.InferenceSpecType,
+      shared_model_handle: shared.Shared,
+  ):
+    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type)
+    self._inference_spec_type = inference_spec_type
+    self._shared_model_handle = shared_model_handle
+    self._model_path = inference_spec_type.saved_model_spec.model_path
+    self._tags = None
+    self._signatures = _get_signatures(
+        inference_spec_type.saved_model_spec.model_path,
+        inference_spec_type.saved_model_spec.signature_name,
+        _get_tags(inference_spec_type))
+    self._session = None
+    self._io_tensor_spec = None
+
+  def setup(self):
+    """Load the model.
+
+    Note that worker may crash if exception is thrown in setup due
+    to b/139207285.
+    """
+
+    super(_BaseBatchSavedModelDoFn, self).setup()
+    self._tags = _get_tags(self._inference_spec_type)
+    self._io_tensor_spec = self._pre_process()
+
+    if self._has_tpu_tag():
+      # TODO(b/131873699): Support TPU inference.
+      raise ValueError('TPU inference is not supported yet.')
+    self._session = self._load_model()
+
+  def _load_model(self):
+    """Load a saved model into memory.
+
+    Returns:
+      Session instance.
+    """
+
+    def load():
+      """Function for constructing shared LoadedModel."""
+      # TODO(b/143484017): Do warmup and other heavy model construction here.
+      result = tf.compat.v1.Session(graph=tf.compat.v1.Graph())
+      memory_before = _get_current_process_memory_in_bytes()
+      start_time = self._clock.get_current_time_in_microseconds()
+      tf.compat.v1.saved_model.loader.load(result, self._tags, self._model_path)
+      end_time = self._clock.get_current_time_in_microseconds()
+      memory_after = _get_current_process_memory_in_bytes()
+      self._metrics_collector.load_model_latency_milli_secs_cache = (
+          (end_time - start_time) / _MILLISECOND_TO_MICROSECOND)
+      self._metrics_collector.model_byte_size_cache = (
+          memory_after - memory_before)
+      return result
+
+    if not self._model_path:
+      raise ValueError('Model path is not valid.')
+    return self._shared_model_handle.acquire(load)
+
+  def _pre_process(self) -> _IOTensorSpec:
+    # Pre process functions will validate for each signature.
+    io_tensor_specs = []
+    for signature in self._signatures:
+      if len(signature.signature_def.inputs) != 1:
+        raise ValueError('Signature should have 1 and only 1 inputs')
+      if (list(signature.signature_def.inputs.values())[0].dtype !=
+          tf.string.as_datatype_enum):
+        raise ValueError(
+            'Input dtype is expected to be %s, got %s' %
+            tf.string.as_datatype_enum,
+            list(signature.signature_def.inputs.values())[0].dtype)
+      io_tensor_specs.append(_signature_pre_process(signature.signature_def))
+    input_tensor_name = ''
+    input_tensor_alias = ''
+    output_alias_tensor_names = {}
+    for io_tensor_spec in io_tensor_specs:
+      if not input_tensor_name:
+        input_tensor_name = io_tensor_spec.input_tensor_name
+        input_tensor_alias = io_tensor_spec.input_tensor_alias
+      elif input_tensor_name != io_tensor_spec.input_tensor_name:
+        raise ValueError('Input tensor must be the same for all Signatures.')
+      for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items(
+      ):
+        output_alias_tensor_names[alias] = tensor_name
+    if (not output_alias_tensor_names or not input_tensor_name or
+        not input_tensor_alias):
+      raise ValueError('No valid fetch tensors or feed tensors.')
+    return _IOTensorSpec(input_tensor_alias, input_tensor_name,
+                         output_alias_tensor_names)
+
+  def _has_tpu_tag(self) -> bool:
+    return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
+            tf.saved_model.TPU in self._tags)
+
+  def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
+    self._check_elements(elements)
+    outputs = self._run_tf_operations(elements)
+    return outputs
+
+  def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
+    result = self._session.run(
+        self._io_tensor_spec.output_alias_tensor_names,
+        feed_dict={self._io_tensor_spec.input_tensor_name: elements})
+    if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
+      raise RuntimeError('Output length does not match fetches')
+    return result
+
+  def _check_elements(
+      self, elements: List[Union[tf.train.Example,
+                                 tf.train.SequenceExample]]) -> None:
+    """Unimplemented."""
+
+    raise NotImplementedError
+
+
+@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_output_types(Tuple[tf.train.Example,
+                                        classification_pb2.Classifications])
+class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn):
+  """A DoFn that run inference on classification model."""
+
+  def setup(self):
+    signature_def = self._signatures[0].signature_def
+    if signature_def.method_name != tf.saved_model.CLASSIFY_METHOD_NAME:
+      raise ValueError(
+          'BulkInferrerClassifyDoFn requires signature method '
+          'name %s, got: %s' % tf.saved_model.CLASSIFY_METHOD_NAME,
+          signature_def.method_name)
+    super(_BatchClassifyDoFn, self).setup()
+
+  def _check_elements(
+      self, elements: List[Union[tf.train.Example,
+                                 tf.train.SequenceExample]]) -> None:
+    if not all(isinstance(element, tf.train.Example) for element in elements):
+      raise ValueError('Classify only supports tf.train.Example')
+
+  def _post_process(
+      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
+                                                                   np.ndarray]
+  ) -> Iterable[Tuple[tf.train.Example, classification_pb2.Classifications]]:
+    classifications = _post_process_classify(
+        self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
+    return zip(elements, classifications)
+
+
+@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_output_types(Tuple[tf.train.Example,
+                                        regression_pb2.Regression])
+class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
+  """A DoFn that run inference on regression model."""
+
+  def setup(self):
+    super(_BatchRegressDoFn, self).setup()
+
+  def _check_elements(
+      self, elements: List[Union[tf.train.Example,
+                                 tf.train.SequenceExample]]) -> None:
+    if not all(isinstance(element, tf.train.Example) for element in elements):
+      raise ValueError('Regress only supports tf.train.Example')
+
+  def _post_process(
+      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
+                                                                   np.ndarray]
+  ) -> Iterable[Tuple[tf.train.Example, regression_pb2.Regression]]:
+    regressions = _post_process_regress(elements, outputs)
+    return zip(elements, regressions)
+
+
+@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
+class _BatchPredictDoFn(_BaseBatchSavedModelDoFn):
+  """A DoFn that runs inference on predict model."""
+
+  def setup(self):
+    signature_def = self._signatures[0].signature_def
+    if signature_def.method_name != tf.saved_model.PREDICT_METHOD_NAME:
+      raise ValueError(
+          'BulkInferrerPredictDoFn requires signature method '
+          'name %s, got: %s' % tf.saved_model.PREDICT_METHOD_NAME,
+          signature_def.method_name)
+    super(_BatchPredictDoFn, self).setup()
+
+  def _check_elements(
+      self, elements: List[Union[tf.train.Example,
+                                 tf.train.SequenceExample]]) -> None:
+    pass
+
+  def _post_process(
+      self, elements: Union[Sequence[tf.train.Example],
+                            Sequence[tf.train.SequenceExample]],
+      outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[prediction_log_pb2.PredictLog]:
+    input_tensor_alias = self._io_tensor_spec.input_tensor_alias
+    signature_name = self._signatures[0].name
+    batch_size = len(elements)
+    for output_alias, output in outputs.items():
+      if len(output.shape) < 1 or output.shape[0] != batch_size:
+        raise ValueError(
+            'Expected output tensor %s to have at least one '
+            'dimension, with the first having a size equal to the input batch '
+            'size %s. Instead found %s' %
+            (output_alias, batch_size, output.shape))
+    predict_log_tmpl = prediction_log_pb2.PredictLog()
+    predict_log_tmpl.request.model_spec.signature_name = signature_name
+    predict_log_tmpl.response.model_spec.signature_name = signature_name
+    input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias]
+    input_tensor_proto.dtype = tf.string.as_datatype_enum
+    input_tensor_proto.tensor_shape.dim.add().size = 1
+
+    result = []
+    for i in range(batch_size):
+      predict_log = prediction_log_pb2.PredictLog()
+      predict_log.CopyFrom(predict_log_tmpl)
+      predict_log.request.inputs[input_tensor_alias].string_val.append(
+          elements[i].SerializeToString())
+      for output_alias, output in outputs.items():
+        # Mimic tensor::Split
+        tensor_proto = tf.make_tensor_proto(
+            values=output[i],
+            dtype=tf.as_dtype(output[i].dtype).as_datatype_enum,
+            shape=np.expand_dims(output[i], axis=0).shape)
+        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
+      result.append(predict_log)
+    return result
+
+
+@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_output_types(Tuple[tf.train.Example,
+                                        inference_pb2.MultiInferenceResponse])
+class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
+  """A DoFn that runs inference on multi-head model."""
+
+  def _check_elements(
+      self, elements: List[Union[tf.train.Example,
+                                 tf.train.SequenceExample]]) -> None:
+    if not all(isinstance(element, tf.train.Example) for element in elements):
+      raise ValueError('Multi inference only supports tf.train.Example')
+
+  def _post_process(
+      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
+                                                                   np.ndarray]
+  ) -> Iterable[Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]]:
+    classifications = None
+    regressions = None
+    for signature in self._signatures:
+      signature_def = signature.signature_def
+      if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
+        classifications = _post_process_classify(
+            self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
+      elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME:
+        regressions = _post_process_regress(elements, outputs)
+      else:
+        raise ValueError('Signature method %s is not supported for '
+                         'multi inference' % signature_def.method_name)
+    result = []
+    for i in range(len(elements)):
+      response = inference_pb2.MultiInferenceResponse()
+      for signature in self._signatures:
+        signature_def = signature.signature_def
+        inference_result = response.results.add()
+        if (signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME and
+            classifications):
+          inference_result.classification_result.classifications.add().CopyFrom(
+              classifications[i])
+        elif (
+            signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME and
+            regressions):
+          inference_result.regression_result.regressions.add().CopyFrom(
+              regressions[i])
+        else:
+          raise ValueError('Signature method %s is not supported for '
+                           'multi inference' % signature_def.method_name)
+        inference_result.model_spec.signature_name = signature.name
+      if len(response.results) != len(self._signatures):
+        raise RuntimeError('Multi inference response result length does not '
+                           'match the number of signatures')
+      result.append((elements[i], response))
+    return result
+
+
+@beam.typehints.with_input_types(Tuple[tf.train.Example,
+                                       classification_pb2.Classifications])
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+class _BuildPredictionLogForClassificationsDoFn(beam.DoFn):
+  """A DoFn that builds prediction log from classifications."""
+
+  def process(
+      self, element: Tuple[tf.train.Example, classification_pb2.Classifications]
+  ) -> Iterable[prediction_log_pb2.PredictionLog]:
+    (train_example, classifications) = element
+    result = prediction_log_pb2.PredictionLog()
+    result.classify_log.request.input.example_list.examples.add().CopyFrom(
+        train_example)
+    result.classify_log.response.result.classifications.add().CopyFrom(
+        classifications)
+    yield result
+
+
+@beam.typehints.with_input_types(Tuple[tf.train.Example,
+                                       regression_pb2.Regression])
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+class _BuildPredictionLogForRegressionsDoFn(beam.DoFn):
+  """A DoFn that builds prediction log from regressions."""
+
+  def process(
+      self, element: Tuple[tf.train.Example, regression_pb2.Regression]
+  ) -> Iterable[prediction_log_pb2.PredictionLog]:
+    (train_example, regression) = element
+    result = prediction_log_pb2.PredictionLog()
+    result.regress_log.request.input.example_list.examples.add().CopyFrom(
+        train_example)
+    result.regress_log.response.result.regressions.add().CopyFrom(regression)
+    yield result
+
+
+@beam.typehints.with_input_types(prediction_log_pb2.PredictLog)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+class _BuildPredictionLogForPredictionsDoFn(beam.DoFn):
+  """A DoFn that builds prediction log from predictions."""
+
+  def process(
+      self, element: prediction_log_pb2.PredictLog
+  ) -> Iterable[prediction_log_pb2.PredictionLog]:
+    result = prediction_log_pb2.PredictionLog()
+    result.predict_log.CopyFrom(element)
+    yield result
+
+
+@beam.typehints.with_input_types(Tuple[tf.train.Example,
+                                       inference_pb2.MultiInferenceResponse])
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+class _BuildMultiInferenceLogDoFn(beam.DoFn):
+  """A DoFn that builds prediction log from multi-head inference result."""
+
+  def process(
+      self, element: Tuple[tf.train.Example,
+                           inference_pb2.MultiInferenceResponse]
+  ) -> Iterable[prediction_log_pb2.PredictionLog]:
+    (train_example, multi_inference_response) = element
+    result = prediction_log_pb2.PredictionLog()
+    (result.multi_inference_log.request.input.example_list.examples.add()
+     .CopyFrom(train_example))
+    result.multi_inference_log.response.CopyFrom(multi_inference_response)
+    yield result
+
+
+# TODO (Maxine): moving these into class?
+def _post_process_classify(
+    output_alias_tensor_names: Mapping[Text, Text],
+    elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray]
+) -> Sequence[classification_pb2.Classifications]:
+  """Returns classifications from inference output."""
+
+  # This is to avoid error "The truth value of an array with
+  # more than one element is ambiguous."
+  has_classes = False
+  has_scores = False
+  if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in output_alias_tensor_names:
+    classes = outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES]
+    has_classes = True
+  if tf.saved_model.CLASSIFY_OUTPUT_SCORES in output_alias_tensor_names:
+    scores = outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES]
+    has_scores = True
+  if has_classes:
+    if classes.ndim != 2:
+      raise ValueError('Expected Tensor shape: [batch_size num_classes] but '
+                       'got %s' % classes.shape)
+    if classes.dtype != tf.string.as_numpy_dtype:
+      raise ValueError('Expected classes Tensor of %s. Got: %s' %
+                       (tf.string.as_numpy_dtype, classes.dtype))
+    if classes.shape[0] != len(elements):
+      raise ValueError('Expected classes output batch size of %s, got %s' %
+                       (len(elements), classes.shape[0]))
+  if has_scores:
+    if scores.ndim != 2:
+      raise ValueError("""Expected Tensor shape: [batch_size num_classes] but
+        got %s""" % scores.shape)
+    if scores.dtype != tf.float32.as_numpy_dtype:
+      raise ValueError('Expected classes Tensor of %s. Got: %s' %
+                       (tf.float32.as_numpy_dtype, scores.dtype))
+    if scores.shape[0] != len(elements):
+      raise ValueError('Expected classes output batch size of %s, got %s' %
+                       (len(elements), scores.shape[0]))
+  num_classes = 0
+  if has_classes and has_scores:
+    if scores.shape[1] != classes.shape[1]:
+      raise ValueError('Tensors class and score should match in shape[1]. '
+                       'Got %s vs %s' % (classes.shape[1], scores.shape[1]))
+    num_classes = classes.shape[1]
+  elif has_classes:
+    num_classes = classes.shape[1]
+  elif has_scores:
+    num_classes = scores.shape[1]
+
+  result = []
+  for i in range(len(elements)):
+    a_classification = classification_pb2.Classifications()
+    for c in range(num_classes):
+      a_class = a_classification.classes.add()
+      if has_classes:
+        a_class.label = classes[i][c]
+      if has_scores:
+        a_class.score = scores[i][c]
+    result.append(a_classification)
+  if len(result) != len(elements):
+    raise RuntimeError('Classifications length does not match elements')
+  return result
+
+
+def _post_process_regress(
+    elements: Sequence[tf.train.Example],
+    outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]:
+  """Returns regressions from inference output."""
+
+  if tf.saved_model.REGRESS_OUTPUTS not in outputs:
+    raise ValueError('No regression outputs found in outputs: %s' %
+                     outputs.keys())
+  output = outputs[tf.saved_model.REGRESS_OUTPUTS]
+  batch_size = len(elements)
+  if not (output.ndim == 1 or (output.ndim == 2 and output.shape[1] == 1)):
+    raise ValueError("""Expected output Tensor shape to be either [batch_size]
+                     or [batch_size, 1] but got %s""" % output.shape)
+  if batch_size != output.shape[0]:
+    raise ValueError(
+        'Input batch size did not match output batch size: %s vs %s' %
+        (batch_size, output.shape[0]))
+  if output.dtype != tf.float32.as_numpy_dtype:
+    raise ValueError('Expected output Tensor of %s. Got: %s' %
+                     (tf.float32.as_numpy_dtype, output.dtype))
+  if output.size != batch_size:
+    raise ValueError('Expected output batch size to be %s. Got: %s' %
+                     (batch_size, output.size))
+  flatten_output = output.flatten()
+  result = []
+  for regression_result in flatten_output:
+    regression = regression_pb2.Regression()
+    regression.value = regression_result
+    result.append(regression)
+
+  # Add additional check to save downstream consumer checks.
+  if len(result) != len(elements):
+    raise RuntimeError('Regression length does not match elements')
+  return result
+
+
+def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec:
+  """Returns IOTensorSpec from signature."""
+
+  if len(signature.inputs) != 1:
+    raise ValueError('Signature should have 1 and only 1 inputs')
+  input_tensor_alias = list(signature.inputs.keys())[0]
+  if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum:
+    raise ValueError(
+        'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum,
+        list(signature.inputs.values())[0].dtype)
+  if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
+    input_tensor_name, output_alias_tensor_names = (
+        _signature_pre_process_classify(signature))
+  elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME:
+    input_tensor_name, output_alias_tensor_names = (
+        _signature_pre_process_predict(signature))
+  elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME:
+    input_tensor_name, output_alias_tensor_names = (
+        _signature_pre_process_regress(signature))
+  else:
+    raise ValueError('Signature method %s is not supported' %
+                     signature.method_name)
+  return _IOTensorSpec(input_tensor_alias, input_tensor_name,
+                       output_alias_tensor_names)
+
+
+def _signature_pre_process_classify(
+    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
+  """Returns input tensor name and output alias tensor names from signature.
+
+  Args:
+    signature: SignatureDef
+
+  Returns:
+    A tuple of input tensor name and output alias tensor names.
+  """
+
+  if len(signature.outputs) != 1 and len(signature.outputs) != 2:
+    raise ValueError('Classify signature should have 1 or 2 outputs')
+  if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs:
+    raise ValueError('No classification inputs found in SignatureDef: %s' %
+                     signature.inputs)
+  input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name
+  output_alias_tensor_names = {}
+  if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and
+      tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs):
+    raise ValueError(
+        """Expected classification signature outputs to contain at
+        least one of %s or %s. Signature was: %s""" %
+        tf.saved_model.CLASSIFY_OUTPUT_CLASSES,
+        tf.saved_model.CLASSIFY_OUTPUT_SCORES, signature)
+  if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in signature.outputs:
+    output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] = (
+        signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES].name)
+  if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs:
+    output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = (
+        signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name)
+  return input_tensor_name, output_alias_tensor_names
+
+
+def _signature_pre_process_predict(
+    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
+  """Returns input tensor name and output alias tensor names from signature.
+
+  Args:
+    signature: SignatureDef
+
+  Returns:
+    A tuple of input tensor name and output alias tensor names.
+  """
+
+  input_tensor_name = list(signature.inputs.values())[0].name
+  output_alias_tensor_names = dict([
+      (key, output.name) for key, output in signature.outputs.items()
+  ])
+  return input_tensor_name, output_alias_tensor_names
+
+
+def _signature_pre_process_regress(
+    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
+  """Returns input tensor name and output alias tensor names from signature.
+
+  Args:
+    signature: SignatureDef
+
+  Returns:
+    A tuple of input tensor name and output alias tensor names.
+  """
+
+  if len(signature.outputs) != 1:
+    raise ValueError('Regress signature should have 1 output')
+  if tf.saved_model.REGRESS_INPUTS not in signature.inputs:
+    raise ValueError('No regression inputs found in SignatureDef: %s' %
+                     signature.inputs)
+  input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name
+  if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs:
+    raise ValueError('No regression outputs found in SignatureDef: %s' %
+                     signature.outputs)
+  output_alias_tensor_names = {
+      tf.saved_model.REGRESS_OUTPUTS:
+          signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name
+  }
+  return input_tensor_name, output_alias_tensor_names
+
+
+def _using_in_process_inference(
+    inference_spec_type: model_spec_pb2.InferenceSpecType) -> bool:
+  return inference_spec_type.WhichOneof('type') == 'saved_model_spec'
+
+
+def _get_signatures(model_path: Text, signatures: Sequence[Text],
+                    tags: Sequence[Text]) -> Sequence[_Signature]:
+  """Returns a sequence of {model_signature_name: signature}."""
+
+  if signatures:
+    signature_names = signatures
+  else:
+    signature_names = [tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+
+  saved_model_pb = loader_impl.parse_saved_model(model_path)
+  meta_graph_def = _get_meta_graph_def(saved_model_pb, tags)
+  result = []
+  for signature_name in signature_names:
+    if signature_name in meta_graph_def.signature_def:
+      result.append(
+          _Signature(signature_name,
+                     meta_graph_def.signature_def[signature_name]))
+    else:
+      raise RuntimeError('Signature %s could not be found in SavedModel' %
+                         signature_name)
+  return result
+
+
+def _get_operation_type(
+    inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text:
+  if _using_in_process_inference(inference_spec_type):
+    signatures = _get_signatures(
+        inference_spec_type.saved_model_spec.model_path,
+        inference_spec_type.saved_model_spec.signature_name,
+        _get_tags(inference_spec_type))
+    if not signatures:
+      raise ValueError('Model does not have valid signature to use')
+
+    if len(signatures) == 1:
+      method_name = signatures[0].signature_def.method_name
+      if method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
+        return OperationType.CLASSIFICATION
+      elif method_name == tf.saved_model.REGRESS_METHOD_NAME:
+        return OperationType.REGRESSION
+      elif method_name == tf.saved_model.PREDICT_METHOD_NAME:
+        return OperationType.PREDICTION
+      else:
+        raise ValueError('Unsupported signature method_name %s' % method_name)
+    else:
+      for signature in signatures:
+        method_name = signature.signature_def.method_name
+        if (method_name != tf.saved_model.CLASSIFY_METHOD_NAME and
+            method_name != tf.saved_model.REGRESS_METHOD_NAME):
+          raise ValueError('Unsupported signature method_name for multi-head '
+                           'model inference: %s' % method_name)
+      return OperationType.MULTIHEAD
+  else:
+    # Remote inference supports predictions only.
+    return OperationType.PREDICTION
+
+
+def _get_meta_graph_def(saved_model_pb: _SavedModel,
+                        tags: Sequence[Text]) -> _MetaGraphDef:
+  """Returns MetaGraphDef from SavedModel."""
+
+  for meta_graph_def in saved_model_pb.meta_graphs:
+    if set(meta_graph_def.meta_info_def.tags) == set(tags):
+      return meta_graph_def
+  raise RuntimeError('MetaGraphDef associated with tags %s could not be '
+                     'found in SavedModel' % tags)
+
+
+def _get_current_process_memory_in_bytes():
+  """Returns memory usage in bytes."""
+
+  if resource is not None:
+    usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+    if _is_darwin():
+      return usage
+    return usage * 1024
+  else:
+    logging.warning('Resource module is not available for current platform, '
+                    'memory usage cannot be fetched.')
+  return 0
+
+
+def _get_tags(
+    inference_spec_type: model_spec_pb2.InferenceSpecType) -> Sequence[Text]:
+  """Returns tags from ModelSpec."""
+
+  if inference_spec_type.saved_model_spec.tag:
+    return list(inference_spec_type.saved_model_spec.tag)
+  else:
+    return [tf.saved_model.SERVING]
+
+
+def _is_darwin() -> bool:
+  return sys.platform == 'darwin'
+
+
+def _is_windows() -> bool:
+  return platform.system() == 'Windows' or os.name == 'nt'
+
+
+def _is_cygwin() -> bool:
+  return platform.system().startswith('CYGWIN_NT')
+
+
+class _Clock(object):
+
+  def get_current_time_in_microseconds(self) -> int:
+    return int(time.time() * _SECOND_TO_MICROSECOND)
+
+
+class _FineGrainedClock(_Clock):
+
+  def get_current_time_in_microseconds(self) -> int:
+    return int(
+        time.clock_gettime_ns(time.CLOCK_REALTIME) /  # pytype: disable=module-attr
+        _MICROSECOND_TO_NANOSECOND)
+
+
+class _ClockFactory(object):
+
+  @staticmethod
+  def make_clock() -> _Clock:
+    if (hasattr(time, 'clock_gettime_ns') and not _is_windows()
+        and not _is_cygwin()):
+      return _FineGrainedClock()
+    return _Clock()
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
new file mode 100644
index 00000000..a4eed521
--- /dev/null
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -0,0 +1,581 @@
+# Copyright 2019 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for tfx_bsl.run_inference."""
+
+from __future__ import absolute_import
+from __future__ import division
+# Standard __future__ imports
+from __future__ import print_function
+
+import json
+import os
+try:
+  import unittest.mock as mock
+except ImportError:
+  import mock
+
+import apache_beam as beam
+from apache_beam.metrics.metric import MetricsFilter
+from apache_beam.testing.util import assert_that
+from apache_beam.testing.util import equal_to
+from googleapiclient import discovery
+from googleapiclient import http
+from six.moves import http_client
+import tensorflow as tf
+from tfx_bsl.beam import run_inference
+from tfx_bsl.public.proto import model_spec_pb2
+
+from google.protobuf import text_format
+
+from tensorflow_serving.apis import prediction_log_pb2
+
+
+class RunInferenceFixture(tf.test.TestCase):
+
+  def setUp(self):
+    super(RunInferenceFixture, self).setUp()
+    self._predict_examples = [
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 0 }}}
+              }
+              """, tf.train.Example()),
+    ]
+
+  def _get_output_data_dir(self, sub_dir=None):
+    test_dir = self._testMethodName
+    path = os.path.join(
+        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
+        test_dir)
+    if not tf.io.gfile.exists(path):
+      tf.io.gfile.makedirs(path)
+    if sub_dir is not None:
+      path = os.path.join(path, sub_dir)
+    return path
+
+  def _prepare_predict_examples(self, example_path):
+    with tf.io.TFRecordWriter(example_path) as output_file:
+      for example in self._predict_examples:
+        output_file.write(example.SerializeToString())
+
+
+ARROW_INPUT_COLUMN = '__raw_record__'
+class RunOfflineInferenceTest(RunInferenceFixture):
+  
+  def setUp(self):
+    super(RunOfflineInferenceTest, self).setUp()
+    
+    self._predict_examples = [
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 0 }}}
+              }
+              """, tf.train.Example()),
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 1 }}}
+              }
+              """, tf.train.Example()),
+    ]
+    self._multihead_examples = [
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.8 }}}
+              feature {key: "y" value { float_list { value: 0.2 }}}
+            }
+            """, tf.train.Example()),
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.6 }}}
+              feature {key: "y" value { float_list { value: 0.1 }}}
+            }
+            """, tf.train.Example()),
+    ]
+
+    # TODO: Ask if these example can directly transform to recordBatch
+    
+
+  def _prepare_multihead_examples(self, example_path):
+    with tf.io.TFRecordWriter(example_path) as output_file:
+      for example in self._multihead_examples:
+        output_file.write(example.SerializeToString())
+
+  def _build_predict_model(self, model_path):
+    """Exports the dummy sum predict model."""
+
+    with tf.compat.v1.Graph().as_default():
+      input_tensors = {
+          'x': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0)
+      }
+      serving_receiver = (
+          tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn(
+              input_tensors)())
+      output_tensors = {'y': serving_receiver.features['x'] * 2}
+      sess = tf.compat.v1.Session()
+      sess.run(tf.compat.v1.initializers.global_variables())
+      signature_def = tf.compat.v1.estimator.export.PredictOutput(
+          output_tensors).as_signature_def(serving_receiver.receiver_tensors)
+      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
+      builder.add_meta_graph_and_variables(
+          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
+          signature_def_map={
+              tf.compat.v1.saved_model.signature_constants
+              .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+                  signature_def,
+          })
+      builder.save()
+
+  def _build_regression_signature(self, input_tensor, output_tensor):
+    """Helper function for building a regression SignatureDef."""
+    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        input_tensor)
+    signature_inputs = {
+        tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS:
+            input_tensor_info
+    }
+    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        output_tensor)
+    signature_outputs = {
+        tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS:
+            output_tensor_info
+    }
+    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+        signature_inputs, signature_outputs,
+        tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME)
+
+  def _build_classification_signature(self, input_tensor, scores_tensor):
+    """Helper function for building a classification SignatureDef."""
+    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        input_tensor)
+    signature_inputs = {
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS:
+            input_tensor_info
+    }
+    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        scores_tensor)
+    signature_outputs = {
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
+            output_tensor_info
+    }
+    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+        signature_inputs, signature_outputs,
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
+
+  def _build_multihead_model(self, model_path):
+    with tf.compat.v1.Graph().as_default():
+      input_example = tf.compat.v1.placeholder(
+          tf.string, name='input_examples_tensor')
+      config = {
+          'x': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0),
+          'y': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0),
+      }
+      features = tf.compat.v1.parse_example(input_example, config)
+      x = features['x']
+      y = features['y']
+      sum_pred = x + y
+      diff_pred = tf.abs(x - y)
+      sess = tf.compat.v1.Session()
+      sess.run(tf.compat.v1.initializers.global_variables())
+      signature_def_map = {
+          'regress_diff':
+              self._build_regression_signature(input_example, diff_pred),
+          'classify_sum':
+              self._build_classification_signature(input_example, sum_pred),
+          tf.compat.v1.saved_model.signature_constants
+          .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+              self._build_regression_signature(input_example, sum_pred)
+      }
+      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
+      builder.add_meta_graph_and_variables(
+          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
+          signature_def_map=signature_def_map)
+      builder.save()
+
+  def _run_inference_with_beam(self, example_path, inference_spec_type,
+                               prediction_log_path):
+    with beam.Pipeline() as pipeline:
+      _ = (
+          pipeline
+          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+          | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+          |
+          'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+
+  def _get_results(self, prediction_log_path):
+    results = []
+    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
+      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
+      for record_string in record_iterator:
+        prediction_log = prediction_log_pb2.PredictionLog()
+        prediction_log.MergeFromString(record_string)
+        results.append(prediction_log)
+    return results
+
+  def testModelPathInvalid(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'):
+      self._run_inference_with_beam(
+          example_path,
+          model_spec_pb2.InferenceSpecType(
+              saved_model_spec=model_spec_pb2.SavedModelSpec(
+                  model_path=self._get_output_data_dir())), prediction_log_path)
+
+  def testEstimatorModelPredict(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_predict_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    self.assertEqual(
+        results[0].predict_log.request.inputs[
+            run_inference._DEFAULT_INPUT_KEY].string_val[0],
+        self._predict_examples[0].SerializeToString())
+    self.assertEqual(results[0].predict_log.response.outputs['y'].dtype,
+                     tf.float32)
+    self.assertLen(
+        results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2)
+    self.assertEqual(
+        results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size,
+        1)
+    self.assertEqual(
+        results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size,
+        1)
+
+  def testClassifyModel(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path, signature_name=['classify_sum'])),
+        prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    classify_log = results[0].classify_log
+    self.assertLen(classify_log.request.input.example_list.examples, 1)
+    self.assertEqual(classify_log.request.input.example_list.examples[0],
+                     self._multihead_examples[0])
+    self.assertLen(classify_log.response.result.classifications, 1)
+    self.assertLen(classify_log.response.result.classifications[0].classes, 1)
+    self.assertAlmostEqual(
+        classify_log.response.result.classifications[0].classes[0].score, 1.0)
+
+  def testRegressModel(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path, signature_name=['regress_diff'])),
+        prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    regress_log = results[0].regress_log
+    self.assertLen(regress_log.request.input.example_list.examples, 1)
+    self.assertEqual(regress_log.request.input.example_list.examples[0],
+                     self._multihead_examples[0])
+    self.assertLen(regress_log.response.result.regressions, 1)
+    self.assertAlmostEqual(regress_log.response.result.regressions[0].value,
+                           0.6)
+
+  def testMultiInferenceModel(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path,
+                signature_name=['regress_diff', 'classify_sum'])),
+        prediction_log_path)
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    multi_inference_log = results[0].multi_inference_log
+    self.assertLen(multi_inference_log.request.input.example_list.examples, 1)
+    self.assertEqual(multi_inference_log.request.input.example_list.examples[0],
+                     self._multihead_examples[0])
+    self.assertLen(multi_inference_log.response.results, 2)
+    signature_names = []
+    for result in multi_inference_log.response.results:
+      signature_names.append(result.model_spec.signature_name)
+    self.assertIn('regress_diff', signature_names)
+    self.assertIn('classify_sum', signature_names)
+    result = multi_inference_log.response.results[0]
+    self.assertEqual(result.model_spec.signature_name, 'regress_diff')
+    self.assertLen(result.regression_result.regressions, 1)
+    self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6)
+    result = multi_inference_log.response.results[1]
+    self.assertEqual(result.model_spec.signature_name, 'classify_sum')
+    self.assertLen(result.classification_result.classifications, 1)
+    self.assertLen(result.classification_result.classifications[0].classes, 1)
+    self.assertAlmostEqual(
+        result.classification_result.classifications[0].classes[0].score, 1.0)
+
+  def testKerasModelPredict(self):
+    inputs = tf.keras.Input(shape=(1,), name='input1')
+    output1 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output1')(
+            inputs)
+    output2 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output2')(
+            inputs)
+    inference_model = tf.keras.models.Model(inputs, [output1, output2])
+
+    class TestKerasModel(tf.keras.Model):
+
+      def __init__(self, inference_model):
+        super(TestKerasModel, self).__init__(name='test_keras_model')
+        self.inference_model = inference_model
+
+      @tf.function(input_signature=[
+          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
+      ])
+      def call(self, serialized_example):
+        features = {
+            'input1':
+                tf.compat.v1.io.FixedLenFeature([1],
+                                                dtype=tf.float32,
+                                                default_value=0)
+        }
+        input_tensor_dict = tf.io.parse_example(serialized_example, features)
+        return inference_model(input_tensor_dict['input1'])
+
+    model = TestKerasModel(inference_model)
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(lr=.001),
+        loss=tf.keras.losses.binary_crossentropy,
+        metrics=['accuracy'])
+
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+
+  def testTelemetry(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    inference_spec_type = model_spec_pb2.InferenceSpecType(
+        saved_model_spec=model_spec_pb2.SavedModelSpec(
+            model_path=model_path, signature_name=['classify_sum']))
+    pipeline = beam.Pipeline()
+    _ = (
+        pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+        | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
+    run_result = pipeline.run()
+    run_result.wait_until_finish()
+
+    num_inferences = run_result.metrics().query(
+        MetricsFilter().with_name('num_inferences'))
+    self.assertTrue(num_inferences['counters'])
+    self.assertEqual(num_inferences['counters'][0].result, 2)
+    num_instances = run_result.metrics().query(
+        MetricsFilter().with_name('num_instances'))
+    self.assertTrue(num_instances['counters'])
+    self.assertEqual(num_instances['counters'][0].result, 2)
+    inference_request_batch_size = run_result.metrics().query(
+        MetricsFilter().with_name('inference_request_batch_size'))
+    self.assertTrue(inference_request_batch_size['distributions'])
+    self.assertEqual(
+        inference_request_batch_size['distributions'][0].result.sum, 2)
+    inference_request_batch_byte_size = run_result.metrics().query(
+        MetricsFilter().with_name('inference_request_batch_byte_size'))
+    self.assertTrue(inference_request_batch_byte_size['distributions'])
+    self.assertEqual(
+        inference_request_batch_byte_size['distributions'][0].result.sum,
+        sum(element.ByteSize() for element in self._multihead_examples))
+    inference_batch_latency_micro_secs = run_result.metrics().query(
+        MetricsFilter().with_name('inference_batch_latency_micro_secs'))
+    self.assertTrue(inference_batch_latency_micro_secs['distributions'])
+    self.assertGreaterEqual(
+        inference_batch_latency_micro_secs['distributions'][0].result.sum, 0)
+    load_model_latency_milli_secs = run_result.metrics().query(
+        MetricsFilter().with_name('load_model_latency_milli_secs'))
+    self.assertTrue(load_model_latency_milli_secs['distributions'])
+    self.assertGreaterEqual(
+        load_model_latency_milli_secs['distributions'][0].result.sum, 0)
+
+
+class RunRemoteInferenceTest(RunInferenceFixture):
+
+  def setUp(self):
+    super(RunRemoteInferenceTest, self).setUp()
+    self.example_path = self._get_output_data_dir('example')
+    self._prepare_predict_examples(self.example_path)
+    # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
+    self._discovery_testdata_dir = os.path.join(
+        os.path.join(os.path.dirname(__file__), 'testdata'),
+        'ml_discovery.json')
+
+  @staticmethod
+  def _make_response_body(content, successful):
+    if successful:
+      response_dict = {'predictions': content}
+    else:
+      response_dict = {'error': content}
+    return json.dumps(response_dict)
+
+  def _set_up_pipeline(self, inference_spec_type):
+    self.pipeline = beam.Pipeline()
+    self.pcoll = (
+        self.pipeline
+        | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
+        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+        | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
+
+  def _run_inference_with_beam(self):
+    self.pipeline_result = self.pipeline.run()
+    self.pipeline_result.wait_until_finish()
+
+  def test_model_predict(self):
+    predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
+    builder = http.RequestMockBuilder({
+        'ml.projects.predict':
+            (None, self._make_response_body(predictions, successful=True))
+    })
+    resource = discovery.build(
+        'ml',
+        'v1',
+        http=http.HttpMock(self._discovery_testdata_dir,
+                           {'status': http_client.OK}),
+        requestBuilder=builder)
+    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
+      response_mock.side_effect = lambda service, version: resource
+      inference_spec_type = model_spec_pb2.InferenceSpecType(
+          ai_platform_prediction_model_spec=model_spec_pb2
+          .AIPlatformPredictionModelSpec(
+              project_id='test-project',
+              model_name='test-model',
+          ))
+
+      prediction_log = prediction_log_pb2.PredictionLog()
+      prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
+          tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
+      prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
+          tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
+
+      self._set_up_pipeline(inference_spec_type)
+      assert_that(self.pcoll, equal_to([prediction_log]))
+      self._run_inference_with_beam()
+
+  def test_exception_raised_when_response_body_contains_error_entry(self):
+    error_msg = 'Base64 decode failed.'
+    builder = http.RequestMockBuilder({
+        'ml.projects.predict':
+            (None, self._make_response_body(error_msg, successful=False))
+    })
+    resource = discovery.build(
+        'ml',
+        'v1',
+        http=http.HttpMock(self._discovery_testdata_dir,
+                           {'status': http_client.OK}),
+        requestBuilder=builder)
+    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
+      response_mock.side_effect = lambda service, version: resource
+      inference_spec_type = model_spec_pb2.InferenceSpecType(
+          ai_platform_prediction_model_spec=model_spec_pb2
+          .AIPlatformPredictionModelSpec(
+              project_id='test-project',
+              model_name='test-model',
+          ))
+
+      try:
+        self._set_up_pipeline(inference_spec_type)
+        self._run_inference_with_beam()
+      except ValueError as exc:
+        actual_error_msg = str(exc)
+        self.assertTrue(actual_error_msg.startswith(error_msg))
+      else:
+        self.fail('Test was expected to throw ValueError exception')
+
+  def test_exception_raised_when_project_id_is_empty(self):
+    inference_spec_type = model_spec_pb2.InferenceSpecType(
+        ai_platform_prediction_model_spec=model_spec_pb2
+        .AIPlatformPredictionModelSpec(model_name='test-model',))
+
+    with self.assertRaises(ValueError):
+      self._set_up_pipeline(inference_spec_type)
+      self._run_inference_with_beam()
+
+  def test_request_body_with_binary_data(self):
+    example = text_format.Parse(
+        """
+      features {
+        feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
+        feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
+        feature { key: "y" value { int64_list { value: [1, 2] }}}
+      }
+      """, tf.train.Example())
+    result = list(
+        run_inference._RemotePredictDoFn._prepare_instances([example]))
+    self.assertEqual([
+        {
+            'x_bytes': {
+                'b64': 'QVNhOGFzZGY='
+            },
+            'x': 'JLK7ljk3',
+            'y': [1, 2]
+        },
+    ], result)
+
+
+if __name__ == '__main__':
+  tf.test.main()

From 1d553019cc0d48409553716f3e3389825d633203 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 27 May 2020 11:57:39 -0400
Subject: [PATCH 02/31] make master the same as before for comparison

---
 tfx_bsl/beam/run_inference_arrow.py      | 72 ++++++++++++++----------
 tfx_bsl/beam/run_inference_arrow_test.py |  6 +-
 2 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index 316b65a5..98e45148 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -32,7 +32,6 @@
 
 from absl import logging
 import apache_beam as beam
-import pyarrow as pa
 from apache_beam.options.pipeline_options import GoogleCloudOptions
 from apache_beam.options.pipeline_options import PipelineOptions
 from apache_beam.utils import retry
@@ -80,7 +79,6 @@
 _MetaGraphDef = Any
 _SavedModel = Any
 
-# TODO (Maxine): what is this?
 _BulkInferResult = Union[prediction_log_pb2.PredictLog,
                          Tuple[tf.train.Example, regression_pb2.Regression],
                          Tuple[tf.train.Example,
@@ -97,11 +95,9 @@ class OperationType(object):
   MULTIHEAD = 'MULTIHEAD'
 
 
-# TODO (Me): pTransform from examples/sequence example here
-
-# TODO (Me): Union[bytes, pa.RecordBatch]?
 @beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def RunInferenceImpl(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
@@ -110,7 +106,7 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   """Implementation of RunInference API.
 
   Args:
-    examples: A PCollection containing RecordBatch.
+    examples: A PCollection containing examples.
     inference_spec_type: Model inference endpoint.
 
   Returns:
@@ -144,7 +140,8 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
               inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -160,7 +157,8 @@ def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
              inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -176,7 +174,8 @@ def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
              inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -197,7 +196,8 @@ def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
                     inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -261,7 +261,9 @@ def update_metrics_with_cache(self):
         self._model_byte_size.update(self.model_byte_size_cache)
         self.model_byte_size_cache = None
 
-    def update(self, elements: List[str], latency_micro_secs: int) -> None:
+    def update(self, elements: List[Union[tf.train.Example,
+                                          tf.train.SequenceExample]],
+               latency_micro_secs: int) -> None:
       self._inference_batch_latency_micro_secs.update(latency_micro_secs)
       self._num_instances.inc(len(elements))
       self._inference_counter.inc(len(elements))
@@ -278,14 +280,11 @@ def setup(self):
     self._clock = _ClockFactory.make_clock()
 
   def process(
-      self, elements: pa.RecordBatch
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
   ) -> Iterable[Any]:
     batch_start_time = self._clock.get_current_time_in_microseconds()
-    # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter
-    # extract record batch from here, assuming first column
-    serialized_examples = elements.column(0)
-    outputs = self.run_inference(serialized_examples)
-    result = self._post_process(serialized_examples, outputs)
+    outputs = self.run_inference(elements)
+    result = self._post_process(elements, outputs)
     self._metrics_collector.update(
         elements,
         self._clock.get_current_time_in_microseconds() - batch_start_time)
@@ -296,12 +295,14 @@ def finish_bundle(self):
 
   @abc.abstractmethod
   def run_inference(
-    self, elements: List[str]
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
   ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
     raise NotImplementedError
 
   @abc.abstractmethod
-  def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]:
+  def _post_process(self, elements: List[Union[tf.train.Example,
+                                               tf.train.SequenceExample]],
+                    outputs: Any) -> Iterable[Any]:
     raise NotImplementedError
 
 
@@ -320,8 +321,9 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
   return (isinstance(exception, googleapiclient.errors.HttpError) and
           exception.resp.status in (503, 429))
 
-# TODO (Maxine): change all example to serialized
-@beam.typehints.with_input_types(List[str])
+
+@beam.typehints.with_input_types(List[Union[tf.train.Example,
+                                            tf.train.SequenceExample]])
 # Using output typehints triggers NotImplementedError('BEAM-2717)' on
 # streaming mode on Dataflow runner.
 # TODO(b/151468119): Consider to re-batch with online serving request size
@@ -578,15 +580,22 @@ def _has_tpu_tag(self) -> bool:
     return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
             tf.saved_model.TPU in self._tags)
 
-  def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
+  def run_inference(
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
+  ) -> Mapping[Text, np.ndarray]:
     self._check_elements(elements)
     outputs = self._run_tf_operations(elements)
     return outputs
 
-  def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
+  def _run_tf_operations(
+      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
+  ) -> Mapping[Text, np.ndarray]:
+    input_values = []
+    for element in elements:
+      input_values.append(element.SerializeToString())
     result = self._session.run(
         self._io_tensor_spec.output_alias_tensor_names,
-        feed_dict={self._io_tensor_spec.input_tensor_name: elements})
+        feed_dict={self._io_tensor_spec.input_tensor_name: input_values})
     if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
       raise RuntimeError('Output length does not match fetches')
     return result
@@ -599,7 +608,8 @@ def _check_elements(
     raise NotImplementedError
 
 
-@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_input_types(List[Union[tf.train.Example,
+                                            tf.train.SequenceExample]])
 @beam.typehints.with_output_types(Tuple[tf.train.Example,
                                         classification_pb2.Classifications])
 class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn):
@@ -629,7 +639,8 @@ def _post_process(
     return zip(elements, classifications)
 
 
-@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_input_types(List[Union[tf.train.Example,
+                                            tf.train.SequenceExample]])
 @beam.typehints.with_output_types(Tuple[tf.train.Example,
                                         regression_pb2.Regression])
 class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
@@ -652,7 +663,8 @@ def _post_process(
     return zip(elements, regressions)
 
 
-@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_input_types(List[Union[tf.train.Example,
+                                            tf.train.SequenceExample]])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
 class _BatchPredictDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that runs inference on predict model."""
@@ -710,7 +722,8 @@ def _post_process(
     return result
 
 
-@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_input_types(List[Union[tf.train.Example,
+                                            tf.train.SequenceExample]])
 @beam.typehints.with_output_types(Tuple[tf.train.Example,
                                         inference_pb2.MultiInferenceResponse])
 class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
@@ -830,7 +843,6 @@ def process(
     yield result
 
 
-# TODO (Maxine): moving these into class?
 def _post_process_classify(
     output_alias_tensor_names: Mapping[Text, Text],
     elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray]
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index a4eed521..ce9ac4d0 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -71,12 +71,10 @@ def _prepare_predict_examples(self, example_path):
         output_file.write(example.SerializeToString())
 
 
-ARROW_INPUT_COLUMN = '__raw_record__'
 class RunOfflineInferenceTest(RunInferenceFixture):
-  
+
   def setUp(self):
     super(RunOfflineInferenceTest, self).setUp()
-    
     self._predict_examples = [
         text_format.Parse(
             """
@@ -108,8 +106,6 @@ def setUp(self):
             """, tf.train.Example()),
     ]
 
-    # TODO: Ask if these example can directly transform to recordBatch
-    
 
   def _prepare_multihead_examples(self, example_path):
     with tf.io.TFRecordWriter(example_path) as output_file:

From c39a82db1d712fd6d2e817752d6fe4621188c653 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 27 May 2020 12:05:06 -0400
Subject: [PATCH 03/31] add changes for base class

---
 tfx_bsl/beam/run_inference_arrow.py | 72 ++++++++++++-----------------
 1 file changed, 30 insertions(+), 42 deletions(-)

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index 98e45148..316b65a5 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -32,6 +32,7 @@
 
 from absl import logging
 import apache_beam as beam
+import pyarrow as pa
 from apache_beam.options.pipeline_options import GoogleCloudOptions
 from apache_beam.options.pipeline_options import PipelineOptions
 from apache_beam.utils import retry
@@ -79,6 +80,7 @@
 _MetaGraphDef = Any
 _SavedModel = Any
 
+# TODO (Maxine): what is this?
 _BulkInferResult = Union[prediction_log_pb2.PredictLog,
                          Tuple[tf.train.Example, regression_pb2.Regression],
                          Tuple[tf.train.Example,
@@ -95,9 +97,11 @@ class OperationType(object):
   MULTIHEAD = 'MULTIHEAD'
 
 
+# TODO (Me): pTransform from examples/sequence example here
+
+# TODO (Me): Union[bytes, pa.RecordBatch]?
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def RunInferenceImpl(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
@@ -106,7 +110,7 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   """Implementation of RunInference API.
 
   Args:
-    examples: A PCollection containing examples.
+    examples: A PCollection containing RecordBatch.
     inference_spec_type: Model inference endpoint.
 
   Returns:
@@ -140,8 +144,7 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
               inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -157,8 +160,7 @@ def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
              inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -174,8 +176,7 @@ def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
              inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -196,8 +197,7 @@ def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
                     inference_spec_type: model_spec_pb2.InferenceSpecType):
@@ -261,9 +261,7 @@ def update_metrics_with_cache(self):
         self._model_byte_size.update(self.model_byte_size_cache)
         self.model_byte_size_cache = None
 
-    def update(self, elements: List[Union[tf.train.Example,
-                                          tf.train.SequenceExample]],
-               latency_micro_secs: int) -> None:
+    def update(self, elements: List[str], latency_micro_secs: int) -> None:
       self._inference_batch_latency_micro_secs.update(latency_micro_secs)
       self._num_instances.inc(len(elements))
       self._inference_counter.inc(len(elements))
@@ -280,11 +278,14 @@ def setup(self):
     self._clock = _ClockFactory.make_clock()
 
   def process(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
+      self, elements: pa.RecordBatch
   ) -> Iterable[Any]:
     batch_start_time = self._clock.get_current_time_in_microseconds()
-    outputs = self.run_inference(elements)
-    result = self._post_process(elements, outputs)
+    # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter
+    # extract record batch from here, assuming first column
+    serialized_examples = elements.column(0)
+    outputs = self.run_inference(serialized_examples)
+    result = self._post_process(serialized_examples, outputs)
     self._metrics_collector.update(
         elements,
         self._clock.get_current_time_in_microseconds() - batch_start_time)
@@ -295,14 +296,12 @@ def finish_bundle(self):
 
   @abc.abstractmethod
   def run_inference(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
+    self, elements: List[str]
   ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
     raise NotImplementedError
 
   @abc.abstractmethod
-  def _post_process(self, elements: List[Union[tf.train.Example,
-                                               tf.train.SequenceExample]],
-                    outputs: Any) -> Iterable[Any]:
+  def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]:
     raise NotImplementedError
 
 
@@ -321,9 +320,8 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
   return (isinstance(exception, googleapiclient.errors.HttpError) and
           exception.resp.status in (503, 429))
 
-
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
+# TODO (Maxine): change all example to serialized
+@beam.typehints.with_input_types(List[str])
 # Using output typehints triggers NotImplementedError('BEAM-2717)' on
 # streaming mode on Dataflow runner.
 # TODO(b/151468119): Consider to re-batch with online serving request size
@@ -580,22 +578,15 @@ def _has_tpu_tag(self) -> bool:
     return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
             tf.saved_model.TPU in self._tags)
 
-  def run_inference(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Mapping[Text, np.ndarray]:
+  def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
     self._check_elements(elements)
     outputs = self._run_tf_operations(elements)
     return outputs
 
-  def _run_tf_operations(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Mapping[Text, np.ndarray]:
-    input_values = []
-    for element in elements:
-      input_values.append(element.SerializeToString())
+  def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
     result = self._session.run(
         self._io_tensor_spec.output_alias_tensor_names,
-        feed_dict={self._io_tensor_spec.input_tensor_name: input_values})
+        feed_dict={self._io_tensor_spec.input_tensor_name: elements})
     if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
       raise RuntimeError('Output length does not match fetches')
     return result
@@ -608,8 +599,7 @@ def _check_elements(
     raise NotImplementedError
 
 
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
+@beam.typehints.with_input_types(List[str])
 @beam.typehints.with_output_types(Tuple[tf.train.Example,
                                         classification_pb2.Classifications])
 class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn):
@@ -639,8 +629,7 @@ def _post_process(
     return zip(elements, classifications)
 
 
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
+@beam.typehints.with_input_types(List[str])
 @beam.typehints.with_output_types(Tuple[tf.train.Example,
                                         regression_pb2.Regression])
 class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
@@ -663,8 +652,7 @@ def _post_process(
     return zip(elements, regressions)
 
 
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
+@beam.typehints.with_input_types(List[str])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
 class _BatchPredictDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that runs inference on predict model."""
@@ -722,8 +710,7 @@ def _post_process(
     return result
 
 
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
+@beam.typehints.with_input_types(List[str])
 @beam.typehints.with_output_types(Tuple[tf.train.Example,
                                         inference_pb2.MultiInferenceResponse])
 class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
@@ -843,6 +830,7 @@ def process(
     yield result
 
 
+# TODO (Maxine): moving these into class?
 def _post_process_classify(
     output_alias_tensor_names: Mapping[Text, Text],
     elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray]

From ace3f73fe66f822853307e7b24c53db93d6772d8 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 27 May 2020 12:09:35 -0400
Subject: [PATCH 04/31] add more changes

---
 tfx_bsl/beam/run_inference_arrow.py | 33 ++++++++++++++++-------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index 316b65a5..793b1532 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -80,7 +80,7 @@
 _MetaGraphDef = Any
 _SavedModel = Any
 
-# TODO (Maxine): what is this?
+# TODO (Maxine): Change this to serialized?
 _BulkInferResult = Union[prediction_log_pb2.PredictLog,
                          Tuple[tf.train.Example, regression_pb2.Regression],
                          Tuple[tf.train.Example,
@@ -97,9 +97,9 @@ class OperationType(object):
   MULTIHEAD = 'MULTIHEAD'
 
 
-# TODO (Me): pTransform from examples/sequence example here
+# TODO (Maxine): pTransform from examples/sequence example here
 
-# TODO (Me): Union[bytes, pa.RecordBatch]?
+# TODO (Maxine): Union[bytes, pa.RecordBatch]?
 @beam.ptransform_fn
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
@@ -320,7 +320,7 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
   return (isinstance(exception, googleapiclient.errors.HttpError) and
           exception.resp.status in (503, 429))
 
-# TODO (Maxine): change all example to serialized
+
 @beam.typehints.with_input_types(List[str])
 # Using output typehints triggers NotImplementedError('BEAM-2717)' on
 # streaming mode on Dataflow runner.
@@ -398,13 +398,15 @@ def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest:
 
   @classmethod
   def _prepare_instances(
-      cls, elements: List[tf.train.Example]
+      cls, elements: List[str]
   ) -> Generator[Mapping[Text, Any], None, None]:
     for example in elements:
       # TODO(b/151468119): support tf.train.SequenceExample
-      if not isinstance(example, tf.train.Example):
-        raise ValueError('Remote prediction only supports tf.train.Example')
+      if not isinstance(example, str):
+        # raise ValueError('Remote prediction only supports tf.train.Example')
+        raise ValueError('Example should be serialized before calling remote prediction')
 
+      # TODO (Maxine): Fix this part with serialized example
       instance = {}
       for input_name, feature in example.features.feature.items():
         attr_name = feature.WhichOneof('kind')
@@ -443,17 +445,14 @@ def _parse_feature_content(values: Sequence[Any], attr_name: Text,
     else:
       return values
 
-  def run_inference(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Sequence[Mapping[Text, Any]]:
+  def run_inference(self, elements: List[str]) -> Sequence[Mapping[Text, Any]]:
     body = {'instances': list(self._prepare_instances(elements))}
     request = self._make_request(body)
     response = self._execute_request(request)
     return response['predictions']
 
   def _post_process(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]],
-      outputs: Sequence[Mapping[Text, Any]]
+      self, elements: List[str], outputs: Sequence[Mapping[Text, Any]]
   ) -> Iterable[prediction_log_pb2.PredictLog]:
     result = []
     for output in outputs:
@@ -474,6 +473,9 @@ def _post_process(
 # is fixed.
 # TODO(b/143484017): Add batch_size back off in the case there are functional
 # reasons large batch sizes cannot be handled.
+
+# TODO (Maxine): Anything I can do to check that the serialized string is an example or sequence example?
+# converting it and then check?
 class _BaseBatchSavedModelDoFn(_BaseDoFn):
   """A DoFn that runs in-process batch inference with a model.
 
@@ -592,13 +594,14 @@ def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
     return result
 
   def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
+      self, elements: List[str]) -> None:
     """Unimplemented."""
 
     raise NotImplementedError
 
 
+# TODO (Maxine): Haven't change other than typeints beyond this point
+# should I change these to example inside the functions or keep them as serialized
 @beam.typehints.with_input_types(List[str])
 @beam.typehints.with_output_types(Tuple[tf.train.Example,
                                         classification_pb2.Classifications])
@@ -834,7 +837,7 @@ def process(
 def _post_process_classify(
     output_alias_tensor_names: Mapping[Text, Text],
     elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray]
-) -> Sequence[classification_pb2.Classifications]:
+  ) -> Sequence[classification_pb2.Classifications]:
   """Returns classifications from inference output."""
 
   # This is to avoid error "The truth value of an array with

From 56357a0423bb4ffc68fffa467414690bfdf5910b Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 27 May 2020 19:07:16 -0400
Subject: [PATCH 05/31] modify batch functions

---
 .bazelrc                            |   2 +
 tfx_bsl/beam/run_inference_arrow.py | 137 ++++++++++++++--------------
 2 files changed, 73 insertions(+), 66 deletions(-)
 create mode 100644 .bazelrc

diff --git a/.bazelrc b/.bazelrc
new file mode 100644
index 00000000..23842f86
--- /dev/null
+++ b/.bazelrc
@@ -0,0 +1,2 @@
+build --action_env ARROW_HEADER_DIR="/home/zhangmaxine/tfx-venv/lib/python3.7/site-packages/pyarrow/include"
+build --action_env ARROW_SHARED_LIBRARY_DIR="/home/zhangmaxine/tfx-venv/lib/python3.7/site-packages/pyarrow"
diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index 793b1532..9316988e 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -74,19 +74,15 @@
 _SECOND_TO_MICROSECOND = 1000000
 _REMOTE_INFERENCE_NUM_RETRIES = 5
 
-# We define the following aliases of Any because the actual types are not
-# public.
+# We define the following aliases of Any because the actual types are not public.
 _SignatureDef = Any
 _MetaGraphDef = Any
 _SavedModel = Any
 
-# TODO (Maxine): Change this to serialized?
 _BulkInferResult = Union[prediction_log_pb2.PredictLog,
-                         Tuple[tf.train.Example, regression_pb2.Regression],
-                         Tuple[tf.train.Example,
-                               inference_pb2.MultiInferenceResponse],
-                         Tuple[tf.train.Example,
-                               classification_pb2.Classifications]]
+                         Tuple[str, regression_pb2.Regression],
+                         Tuple[str, inference_pb2.MultiInferenceResponse],
+                         Tuple[str, classification_pb2.Classifications]]
 
 
 # TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7
@@ -98,6 +94,12 @@ class OperationType(object):
 
 
 # TODO (Maxine): pTransform from examples/sequence example here
+# remember input type?
+# if (isinstance(element, tf.train.Example))
+# elif isinstance(element, tf.train.SequenceExample)
+
+
+# TODO (Maxine): Work on remote
 
 # TODO (Maxine): Union[bytes, pa.RecordBatch]?
 @beam.ptransform_fn
@@ -110,7 +112,7 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   """Implementation of RunInference API.
 
   Args:
-    examples: A PCollection containing RecordBatch.
+    examples: A PCollection containing RecordBatch of serialized examples.
     inference_spec_type: Model inference endpoint.
 
   Returns:
@@ -284,6 +286,20 @@ def process(
     # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter
     # extract record batch from here, assuming first column
     serialized_examples = elements.column(0)
+
+    for element in serialized_examples:
+      if not isinstance(element, str):
+        raise ValueError('Expected a list of serialized examples (string type)')
+
+      try:
+        example = tf.train.Example.FromString(element)
+        sequenceExample = tf.train.sequenceExample.FromString(element)
+      except:
+        raise ValueError(
+          'RecordBatch should contain a serialized example of the type \
+          tf.Train.Example or tf.Train.SequenceExample'
+        )
+
     outputs = self.run_inference(serialized_examples)
     result = self._post_process(serialized_examples, outputs)
     self._metrics_collector.update(
@@ -402,9 +418,10 @@ def _prepare_instances(
   ) -> Generator[Mapping[Text, Any], None, None]:
     for example in elements:
       # TODO(b/151468119): support tf.train.SequenceExample
-      if not isinstance(example, str):
-        # raise ValueError('Remote prediction only supports tf.train.Example')
-        raise ValueError('Example should be serialized before calling remote prediction')
+      try:
+        example = tf.train.Example.FromString(example)
+      except:
+        raise ValueError('Remote prediction only supports tf.train.Example')
 
       # TODO (Maxine): Fix this part with serialized example
       instance = {}
@@ -474,8 +491,6 @@ def _post_process(
 # TODO(b/143484017): Add batch_size back off in the case there are functional
 # reasons large batch sizes cannot be handled.
 
-# TODO (Maxine): Anything I can do to check that the serialized string is an example or sequence example?
-# converting it and then check?
 class _BaseBatchSavedModelDoFn(_BaseDoFn):
   """A DoFn that runs in-process batch inference with a model.
 
@@ -547,6 +562,7 @@ def load():
 
   def _pre_process(self) -> _IOTensorSpec:
     # Pre process functions will validate for each signature.
+    # TODO (Maxine): having more than 1 input
     io_tensor_specs = []
     for signature in self._signatures:
       if len(signature.signature_def.inputs) != 1:
@@ -600,11 +616,8 @@ def _check_elements(
     raise NotImplementedError
 
 
-# TODO (Maxine): Haven't change other than typeints beyond this point
-# should I change these to example inside the functions or keep them as serialized
 @beam.typehints.with_input_types(List[str])
-@beam.typehints.with_output_types(Tuple[tf.train.Example,
-                                        classification_pb2.Classifications])
+@beam.typehints.with_output_types(Tuple[str, classification_pb2.Classifications])
 class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that run inference on classification model."""
 
@@ -617,40 +630,40 @@ def setup(self):
           signature_def.method_name)
     super(_BatchClassifyDoFn, self).setup()
 
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    if not all(isinstance(element, tf.train.Example) for element in elements):
-      raise ValueError('Classify only supports tf.train.Example')
+  def _check_elements(self, elements: List[str]) -> None:
+    for element in serialized_examples:
+      try:
+        example = tf.train.Example.FromString(element)
+      except:
+        raise ValueError('Classify only supports tf.train.Example')
 
   def _post_process(
-      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
-                                                                   np.ndarray]
-  ) -> Iterable[Tuple[tf.train.Example, classification_pb2.Classifications]]:
+      self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[Tuple[str, classification_pb2.Classifications]]:
     classifications = _post_process_classify(
         self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
     return zip(elements, classifications)
 
 
 @beam.typehints.with_input_types(List[str])
-@beam.typehints.with_output_types(Tuple[tf.train.Example,
-                                        regression_pb2.Regression])
+@beam.typehints.with_output_types(Tuple[str, regression_pb2.Regression])
 class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that run inference on regression model."""
 
   def setup(self):
     super(_BatchRegressDoFn, self).setup()
 
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    if not all(isinstance(element, tf.train.Example) for element in elements):
-      raise ValueError('Regress only supports tf.train.Example')
+  def _check_elements(self, elements: List[str]) -> None:
+    for element in serialized_examples:
+      try:
+        example = tf.train.Example.FromString(element)
+      except:
+        raise ValueError('Regress only supports tf.train.Example')
+
 
   def _post_process(
-      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
-                                                                   np.ndarray]
-  ) -> Iterable[Tuple[tf.train.Example, regression_pb2.Regression]]:
+      self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[Tuple[str, regression_pb2.Regression]]:
     regressions = _post_process_regress(elements, outputs)
     return zip(elements, regressions)
 
@@ -669,14 +682,11 @@ def setup(self):
           signature_def.method_name)
     super(_BatchPredictDoFn, self).setup()
 
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
+  def _check_elements(self, elements: List[str]) -> None:
     pass
 
   def _post_process(
-      self, elements: Union[Sequence[tf.train.Example],
-                            Sequence[tf.train.SequenceExample]],
+      self, elements: Sequence[str],
       outputs: Mapping[Text, np.ndarray]
   ) -> Iterable[prediction_log_pb2.PredictLog]:
     input_tensor_alias = self._io_tensor_spec.input_tensor_alias
@@ -700,8 +710,7 @@ def _post_process(
     for i in range(batch_size):
       predict_log = prediction_log_pb2.PredictLog()
       predict_log.CopyFrom(predict_log_tmpl)
-      predict_log.request.inputs[input_tensor_alias].string_val.append(
-          elements[i].SerializeToString())
+      predict_log.request.inputs[input_tensor_alias].string_val.append(elements[i])
       for output_alias, output in outputs.items():
         # Mimic tensor::Split
         tensor_proto = tf.make_tensor_proto(
@@ -714,21 +723,21 @@ def _post_process(
 
 
 @beam.typehints.with_input_types(List[str])
-@beam.typehints.with_output_types(Tuple[tf.train.Example,
-                                        inference_pb2.MultiInferenceResponse])
+@beam.typehints.with_output_types(Tuple[str, inference_pb2.MultiInferenceResponse])
 class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that runs inference on multi-head model."""
 
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    if not all(isinstance(element, tf.train.Example) for element in elements):
-      raise ValueError('Multi inference only supports tf.train.Example')
+  def _check_elements(self, elements: List[str]) -> None:
+    for element in serialized_examples:
+      try:
+        example = tf.train.Example.FromString(element)
+      except:
+        raise ValueError('Multi-inference only supports tf.train.Example')
+
 
   def _post_process(
-      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
-                                                                   np.ndarray]
-  ) -> Iterable[Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]]:
+      self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[Tuple[str, inference_pb2.MultiInferenceResponse]]:
     classifications = None
     regressions = None
     for signature in self._signatures:
@@ -767,14 +776,14 @@ def _post_process(
     return result
 
 
-@beam.typehints.with_input_types(Tuple[tf.train.Example,
-                                       classification_pb2.Classifications])
+# TODO (Maxine): need to replace train example from this point on
+@beam.typehints.with_input_types(Tuple[str, classification_pb2.Classifications])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 class _BuildPredictionLogForClassificationsDoFn(beam.DoFn):
   """A DoFn that builds prediction log from classifications."""
 
   def process(
-      self, element: Tuple[tf.train.Example, classification_pb2.Classifications]
+      self, element: Tuple[str, classification_pb2.Classifications]
   ) -> Iterable[prediction_log_pb2.PredictionLog]:
     (train_example, classifications) = element
     result = prediction_log_pb2.PredictionLog()
@@ -785,14 +794,13 @@ def process(
     yield result
 
 
-@beam.typehints.with_input_types(Tuple[tf.train.Example,
-                                       regression_pb2.Regression])
+@beam.typehints.with_input_types(Tuple[str, regression_pb2.Regression])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 class _BuildPredictionLogForRegressionsDoFn(beam.DoFn):
   """A DoFn that builds prediction log from regressions."""
 
   def process(
-      self, element: Tuple[tf.train.Example, regression_pb2.Regression]
+    self, element: Tuple[str, regression_pb2.Regression]
   ) -> Iterable[prediction_log_pb2.PredictionLog]:
     (train_example, regression) = element
     result = prediction_log_pb2.PredictionLog()
@@ -815,15 +823,13 @@ def process(
     yield result
 
 
-@beam.typehints.with_input_types(Tuple[tf.train.Example,
-                                       inference_pb2.MultiInferenceResponse])
+@beam.typehints.with_input_types(Tuple[str,inference_pb2.MultiInferenceResponse])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 class _BuildMultiInferenceLogDoFn(beam.DoFn):
   """A DoFn that builds prediction log from multi-head inference result."""
 
   def process(
-      self, element: Tuple[tf.train.Example,
-                           inference_pb2.MultiInferenceResponse]
+      self, element: Tuple[str, inference_pb2.MultiInferenceResponse]
   ) -> Iterable[prediction_log_pb2.PredictionLog]:
     (train_example, multi_inference_response) = element
     result = prediction_log_pb2.PredictionLog()
@@ -833,10 +839,9 @@ def process(
     yield result
 
 
-# TODO (Maxine): moving these into class?
 def _post_process_classify(
     output_alias_tensor_names: Mapping[Text, Text],
-    elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray]
+    elements: Sequence[str], outputs: Mapping[Text, np.ndarray]
   ) -> Sequence[classification_pb2.Classifications]:
   """Returns classifications from inference output."""
 
@@ -897,7 +902,7 @@ def _post_process_classify(
 
 
 def _post_process_regress(
-    elements: Sequence[tf.train.Example],
+    elements: Sequence[str],
     outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]:
   """Returns regressions from inference output."""
 

From ee6e928db951dba2c9dc4a00ae9ffe4a9faef374 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 27 May 2020 19:11:10 -0400
Subject: [PATCH 06/31] remove extra files

---
 .bazelrc | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 .bazelrc

diff --git a/.bazelrc b/.bazelrc
deleted file mode 100644
index 23842f86..00000000
--- a/.bazelrc
+++ /dev/null
@@ -1,2 +0,0 @@
-build --action_env ARROW_HEADER_DIR="/home/zhangmaxine/tfx-venv/lib/python3.7/site-packages/pyarrow/include"
-build --action_env ARROW_SHARED_LIBRARY_DIR="/home/zhangmaxine/tfx-venv/lib/python3.7/site-packages/pyarrow"

From 622fbcf129696de67f4a70ab9f18ca34af05191f Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Mon, 1 Jun 2020 19:28:37 -0400
Subject: [PATCH 07/31] add column choice (so far only support one column
 still)

---
 tfx_bsl/beam/run_inference_arrow.py      |  78 +++++---
 tfx_bsl/beam/run_inference_arrow_test.py | 245 +++++++++++------------
 2 files changed, 173 insertions(+), 150 deletions(-)

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index 9316988e..ceaa46ef 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -46,7 +46,7 @@
 from tfx_bsl.public.proto import model_spec_pb2
 from tfx_bsl.telemetry import util
 from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \
-    Tuple, Union
+    Tuple, Union, Optional
 
 # TODO(b/140306674): stop using the internal TF API.
 from tensorflow.python.saved_model import loader_impl
@@ -99,15 +99,14 @@ class OperationType(object):
 # elif isinstance(element, tf.train.SequenceExample)
 
 
-# TODO (Maxine): Work on remote
-
 # TODO (Maxine): Union[bytes, pa.RecordBatch]?
 @beam.ptransform_fn
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def RunInferenceImpl(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
-    inference_spec_type: model_spec_pb2.InferenceSpecType
+    inference_spec_type: model_spec_pb2.InferenceSpecType,
+    process_column: Optional[str] = None
 ) -> beam.pvalue.PCollection:
   """Implementation of RunInference API.
 
@@ -126,14 +125,14 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   batched_examples = examples | 'BatchExamples' >> beam.BatchElements()
   operation_type = _get_operation_type(inference_spec_type)
   if operation_type == OperationType.CLASSIFICATION:
-    return batched_examples | 'Classify' >> _Classify(inference_spec_type)
+    return batched_examples | 'Classify' >> _Classify(inference_spec_type, process_column)
   elif operation_type == OperationType.REGRESSION:
-    return batched_examples | 'Regress' >> _Regress(inference_spec_type)
+    return batched_examples | 'Regress' >> _Regress(inference_spec_type, process_column)
   elif operation_type == OperationType.PREDICTION:
-    return batched_examples | 'Predict' >> _Predict(inference_spec_type)
+    return batched_examples | 'Predict' >> _Predict(inference_spec_type, process_column)
   elif operation_type == OperationType.MULTIHEAD:
     return (batched_examples
-            | 'MultiInference' >> _MultiInference(inference_spec_type))
+            | 'MultiInference' >> _MultiInference(inference_spec_type, process_column))
   else:
     raise ValueError('Unsupported operation_type %s' % operation_type)
 
@@ -149,12 +148,13 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-              inference_spec_type: model_spec_pb2.InferenceSpecType):
+              inference_spec_type: model_spec_pb2.InferenceSpecType,
+              process_column: Optional[str] = None):
   """Performs classify PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (pcoll
             | 'Classify' >> beam.ParDo(
-                _BatchClassifyDoFn(inference_spec_type, shared.Shared()))
+                _BatchClassifyDoFn(inference_spec_type, process_column, shared.Shared()))
             | 'BuildPredictionLogForClassifications' >> beam.ParDo(
                 _BuildPredictionLogForClassificationsDoFn()))
   else:
@@ -165,12 +165,13 @@ def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-             inference_spec_type: model_spec_pb2.InferenceSpecType):
+             inference_spec_type: model_spec_pb2.InferenceSpecType,
+             process_column: Optional[str] = None):
   """Performs regress PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (pcoll
             | 'Regress' >> beam.ParDo(
-                _BatchRegressDoFn(inference_spec_type, shared.Shared()))
+                _BatchRegressDoFn(inference_spec_type, process_column, shared.Shared()))
             | 'BuildPredictionLogForRegressions' >> beam.ParDo(
                 _BuildPredictionLogForRegressionsDoFn()))
   else:
@@ -181,13 +182,14 @@ def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-             inference_spec_type: model_spec_pb2.InferenceSpecType):
+             inference_spec_type: model_spec_pb2.InferenceSpecType,
+             process_column: Optional[str] = None):
   """Performs predict PTransform."""
   if _using_in_process_inference(inference_spec_type):
     predictions = (
         pcoll
         | 'Predict' >> beam.ParDo(
-            _BatchPredictDoFn(inference_spec_type, shared.Shared())))
+            _BatchPredictDoFn(inference_spec_type, process_column, shared.Shared())))
   else:
     predictions = (
         pcoll
@@ -202,13 +204,14 @@ def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-                    inference_spec_type: model_spec_pb2.InferenceSpecType):
+                    inference_spec_type: model_spec_pb2.InferenceSpecType,
+                    process_column: Optional[str] = None):
   """Performs multi inference PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (
         pcoll
         | 'MultiInference' >> beam.ParDo(
-            _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared()))
+            _BatchMultiInferenceDoFn(inference_spec_type, process_column, shared.Shared()))
         | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn()))
   else:
     raise NotImplementedError
@@ -271,9 +274,13 @@ def update(self, elements: List[str], latency_micro_secs: int) -> None:
       self._inference_request_batch_byte_size.update(
           sum(element.ByteSize() for element in elements))
 
-  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
+  # TODO (Maxine): just one col for now, later, will do a list of str
+  def __init__(
+    self, inference_spec_type: model_spec_pb2.InferenceSpecType,
+    process_column: Optional[str] = None):
     super(_BaseDoFn, self).__init__()
     self._clock = None
+    self._process_column = process_column
     self._metrics_collector = self._MetricsCollector(inference_spec_type)
 
   def setup(self):
@@ -283,22 +290,39 @@ def process(
       self, elements: pa.RecordBatch
   ) -> Iterable[Any]:
     batch_start_time = self._clock.get_current_time_in_microseconds()
-    # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter
+    # TODO (Maxine): take process as a parameter, should it be part of inference spec?
     # extract record batch from here, assuming first column
-    serialized_examples = elements.column(0)
+
+    # what would record batch look like? (flatten or not)
+    # vs np.asarray(elements.column(0))
+    if len(elements.columns) == 1: 
+      serialized_examples = elements.column(0).to_pylist()
+    else: 
+      if self._process_column is None:
+        raise ValueError('Must pass in a process column with multi-column RecordBatch')
+      serialized_examples = None
+
+    for column_name, column_array in zip(elements.schema.names, elements.columns):
+      column_type = column_array.type
+      if column_name == self._process_column:
+        serialized_examples = column_array.to_pylist()
+        break
 
     for element in serialized_examples:
       if not isinstance(element, str):
         raise ValueError('Expected a list of serialized examples (string type)')
 
+      # TODO (Maxine): Is there a better way?
       try:
         example = tf.train.Example.FromString(element)
-        sequenceExample = tf.train.sequenceExample.FromString(element)
       except:
-        raise ValueError(
-          'RecordBatch should contain a serialized example of the type \
-          tf.Train.Example or tf.Train.SequenceExample'
-        )
+        try: 
+          sequenceExample = tf.train.sequenceExample.FromString(element)
+        except:
+          raise ValueError(
+            'RecordBatch should contain a serialized example of the type \
+            tf.Train.Example or tf.Train.SequenceExample'
+          )
 
     outputs = self.run_inference(serialized_examples)
     result = self._post_process(serialized_examples, outputs)
@@ -504,9 +528,9 @@ class _BaseBatchSavedModelDoFn(_BaseDoFn):
   def __init__(
       self,
       inference_spec_type: model_spec_pb2.InferenceSpecType,
-      shared_model_handle: shared.Shared,
+      shared_model_handle: shared.Shared, process_column: Optional[str] = None,
   ):
-    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type)
+    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, process_column)
     self._inference_spec_type = inference_spec_type
     self._shared_model_handle = shared_model_handle
     self._model_path = inference_spec_type.saved_model_spec.model_path
@@ -776,7 +800,7 @@ def _post_process(
     return result
 
 
-# TODO (Maxine): need to replace train example from this point on
+
 @beam.typehints.with_input_types(Tuple[str, classification_pb2.Classifications])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 class _BuildPredictionLogForClassificationsDoFn(beam.DoFn):
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index ce9ac4d0..30416ae0 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -213,8 +213,7 @@ def _run_inference_with_beam(self, example_path, inference_spec_type,
           pipeline
           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
           | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-          |
-          'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)
+          | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -450,127 +449,127 @@ def testTelemetry(self):
         load_model_latency_milli_secs['distributions'][0].result.sum, 0)
 
 
-class RunRemoteInferenceTest(RunInferenceFixture):
-
-  def setUp(self):
-    super(RunRemoteInferenceTest, self).setUp()
-    self.example_path = self._get_output_data_dir('example')
-    self._prepare_predict_examples(self.example_path)
-    # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
-    self._discovery_testdata_dir = os.path.join(
-        os.path.join(os.path.dirname(__file__), 'testdata'),
-        'ml_discovery.json')
-
-  @staticmethod
-  def _make_response_body(content, successful):
-    if successful:
-      response_dict = {'predictions': content}
-    else:
-      response_dict = {'error': content}
-    return json.dumps(response_dict)
-
-  def _set_up_pipeline(self, inference_spec_type):
-    self.pipeline = beam.Pipeline()
-    self.pcoll = (
-        self.pipeline
-        | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
-        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-        | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
-
-  def _run_inference_with_beam(self):
-    self.pipeline_result = self.pipeline.run()
-    self.pipeline_result.wait_until_finish()
-
-  def test_model_predict(self):
-    predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
-    builder = http.RequestMockBuilder({
-        'ml.projects.predict':
-            (None, self._make_response_body(predictions, successful=True))
-    })
-    resource = discovery.build(
-        'ml',
-        'v1',
-        http=http.HttpMock(self._discovery_testdata_dir,
-                           {'status': http_client.OK}),
-        requestBuilder=builder)
-    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
-      response_mock.side_effect = lambda service, version: resource
-      inference_spec_type = model_spec_pb2.InferenceSpecType(
-          ai_platform_prediction_model_spec=model_spec_pb2
-          .AIPlatformPredictionModelSpec(
-              project_id='test-project',
-              model_name='test-model',
-          ))
-
-      prediction_log = prediction_log_pb2.PredictionLog()
-      prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
-          tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
-      prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
-          tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
-
-      self._set_up_pipeline(inference_spec_type)
-      assert_that(self.pcoll, equal_to([prediction_log]))
-      self._run_inference_with_beam()
-
-  def test_exception_raised_when_response_body_contains_error_entry(self):
-    error_msg = 'Base64 decode failed.'
-    builder = http.RequestMockBuilder({
-        'ml.projects.predict':
-            (None, self._make_response_body(error_msg, successful=False))
-    })
-    resource = discovery.build(
-        'ml',
-        'v1',
-        http=http.HttpMock(self._discovery_testdata_dir,
-                           {'status': http_client.OK}),
-        requestBuilder=builder)
-    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
-      response_mock.side_effect = lambda service, version: resource
-      inference_spec_type = model_spec_pb2.InferenceSpecType(
-          ai_platform_prediction_model_spec=model_spec_pb2
-          .AIPlatformPredictionModelSpec(
-              project_id='test-project',
-              model_name='test-model',
-          ))
-
-      try:
-        self._set_up_pipeline(inference_spec_type)
-        self._run_inference_with_beam()
-      except ValueError as exc:
-        actual_error_msg = str(exc)
-        self.assertTrue(actual_error_msg.startswith(error_msg))
-      else:
-        self.fail('Test was expected to throw ValueError exception')
-
-  def test_exception_raised_when_project_id_is_empty(self):
-    inference_spec_type = model_spec_pb2.InferenceSpecType(
-        ai_platform_prediction_model_spec=model_spec_pb2
-        .AIPlatformPredictionModelSpec(model_name='test-model',))
-
-    with self.assertRaises(ValueError):
-      self._set_up_pipeline(inference_spec_type)
-      self._run_inference_with_beam()
-
-  def test_request_body_with_binary_data(self):
-    example = text_format.Parse(
-        """
-      features {
-        feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
-        feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
-        feature { key: "y" value { int64_list { value: [1, 2] }}}
-      }
-      """, tf.train.Example())
-    result = list(
-        run_inference._RemotePredictDoFn._prepare_instances([example]))
-    self.assertEqual([
-        {
-            'x_bytes': {
-                'b64': 'QVNhOGFzZGY='
-            },
-            'x': 'JLK7ljk3',
-            'y': [1, 2]
-        },
-    ], result)
+# class RunRemoteInferenceTest(RunInferenceFixture):
+
+#   def setUp(self):
+#     super(RunRemoteInferenceTest, self).setUp()
+#     self.example_path = self._get_output_data_dir('example')
+#     self._prepare_predict_examples(self.example_path)
+#     # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
+#     self._discovery_testdata_dir = os.path.join(
+#         os.path.join(os.path.dirname(__file__), 'testdata'),
+#         'ml_discovery.json')
+
+#   @staticmethod
+#   def _make_response_body(content, successful):
+#     if successful:
+#       response_dict = {'predictions': content}
+#     else:
+#       response_dict = {'error': content}
+#     return json.dumps(response_dict)
+
+#   def _set_up_pipeline(self, inference_spec_type):
+#     self.pipeline = beam.Pipeline()
+#     self.pcoll = (
+#         self.pipeline
+#         | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
+#         | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+#         | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
+
+#   def _run_inference_with_beam(self):
+#     self.pipeline_result = self.pipeline.run()
+#     self.pipeline_result.wait_until_finish()
+
+#   def test_model_predict(self):
+#     predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
+#     builder = http.RequestMockBuilder({
+#         'ml.projects.predict':
+#             (None, self._make_response_body(predictions, successful=True))
+#     })
+#     resource = discovery.build(
+#         'ml',
+#         'v1',
+#         http=http.HttpMock(self._discovery_testdata_dir,
+#                            {'status': http_client.OK}),
+#         requestBuilder=builder)
+#     with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
+#       response_mock.side_effect = lambda service, version: resource
+#       inference_spec_type = model_spec_pb2.InferenceSpecType(
+#           ai_platform_prediction_model_spec=model_spec_pb2
+#           .AIPlatformPredictionModelSpec(
+#               project_id='test-project',
+#               model_name='test-model',
+#           ))
+
+#       prediction_log = prediction_log_pb2.PredictionLog()
+#       prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
+#           tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
+#       prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
+#           tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
+
+#       self._set_up_pipeline(inference_spec_type)
+#       assert_that(self.pcoll, equal_to([prediction_log]))
+#       self._run_inference_with_beam()
+
+#   def test_exception_raised_when_response_body_contains_error_entry(self):
+#     error_msg = 'Base64 decode failed.'
+#     builder = http.RequestMockBuilder({
+#         'ml.projects.predict':
+#             (None, self._make_response_body(error_msg, successful=False))
+#     })
+#     resource = discovery.build(
+#         'ml',
+#         'v1',
+#         http=http.HttpMock(self._discovery_testdata_dir,
+#                            {'status': http_client.OK}),
+#         requestBuilder=builder)
+#     with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
+#       response_mock.side_effect = lambda service, version: resource
+#       inference_spec_type = model_spec_pb2.InferenceSpecType(
+#           ai_platform_prediction_model_spec=model_spec_pb2
+#           .AIPlatformPredictionModelSpec(
+#               project_id='test-project',
+#               model_name='test-model',
+#           ))
+
+#       try:
+#         self._set_up_pipeline(inference_spec_type)
+#         self._run_inference_with_beam()
+#       except ValueError as exc:
+#         actual_error_msg = str(exc)
+#         self.assertTrue(actual_error_msg.startswith(error_msg))
+#       else:
+#         self.fail('Test was expected to throw ValueError exception')
+
+#   def test_exception_raised_when_project_id_is_empty(self):
+#     inference_spec_type = model_spec_pb2.InferenceSpecType(
+#         ai_platform_prediction_model_spec=model_spec_pb2
+#         .AIPlatformPredictionModelSpec(model_name='test-model',))
+
+#     with self.assertRaises(ValueError):
+#       self._set_up_pipeline(inference_spec_type)
+#       self._run_inference_with_beam()
+
+#   def test_request_body_with_binary_data(self):
+#     example = text_format.Parse(
+#         """
+#       features {
+#         feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
+#         feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
+#         feature { key: "y" value { int64_list { value: [1, 2] }}}
+#       }
+#       """, tf.train.Example())
+#     result = list(
+#         run_inference._RemotePredictDoFn._prepare_instances([example]))
+#     self.assertEqual([
+#         {
+#             'x_bytes': {
+#                 'b64': 'QVNhOGFzZGY='
+#             },
+#             'x': 'JLK7ljk3',
+#             'y': [1, 2]
+#         },
+#     ], result)
 
 
 if __name__ == '__main__':

From 7562e15350ca388795aef220bbee8f0e3730d4d3 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Fri, 5 Jun 2020 15:02:47 -0400
Subject: [PATCH 08/31] make internal implementation with arrow, add input
 type, process column opption and tests

---
 tfx_bsl/beam/run_inference_arrow.py      | 242 ++++++------
 tfx_bsl/beam/run_inference_arrow_test.py | 478 +++++++++++++++++++++--
 2 files changed, 581 insertions(+), 139 deletions(-)

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index ceaa46ef..7d7cecf5 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -79,11 +79,6 @@
 _MetaGraphDef = Any
 _SavedModel = Any
 
-_BulkInferResult = Union[prediction_log_pb2.PredictLog,
-                         Tuple[str, regression_pb2.Regression],
-                         Tuple[str, inference_pb2.MultiInferenceResponse],
-                         Tuple[str, classification_pb2.Classifications]]
-
 
 # TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7
 class OperationType(object):
@@ -92,14 +87,13 @@ class OperationType(object):
   PREDICTION = 'PREDICTION'
   MULTIHEAD = 'MULTIHEAD'
 
-
-# TODO (Maxine): pTransform from examples/sequence example here
-# remember input type?
-# if (isinstance(element, tf.train.Example))
-# elif isinstance(element, tf.train.SequenceExample)
+class DataType(object):
+  EXAMPLE = 'EXAMPLE'
+  SEQUENCEEXAMPLE = 'SEQUENCEEXAMPLE'
 
 
-# TODO (Maxine): Union[bytes, pa.RecordBatch]?
+# This API is private and called with only example or sequence example
+# TODO (Maxine): pTransform from examples/sequence example here
 @beam.ptransform_fn
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
@@ -122,17 +116,25 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   """
   logging.info('RunInference on model: %s', inference_spec_type)
 
-  batched_examples = examples | 'BatchExamples' >> beam.BatchElements()
+  # TODO (Maxine): uncomment this once we change the api to take input 
+  # Union[tf.train.Example, tf.train.SequenceExample]
+  # data_type = _get_data_type(examples)
+
+  data_type = DataType.EXAMPLE
   operation_type = _get_operation_type(inference_spec_type)
   if operation_type == OperationType.CLASSIFICATION:
-    return batched_examples | 'Classify' >> _Classify(inference_spec_type, process_column)
+    return examples | 'Classify' >> _Classify(
+                        inference_spec_type, data_type, process_column)
   elif operation_type == OperationType.REGRESSION:
-    return batched_examples | 'Regress' >> _Regress(inference_spec_type, process_column)
+    return examples | 'Regress' >> _Regress(
+                        inference_spec_type, data_type, process_column)
   elif operation_type == OperationType.PREDICTION:
-    return batched_examples | 'Predict' >> _Predict(inference_spec_type, process_column)
+    return examples | 'Predict' >> _Predict(
+                        inference_spec_type, data_type, process_column)
   elif operation_type == OperationType.MULTIHEAD:
-    return (batched_examples
-            | 'MultiInference' >> _MultiInference(inference_spec_type, process_column))
+    return (examples
+            | 'MultiInference' >> _MultiInference(
+                    inference_spec_type, data_type, process_column))
   else:
     raise ValueError('Unsupported operation_type %s' % operation_type)
 
@@ -149,12 +151,12 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
               inference_spec_type: model_spec_pb2.InferenceSpecType,
-              process_column: Optional[str] = None):
+              data_type, process_column: Optional[str] = None):
   """Performs classify PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (pcoll
-            | 'Classify' >> beam.ParDo(
-                _BatchClassifyDoFn(inference_spec_type, process_column, shared.Shared()))
+            | 'Classify' >> beam.ParDo(_BatchClassifyDoFn(
+                  inference_spec_type, shared.Shared(), data_type, process_column))
             | 'BuildPredictionLogForClassifications' >> beam.ParDo(
                 _BuildPredictionLogForClassificationsDoFn()))
   else:
@@ -166,12 +168,12 @@ def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
              inference_spec_type: model_spec_pb2.InferenceSpecType,
-             process_column: Optional[str] = None):
+             data_type, process_column: Optional[str] = None):
   """Performs regress PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (pcoll
-            | 'Regress' >> beam.ParDo(
-                _BatchRegressDoFn(inference_spec_type, process_column, shared.Shared()))
+            | 'Regress' >> beam.ParDo(_BatchRegressDoFn(
+                  inference_spec_type, shared.Shared(), data_type, process_column))
             | 'BuildPredictionLogForRegressions' >> beam.ParDo(
                 _BuildPredictionLogForRegressionsDoFn()))
   else:
@@ -183,18 +185,18 @@ def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
              inference_spec_type: model_spec_pb2.InferenceSpecType,
-             process_column: Optional[str] = None):
+             data_type, process_column: Optional[str] = None):
   """Performs predict PTransform."""
   if _using_in_process_inference(inference_spec_type):
     predictions = (
         pcoll
-        | 'Predict' >> beam.ParDo(
-            _BatchPredictDoFn(inference_spec_type, process_column, shared.Shared())))
+        | 'Predict' >> beam.ParDo(_BatchPredictDoFn(
+              inference_spec_type, shared.Shared(), data_type, process_column)))
   else:
     predictions = (
         pcoll
-        | 'RemotePredict' >> beam.ParDo(
-            _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options)))
+        | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn(
+                inference_spec_type, pcoll.pipeline.options, data_type)))
   return (predictions
           | 'BuildPredictionLogForPredictions' >> beam.ParDo(
               _BuildPredictionLogForPredictionsDoFn()))
@@ -205,13 +207,13 @@ def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
                     inference_spec_type: model_spec_pb2.InferenceSpecType,
-                    process_column: Optional[str] = None):
+                    data_type, process_column: Optional[str] = None):
   """Performs multi inference PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (
         pcoll
-        | 'MultiInference' >> beam.ParDo(
-            _BatchMultiInferenceDoFn(inference_spec_type, process_column, shared.Shared()))
+        | 'MultiInference' >> beam.ParDo(_BatchMultiInferenceDoFn(
+                inference_spec_type, shared.Shared(), data_type, process_column))
         | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn()))
   else:
     raise NotImplementedError
@@ -266,13 +268,14 @@ def update_metrics_with_cache(self):
         self._model_byte_size.update(self.model_byte_size_cache)
         self.model_byte_size_cache = None
 
-    def update(self, elements: List[str], latency_micro_secs: int) -> None:
+    def update(
+      self, elements: List[Union[str, bytes]], latency_micro_secs: int) -> None:
       self._inference_batch_latency_micro_secs.update(latency_micro_secs)
       self._num_instances.inc(len(elements))
       self._inference_counter.inc(len(elements))
       self._inference_request_batch_size.update(len(elements))
       self._inference_request_batch_byte_size.update(
-          sum(element.ByteSize() for element in elements))
+          sum(len(element) for element in elements))
 
   # TODO (Maxine): just one col for now, later, will do a list of str
   def __init__(
@@ -300,34 +303,25 @@ def process(
     else: 
       if self._process_column is None:
         raise ValueError('Must pass in a process column with multi-column RecordBatch')
-      serialized_examples = None
 
-    for column_name, column_array in zip(elements.schema.names, elements.columns):
-      column_type = column_array.type
-      if column_name == self._process_column:
-        serialized_examples = column_array.to_pylist()
-        break
-
-    for element in serialized_examples:
-      if not isinstance(element, str):
-        raise ValueError('Expected a list of serialized examples (string type)')
-
-      # TODO (Maxine): Is there a better way?
-      try:
-        example = tf.train.Example.FromString(element)
-      except:
-        try: 
-          sequenceExample = tf.train.sequenceExample.FromString(element)
-        except:
-          raise ValueError(
-            'RecordBatch should contain a serialized example of the type \
-            tf.Train.Example or tf.Train.SequenceExample'
-          )
+      serialized_examples = None
+      for column_name, column_array in zip(elements.schema.names, elements.columns):
+        column_type = column_array.type
+        if column_name == self._process_column:
+          serialized_examples = column_array.to_pylist()
+          break
+
+    for example in serialized_examples:
+      if not (isinstance(example, bytes) or isinstance(example, str)):
+        raise ValueError(
+          f'Expected a list of serialized examples in bytes or as a string, \
+          got {type(example)}'
+        )
 
     outputs = self.run_inference(serialized_examples)
     result = self._post_process(serialized_examples, outputs)
     self._metrics_collector.update(
-        elements,
+        serialized_examples,
         self._clock.get_current_time_in_microseconds() - batch_start_time)
     return result
 
@@ -336,12 +330,13 @@ def finish_bundle(self):
 
   @abc.abstractmethod
   def run_inference(
-    self, elements: List[str]
+    self, elements: List[Union[str, bytes]]
   ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
     raise NotImplementedError
 
   @abc.abstractmethod
-  def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]:
+  def _post_process(
+    self, elements: List[Union[str, bytes]], outputs: Any) -> Iterable[Any]:
     raise NotImplementedError
 
 
@@ -361,7 +356,7 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
           exception.resp.status in (503, 429))
 
 
-@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_input_types(pa.RecordBatch)
 # Using output typehints triggers NotImplementedError('BEAM-2717)' on
 # streaming mode on Dataflow runner.
 # TODO(b/151468119): Consider to re-batch with online serving request size
@@ -388,7 +383,7 @@ class _RemotePredictDoFn(_BaseDoFn):
   """
 
   def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-               pipeline_options: PipelineOptions):
+               pipeline_options: PipelineOptions, data_type):
     super(_RemotePredictDoFn, self).__init__(inference_spec_type)
     self._api_client = None
 
@@ -438,18 +433,16 @@ def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest:
 
   @classmethod
   def _prepare_instances(
-      cls, elements: List[str]
+      cls, elements: List[Union[str, bytes]]
   ) -> Generator[Mapping[Text, Any], None, None]:
     for example in elements:
       # TODO(b/151468119): support tf.train.SequenceExample
-      try:
-        example = tf.train.Example.FromString(example)
-      except:
+      if data_type != DataType.EXAMPLE:
         raise ValueError('Remote prediction only supports tf.train.Example')
 
-      # TODO (Maxine): Fix this part with serialized example
       instance = {}
-      for input_name, feature in example.features.feature.items():
+      tfexample = tf.train.Example.FromString(example)
+      for input_name, feature in tfexample.features.feature.items():
         attr_name = feature.WhichOneof('kind')
         if attr_name is None:
           continue
@@ -486,14 +479,16 @@ def _parse_feature_content(values: Sequence[Any], attr_name: Text,
     else:
       return values
 
-  def run_inference(self, elements: List[str]) -> Sequence[Mapping[Text, Any]]:
+  def run_inference(
+    self, elements: List[Union[str, bytes]]) -> Sequence[Mapping[Text, Any]]:
     body = {'instances': list(self._prepare_instances(elements))}
     request = self._make_request(body)
     response = self._execute_request(request)
     return response['predictions']
 
   def _post_process(
-      self, elements: List[str], outputs: Sequence[Mapping[Text, Any]]
+      self, elements: List[Union[str, bytes]], 
+      outputs: Sequence[Mapping[Text, Any]]
   ) -> Iterable[prediction_log_pb2.PredictLog]:
     result = []
     for output in outputs:
@@ -528,7 +523,8 @@ class _BaseBatchSavedModelDoFn(_BaseDoFn):
   def __init__(
       self,
       inference_spec_type: model_spec_pb2.InferenceSpecType,
-      shared_model_handle: shared.Shared, process_column: Optional[str] = None,
+      shared_model_handle: shared.Shared, data_type,
+      process_column: Optional[str] = None,
   ):
     super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, process_column)
     self._inference_spec_type = inference_spec_type
@@ -541,6 +537,7 @@ def __init__(
         _get_tags(inference_spec_type))
     self._session = None
     self._io_tensor_spec = None
+    self._data_type = data_type
 
   def setup(self):
     """Load the model.
@@ -620,12 +617,14 @@ def _has_tpu_tag(self) -> bool:
     return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
             tf.saved_model.TPU in self._tags)
 
-  def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
+  def run_inference(
+    self, elements: List[Union[str, bytes]]) -> Mapping[Text, np.ndarray]:
     self._check_elements(elements)
     outputs = self._run_tf_operations(elements)
     return outputs
 
-  def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
+  def _run_tf_operations(
+    self, elements: List[Union[str, bytes]]) -> Mapping[Text, np.ndarray]:
     result = self._session.run(
         self._io_tensor_spec.output_alias_tensor_names,
         feed_dict={self._io_tensor_spec.input_tensor_name: elements})
@@ -633,15 +632,15 @@ def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]:
       raise RuntimeError('Output length does not match fetches')
     return result
 
-  def _check_elements(
-      self, elements: List[str]) -> None:
+  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
     """Unimplemented."""
 
     raise NotImplementedError
 
 
-@beam.typehints.with_input_types(List[str])
-@beam.typehints.with_output_types(Tuple[str, classification_pb2.Classifications])
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(Tuple[Union[str, bytes], 
+                                  classification_pb2.Classifications])
 class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that run inference on classification model."""
 
@@ -654,45 +653,43 @@ def setup(self):
           signature_def.method_name)
     super(_BatchClassifyDoFn, self).setup()
 
-  def _check_elements(self, elements: List[str]) -> None:
-    for element in serialized_examples:
-      try:
-        example = tf.train.Example.FromString(element)
-      except:
+  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
+    for element in elements:
+      if self._data_type != DataType.EXAMPLE:
         raise ValueError('Classify only supports tf.train.Example')
 
   def _post_process(
-      self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray]
-  ) -> Iterable[Tuple[str, classification_pb2.Classifications]]:
+      self, elements: Sequence[Union[str, bytes]], 
+      outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[Tuple[Union[str, bytes], classification_pb2.Classifications]]:
     classifications = _post_process_classify(
         self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
     return zip(elements, classifications)
 
 
-@beam.typehints.with_input_types(List[str])
-@beam.typehints.with_output_types(Tuple[str, regression_pb2.Regression])
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(Tuple[Union[str, bytes], 
+                                  regression_pb2.Regression])
 class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that run inference on regression model."""
 
   def setup(self):
     super(_BatchRegressDoFn, self).setup()
 
-  def _check_elements(self, elements: List[str]) -> None:
-    for element in serialized_examples:
-      try:
-        example = tf.train.Example.FromString(element)
-      except:
+  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
+    for element in elements:
+      if self._data_type != DataType.EXAMPLE:
         raise ValueError('Regress only supports tf.train.Example')
 
-
   def _post_process(
-      self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray]
-  ) -> Iterable[Tuple[str, regression_pb2.Regression]]:
+      self, elements: Sequence[Union[str, bytes]], 
+      outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[Tuple[Union[str, bytes], regression_pb2.Regression]]:
     regressions = _post_process_regress(elements, outputs)
     return zip(elements, regressions)
 
 
-@beam.typehints.with_input_types(List[str])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
 class _BatchPredictDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that runs inference on predict model."""
@@ -706,11 +703,11 @@ def setup(self):
           signature_def.method_name)
     super(_BatchPredictDoFn, self).setup()
 
-  def _check_elements(self, elements: List[str]) -> None:
+  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
     pass
 
   def _post_process(
-      self, elements: Sequence[str],
+      self, elements: Sequence[Union[str, bytes]],
       outputs: Mapping[Text, np.ndarray]
   ) -> Iterable[prediction_log_pb2.PredictLog]:
     input_tensor_alias = self._io_tensor_spec.input_tensor_alias
@@ -746,22 +743,21 @@ def _post_process(
     return result
 
 
-@beam.typehints.with_input_types(List[str])
-@beam.typehints.with_output_types(Tuple[str, inference_pb2.MultiInferenceResponse])
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(Tuple[Union[str, bytes], 
+                                  inference_pb2.MultiInferenceResponse])
 class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that runs inference on multi-head model."""
 
-  def _check_elements(self, elements: List[str]) -> None:
-    for element in serialized_examples:
-      try:
-        example = tf.train.Example.FromString(element)
-      except:
+  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
+    for element in elements:
+      if self._data_type != DataType.EXAMPLE:
         raise ValueError('Multi-inference only supports tf.train.Example')
 
-
   def _post_process(
-      self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray]
-  ) -> Iterable[Tuple[str, inference_pb2.MultiInferenceResponse]]:
+      self, elements: Sequence[Union[str, bytes]], 
+      outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[Tuple[Union[str, bytes], inference_pb2.MultiInferenceResponse]]:
     classifications = None
     regressions = None
     for signature in self._signatures:
@@ -801,35 +797,38 @@ def _post_process(
 
 
 
-@beam.typehints.with_input_types(Tuple[str, classification_pb2.Classifications])
+@beam.typehints.with_input_types(Tuple[Union[str, bytes], 
+                                classification_pb2.Classifications])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 class _BuildPredictionLogForClassificationsDoFn(beam.DoFn):
   """A DoFn that builds prediction log from classifications."""
 
   def process(
-      self, element: Tuple[str, classification_pb2.Classifications]
+      self, 
+      element: Tuple[Union[str, bytes], classification_pb2.Classifications]
   ) -> Iterable[prediction_log_pb2.PredictionLog]:
     (train_example, classifications) = element
     result = prediction_log_pb2.PredictionLog()
     result.classify_log.request.input.example_list.examples.add().CopyFrom(
-        train_example)
+        tf.train.Example.FromString(train_example))
     result.classify_log.response.result.classifications.add().CopyFrom(
         classifications)
     yield result
 
 
-@beam.typehints.with_input_types(Tuple[str, regression_pb2.Regression])
+@beam.typehints.with_input_types(Tuple[Union[str, bytes], 
+                                regression_pb2.Regression])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 class _BuildPredictionLogForRegressionsDoFn(beam.DoFn):
   """A DoFn that builds prediction log from regressions."""
 
   def process(
-    self, element: Tuple[str, regression_pb2.Regression]
+    self, element: Tuple[Union[str, bytes], regression_pb2.Regression]
   ) -> Iterable[prediction_log_pb2.PredictionLog]:
     (train_example, regression) = element
     result = prediction_log_pb2.PredictionLog()
     result.regress_log.request.input.example_list.examples.add().CopyFrom(
-        train_example)
+        tf.train.Example.FromString(train_example))
     result.regress_log.response.result.regressions.add().CopyFrom(regression)
     yield result
 
@@ -847,25 +846,27 @@ def process(
     yield result
 
 
-@beam.typehints.with_input_types(Tuple[str,inference_pb2.MultiInferenceResponse])
+@beam.typehints.with_input_types(Tuple[Union[str, bytes],
+                                inference_pb2.MultiInferenceResponse])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 class _BuildMultiInferenceLogDoFn(beam.DoFn):
   """A DoFn that builds prediction log from multi-head inference result."""
 
   def process(
-      self, element: Tuple[str, inference_pb2.MultiInferenceResponse]
+      self, element: Tuple[Union[str, bytes], 
+      inference_pb2.MultiInferenceResponse]
   ) -> Iterable[prediction_log_pb2.PredictionLog]:
     (train_example, multi_inference_response) = element
     result = prediction_log_pb2.PredictionLog()
     (result.multi_inference_log.request.input.example_list.examples.add()
-     .CopyFrom(train_example))
+      .CopyFrom(tf.train.Example.FromString(train_example)))
     result.multi_inference_log.response.CopyFrom(multi_inference_response)
     yield result
 
 
 def _post_process_classify(
     output_alias_tensor_names: Mapping[Text, Text],
-    elements: Sequence[str], outputs: Mapping[Text, np.ndarray]
+    elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray]
   ) -> Sequence[classification_pb2.Classifications]:
   """Returns classifications from inference output."""
 
@@ -926,7 +927,7 @@ def _post_process_classify(
 
 
 def _post_process_regress(
-    elements: Sequence[str],
+    elements: Sequence[Union[str, bytes]],
     outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]:
   """Returns regressions from inference output."""
 
@@ -1127,6 +1128,15 @@ def _get_operation_type(
     return OperationType.PREDICTION
 
 
+def _get_data_type(elements: Sequence[Any]) -> Text:
+  if all(isinstance(elements, tf.train.Example)): 
+    return DataType.EXAMPLE
+  elif all(isinstance(element, tf.train.SequenceExample)):
+    return DataType.SEQUENCEEXAMPLE
+  else:
+    raise ValueError(f'Unsupported DataType {type(elements)}')
+
+
 def _get_meta_graph_def(saved_model_pb: _SavedModel,
                         tags: Sequence[Text]) -> _MetaGraphDef:
   """Returns MetaGraphDef from SavedModel."""
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index 30416ae0..056d5674 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -26,6 +26,7 @@
   import mock
 
 import apache_beam as beam
+import pyarrow as pa
 from apache_beam.metrics.metric import MetricsFilter
 from apache_beam.testing.util import assert_that
 from apache_beam.testing.util import equal_to
@@ -33,18 +34,18 @@
 from googleapiclient import http
 from six.moves import http_client
 import tensorflow as tf
-from tfx_bsl.beam import run_inference
+from tfx_bsl.beam import run_inference_arrow
 from tfx_bsl.public.proto import model_spec_pb2
+from tfx_bsl.tfxio import raw_tf_record
 
 from google.protobuf import text_format
-
 from tensorflow_serving.apis import prediction_log_pb2
 
 
-class RunInferenceFixture(tf.test.TestCase):
+class RunInferenceArrowFixture(tf.test.TestCase):
 
   def setUp(self):
-    super(RunInferenceFixture, self).setUp()
+    super(RunInferenceArrowFixture, self).setUp()
     self._predict_examples = [
         text_format.Parse(
             """
@@ -70,11 +71,10 @@ def _prepare_predict_examples(self, example_path):
       for example in self._predict_examples:
         output_file.write(example.SerializeToString())
 
-
-class RunOfflineInferenceTest(RunInferenceFixture):
+class RunOfflineInferenceArrowTest(RunInferenceArrowFixture):
 
   def setUp(self):
-    super(RunOfflineInferenceTest, self).setUp()
+    super(RunOfflineInferenceArrowTest, self).setUp()
     self._predict_examples = [
         text_format.Parse(
             """
@@ -107,6 +107,21 @@ def setUp(self):
     ]
 
 
+    serialized_example = []
+    for example in self._predict_examples:
+      serialized_example.append(example.SerializeToString())
+    self.record_batch = pa.RecordBatch.from_arrays(
+      [serialized_example, ], ["__RAW_RECORD__", ]
+    )
+
+    serialized_example_multi = []
+    for example in self._multihead_examples:
+      serialized_example_multi.append(example.SerializeToString())
+    self.record_batch_multihead = pa.RecordBatch.from_arrays(
+      [serialized_example_multi, ], ["__RAW_RECORD__", ]
+    )
+
+
   def _prepare_multihead_examples(self, example_path):
     with tf.io.TFRecordWriter(example_path) as output_file:
       for example in self._multihead_examples:
@@ -206,14 +221,23 @@ def _build_multihead_model(self, model_path):
           signature_def_map=signature_def_map)
       builder.save()
 
-  def _run_inference_with_beam(self, example_path, inference_spec_type,
+  def _run_inference_with_beam(self, example_type, inference_spec_type,
                                prediction_log_path):
-    with beam.Pipeline() as pipeline:
-      _ = (
+    if example_type == 'multi':
+      with beam.Pipeline() as pipeline:
+        _ = (
           pipeline
-          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-          | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-          | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)
+          | "createRecordBatch" >> beam.Create([self.record_batch_multihead])
+          | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+    else:
+      with beam.Pipeline() as pipeline:
+        _ = (
+          pipeline
+          | "createRecordBatch" >> beam.Create([self.record_batch])
+          | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -234,7 +258,7 @@ def testModelPathInvalid(self):
     prediction_log_path = self._get_output_data_dir('predictions')
     with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'):
       self._run_inference_with_beam(
-          example_path,
+          'predict',
           model_spec_pb2.InferenceSpecType(
               saved_model_spec=model_spec_pb2.SavedModelSpec(
                   model_path=self._get_output_data_dir())), prediction_log_path)
@@ -246,7 +270,7 @@ def testEstimatorModelPredict(self):
     self._build_predict_model(model_path)
     prediction_log_path = self._get_output_data_dir('predictions')
     self._run_inference_with_beam(
-        example_path,
+        'predict',
         model_spec_pb2.InferenceSpecType(
             saved_model_spec=model_spec_pb2.SavedModelSpec(
                 model_path=model_path)), prediction_log_path)
@@ -255,7 +279,7 @@ def testEstimatorModelPredict(self):
     self.assertLen(results, 2)
     self.assertEqual(
         results[0].predict_log.request.inputs[
-            run_inference._DEFAULT_INPUT_KEY].string_val[0],
+            run_inference_arrow._DEFAULT_INPUT_KEY].string_val[0],
         self._predict_examples[0].SerializeToString())
     self.assertEqual(results[0].predict_log.response.outputs['y'].dtype,
                      tf.float32)
@@ -275,7 +299,7 @@ def testClassifyModel(self):
     self._build_multihead_model(model_path)
     prediction_log_path = self._get_output_data_dir('predictions')
     self._run_inference_with_beam(
-        example_path,
+        'multi',
         model_spec_pb2.InferenceSpecType(
             saved_model_spec=model_spec_pb2.SavedModelSpec(
                 model_path=model_path, signature_name=['classify_sum'])),
@@ -299,7 +323,7 @@ def testRegressModel(self):
     self._build_multihead_model(model_path)
     prediction_log_path = self._get_output_data_dir('predictions')
     self._run_inference_with_beam(
-        example_path,
+                'multi',
         model_spec_pb2.InferenceSpecType(
             saved_model_spec=model_spec_pb2.SavedModelSpec(
                 model_path=model_path, signature_name=['regress_diff'])),
@@ -322,7 +346,7 @@ def testMultiInferenceModel(self):
     self._build_multihead_model(model_path)
     prediction_log_path = self._get_output_data_dir('predictions')
     self._run_inference_with_beam(
-        example_path,
+        'multi',
         model_spec_pb2.InferenceSpecType(
             saved_model_spec=model_spec_pb2.SavedModelSpec(
                 model_path=model_path,
@@ -394,7 +418,7 @@ def call(self, serialized_example):
     self._prepare_predict_examples(example_path)
     prediction_log_path = self._get_output_data_dir('predictions')
     self._run_inference_with_beam(
-        example_path,
+        'predict',
         model_spec_pb2.InferenceSpecType(
             saved_model_spec=model_spec_pb2.SavedModelSpec(
                 model_path=model_path)), prediction_log_path)
@@ -412,9 +436,9 @@ def testTelemetry(self):
             model_path=model_path, signature_name=['classify_sum']))
     pipeline = beam.Pipeline()
     _ = (
-        pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-        | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
+        pipeline 
+        | "createRecordBatch" >> beam.Create([self.record_batch_multihead])
+        | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type))
     run_result = pipeline.run()
     run_result.wait_until_finish()
 
@@ -449,6 +473,414 @@ def testTelemetry(self):
         load_model_latency_milli_secs['distributions'][0].result.sum, 0)
 
 
+# class RunInferenceFixture(tf.test.TestCase):
+
+#   def setUp(self):
+#     super(RunInferenceFixture, self).setUp()
+#     self._predict_examples = [
+#         text_format.Parse(
+#             """
+#               features {
+#                 feature { key: "input1" value { float_list { value: 0 }}}
+#               }
+#               """, tf.train.Example()),
+#     ]
+
+#   def _get_output_data_dir(self, sub_dir=None):
+#     test_dir = self._testMethodName
+#     path = os.path.join(
+#         os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
+#         test_dir)
+#     if not tf.io.gfile.exists(path):
+#       tf.io.gfile.makedirs(path)
+#     if sub_dir is not None:
+#       path = os.path.join(path, sub_dir)
+#     return path
+
+#   def _prepare_predict_examples(self, example_path):
+#     with tf.io.TFRecordWriter(example_path) as output_file:
+#       for example in self._predict_examples:
+#         output_file.write(example.SerializeToString())
+
+
+# class RunOfflineInferenceTest(RunInferenceFixture):
+
+#   def setUp(self):
+#     super(RunOfflineInferenceTest, self).setUp()
+#     self._predict_examples = [
+#         text_format.Parse(
+#             """
+#               features {
+#                 feature { key: "input1" value { float_list { value: 0 }}}
+#               }
+#               """, tf.train.Example()),
+#         text_format.Parse(
+#             """
+#               features {
+#                 feature { key: "input1" value { float_list { value: 1 }}}
+#               }
+#               """, tf.train.Example()),
+#     ]
+#     self._multihead_examples = [
+#         text_format.Parse(
+#             """
+#             features {
+#               feature {key: "x" value { float_list { value: 0.8 }}}
+#               feature {key: "y" value { float_list { value: 0.2 }}}
+#             }
+#             """, tf.train.Example()),
+#         text_format.Parse(
+#             """
+#             features {
+#               feature {key: "x" value { float_list { value: 0.6 }}}
+#               feature {key: "y" value { float_list { value: 0.1 }}}
+#             }
+#             """, tf.train.Example()),
+#     ]
+
+
+#   def _prepare_multihead_examples(self, example_path):
+#     with tf.io.TFRecordWriter(example_path) as output_file:
+#       for example in self._multihead_examples:
+#         output_file.write(example.SerializeToString())
+
+#   def _build_predict_model(self, model_path):
+#     """Exports the dummy sum predict model."""
+
+#     with tf.compat.v1.Graph().as_default():
+#       input_tensors = {
+#           'x': tf.compat.v1.io.FixedLenFeature(
+#               [1], dtype=tf.float32, default_value=0)
+#       }
+#       serving_receiver = (
+#           tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn(
+#               input_tensors)())
+#       output_tensors = {'y': serving_receiver.features['x'] * 2}
+#       sess = tf.compat.v1.Session()
+#       sess.run(tf.compat.v1.initializers.global_variables())
+#       signature_def = tf.compat.v1.estimator.export.PredictOutput(
+#           output_tensors).as_signature_def(serving_receiver.receiver_tensors)
+#       builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
+#       builder.add_meta_graph_and_variables(
+#           sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
+#           signature_def_map={
+#               tf.compat.v1.saved_model.signature_constants
+#               .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+#                   signature_def,
+#           })
+#       builder.save()
+
+#   def _build_regression_signature(self, input_tensor, output_tensor):
+#     """Helper function for building a regression SignatureDef."""
+#     input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+#         input_tensor)
+#     signature_inputs = {
+#         tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS:
+#             input_tensor_info
+#     }
+#     output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+#         output_tensor)
+#     signature_outputs = {
+#         tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS:
+#             output_tensor_info
+#     }
+#     return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+#         signature_inputs, signature_outputs,
+#         tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME)
+
+#   def _build_classification_signature(self, input_tensor, scores_tensor):
+#     """Helper function for building a classification SignatureDef."""
+#     input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+#         input_tensor)
+#     signature_inputs = {
+#         tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS:
+#             input_tensor_info
+#     }
+#     output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+#         scores_tensor)
+#     signature_outputs = {
+#         tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
+#             output_tensor_info
+#     }
+#     return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+#         signature_inputs, signature_outputs,
+#         tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
+
+#   def _build_multihead_model(self, model_path):
+#     with tf.compat.v1.Graph().as_default():
+#       input_example = tf.compat.v1.placeholder(
+#           tf.string, name='input_examples_tensor')
+#       config = {
+#           'x': tf.compat.v1.io.FixedLenFeature(
+#               [1], dtype=tf.float32, default_value=0),
+#           'y': tf.compat.v1.io.FixedLenFeature(
+#               [1], dtype=tf.float32, default_value=0),
+#       }
+#       features = tf.compat.v1.parse_example(input_example, config)
+#       x = features['x']
+#       y = features['y']
+#       sum_pred = x + y
+#       diff_pred = tf.abs(x - y)
+#       sess = tf.compat.v1.Session()
+#       sess.run(tf.compat.v1.initializers.global_variables())
+#       signature_def_map = {
+#           'regress_diff':
+#               self._build_regression_signature(input_example, diff_pred),
+#           'classify_sum':
+#               self._build_classification_signature(input_example, sum_pred),
+#           tf.compat.v1.saved_model.signature_constants
+#           .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+#               self._build_regression_signature(input_example, sum_pred)
+#       }
+#       builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
+#       builder.add_meta_graph_and_variables(
+#           sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
+#           signature_def_map=signature_def_map)
+#       builder.save()
+
+#   def _run_inference_with_beam(self, example_path, inference_spec_type,
+#                                prediction_log_path):
+#     with beam.Pipeline() as pipeline:
+#       _ = (
+#           pipeline
+#           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+#           | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+#           | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)
+#           | 'WritePredictions' >> beam.io.WriteToTFRecord(
+#               prediction_log_path,
+#               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+
+#   def _get_results(self, prediction_log_path):
+#     results = []
+#     for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
+#       record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
+#       for record_string in record_iterator:
+#         prediction_log = prediction_log_pb2.PredictionLog()
+#         prediction_log.MergeFromString(record_string)
+#         results.append(prediction_log)
+#     return results
+
+#   def testModelPathInvalid(self):
+#     example_path = self._get_output_data_dir('examples')
+#     self._prepare_predict_examples(example_path)
+#     prediction_log_path = self._get_output_data_dir('predictions')
+#     with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'):
+#       self._run_inference_with_beam(
+#           example_path,
+#           model_spec_pb2.InferenceSpecType(
+#               saved_model_spec=model_spec_pb2.SavedModelSpec(
+#                   model_path=self._get_output_data_dir())), prediction_log_path)
+
+#   def testEstimatorModelPredict(self):
+#     example_path = self._get_output_data_dir('examples')
+#     self._prepare_predict_examples(example_path)
+#     model_path = self._get_output_data_dir('model')
+#     self._build_predict_model(model_path)
+#     prediction_log_path = self._get_output_data_dir('predictions')
+#     self._run_inference_with_beam(
+#         example_path,
+#         model_spec_pb2.InferenceSpecType(
+#             saved_model_spec=model_spec_pb2.SavedModelSpec(
+#                 model_path=model_path)), prediction_log_path)
+
+#     results = self._get_results(prediction_log_path)
+#     self.assertLen(results, 2)
+#     self.assertEqual(
+#         results[0].predict_log.request.inputs[
+#             run_inference._DEFAULT_INPUT_KEY].string_val[0],
+#         self._predict_examples[0].SerializeToString())
+#     self.assertEqual(results[0].predict_log.response.outputs['y'].dtype,
+#                      tf.float32)
+#     self.assertLen(
+#         results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2)
+#     self.assertEqual(
+#         results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size,
+#         1)
+#     self.assertEqual(
+#         results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size,
+#         1)
+
+#   def testClassifyModel(self):
+#     example_path = self._get_output_data_dir('examples')
+#     self._prepare_multihead_examples(example_path)
+#     model_path = self._get_output_data_dir('model')
+#     self._build_multihead_model(model_path)
+#     prediction_log_path = self._get_output_data_dir('predictions')
+#     self._run_inference_with_beam(
+#         example_path,
+#         model_spec_pb2.InferenceSpecType(
+#             saved_model_spec=model_spec_pb2.SavedModelSpec(
+#                 model_path=model_path, signature_name=['classify_sum'])),
+#         prediction_log_path)
+
+#     results = self._get_results(prediction_log_path)
+#     self.assertLen(results, 2)
+#     classify_log = results[0].classify_log
+#     self.assertLen(classify_log.request.input.example_list.examples, 1)
+#     self.assertEqual(classify_log.request.input.example_list.examples[0],
+#                      self._multihead_examples[0])
+#     self.assertLen(classify_log.response.result.classifications, 1)
+#     self.assertLen(classify_log.response.result.classifications[0].classes, 1)
+#     self.assertAlmostEqual(
+#         classify_log.response.result.classifications[0].classes[0].score, 1.0)
+
+#   def testRegressModel(self):
+#     example_path = self._get_output_data_dir('examples')
+#     self._prepare_multihead_examples(example_path)
+#     model_path = self._get_output_data_dir('model')
+#     self._build_multihead_model(model_path)
+#     prediction_log_path = self._get_output_data_dir('predictions')
+#     self._run_inference_with_beam(
+#         example_path,
+#         model_spec_pb2.InferenceSpecType(
+#             saved_model_spec=model_spec_pb2.SavedModelSpec(
+#                 model_path=model_path, signature_name=['regress_diff'])),
+#         prediction_log_path)
+
+#     results = self._get_results(prediction_log_path)
+#     self.assertLen(results, 2)
+#     regress_log = results[0].regress_log
+#     self.assertLen(regress_log.request.input.example_list.examples, 1)
+#     self.assertEqual(regress_log.request.input.example_list.examples[0],
+#                      self._multihead_examples[0])
+#     self.assertLen(regress_log.response.result.regressions, 1)
+#     self.assertAlmostEqual(regress_log.response.result.regressions[0].value,
+#                            0.6)
+
+#   def testMultiInferenceModel(self):
+#     example_path = self._get_output_data_dir('examples')
+#     self._prepare_multihead_examples(example_path)
+#     model_path = self._get_output_data_dir('model')
+#     self._build_multihead_model(model_path)
+#     prediction_log_path = self._get_output_data_dir('predictions')
+#     self._run_inference_with_beam(
+#         example_path,
+#         model_spec_pb2.InferenceSpecType(
+#             saved_model_spec=model_spec_pb2.SavedModelSpec(
+#                 model_path=model_path,
+#                 signature_name=['regress_diff', 'classify_sum'])),
+#         prediction_log_path)
+#     results = self._get_results(prediction_log_path)
+#     self.assertLen(results, 2)
+#     multi_inference_log = results[0].multi_inference_log
+#     self.assertLen(multi_inference_log.request.input.example_list.examples, 1)
+#     self.assertEqual(multi_inference_log.request.input.example_list.examples[0],
+#                      self._multihead_examples[0])
+#     self.assertLen(multi_inference_log.response.results, 2)
+#     signature_names = []
+#     for result in multi_inference_log.response.results:
+#       signature_names.append(result.model_spec.signature_name)
+#     self.assertIn('regress_diff', signature_names)
+#     self.assertIn('classify_sum', signature_names)
+#     result = multi_inference_log.response.results[0]
+#     self.assertEqual(result.model_spec.signature_name, 'regress_diff')
+#     self.assertLen(result.regression_result.regressions, 1)
+#     self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6)
+#     result = multi_inference_log.response.results[1]
+#     self.assertEqual(result.model_spec.signature_name, 'classify_sum')
+#     self.assertLen(result.classification_result.classifications, 1)
+#     self.assertLen(result.classification_result.classifications[0].classes, 1)
+#     self.assertAlmostEqual(
+#         result.classification_result.classifications[0].classes[0].score, 1.0)
+
+#   def testKerasModelPredict(self):
+#     inputs = tf.keras.Input(shape=(1,), name='input1')
+#     output1 = tf.keras.layers.Dense(
+#         1, activation=tf.nn.sigmoid, name='output1')(
+#             inputs)
+#     output2 = tf.keras.layers.Dense(
+#         1, activation=tf.nn.sigmoid, name='output2')(
+#             inputs)
+#     inference_model = tf.keras.models.Model(inputs, [output1, output2])
+
+#     class TestKerasModel(tf.keras.Model):
+
+#       def __init__(self, inference_model):
+#         super(TestKerasModel, self).__init__(name='test_keras_model')
+#         self.inference_model = inference_model
+
+#       @tf.function(input_signature=[
+#           tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
+#       ])
+#       def call(self, serialized_example):
+#         features = {
+#             'input1':
+#                 tf.compat.v1.io.FixedLenFeature([1],
+#                                                 dtype=tf.float32,
+#                                                 default_value=0)
+#         }
+#         input_tensor_dict = tf.io.parse_example(serialized_example, features)
+#         return inference_model(input_tensor_dict['input1'])
+
+#     model = TestKerasModel(inference_model)
+#     model.compile(
+#         optimizer=tf.keras.optimizers.Adam(lr=.001),
+#         loss=tf.keras.losses.binary_crossentropy,
+#         metrics=['accuracy'])
+
+#     model_path = self._get_output_data_dir('model')
+#     tf.compat.v1.keras.experimental.export_saved_model(
+#         model, model_path, serving_only=True)
+
+#     example_path = self._get_output_data_dir('examples')
+#     self._prepare_predict_examples(example_path)
+#     prediction_log_path = self._get_output_data_dir('predictions')
+#     self._run_inference_with_beam(
+#         example_path,
+#         model_spec_pb2.InferenceSpecType(
+#             saved_model_spec=model_spec_pb2.SavedModelSpec(
+#                 model_path=model_path)), prediction_log_path)
+
+#     results = self._get_results(prediction_log_path)
+#     self.assertLen(results, 2)
+
+#   def testTelemetry(self):
+#     example_path = self._get_output_data_dir('examples')
+#     self._prepare_multihead_examples(example_path)
+#     model_path = self._get_output_data_dir('model')
+#     self._build_multihead_model(model_path)
+#     inference_spec_type = model_spec_pb2.InferenceSpecType(
+#         saved_model_spec=model_spec_pb2.SavedModelSpec(
+#             model_path=model_path, signature_name=['classify_sum']))
+#     pipeline = beam.Pipeline()
+#     _ = (
+#         pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+#         | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+#         | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
+#     run_result = pipeline.run()
+#     run_result.wait_until_finish()
+
+#     num_inferences = run_result.metrics().query(
+#         MetricsFilter().with_name('num_inferences'))
+#     self.assertTrue(num_inferences['counters'])
+#     self.assertEqual(num_inferences['counters'][0].result, 2)
+#     num_instances = run_result.metrics().query(
+#         MetricsFilter().with_name('num_instances'))
+#     self.assertTrue(num_instances['counters'])
+#     self.assertEqual(num_instances['counters'][0].result, 2)
+#     inference_request_batch_size = run_result.metrics().query(
+#         MetricsFilter().with_name('inference_request_batch_size'))
+#     self.assertTrue(inference_request_batch_size['distributions'])
+#     self.assertEqual(
+#         inference_request_batch_size['distributions'][0].result.sum, 2)
+#     inference_request_batch_byte_size = run_result.metrics().query(
+#         MetricsFilter().with_name('inference_request_batch_byte_size'))
+#     self.assertTrue(inference_request_batch_byte_size['distributions'])
+#     self.assertEqual(
+#         inference_request_batch_byte_size['distributions'][0].result.sum,
+#         sum(element.ByteSize() for element in self._multihead_examples))
+#     inference_batch_latency_micro_secs = run_result.metrics().query(
+#         MetricsFilter().with_name('inference_batch_latency_micro_secs'))
+#     self.assertTrue(inference_batch_latency_micro_secs['distributions'])
+#     self.assertGreaterEqual(
+#         inference_batch_latency_micro_secs['distributions'][0].result.sum, 0)
+#     load_model_latency_milli_secs = run_result.metrics().query(
+#         MetricsFilter().with_name('load_model_latency_milli_secs'))
+#     self.assertTrue(load_model_latency_milli_secs['distributions'])
+#     self.assertGreaterEqual(
+#         load_model_latency_milli_secs['distributions'][0].result.sum, 0)
+
+
 # class RunRemoteInferenceTest(RunInferenceFixture):
 
 #   def setUp(self):

From 037f3b62bbff87ef726ec85c2a4199179efa7e75 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Mon, 8 Jun 2020 10:16:39 -0400
Subject: [PATCH 09/31] fix spacing

---
 tfx_bsl/beam/run_inference_arrow_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index 056d5674..4104d1f4 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -323,7 +323,7 @@ def testRegressModel(self):
     self._build_multihead_model(model_path)
     prediction_log_path = self._get_output_data_dir('predictions')
     self._run_inference_with_beam(
-                'multi',
+        'multi',
         model_spec_pb2.InferenceSpecType(
             saved_model_spec=model_spec_pb2.SavedModelSpec(
                 model_path=model_path, signature_name=['regress_diff'])),

From 44af0587062dd4e6a9bd1acfe2e2c42bbd6c3a31 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 10 Jun 2020 17:57:18 -0400
Subject: [PATCH 10/31] remove unecessary loop

---
 tfx_bsl/beam/run_inference_arrow.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index 7d7cecf5..f5d2800f 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -65,6 +65,7 @@
 except ImportError:
   pass
 
+_RECORDBATCH_COLUMN = '__RAW_RECORD__'
 _DEFAULT_INPUT_KEY = 'examples'
 _METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer'
 _METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess'
@@ -442,6 +443,8 @@ def _prepare_instances(
 
       instance = {}
       tfexample = tf.train.Example.FromString(example)
+
+      # TODO (Maxine): consider leveraging recordbatch columns
       for input_name, feature in tfexample.features.feature.items():
         attr_name = feature.WhichOneof('kind')
         if attr_name is None:
@@ -654,9 +657,8 @@ def setup(self):
     super(_BatchClassifyDoFn, self).setup()
 
   def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
-    for element in elements:
-      if self._data_type != DataType.EXAMPLE:
-        raise ValueError('Classify only supports tf.train.Example')
+    if self._data_type != DataType.EXAMPLE:
+      raise ValueError('Classify only supports tf.train.Example')
 
   def _post_process(
       self, elements: Sequence[Union[str, bytes]], 
@@ -677,9 +679,8 @@ def setup(self):
     super(_BatchRegressDoFn, self).setup()
 
   def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
-    for element in elements:
-      if self._data_type != DataType.EXAMPLE:
-        raise ValueError('Regress only supports tf.train.Example')
+    if self._data_type != DataType.EXAMPLE:
+      raise ValueError('Regress only supports tf.train.Example')
 
   def _post_process(
       self, elements: Sequence[Union[str, bytes]], 
@@ -750,9 +751,8 @@ class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that runs inference on multi-head model."""
 
   def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
-    for element in elements:
-      if self._data_type != DataType.EXAMPLE:
-        raise ValueError('Multi-inference only supports tf.train.Example')
+    if self._data_type != DataType.EXAMPLE:
+      raise ValueError('Multi-inference only supports tf.train.Example')
 
   def _post_process(
       self, elements: Sequence[Union[str, bytes]], 
@@ -864,6 +864,7 @@ def process(
     yield result
 
 
+
 def _post_process_classify(
     output_alias_tensor_names: Mapping[Text, Text],
     elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray]

From 3b64e745b7b64ee6ea483aefc35f32a9c9a169af Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 11 Jun 2020 18:21:40 -0400
Subject: [PATCH 11/31] modify and add tests for remote prediction

---
 tfx_bsl/beam/run_inference_arrow.py      |  11 +-
 tfx_bsl/beam/run_inference_arrow_test.py | 656 +++++------------------
 2 files changed, 134 insertions(+), 533 deletions(-)

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index f5d2800f..c0cc0521 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -387,6 +387,7 @@ def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
                pipeline_options: PipelineOptions, data_type):
     super(_RemotePredictDoFn, self).__init__(inference_spec_type)
     self._api_client = None
+    self._data_type = data_type
 
     project_id = (
         inference_spec_type.ai_platform_prediction_model_spec.project_id or
@@ -437,10 +438,6 @@ def _prepare_instances(
       cls, elements: List[Union[str, bytes]]
   ) -> Generator[Mapping[Text, Any], None, None]:
     for example in elements:
-      # TODO(b/151468119): support tf.train.SequenceExample
-      if data_type != DataType.EXAMPLE:
-        raise ValueError('Remote prediction only supports tf.train.Example')
-
       instance = {}
       tfexample = tf.train.Example.FromString(example)
 
@@ -482,8 +479,14 @@ def _parse_feature_content(values: Sequence[Any], attr_name: Text,
     else:
       return values
 
+  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
+    # TODO(b/151468119): support tf.train.SequenceExample
+    if self._data_type != DataType.EXAMPLE:
+      raise ValueError('Remote prediction only supports tf.train.Example')
+
   def run_inference(
     self, elements: List[Union[str, bytes]]) -> Sequence[Mapping[Text, Any]]:
+    self._check_elements(elements)
     body = {'instances': list(self._prepare_instances(elements))}
     request = self._make_request(body)
     response = self._execute_request(request)
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index 4104d1f4..c502ad8c 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -55,6 +55,13 @@ def setUp(self):
               """, tf.train.Example()),
     ]
 
+    serialized_example = []
+    for example in self._predict_examples:
+      serialized_example.append(example.SerializeToString())
+    self.record_batch = pa.RecordBatch.from_arrays(
+      [serialized_example, ], ["__RAW_RECORD__", ]
+    )
+
   def _get_output_data_dir(self, sub_dir=None):
     test_dir = self._testMethodName
     path = os.path.join(
@@ -473,535 +480,126 @@ def testTelemetry(self):
         load_model_latency_milli_secs['distributions'][0].result.sum, 0)
 
 
-# class RunInferenceFixture(tf.test.TestCase):
-
-#   def setUp(self):
-#     super(RunInferenceFixture, self).setUp()
-#     self._predict_examples = [
-#         text_format.Parse(
-#             """
-#               features {
-#                 feature { key: "input1" value { float_list { value: 0 }}}
-#               }
-#               """, tf.train.Example()),
-#     ]
-
-#   def _get_output_data_dir(self, sub_dir=None):
-#     test_dir = self._testMethodName
-#     path = os.path.join(
-#         os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
-#         test_dir)
-#     if not tf.io.gfile.exists(path):
-#       tf.io.gfile.makedirs(path)
-#     if sub_dir is not None:
-#       path = os.path.join(path, sub_dir)
-#     return path
-
-#   def _prepare_predict_examples(self, example_path):
-#     with tf.io.TFRecordWriter(example_path) as output_file:
-#       for example in self._predict_examples:
-#         output_file.write(example.SerializeToString())
-
-
-# class RunOfflineInferenceTest(RunInferenceFixture):
-
-#   def setUp(self):
-#     super(RunOfflineInferenceTest, self).setUp()
-#     self._predict_examples = [
-#         text_format.Parse(
-#             """
-#               features {
-#                 feature { key: "input1" value { float_list { value: 0 }}}
-#               }
-#               """, tf.train.Example()),
-#         text_format.Parse(
-#             """
-#               features {
-#                 feature { key: "input1" value { float_list { value: 1 }}}
-#               }
-#               """, tf.train.Example()),
-#     ]
-#     self._multihead_examples = [
-#         text_format.Parse(
-#             """
-#             features {
-#               feature {key: "x" value { float_list { value: 0.8 }}}
-#               feature {key: "y" value { float_list { value: 0.2 }}}
-#             }
-#             """, tf.train.Example()),
-#         text_format.Parse(
-#             """
-#             features {
-#               feature {key: "x" value { float_list { value: 0.6 }}}
-#               feature {key: "y" value { float_list { value: 0.1 }}}
-#             }
-#             """, tf.train.Example()),
-#     ]
-
-
-#   def _prepare_multihead_examples(self, example_path):
-#     with tf.io.TFRecordWriter(example_path) as output_file:
-#       for example in self._multihead_examples:
-#         output_file.write(example.SerializeToString())
-
-#   def _build_predict_model(self, model_path):
-#     """Exports the dummy sum predict model."""
-
-#     with tf.compat.v1.Graph().as_default():
-#       input_tensors = {
-#           'x': tf.compat.v1.io.FixedLenFeature(
-#               [1], dtype=tf.float32, default_value=0)
-#       }
-#       serving_receiver = (
-#           tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn(
-#               input_tensors)())
-#       output_tensors = {'y': serving_receiver.features['x'] * 2}
-#       sess = tf.compat.v1.Session()
-#       sess.run(tf.compat.v1.initializers.global_variables())
-#       signature_def = tf.compat.v1.estimator.export.PredictOutput(
-#           output_tensors).as_signature_def(serving_receiver.receiver_tensors)
-#       builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
-#       builder.add_meta_graph_and_variables(
-#           sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
-#           signature_def_map={
-#               tf.compat.v1.saved_model.signature_constants
-#               .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-#                   signature_def,
-#           })
-#       builder.save()
-
-#   def _build_regression_signature(self, input_tensor, output_tensor):
-#     """Helper function for building a regression SignatureDef."""
-#     input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-#         input_tensor)
-#     signature_inputs = {
-#         tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS:
-#             input_tensor_info
-#     }
-#     output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-#         output_tensor)
-#     signature_outputs = {
-#         tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS:
-#             output_tensor_info
-#     }
-#     return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
-#         signature_inputs, signature_outputs,
-#         tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME)
-
-#   def _build_classification_signature(self, input_tensor, scores_tensor):
-#     """Helper function for building a classification SignatureDef."""
-#     input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-#         input_tensor)
-#     signature_inputs = {
-#         tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS:
-#             input_tensor_info
-#     }
-#     output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-#         scores_tensor)
-#     signature_outputs = {
-#         tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
-#             output_tensor_info
-#     }
-#     return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
-#         signature_inputs, signature_outputs,
-#         tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
-
-#   def _build_multihead_model(self, model_path):
-#     with tf.compat.v1.Graph().as_default():
-#       input_example = tf.compat.v1.placeholder(
-#           tf.string, name='input_examples_tensor')
-#       config = {
-#           'x': tf.compat.v1.io.FixedLenFeature(
-#               [1], dtype=tf.float32, default_value=0),
-#           'y': tf.compat.v1.io.FixedLenFeature(
-#               [1], dtype=tf.float32, default_value=0),
-#       }
-#       features = tf.compat.v1.parse_example(input_example, config)
-#       x = features['x']
-#       y = features['y']
-#       sum_pred = x + y
-#       diff_pred = tf.abs(x - y)
-#       sess = tf.compat.v1.Session()
-#       sess.run(tf.compat.v1.initializers.global_variables())
-#       signature_def_map = {
-#           'regress_diff':
-#               self._build_regression_signature(input_example, diff_pred),
-#           'classify_sum':
-#               self._build_classification_signature(input_example, sum_pred),
-#           tf.compat.v1.saved_model.signature_constants
-#           .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-#               self._build_regression_signature(input_example, sum_pred)
-#       }
-#       builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
-#       builder.add_meta_graph_and_variables(
-#           sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
-#           signature_def_map=signature_def_map)
-#       builder.save()
-
-#   def _run_inference_with_beam(self, example_path, inference_spec_type,
-#                                prediction_log_path):
-#     with beam.Pipeline() as pipeline:
-#       _ = (
-#           pipeline
-#           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-#           | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-#           | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)
-#           | 'WritePredictions' >> beam.io.WriteToTFRecord(
-#               prediction_log_path,
-#               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-
-#   def _get_results(self, prediction_log_path):
-#     results = []
-#     for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
-#       record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
-#       for record_string in record_iterator:
-#         prediction_log = prediction_log_pb2.PredictionLog()
-#         prediction_log.MergeFromString(record_string)
-#         results.append(prediction_log)
-#     return results
-
-#   def testModelPathInvalid(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_predict_examples(example_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'):
-#       self._run_inference_with_beam(
-#           example_path,
-#           model_spec_pb2.InferenceSpecType(
-#               saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                   model_path=self._get_output_data_dir())), prediction_log_path)
-
-#   def testEstimatorModelPredict(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_predict_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     self._build_predict_model(model_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path)), prediction_log_path)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     self.assertEqual(
-#         results[0].predict_log.request.inputs[
-#             run_inference._DEFAULT_INPUT_KEY].string_val[0],
-#         self._predict_examples[0].SerializeToString())
-#     self.assertEqual(results[0].predict_log.response.outputs['y'].dtype,
-#                      tf.float32)
-#     self.assertLen(
-#         results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2)
-#     self.assertEqual(
-#         results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size,
-#         1)
-#     self.assertEqual(
-#         results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size,
-#         1)
-
-#   def testClassifyModel(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     self._build_multihead_model(model_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path, signature_name=['classify_sum'])),
-#         prediction_log_path)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     classify_log = results[0].classify_log
-#     self.assertLen(classify_log.request.input.example_list.examples, 1)
-#     self.assertEqual(classify_log.request.input.example_list.examples[0],
-#                      self._multihead_examples[0])
-#     self.assertLen(classify_log.response.result.classifications, 1)
-#     self.assertLen(classify_log.response.result.classifications[0].classes, 1)
-#     self.assertAlmostEqual(
-#         classify_log.response.result.classifications[0].classes[0].score, 1.0)
-
-#   def testRegressModel(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     self._build_multihead_model(model_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path, signature_name=['regress_diff'])),
-#         prediction_log_path)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     regress_log = results[0].regress_log
-#     self.assertLen(regress_log.request.input.example_list.examples, 1)
-#     self.assertEqual(regress_log.request.input.example_list.examples[0],
-#                      self._multihead_examples[0])
-#     self.assertLen(regress_log.response.result.regressions, 1)
-#     self.assertAlmostEqual(regress_log.response.result.regressions[0].value,
-#                            0.6)
-
-#   def testMultiInferenceModel(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     self._build_multihead_model(model_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path,
-#                 signature_name=['regress_diff', 'classify_sum'])),
-#         prediction_log_path)
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     multi_inference_log = results[0].multi_inference_log
-#     self.assertLen(multi_inference_log.request.input.example_list.examples, 1)
-#     self.assertEqual(multi_inference_log.request.input.example_list.examples[0],
-#                      self._multihead_examples[0])
-#     self.assertLen(multi_inference_log.response.results, 2)
-#     signature_names = []
-#     for result in multi_inference_log.response.results:
-#       signature_names.append(result.model_spec.signature_name)
-#     self.assertIn('regress_diff', signature_names)
-#     self.assertIn('classify_sum', signature_names)
-#     result = multi_inference_log.response.results[0]
-#     self.assertEqual(result.model_spec.signature_name, 'regress_diff')
-#     self.assertLen(result.regression_result.regressions, 1)
-#     self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6)
-#     result = multi_inference_log.response.results[1]
-#     self.assertEqual(result.model_spec.signature_name, 'classify_sum')
-#     self.assertLen(result.classification_result.classifications, 1)
-#     self.assertLen(result.classification_result.classifications[0].classes, 1)
-#     self.assertAlmostEqual(
-#         result.classification_result.classifications[0].classes[0].score, 1.0)
-
-#   def testKerasModelPredict(self):
-#     inputs = tf.keras.Input(shape=(1,), name='input1')
-#     output1 = tf.keras.layers.Dense(
-#         1, activation=tf.nn.sigmoid, name='output1')(
-#             inputs)
-#     output2 = tf.keras.layers.Dense(
-#         1, activation=tf.nn.sigmoid, name='output2')(
-#             inputs)
-#     inference_model = tf.keras.models.Model(inputs, [output1, output2])
-
-#     class TestKerasModel(tf.keras.Model):
-
-#       def __init__(self, inference_model):
-#         super(TestKerasModel, self).__init__(name='test_keras_model')
-#         self.inference_model = inference_model
-
-#       @tf.function(input_signature=[
-#           tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
-#       ])
-#       def call(self, serialized_example):
-#         features = {
-#             'input1':
-#                 tf.compat.v1.io.FixedLenFeature([1],
-#                                                 dtype=tf.float32,
-#                                                 default_value=0)
-#         }
-#         input_tensor_dict = tf.io.parse_example(serialized_example, features)
-#         return inference_model(input_tensor_dict['input1'])
-
-#     model = TestKerasModel(inference_model)
-#     model.compile(
-#         optimizer=tf.keras.optimizers.Adam(lr=.001),
-#         loss=tf.keras.losses.binary_crossentropy,
-#         metrics=['accuracy'])
-
-#     model_path = self._get_output_data_dir('model')
-#     tf.compat.v1.keras.experimental.export_saved_model(
-#         model, model_path, serving_only=True)
-
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_predict_examples(example_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path)), prediction_log_path)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-
-#   def testTelemetry(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     self._build_multihead_model(model_path)
-#     inference_spec_type = model_spec_pb2.InferenceSpecType(
-#         saved_model_spec=model_spec_pb2.SavedModelSpec(
-#             model_path=model_path, signature_name=['classify_sum']))
-#     pipeline = beam.Pipeline()
-#     _ = (
-#         pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-#         | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-#         | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
-#     run_result = pipeline.run()
-#     run_result.wait_until_finish()
-
-#     num_inferences = run_result.metrics().query(
-#         MetricsFilter().with_name('num_inferences'))
-#     self.assertTrue(num_inferences['counters'])
-#     self.assertEqual(num_inferences['counters'][0].result, 2)
-#     num_instances = run_result.metrics().query(
-#         MetricsFilter().with_name('num_instances'))
-#     self.assertTrue(num_instances['counters'])
-#     self.assertEqual(num_instances['counters'][0].result, 2)
-#     inference_request_batch_size = run_result.metrics().query(
-#         MetricsFilter().with_name('inference_request_batch_size'))
-#     self.assertTrue(inference_request_batch_size['distributions'])
-#     self.assertEqual(
-#         inference_request_batch_size['distributions'][0].result.sum, 2)
-#     inference_request_batch_byte_size = run_result.metrics().query(
-#         MetricsFilter().with_name('inference_request_batch_byte_size'))
-#     self.assertTrue(inference_request_batch_byte_size['distributions'])
-#     self.assertEqual(
-#         inference_request_batch_byte_size['distributions'][0].result.sum,
-#         sum(element.ByteSize() for element in self._multihead_examples))
-#     inference_batch_latency_micro_secs = run_result.metrics().query(
-#         MetricsFilter().with_name('inference_batch_latency_micro_secs'))
-#     self.assertTrue(inference_batch_latency_micro_secs['distributions'])
-#     self.assertGreaterEqual(
-#         inference_batch_latency_micro_secs['distributions'][0].result.sum, 0)
-#     load_model_latency_milli_secs = run_result.metrics().query(
-#         MetricsFilter().with_name('load_model_latency_milli_secs'))
-#     self.assertTrue(load_model_latency_milli_secs['distributions'])
-#     self.assertGreaterEqual(
-#         load_model_latency_milli_secs['distributions'][0].result.sum, 0)
-
-
-# class RunRemoteInferenceTest(RunInferenceFixture):
-
-#   def setUp(self):
-#     super(RunRemoteInferenceTest, self).setUp()
-#     self.example_path = self._get_output_data_dir('example')
-#     self._prepare_predict_examples(self.example_path)
-#     # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
-#     self._discovery_testdata_dir = os.path.join(
-#         os.path.join(os.path.dirname(__file__), 'testdata'),
-#         'ml_discovery.json')
-
-#   @staticmethod
-#   def _make_response_body(content, successful):
-#     if successful:
-#       response_dict = {'predictions': content}
-#     else:
-#       response_dict = {'error': content}
-#     return json.dumps(response_dict)
-
-#   def _set_up_pipeline(self, inference_spec_type):
-#     self.pipeline = beam.Pipeline()
-#     self.pcoll = (
-#         self.pipeline
-#         | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
-#         | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-#         | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
-
-#   def _run_inference_with_beam(self):
-#     self.pipeline_result = self.pipeline.run()
-#     self.pipeline_result.wait_until_finish()
-
-#   def test_model_predict(self):
-#     predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
-#     builder = http.RequestMockBuilder({
-#         'ml.projects.predict':
-#             (None, self._make_response_body(predictions, successful=True))
-#     })
-#     resource = discovery.build(
-#         'ml',
-#         'v1',
-#         http=http.HttpMock(self._discovery_testdata_dir,
-#                            {'status': http_client.OK}),
-#         requestBuilder=builder)
-#     with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
-#       response_mock.side_effect = lambda service, version: resource
-#       inference_spec_type = model_spec_pb2.InferenceSpecType(
-#           ai_platform_prediction_model_spec=model_spec_pb2
-#           .AIPlatformPredictionModelSpec(
-#               project_id='test-project',
-#               model_name='test-model',
-#           ))
-
-#       prediction_log = prediction_log_pb2.PredictionLog()
-#       prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
-#           tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
-#       prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
-#           tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
-
-#       self._set_up_pipeline(inference_spec_type)
-#       assert_that(self.pcoll, equal_to([prediction_log]))
-#       self._run_inference_with_beam()
-
-#   def test_exception_raised_when_response_body_contains_error_entry(self):
-#     error_msg = 'Base64 decode failed.'
-#     builder = http.RequestMockBuilder({
-#         'ml.projects.predict':
-#             (None, self._make_response_body(error_msg, successful=False))
-#     })
-#     resource = discovery.build(
-#         'ml',
-#         'v1',
-#         http=http.HttpMock(self._discovery_testdata_dir,
-#                            {'status': http_client.OK}),
-#         requestBuilder=builder)
-#     with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
-#       response_mock.side_effect = lambda service, version: resource
-#       inference_spec_type = model_spec_pb2.InferenceSpecType(
-#           ai_platform_prediction_model_spec=model_spec_pb2
-#           .AIPlatformPredictionModelSpec(
-#               project_id='test-project',
-#               model_name='test-model',
-#           ))
-
-#       try:
-#         self._set_up_pipeline(inference_spec_type)
-#         self._run_inference_with_beam()
-#       except ValueError as exc:
-#         actual_error_msg = str(exc)
-#         self.assertTrue(actual_error_msg.startswith(error_msg))
-#       else:
-#         self.fail('Test was expected to throw ValueError exception')
-
-#   def test_exception_raised_when_project_id_is_empty(self):
-#     inference_spec_type = model_spec_pb2.InferenceSpecType(
-#         ai_platform_prediction_model_spec=model_spec_pb2
-#         .AIPlatformPredictionModelSpec(model_name='test-model',))
-
-#     with self.assertRaises(ValueError):
-#       self._set_up_pipeline(inference_spec_type)
-#       self._run_inference_with_beam()
-
-#   def test_request_body_with_binary_data(self):
-#     example = text_format.Parse(
-#         """
-#       features {
-#         feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
-#         feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
-#         feature { key: "y" value { int64_list { value: [1, 2] }}}
-#       }
-#       """, tf.train.Example())
-#     result = list(
-#         run_inference._RemotePredictDoFn._prepare_instances([example]))
-#     self.assertEqual([
-#         {
-#             'x_bytes': {
-#                 'b64': 'QVNhOGFzZGY='
-#             },
-#             'x': 'JLK7ljk3',
-#             'y': [1, 2]
-#         },
-#     ], result)
+class RunRemoteInferenceArrowTest(RunInferenceArrowFixture):
+
+  def setUp(self):
+    super(RunRemoteInferenceArrowTest, self).setUp()
+    self.example_path = self._get_output_data_dir('example')
+    self._prepare_predict_examples(self.example_path)
+    # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
+    self._discovery_testdata_dir = os.path.join(
+        os.path.join(os.path.dirname(__file__), 'testdata'),
+        'ml_discovery.json')
+
+  @staticmethod
+  def _make_response_body(content, successful):
+    if successful:
+      response_dict = {'predictions': content}
+    else:
+      response_dict = {'error': content}
+    return json.dumps(response_dict)
+
+  def _set_up_pipeline(self, inference_spec_type):
+    self.pipeline = beam.Pipeline()
+    self.pcoll = (
+        self.pipeline
+        | "createRecordBatch" >> beam.Create([self.record_batch])
+        | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type))
+
+  def _run_inference_with_beam(self):
+    self.pipeline_result = self.pipeline.run()
+    self.pipeline_result.wait_until_finish()
+
+  def test_model_predict(self):
+    predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
+    builder = http.RequestMockBuilder({
+        'ml.projects.predict':
+            (None, self._make_response_body(predictions, successful=True))
+    })
+    resource = discovery.build(
+        'ml',
+        'v1',
+        http=http.HttpMock(self._discovery_testdata_dir,
+                           {'status': http_client.OK}),
+        requestBuilder=builder)
+    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
+      response_mock.side_effect = lambda service, version: resource
+      inference_spec_type = model_spec_pb2.InferenceSpecType(
+          ai_platform_prediction_model_spec=model_spec_pb2
+          .AIPlatformPredictionModelSpec(
+              project_id='test-project',
+              model_name='test-model',
+          ))
+
+      prediction_log = prediction_log_pb2.PredictionLog()
+      prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
+          tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
+      prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
+          tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
+
+      self._set_up_pipeline(inference_spec_type)
+      assert_that(self.pcoll, equal_to([prediction_log]))
+      self._run_inference_with_beam()
+
+  def test_exception_raised_when_response_body_contains_error_entry(self):
+    error_msg = 'Base64 decode failed.'
+    builder = http.RequestMockBuilder({
+        'ml.projects.predict':
+            (None, self._make_response_body(error_msg, successful=False))
+    })
+    resource = discovery.build(
+        'ml',
+        'v1',
+        http=http.HttpMock(self._discovery_testdata_dir,
+                           {'status': http_client.OK}),
+        requestBuilder=builder)
+    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
+      response_mock.side_effect = lambda service, version: resource
+      inference_spec_type = model_spec_pb2.InferenceSpecType(
+          ai_platform_prediction_model_spec=model_spec_pb2
+          .AIPlatformPredictionModelSpec(
+              project_id='test-project',
+              model_name='test-model',
+          ))
+
+      try:
+        self._set_up_pipeline(inference_spec_type)
+        self._run_inference_with_beam()
+      except ValueError as exc:
+        actual_error_msg = str(exc)
+        self.assertTrue(actual_error_msg.startswith(error_msg))
+      else:
+        self.fail('Test was expected to throw ValueError exception')
+
+  def test_exception_raised_when_project_id_is_empty(self):
+    inference_spec_type = model_spec_pb2.InferenceSpecType(
+        ai_platform_prediction_model_spec=model_spec_pb2
+        .AIPlatformPredictionModelSpec(model_name='test-model',))
+
+    with self.assertRaises(ValueError):
+      self._set_up_pipeline(inference_spec_type)
+      self._run_inference_with_beam()
+
+  def test_request_body_with_binary_data(self):
+    example = text_format.Parse(
+        """
+      features {
+        feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
+        feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
+        feature { key: "y" value { int64_list { value: [1, 2] }}}
+      }
+      """, tf.train.Example())
+    result = list(
+        run_inference_arrow._RemotePredictDoFn._prepare_instances([example.SerializeToString()]))
+    self.assertEqual([
+        {
+            'x_bytes': {
+                'b64': 'QVNhOGFzZGY='
+            },
+            'x': 'JLK7ljk3',
+            'y': [1, 2]
+        },
+    ], result)
 
 
 if __name__ == '__main__':

From f874ce3fa430fdb739fde2f74e80b9d7c94b3bd1 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Mon, 22 Jun 2020 12:25:59 -0400
Subject: [PATCH 12/31] add config param, declared IOspec (foundation for
 multi-tensor)

---
 tfx_bsl/beam/run_inference_arrow.py      | 109 ++++++++++-----------
 tfx_bsl/beam/run_inference_arrow_test.py | 117 ++++++++++++++++-------
 2 files changed, 134 insertions(+), 92 deletions(-)

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index c0cc0521..f73add3c 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -45,6 +45,7 @@
 from tfx_bsl.beam import shared
 from tfx_bsl.public.proto import model_spec_pb2
 from tfx_bsl.telemetry import util
+from tfx_bsl.tfxio import tensor_adapter
 from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \
     Tuple, Union, Optional
 
@@ -93,21 +94,21 @@ class DataType(object):
   SEQUENCEEXAMPLE = 'SEQUENCEEXAMPLE'
 
 
-# This API is private and called with only example or sequence example
-# TODO (Maxine): pTransform from examples/sequence example here
 @beam.ptransform_fn
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def RunInferenceImpl(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
     inference_spec_type: model_spec_pb2.InferenceSpecType,
-    process_column: Optional[str] = None
+    tensor_adapter_config: tensor_adapter.TensorAdapterConfig
 ) -> beam.pvalue.PCollection:
   """Implementation of RunInference API.
 
   Args:
     examples: A PCollection containing RecordBatch of serialized examples.
     inference_spec_type: Model inference endpoint.
+    tensor_adapter_config: Tensor adapter config which specifies how to obtain
+      tensors from the Arrow RecordBatch.
 
   Returns:
     A PCollection containing prediction logs.
@@ -117,25 +118,24 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   """
   logging.info('RunInference on model: %s', inference_spec_type)
 
-  # TODO (Maxine): uncomment this once we change the api to take input 
-  # Union[tf.train.Example, tf.train.SequenceExample]
+  # TODO (Maxine): either determine data type or take it as an input
   # data_type = _get_data_type(examples)
 
   data_type = DataType.EXAMPLE
   operation_type = _get_operation_type(inference_spec_type)
   if operation_type == OperationType.CLASSIFICATION:
     return examples | 'Classify' >> _Classify(
-                        inference_spec_type, data_type, process_column)
+                        inference_spec_type, tensor_adapter_config, data_type)
   elif operation_type == OperationType.REGRESSION:
     return examples | 'Regress' >> _Regress(
-                        inference_spec_type, data_type, process_column)
+                        inference_spec_type,tensor_adapter_config, data_type)
   elif operation_type == OperationType.PREDICTION:
     return examples | 'Predict' >> _Predict(
-                        inference_spec_type, data_type, process_column)
+                        inference_spec_type, tensor_adapter_config, data_type)
   elif operation_type == OperationType.MULTIHEAD:
     return (examples
             | 'MultiInference' >> _MultiInference(
-                    inference_spec_type, data_type, process_column))
+                    inference_spec_type, tensor_adapter_config, data_type))
   else:
     raise ValueError('Unsupported operation_type %s' % operation_type)
 
@@ -152,12 +152,12 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
               inference_spec_type: model_spec_pb2.InferenceSpecType,
-              data_type, process_column: Optional[str] = None):
+              tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
   """Performs classify PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (pcoll
-            | 'Classify' >> beam.ParDo(_BatchClassifyDoFn(
-                  inference_spec_type, shared.Shared(), data_type, process_column))
+            | 'Classify' >> beam.ParDo(
+                _BatchClassifyDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type))
             | 'BuildPredictionLogForClassifications' >> beam.ParDo(
                 _BuildPredictionLogForClassificationsDoFn()))
   else:
@@ -169,12 +169,12 @@ def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
              inference_spec_type: model_spec_pb2.InferenceSpecType,
-             data_type, process_column: Optional[str] = None):
+             tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
   """Performs regress PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (pcoll
-            | 'Regress' >> beam.ParDo(_BatchRegressDoFn(
-                  inference_spec_type, shared.Shared(), data_type, process_column))
+            | 'Regress' >> beam.ParDo(
+                _BatchRegressDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type))
             | 'BuildPredictionLogForRegressions' >> beam.ParDo(
                 _BuildPredictionLogForRegressionsDoFn()))
   else:
@@ -186,18 +186,18 @@ def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
              inference_spec_type: model_spec_pb2.InferenceSpecType,
-             data_type, process_column: Optional[str] = None):
+             tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
   """Performs predict PTransform."""
   if _using_in_process_inference(inference_spec_type):
     predictions = (
         pcoll
-        | 'Predict' >> beam.ParDo(_BatchPredictDoFn(
-              inference_spec_type, shared.Shared(), data_type, process_column)))
+        | 'Predict' >> beam.ParDo(
+            _BatchPredictDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type)))
   else:
     predictions = (
         pcoll
-        | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn(
-                inference_spec_type, pcoll.pipeline.options, data_type)))
+        | 'RemotePredict' >> beam.ParDo(
+              _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options, tensor_adapter_config, data_type)))
   return (predictions
           | 'BuildPredictionLogForPredictions' >> beam.ParDo(
               _BuildPredictionLogForPredictionsDoFn()))
@@ -208,13 +208,13 @@ def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
                     inference_spec_type: model_spec_pb2.InferenceSpecType,
-                    data_type, process_column: Optional[str] = None):
+                    tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
   """Performs multi inference PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (
         pcoll
-        | 'MultiInference' >> beam.ParDo(_BatchMultiInferenceDoFn(
-                inference_spec_type, shared.Shared(), data_type, process_column))
+        | 'MultiInference' >> beam.ParDo(
+            _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type))
         | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn()))
   else:
     raise NotImplementedError
@@ -278,47 +278,44 @@ def update(
       self._inference_request_batch_byte_size.update(
           sum(len(element) for element in elements))
 
-  # TODO (Maxine): just one col for now, later, will do a list of str
+
   def __init__(
     self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-    process_column: Optional[str] = None):
+    tensor_adapter_config: tensor_adapter.TensorAdapterConfig):
     super(_BaseDoFn, self).__init__()
     self._clock = None
-    self._process_column = process_column
     self._metrics_collector = self._MetricsCollector(inference_spec_type)
+    self._tensor_adapter = tensor_adapter.TensorAdapter(tensor_adapter_config)
+    self._io_tensor_spec = None   # This value may be None if the model is remote
 
   def setup(self):
     self._clock = _ClockFactory.make_clock()
 
-  def process(
-      self, elements: pa.RecordBatch
-  ) -> Iterable[Any]:
-    batch_start_time = self._clock.get_current_time_in_microseconds()
-    # TODO (Maxine): take process as a parameter, should it be part of inference spec?
-    # extract record batch from here, assuming first column
-
-    # what would record batch look like? (flatten or not)
-    # vs np.asarray(elements.column(0))
+  def _extract_from_recordBatch(self, elements: pa.RecordBatch):
     if len(elements.columns) == 1: 
-      serialized_examples = elements.column(0).to_pylist()
+      serialized_examples = elements.column(0).flatten().to_pylist()
     else: 
-      if self._process_column is None:
-        raise ValueError('Must pass in a process column with multi-column RecordBatch')
-
       serialized_examples = None
       for column_name, column_array in zip(elements.schema.names, elements.columns):
         column_type = column_array.type
-        if column_name == self._process_column:
-          serialized_examples = column_array.to_pylist()
+        if column_name == _RECORDBATCH_COLUMN:
+          serialized_examples = column_array.flatten().to_pylist()
           break
 
+    if (serialized_examples is None):
+      raise ValueError('Raw examples not found.')
+
     for example in serialized_examples:
       if not (isinstance(example, bytes) or isinstance(example, str)):
         raise ValueError(
-          f'Expected a list of serialized examples in bytes or as a string, \
-          got {type(example)}'
-        )
+          'Expected a list of serialized examples in bytes or as a string, got %s' %
+          type(example))
+
+    return serialized_examples
 
+  def process(self, elements: pa.RecordBatch) -> Iterable[Any]:
+    batch_start_time = self._clock.get_current_time_in_microseconds()
+    serialized_examples = self._extract_from_recordBatch(elements)
     outputs = self.run_inference(serialized_examples)
     result = self._post_process(serialized_examples, outputs)
     self._metrics_collector.update(
@@ -384,8 +381,9 @@ class _RemotePredictDoFn(_BaseDoFn):
   """
 
   def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-               pipeline_options: PipelineOptions, data_type):
-    super(_RemotePredictDoFn, self).__init__(inference_spec_type)
+               pipeline_options: PipelineOptions,
+               tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
+    super(_RemotePredictDoFn, self).__init__(inference_spec_type, tensor_adapter_config)
     self._api_client = None
     self._data_type = data_type
 
@@ -527,22 +525,19 @@ class _BaseBatchSavedModelDoFn(_BaseDoFn):
   """
 
   def __init__(
-      self,
-      inference_spec_type: model_spec_pb2.InferenceSpecType,
-      shared_model_handle: shared.Shared, data_type,
-      process_column: Optional[str] = None,
-  ):
-    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, process_column)
+      self, inference_spec_type: model_spec_pb2.InferenceSpecType,
+      shared_model_handle: shared.Shared,
+      tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
+    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, tensor_adapter_config)
     self._inference_spec_type = inference_spec_type
     self._shared_model_handle = shared_model_handle
     self._model_path = inference_spec_type.saved_model_spec.model_path
     self._tags = None
     self._signatures = _get_signatures(
-        inference_spec_type.saved_model_spec.model_path,
-        inference_spec_type.saved_model_spec.signature_name,
-        _get_tags(inference_spec_type))
+      inference_spec_type.saved_model_spec.model_path,
+      inference_spec_type.saved_model_spec.signature_name,
+      _get_tags(inference_spec_type))
     self._session = None
-    self._io_tensor_spec = None
     self._data_type = data_type
 
   def setup(self):
@@ -1138,7 +1133,7 @@ def _get_data_type(elements: Sequence[Any]) -> Text:
   elif all(isinstance(element, tf.train.SequenceExample)):
     return DataType.SEQUENCEEXAMPLE
   else:
-    raise ValueError(f'Unsupported DataType {type(elements)}')
+    return DataType.EXAMPLE
 
 
 def _get_meta_graph_def(saved_model_pb: _SavedModel,
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index c502ad8c..e6554758 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -36,10 +36,12 @@
 import tensorflow as tf
 from tfx_bsl.beam import run_inference_arrow
 from tfx_bsl.public.proto import model_spec_pb2
-from tfx_bsl.tfxio import raw_tf_record
+from tfx_bsl.tfxio import test_util
+from tfx_bsl.tfxio import tensor_adapter
 
 from google.protobuf import text_format
 from tensorflow_serving.apis import prediction_log_pb2
+from tensorflow_metadata.proto.v0 import schema_pb2
 
 
 class RunInferenceArrowFixture(tf.test.TestCase):
@@ -57,10 +59,27 @@ def setUp(self):
 
     serialized_example = []
     for example in self._predict_examples:
-      serialized_example.append(example.SerializeToString())
+      serialized_example.append([example.SerializeToString()])
     self.record_batch = pa.RecordBatch.from_arrays(
-      [serialized_example, ], ["__RAW_RECORD__", ]
-    )
+    [
+        pa.array([[0]], type=pa.list_(pa.float32())),
+        serialized_example
+    ],
+    ['input1', '__RAW_RECORD__']
+)
+
+    tfxio = test_util.InMemoryTFExampleRecord(
+      schema=text_format.Parse(
+        """
+        feature {
+          name: "input1"
+          type: FLOAT
+        }
+        """, schema_pb2.Schema()),
+      raw_record_column_name='__RAW_RECORD__')
+    self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
+        arrow_schema=tfxio.ArrowSchema(),
+        tensor_representations=tfxio.TensorRepresentations())
 
   def _get_output_data_dir(self, sub_dir=None):
     test_dir = self._testMethodName
@@ -96,6 +115,32 @@ def setUp(self):
               }
               """, tf.train.Example()),
     ]
+
+    serialized_example = []
+    for example in self._predict_examples:
+      serialized_example.append([example.SerializeToString()])
+    self.record_batch = pa.RecordBatch.from_arrays(
+      [
+        pa.array([[0], [1]], type=pa.list_(pa.float32())),
+        serialized_example
+      ],
+      ['input1', '__RAW_RECORD__']
+    )
+
+    tfxio = test_util.InMemoryTFExampleRecord(
+      schema=text_format.Parse(
+        """
+        feature {
+          name: "input1"
+          type: FLOAT
+        }
+        """, schema_pb2.Schema()),
+      raw_record_column_name='__RAW_RECORD__')
+    self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
+        arrow_schema=tfxio.ArrowSchema(),
+        tensor_representations=tfxio.TensorRepresentations())
+
+
     self._multihead_examples = [
         text_format.Parse(
             """
@@ -113,21 +158,35 @@ def setUp(self):
             """, tf.train.Example()),
     ]
 
-
-    serialized_example = []
-    for example in self._predict_examples:
-      serialized_example.append(example.SerializeToString())
-    self.record_batch = pa.RecordBatch.from_arrays(
-      [serialized_example, ], ["__RAW_RECORD__", ]
-    )
-
     serialized_example_multi = []
     for example in self._multihead_examples:
-      serialized_example_multi.append(example.SerializeToString())
+      serialized_example_multi.append([example.SerializeToString()])
     self.record_batch_multihead = pa.RecordBatch.from_arrays(
-      [serialized_example_multi, ], ["__RAW_RECORD__", ]
+      [
+        pa.array([[0.8], [0.6]], type=pa.list_(pa.float32())),
+        pa.array([[0.2], [0.1]], type=pa.list_(pa.float32())),
+        serialized_example_multi
+      ],
+      ['x', 'y', '__RAW_RECORD__']
     )
 
+    tfxio_multi = test_util.InMemoryTFExampleRecord(
+      schema=text_format.Parse(
+        """
+          feature {
+            name: "x"
+            type: FLOAT
+          }
+          feature {
+            name: "y"
+            type: FLOAT
+          }
+        """, schema_pb2.Schema()),
+      raw_record_column_name='__RAW_RECORD__')
+    self.tensor_adapter_config_multihead = tensor_adapter.TensorAdapterConfig(
+        arrow_schema=tfxio_multi.ArrowSchema(),
+        tensor_representations=tfxio_multi.TensorRepresentations())
+
 
   def _prepare_multihead_examples(self, example_path):
     with tf.io.TFRecordWriter(example_path) as output_file:
@@ -235,7 +294,8 @@ def _run_inference_with_beam(self, example_type, inference_spec_type,
         _ = (
           pipeline
           | "createRecordBatch" >> beam.Create([self.record_batch_multihead])
-          | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type)
+          | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
+                inference_spec_type, self.tensor_adapter_config_multihead)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -244,7 +304,8 @@ def _run_inference_with_beam(self, example_type, inference_spec_type,
         _ = (
           pipeline
           | "createRecordBatch" >> beam.Create([self.record_batch])
-          | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type)
+          | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
+                inference_spec_type, self.tensor_adapter_config)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -260,8 +321,6 @@ def _get_results(self, prediction_log_path):
     return results
 
   def testModelPathInvalid(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_predict_examples(example_path)
     prediction_log_path = self._get_output_data_dir('predictions')
     with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'):
       self._run_inference_with_beam(
@@ -271,8 +330,6 @@ def testModelPathInvalid(self):
                   model_path=self._get_output_data_dir())), prediction_log_path)
 
   def testEstimatorModelPredict(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_predict_examples(example_path)
     model_path = self._get_output_data_dir('model')
     self._build_predict_model(model_path)
     prediction_log_path = self._get_output_data_dir('predictions')
@@ -300,8 +357,6 @@ def testEstimatorModelPredict(self):
         1)
 
   def testClassifyModel(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_multihead_examples(example_path)
     model_path = self._get_output_data_dir('model')
     self._build_multihead_model(model_path)
     prediction_log_path = self._get_output_data_dir('predictions')
@@ -324,8 +379,6 @@ def testClassifyModel(self):
         classify_log.response.result.classifications[0].classes[0].score, 1.0)
 
   def testRegressModel(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_multihead_examples(example_path)
     model_path = self._get_output_data_dir('model')
     self._build_multihead_model(model_path)
     prediction_log_path = self._get_output_data_dir('predictions')
@@ -347,8 +400,6 @@ def testRegressModel(self):
                            0.6)
 
   def testMultiInferenceModel(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_multihead_examples(example_path)
     model_path = self._get_output_data_dir('model')
     self._build_multihead_model(model_path)
     prediction_log_path = self._get_output_data_dir('predictions')
@@ -421,8 +472,6 @@ def call(self, serialized_example):
     tf.compat.v1.keras.experimental.export_saved_model(
         model, model_path, serving_only=True)
 
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_predict_examples(example_path)
     prediction_log_path = self._get_output_data_dir('predictions')
     self._run_inference_with_beam(
         'predict',
@@ -434,8 +483,6 @@ def call(self, serialized_example):
     self.assertLen(results, 2)
 
   def testTelemetry(self):
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_multihead_examples(example_path)
     model_path = self._get_output_data_dir('model')
     self._build_multihead_model(model_path)
     inference_spec_type = model_spec_pb2.InferenceSpecType(
@@ -445,7 +492,8 @@ def testTelemetry(self):
     _ = (
         pipeline 
         | "createRecordBatch" >> beam.Create([self.record_batch_multihead])
-        | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type))
+        | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
+              inference_spec_type, self.tensor_adapter_config_multihead))
     run_result = pipeline.run()
     run_result.wait_until_finish()
 
@@ -484,8 +532,6 @@ class RunRemoteInferenceArrowTest(RunInferenceArrowFixture):
 
   def setUp(self):
     super(RunRemoteInferenceArrowTest, self).setUp()
-    self.example_path = self._get_output_data_dir('example')
-    self._prepare_predict_examples(self.example_path)
     # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
     self._discovery_testdata_dir = os.path.join(
         os.path.join(os.path.dirname(__file__), 'testdata'),
@@ -504,7 +550,8 @@ def _set_up_pipeline(self, inference_spec_type):
     self.pcoll = (
         self.pipeline
         | "createRecordBatch" >> beam.Create([self.record_batch])
-        | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type))
+        | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
+              inference_spec_type, self.tensor_adapter_config))
 
   def _run_inference_with_beam(self):
     self.pipeline_result = self.pipeline.run()
@@ -582,7 +629,7 @@ def test_exception_raised_when_project_id_is_empty(self):
 
   def test_request_body_with_binary_data(self):
     example = text_format.Parse(
-        """
+      """
       features {
         feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
         feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}

From 300d8c9eca87bda83622ffd801a9fa9b99c76343 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 25 Jun 2020 19:57:03 -0400
Subject: [PATCH 13/31] add signature checking with multi-tensor model

---
 tfx_bsl/beam/run_inference_arrow.py      | 240 ++++++++++++-----------
 tfx_bsl/beam/run_inference_arrow_test.py |  17 +-
 2 files changed, 131 insertions(+), 126 deletions(-)

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index f73add3c..9fafadd4 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -100,15 +100,16 @@ class DataType(object):
 def RunInferenceImpl(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
     inference_spec_type: model_spec_pb2.InferenceSpecType,
-    tensor_adapter_config: tensor_adapter.TensorAdapterConfig
+    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
 ) -> beam.pvalue.PCollection:
   """Implementation of RunInference API.
 
   Args:
     examples: A PCollection containing RecordBatch of serialized examples.
     inference_spec_type: Model inference endpoint.
-    tensor_adapter_config: Tensor adapter config which specifies how to obtain
-      tensors from the Arrow RecordBatch.
+    tensor_adapter_config [Optional]: Tensor adapter config which specifies how to
+      obtain tensors from the Arrow RecordBatch.
+        - Not required when running inference with remote model or 1 input
 
   Returns:
     A PCollection containing prediction logs.
@@ -125,24 +126,23 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   operation_type = _get_operation_type(inference_spec_type)
   if operation_type == OperationType.CLASSIFICATION:
     return examples | 'Classify' >> _Classify(
-                        inference_spec_type, tensor_adapter_config, data_type)
+                        inference_spec_type, data_type, tensor_adapter_config)
   elif operation_type == OperationType.REGRESSION:
     return examples | 'Regress' >> _Regress(
-                        inference_spec_type,tensor_adapter_config, data_type)
+                        inference_spec_type, data_type, tensor_adapter_config)
   elif operation_type == OperationType.PREDICTION:
     return examples | 'Predict' >> _Predict(
-                        inference_spec_type, tensor_adapter_config, data_type)
+                        inference_spec_type, data_type, tensor_adapter_config)
   elif operation_type == OperationType.MULTIHEAD:
-    return (examples
-            | 'MultiInference' >> _MultiInference(
-                    inference_spec_type, tensor_adapter_config, data_type))
+    return (examples | 'MultiInference' >> _MultiInference(
+                        inference_spec_type, data_type, tensor_adapter_config))
   else:
     raise ValueError('Unsupported operation_type %s' % operation_type)
 
 
 _IOTensorSpec = collections.namedtuple(
     '_IOTensorSpec',
-    ['input_tensor_alias', 'input_tensor_name', 'output_alias_tensor_names'])
+    ['input_tensor_alias', 'input_tensor_names', 'input_tensor_types', 'output_alias_tensor_names'])
 
 _Signature = collections.namedtuple('_Signature', ['name', 'signature_def'])
 
@@ -151,13 +151,13 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-              inference_spec_type: model_spec_pb2.InferenceSpecType,
-              tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
+              inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
+              tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
   """Performs classify PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (pcoll
-            | 'Classify' >> beam.ParDo(
-                _BatchClassifyDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type))
+            | 'Classify' >> beam.ParDo(_BatchClassifyDoFn(
+                  inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))
             | 'BuildPredictionLogForClassifications' >> beam.ParDo(
                 _BuildPredictionLogForClassificationsDoFn()))
   else:
@@ -168,13 +168,13 @@ def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-             inference_spec_type: model_spec_pb2.InferenceSpecType,
-             tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
+             inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
+             tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
   """Performs regress PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (pcoll
-            | 'Regress' >> beam.ParDo(
-                _BatchRegressDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type))
+            | 'Regress' >> beam.ParDo(_BatchRegressDoFn(
+                  inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))
             | 'BuildPredictionLogForRegressions' >> beam.ParDo(
                 _BuildPredictionLogForRegressionsDoFn()))
   else:
@@ -185,19 +185,19 @@ def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-             inference_spec_type: model_spec_pb2.InferenceSpecType,
-             tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
+             inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
+             tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
   """Performs predict PTransform."""
   if _using_in_process_inference(inference_spec_type):
     predictions = (
         pcoll
-        | 'Predict' >> beam.ParDo(
-            _BatchPredictDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type)))
+        | 'Predict' >> beam.ParDo(_BatchPredictDoFn(
+              inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)))
   else:
     predictions = (
         pcoll
-        | 'RemotePredict' >> beam.ParDo(
-              _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options, tensor_adapter_config, data_type)))
+        | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn(
+              inference_spec_type, pcoll.pipeline.options, data_type, tensor_adapter_config)))
   return (predictions
           | 'BuildPredictionLogForPredictions' >> beam.ParDo(
               _BuildPredictionLogForPredictionsDoFn()))
@@ -207,14 +207,14 @@ def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-                    inference_spec_type: model_spec_pb2.InferenceSpecType,
-                    tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
+                    inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
+                    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
   """Performs multi inference PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (
         pcoll
-        | 'MultiInference' >> beam.ParDo(
-            _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type))
+        | 'MultiInference' >> beam.ParDo(_BatchMultiInferenceDoFn(
+              inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))
         | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn()))
   else:
     raise NotImplementedError
@@ -281,42 +281,52 @@ def update(
 
   def __init__(
     self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-    tensor_adapter_config: tensor_adapter.TensorAdapterConfig):
+    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
     super(_BaseDoFn, self).__init__()
     self._clock = None
     self._metrics_collector = self._MetricsCollector(inference_spec_type)
-    self._tensor_adapter = tensor_adapter.TensorAdapter(tensor_adapter_config)
+    self._tensor_adapter_config = tensor_adapter_config
     self._io_tensor_spec = None   # This value may be None if the model is remote
 
   def setup(self):
     self._clock = _ClockFactory.make_clock()
 
   def _extract_from_recordBatch(self, elements: pa.RecordBatch):
-    if len(elements.columns) == 1: 
-      serialized_examples = elements.column(0).flatten().to_pylist()
-    else: 
-      serialized_examples = None
-      for column_name, column_array in zip(elements.schema.names, elements.columns):
-        column_type = column_array.type
-        if column_name == _RECORDBATCH_COLUMN:
-          serialized_examples = column_array.flatten().to_pylist()
-          break
+    """
+    Function to extract the compatible input with model signature
+    """
+    serialized_examples = None
+    for column_name, column_array in zip(elements.schema.names, elements.columns):
+      if column_name == _RECORDBATCH_COLUMN:
+        column_type = column_array.flatten().type
+        if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)):
+          raise ValueError('Expected a list of serialized examples in bytes or as a string, got %s' % type(example))
+        serialized_examples = column_array.flatten().to_pylist()
+        break
 
     if (serialized_examples is None):
       raise ValueError('Raw examples not found.')
 
-    for example in serialized_examples:
-      if not (isinstance(example, bytes) or isinstance(example, str)):
-        raise ValueError(
-          'Expected a list of serialized examples in bytes or as a string, got %s' %
-          type(example))
+    model_input = None
+    if self._io_tensor_spec is None:    # Case when we are running remote inference
+      model_input = serialized_examples
+    elif (len(self._io_tensor_spec.input_tensor_names) == 1):
+      model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples}
+    else:
+      if (self._tensor_adapter_config is None):
+        raise ValueError('Tensor adaptor config is required with a multi-input model')
+      _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config)
+      dict_of_tensors = self._tensor_adapter.ToBatchTensors(elements)
+      if self._io_tensor_spec:
+        model_input = model_util.filter_tensors_by_input_names(
+          dict_of_tensors, self._io_tensor_spec.input_tensor_names)
 
-    return serialized_examples
+    return serialized_examples, model_input
 
   def process(self, elements: pa.RecordBatch) -> Iterable[Any]:
     batch_start_time = self._clock.get_current_time_in_microseconds()
-    serialized_examples = self._extract_from_recordBatch(elements)
-    outputs = self.run_inference(serialized_examples)
+    serialized_examples, model_input = self._extract_from_recordBatch(elements)
+    outputs = self.run_inference(model_input)
     result = self._post_process(serialized_examples, outputs)
     self._metrics_collector.update(
         serialized_examples,
@@ -328,7 +338,7 @@ def finish_bundle(self):
 
   @abc.abstractmethod
   def run_inference(
-    self, elements: List[Union[str, bytes]]
+    self, tensors: Mapping[Any, Any]
   ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
     raise NotImplementedError
 
@@ -381,8 +391,8 @@ class _RemotePredictDoFn(_BaseDoFn):
   """
 
   def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-               pipeline_options: PipelineOptions,
-               tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
+               pipeline_options: PipelineOptions, data_type,
+               tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
     super(_RemotePredictDoFn, self).__init__(inference_spec_type, tensor_adapter_config)
     self._api_client = None
     self._data_type = data_type
@@ -439,7 +449,6 @@ def _prepare_instances(
       instance = {}
       tfexample = tf.train.Example.FromString(example)
 
-      # TODO (Maxine): consider leveraging recordbatch columns
       for input_name, feature in tfexample.features.feature.items():
         attr_name = feature.WhichOneof('kind')
         if attr_name is None:
@@ -477,14 +486,14 @@ def _parse_feature_content(values: Sequence[Any], attr_name: Text,
     else:
       return values
 
-  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
+  def _check_elements(self) -> None:
     # TODO(b/151468119): support tf.train.SequenceExample
     if self._data_type != DataType.EXAMPLE:
       raise ValueError('Remote prediction only supports tf.train.Example')
 
   def run_inference(
     self, elements: List[Union[str, bytes]]) -> Sequence[Mapping[Text, Any]]:
-    self._check_elements(elements)
+    self._check_elements()
     body = {'instances': list(self._prepare_instances(elements))}
     request = self._make_request(body)
     response = self._execute_request(request)
@@ -526,8 +535,8 @@ class _BaseBatchSavedModelDoFn(_BaseDoFn):
 
   def __init__(
       self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-      shared_model_handle: shared.Shared,
-      tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type):
+      shared_model_handle: shared.Shared, data_type,
+      tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
     super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, tensor_adapter_config)
     self._inference_spec_type = inference_spec_type
     self._shared_model_handle = shared_model_handle
@@ -584,56 +593,55 @@ def load():
 
   def _pre_process(self) -> _IOTensorSpec:
     # Pre process functions will validate for each signature.
-    # TODO (Maxine): having more than 1 input
     io_tensor_specs = []
     for signature in self._signatures:
-      if len(signature.signature_def.inputs) != 1:
-        raise ValueError('Signature should have 1 and only 1 inputs')
-      if (list(signature.signature_def.inputs.values())[0].dtype !=
+      if (len(signature.signature_def.inputs) == 1 and
+          list(signature.signature_def.inputs.values())[0].dtype !=
           tf.string.as_datatype_enum):
         raise ValueError(
-            'Input dtype is expected to be %s, got %s' %
+            'With 1 input, dtype is expected to be %s, got %s' %
             tf.string.as_datatype_enum,
             list(signature.signature_def.inputs.values())[0].dtype)
       io_tensor_specs.append(_signature_pre_process(signature.signature_def))
-    input_tensor_name = ''
-    input_tensor_alias = ''
+    input_tensor_names = []
+    input_tensor_alias = []
+    input_tensor_types = {}
     output_alias_tensor_names = {}
     for io_tensor_spec in io_tensor_specs:
-      if not input_tensor_name:
-        input_tensor_name = io_tensor_spec.input_tensor_name
+      if not input_tensor_names:
+        input_tensor_names = io_tensor_spec.input_tensor_names
         input_tensor_alias = io_tensor_spec.input_tensor_alias
-      elif input_tensor_name != io_tensor_spec.input_tensor_name:
+      elif input_tensor_names != io_tensor_spec.input_tensor_names:
         raise ValueError('Input tensor must be the same for all Signatures.')
-      for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items(
-      ):
+      for alias, tensor_type in io_tensor_spec.input_tensor_types.items():
+        input_tensor_types[alias] = tensor_type
+      for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items():
         output_alias_tensor_names[alias] = tensor_name
-    if (not output_alias_tensor_names or not input_tensor_name or
+    if (not output_alias_tensor_names or not input_tensor_names or
         not input_tensor_alias):
       raise ValueError('No valid fetch tensors or feed tensors.')
-    return _IOTensorSpec(input_tensor_alias, input_tensor_name,
-                         output_alias_tensor_names)
+    return _IOTensorSpec(input_tensor_alias, input_tensor_names,
+                         input_tensor_types, output_alias_tensor_names)
 
   def _has_tpu_tag(self) -> bool:
     return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
             tf.saved_model.TPU in self._tags)
 
   def run_inference(
-    self, elements: List[Union[str, bytes]]) -> Mapping[Text, np.ndarray]:
-    self._check_elements(elements)
-    outputs = self._run_tf_operations(elements)
+    self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]:
+    self._check_elements()
+    outputs = self._run_tf_operations(tensors)
     return outputs
 
   def _run_tf_operations(
-    self, elements: List[Union[str, bytes]]) -> Mapping[Text, np.ndarray]:
+    self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]:
     result = self._session.run(
-        self._io_tensor_spec.output_alias_tensor_names,
-        feed_dict={self._io_tensor_spec.input_tensor_name: elements})
+        self._io_tensor_spec.output_alias_tensor_names, feed_dict=tensors)
     if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
       raise RuntimeError('Output length does not match fetches')
     return result
 
-  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
+  def _check_elements(self) -> None:
     """Unimplemented."""
 
     raise NotImplementedError
@@ -654,7 +662,7 @@ def setup(self):
           signature_def.method_name)
     super(_BatchClassifyDoFn, self).setup()
 
-  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
+  def _check_elements(self) -> None:
     if self._data_type != DataType.EXAMPLE:
       raise ValueError('Classify only supports tf.train.Example')
 
@@ -676,7 +684,7 @@ class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
   def setup(self):
     super(_BatchRegressDoFn, self).setup()
 
-  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
+  def _check_elements(self) -> None:
     if self._data_type != DataType.EXAMPLE:
       raise ValueError('Regress only supports tf.train.Example')
 
@@ -702,14 +710,16 @@ def setup(self):
           signature_def.method_name)
     super(_BatchPredictDoFn, self).setup()
 
-  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
+  def _check_elements(self) -> None:
     pass
 
   def _post_process(
       self, elements: Sequence[Union[str, bytes]],
       outputs: Mapping[Text, np.ndarray]
   ) -> Iterable[prediction_log_pb2.PredictLog]:
-    input_tensor_alias = self._io_tensor_spec.input_tensor_alias
+    if not self._io_tensor_spec.input_tensor_types:
+      raise ValueError('No valid tensor types.')
+    input_tensor_types = self._io_tensor_spec.input_tensor_types
     signature_name = self._signatures[0].name
     batch_size = len(elements)
     for output_alias, output in outputs.items():
@@ -722,15 +732,16 @@ def _post_process(
     predict_log_tmpl = prediction_log_pb2.PredictLog()
     predict_log_tmpl.request.model_spec.signature_name = signature_name
     predict_log_tmpl.response.model_spec.signature_name = signature_name
-    input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias]
-    input_tensor_proto.dtype = tf.string.as_datatype_enum
-    input_tensor_proto.tensor_shape.dim.add().size = 1
-
+    for alias, tensor_type in input_tensor_types.items():
+      input_tensor_proto = predict_log_tmpl.request.inputs[alias]
+      input_tensor_proto.dtype = tf.as_dtype(tensor_type).as_datatype_enum
+      input_tensor_proto.tensor_shape.dim.add().size = 1
+    # TODO (Maxine): Fix here
     result = []
     for i in range(batch_size):
       predict_log = prediction_log_pb2.PredictLog()
       predict_log.CopyFrom(predict_log_tmpl)
-      predict_log.request.inputs[input_tensor_alias].string_val.append(elements[i])
+      predict_log.request.inputs[list(input_tensor_types)[0]].string_val.append(elements[i])
       for output_alias, output in outputs.items():
         # Mimic tensor::Split
         tensor_proto = tf.make_tensor_proto(
@@ -748,7 +759,7 @@ def _post_process(
 class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that runs inference on multi-head model."""
 
-  def _check_elements(self, elements: List[Union[str, bytes]]) -> None:
+  def _check_elements(self) -> None:
     if self._data_type != DataType.EXAMPLE:
       raise ValueError('Multi-inference only supports tf.train.Example')
 
@@ -963,28 +974,27 @@ def _post_process_regress(
 
 def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec:
   """Returns IOTensorSpec from signature."""
-
-  if len(signature.inputs) != 1:
-    raise ValueError('Signature should have 1 and only 1 inputs')
-  input_tensor_alias = list(signature.inputs.keys())[0]
-  if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum:
+  if (len(signature.inputs) == 1 and
+      list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum):
     raise ValueError(
-        'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum,
-        list(signature.inputs.values())[0].dtype)
+      'With 1 input, dtype is expected to be %s, got %s' %
+      tf.string.as_datatype_enum,
+      list(signature.inputs.values())[0].dtype)
+  input_tensor_alias = [signature.inputs.keys()]
   if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_classify(signature))
+    input_tensor_names, input_tensor_types, output_alias_tensor_names = (
+      _signature_pre_process_classify(signature))
   elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_predict(signature))
+    input_tensor_names, input_tensor_types, output_alias_tensor_names = (
+      _signature_pre_process_predict(signature))
   elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_regress(signature))
+    input_tensor_names, input_tensor_types, output_alias_tensor_names = (
+      _signature_pre_process_regress(signature))
   else:
     raise ValueError('Signature method %s is not supported' %
-                     signature.method_name)
-  return _IOTensorSpec(input_tensor_alias, input_tensor_name,
-                       output_alias_tensor_names)
+                      signature.method_name)
+  return _IOTensorSpec(input_tensor_alias, input_tensor_names,
+                       input_tensor_types, output_alias_tensor_names)
 
 
 def _signature_pre_process_classify(
@@ -997,13 +1007,14 @@ def _signature_pre_process_classify(
   Returns:
     A tuple of input tensor name and output alias tensor names.
   """
-
+  if len(signature.inputs) != 1:
+    raise ValueError('Classify signature should have 1 and only 1 inputs')
   if len(signature.outputs) != 1 and len(signature.outputs) != 2:
     raise ValueError('Classify signature should have 1 or 2 outputs')
   if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs:
     raise ValueError('No classification inputs found in SignatureDef: %s' %
                      signature.inputs)
-  input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name
+  input_tensor_names = [signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name]
   output_alias_tensor_names = {}
   if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and
       tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs):
@@ -1018,7 +1029,7 @@ def _signature_pre_process_classify(
   if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs:
     output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = (
         signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name)
-  return input_tensor_name, output_alias_tensor_names
+  return input_tensor_names, {}, output_alias_tensor_names
 
 
 def _signature_pre_process_predict(
@@ -1031,12 +1042,14 @@ def _signature_pre_process_predict(
   Returns:
     A tuple of input tensor name and output alias tensor names.
   """
-
-  input_tensor_name = list(signature.inputs.values())[0].name
+  input_tensor_names = [value.name for value in signature.inputs.values()]
+  input_tensor_types = dict([
+    (key, value.dtype) for key, value in signature.inputs.items()
+  ])
   output_alias_tensor_names = dict([
-      (key, output.name) for key, output in signature.outputs.items()
+    (key, output.name) for key, output in signature.outputs.items()
   ])
-  return input_tensor_name, output_alias_tensor_names
+  return input_tensor_names, input_tensor_types, output_alias_tensor_names
 
 
 def _signature_pre_process_regress(
@@ -1049,13 +1062,14 @@ def _signature_pre_process_regress(
   Returns:
     A tuple of input tensor name and output alias tensor names.
   """
-
+  if len(signature.inputs) != 1:
+    raise ValueError('Regress signature should have 1 and only 1 inputs')
   if len(signature.outputs) != 1:
     raise ValueError('Regress signature should have 1 output')
   if tf.saved_model.REGRESS_INPUTS not in signature.inputs:
     raise ValueError('No regression inputs found in SignatureDef: %s' %
                      signature.inputs)
-  input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name
+  input_tensor_names = [signature.inputs[tf.saved_model.REGRESS_INPUTS].name]
   if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs:
     raise ValueError('No regression outputs found in SignatureDef: %s' %
                      signature.outputs)
@@ -1063,7 +1077,7 @@ def _signature_pre_process_regress(
       tf.saved_model.REGRESS_OUTPUTS:
           signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name
   }
-  return input_tensor_name, output_alias_tensor_names
+  return input_tensor_names, {}, output_alias_tensor_names
 
 
 def _using_in_process_inference(
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index e6554758..cc4494ee 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -92,10 +92,6 @@ def _get_output_data_dir(self, sub_dir=None):
       path = os.path.join(path, sub_dir)
     return path
 
-  def _prepare_predict_examples(self, example_path):
-    with tf.io.TFRecordWriter(example_path) as output_file:
-      for example in self._predict_examples:
-        output_file.write(example.SerializeToString())
 
 class RunOfflineInferenceArrowTest(RunInferenceArrowFixture):
 
@@ -188,11 +184,6 @@ def setUp(self):
         tensor_representations=tfxio_multi.TensorRepresentations())
 
 
-  def _prepare_multihead_examples(self, example_path):
-    with tf.io.TFRecordWriter(example_path) as output_file:
-      for example in self._multihead_examples:
-        output_file.write(example.SerializeToString())
-
   def _build_predict_model(self, model_path):
     """Exports the dummy sum predict model."""
 
@@ -295,7 +286,7 @@ def _run_inference_with_beam(self, example_type, inference_spec_type,
           pipeline
           | "createRecordBatch" >> beam.Create([self.record_batch_multihead])
           | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-                inference_spec_type, self.tensor_adapter_config_multihead)
+                inference_spec_type)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -305,7 +296,7 @@ def _run_inference_with_beam(self, example_type, inference_spec_type,
           pipeline
           | "createRecordBatch" >> beam.Create([self.record_batch])
           | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-                inference_spec_type, self.tensor_adapter_config)
+                inference_spec_type)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -493,7 +484,7 @@ def testTelemetry(self):
         pipeline 
         | "createRecordBatch" >> beam.Create([self.record_batch_multihead])
         | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-              inference_spec_type, self.tensor_adapter_config_multihead))
+              inference_spec_type))
     run_result = pipeline.run()
     run_result.wait_until_finish()
 
@@ -551,7 +542,7 @@ def _set_up_pipeline(self, inference_spec_type):
         self.pipeline
         | "createRecordBatch" >> beam.Create([self.record_batch])
         | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-              inference_spec_type, self.tensor_adapter_config))
+              inference_spec_type))
 
   def _run_inference_with_beam(self):
     self.pipeline_result = self.pipeline.run()

From a45716bdb6de577cfbebae79b6fa29cb8f865587 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Fri, 10 Jul 2020 15:38:47 -0400
Subject: [PATCH 14/31] complete case 2

---
 tfx_bsl/beam/run_inference_arrow.py      |  96 ++++++++++----
 tfx_bsl/beam/run_inference_arrow_test.py | 154 ++++++++++++++++-------
 tfx_bsl/beam/util.py                     |  59 +++++++++
 3 files changed, 239 insertions(+), 70 deletions(-)
 create mode 100644 tfx_bsl/beam/util.py

diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index 9fafadd4..7d502d2c 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -40,6 +40,7 @@
 from googleapiclient import discovery
 from googleapiclient import http
 import numpy as np
+import json
 import six
 import tensorflow as tf
 from tfx_bsl.beam import shared
@@ -55,7 +56,7 @@
 from tensorflow_serving.apis import inference_pb2
 from tensorflow_serving.apis import prediction_log_pb2
 from tensorflow_serving.apis import regression_pb2
-
+from tensorflow_model_analysis import model_util
 
 # TODO(b/131873699): Remove once 1.x support is dropped.
 # pylint: disable=g-import-not-at-top
@@ -66,6 +67,7 @@
 except ImportError:
   pass
 
+
 _RECORDBATCH_COLUMN = '__RAW_RECORD__'
 _DEFAULT_INPUT_KEY = 'examples'
 _METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer'
@@ -269,6 +271,7 @@ def update_metrics_with_cache(self):
         self._model_byte_size.update(self.model_byte_size_cache)
         self.model_byte_size_cache = None
 
+    # For feature inputs, using serialized example for batch size
     def update(
       self, elements: List[Union[str, bytes]], latency_micro_secs: int) -> None:
       self._inference_batch_latency_micro_secs.update(latency_micro_secs)
@@ -300,7 +303,9 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch):
       if column_name == _RECORDBATCH_COLUMN:
         column_type = column_array.flatten().type
         if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)):
-          raise ValueError('Expected a list of serialized examples in bytes or as a string, got %s' % type(example))
+          raise ValueError(
+            'Expected a list of serialized examples in bytes or as a string, got %s' % 
+            type(example))
         serialized_examples = column_array.flatten().to_pylist()
         break
 
@@ -315,19 +320,25 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch):
     else:
       if (self._tensor_adapter_config is None):
         raise ValueError('Tensor adaptor config is required with a multi-input model')
+    
+      input_tensor_names = self._io_tensor_spec.input_tensor_names
+      input_tensor_alias = self._io_tensor_spec.input_tensor_alias
       _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config)
-      dict_of_tensors = self._tensor_adapter.ToBatchTensors(elements)
-      if self._io_tensor_spec:
-        model_input = model_util.filter_tensors_by_input_names(
-          dict_of_tensors, self._io_tensor_spec.input_tensor_names)
-
+      dict_of_tensors = _tensor_adapter.ToBatchTensors(
+        elements, produce_eager_tensors = False)
+      filtered_tensors = model_util.filter_tensors_by_input_names(
+        dict_of_tensors, input_tensor_alias)
+
+      model_input = {}
+      for feature, tensor_name in zip(input_tensor_alias, input_tensor_names):
+        model_input[tensor_name] = filtered_tensors[feature]
     return serialized_examples, model_input
 
   def process(self, elements: pa.RecordBatch) -> Iterable[Any]:
     batch_start_time = self._clock.get_current_time_in_microseconds()
     serialized_examples, model_input = self._extract_from_recordBatch(elements)
     outputs = self.run_inference(model_input)
-    result = self._post_process(serialized_examples, outputs)
+    result = self._post_process(model_input, outputs)
     self._metrics_collector.update(
         serialized_examples,
         self._clock.get_current_time_in_microseconds() - batch_start_time)
@@ -667,12 +678,14 @@ def _check_elements(self) -> None:
       raise ValueError('Classify only supports tf.train.Example')
 
   def _post_process(
-      self, elements: Sequence[Union[str, bytes]], 
+      self, elements: Mapping[Any, Any],
       outputs: Mapping[Text, np.ndarray]
   ) -> Iterable[Tuple[Union[str, bytes], classification_pb2.Classifications]]:
+    serialized_examples, = elements.values()
     classifications = _post_process_classify(
-        self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
-    return zip(elements, classifications)
+        self._io_tensor_spec.output_alias_tensor_names,
+        serialized_examples, outputs)
+    return zip(serialized_examples, classifications)
 
 
 @beam.typehints.with_input_types(pa.RecordBatch)
@@ -689,11 +702,12 @@ def _check_elements(self) -> None:
       raise ValueError('Regress only supports tf.train.Example')
 
   def _post_process(
-      self, elements: Sequence[Union[str, bytes]], 
+      self, elements: Mapping[Any, Any],
       outputs: Mapping[Text, np.ndarray]
   ) -> Iterable[Tuple[Union[str, bytes], regression_pb2.Regression]]:
-    regressions = _post_process_regress(elements, outputs)
-    return zip(elements, regressions)
+    serialized_examples, = elements.values()
+    regressions = _post_process_regress(serialized_examples, outputs)
+    return zip(serialized_examples, regressions)
 
 
 @beam.typehints.with_input_types(pa.RecordBatch)
@@ -714,14 +728,37 @@ def _check_elements(self) -> None:
     pass
 
   def _post_process(
-      self, elements: Sequence[Union[str, bytes]],
+      self, elements: Mapping[Any, Any],
       outputs: Mapping[Text, np.ndarray]
   ) -> Iterable[prediction_log_pb2.PredictLog]:
     if not self._io_tensor_spec.input_tensor_types:
       raise ValueError('No valid tensor types.')
+    input_tensor_names = self._io_tensor_spec.input_tensor_names
+    input_tensor_alias = self._io_tensor_spec.input_tensor_alias
     input_tensor_types = self._io_tensor_spec.input_tensor_types
     signature_name = self._signatures[0].name
-    batch_size = len(elements)
+
+    if len(input_tensor_alias) != len(input_tensor_names):
+      raise ValueError('Expected to have one name and one alias per tensor')
+
+    include_request = True
+    if len(input_tensor_names) == 1:
+      serialized_examples, = elements.values()
+      batch_size = len(serialized_examples)
+      process_elements = serialized_examples
+    else:
+      # Only include request in the predictLog when the all tensors are dense
+      # is there a better way to check this? 
+      for tensor_name, tensor in elements.items():
+        if not isinstance(tensor, np.ndarray):
+          include_request = False
+          break
+
+      if include_request:
+        batch_size = len(elements[input_tensor_names[0]])
+      else:
+        batch_size = elements[input_tensor_names[0]].shape[0]
+
     for output_alias, output in outputs.items():
       if len(output.shape) < 1 or output.shape[0] != batch_size:
         raise ValueError(
@@ -735,13 +772,22 @@ def _post_process(
     for alias, tensor_type in input_tensor_types.items():
       input_tensor_proto = predict_log_tmpl.request.inputs[alias]
       input_tensor_proto.dtype = tf.as_dtype(tensor_type).as_datatype_enum
+      # TODO (Maxine): fix dimension?
       input_tensor_proto.tensor_shape.dim.add().size = 1
-    # TODO (Maxine): Fix here
+
     result = []
     for i in range(batch_size):
       predict_log = prediction_log_pb2.PredictLog()
       predict_log.CopyFrom(predict_log_tmpl)
-      predict_log.request.inputs[list(input_tensor_types)[0]].string_val.append(elements[i])
+
+      if include_request:
+        if len(input_tensor_alias) == 1:
+          alias = input_tensor_alias[0]
+          predict_log.request.inputs[alias].string_val.append(process_elements[i])
+        else:
+          for alias, tensor_name in zip(input_tensor_alias, input_tensor_names):
+            predict_log.request.inputs[alias].float_val.append(elements[tensor_name][i])
+
       for output_alias, output in outputs.items():
         # Mimic tensor::Split
         tensor_proto = tf.make_tensor_proto(
@@ -764,23 +810,25 @@ def _check_elements(self) -> None:
       raise ValueError('Multi-inference only supports tf.train.Example')
 
   def _post_process(
-      self, elements: Sequence[Union[str, bytes]], 
+      self, elements: Mapping[Any, Any],
       outputs: Mapping[Text, np.ndarray]
   ) -> Iterable[Tuple[Union[str, bytes], inference_pb2.MultiInferenceResponse]]:
     classifications = None
     regressions = None
+    serialized_examples, = elements.values()
     for signature in self._signatures:
       signature_def = signature.signature_def
       if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
         classifications = _post_process_classify(
-            self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
+            self._io_tensor_spec.output_alias_tensor_names,
+            serialized_examples, outputs)
       elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME:
-        regressions = _post_process_regress(elements, outputs)
+        regressions = _post_process_regress(serialized_examples, outputs)
       else:
         raise ValueError('Signature method %s is not supported for '
                          'multi inference' % signature_def.method_name)
     result = []
-    for i in range(len(elements)):
+    for i in range(len(serialized_examples)):
       response = inference_pb2.MultiInferenceResponse()
       for signature in self._signatures:
         signature_def = signature.signature_def
@@ -801,7 +849,7 @@ def _post_process(
       if len(response.results) != len(self._signatures):
         raise RuntimeError('Multi inference response result length does not '
                            'match the number of signatures')
-      result.append((elements[i], response))
+      result.append((serialized_examples[i], response))
     return result
 
 
@@ -980,7 +1028,7 @@ def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec:
       'With 1 input, dtype is expected to be %s, got %s' %
       tf.string.as_datatype_enum,
       list(signature.inputs.values())[0].dtype)
-  input_tensor_alias = [signature.inputs.keys()]
+  input_tensor_alias = [alias for alias in signature.inputs.keys()]
   if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
     input_tensor_names, input_tensor_types, output_alias_tensor_names = (
       _signature_pre_process_classify(signature))
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index cc4494ee..61d50362 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -65,21 +65,8 @@ def setUp(self):
         pa.array([[0]], type=pa.list_(pa.float32())),
         serialized_example
     ],
-    ['input1', '__RAW_RECORD__']
-)
+    ['input1', '__RAW_RECORD__'])
 
-    tfxio = test_util.InMemoryTFExampleRecord(
-      schema=text_format.Parse(
-        """
-        feature {
-          name: "input1"
-          type: FLOAT
-        }
-        """, schema_pb2.Schema()),
-      raw_record_column_name='__RAW_RECORD__')
-    self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-        arrow_schema=tfxio.ArrowSchema(),
-        tensor_representations=tfxio.TensorRepresentations())
 
   def _get_output_data_dir(self, sub_dir=None):
     test_dir = self._testMethodName
@@ -123,19 +110,6 @@ def setUp(self):
       ['input1', '__RAW_RECORD__']
     )
 
-    tfxio = test_util.InMemoryTFExampleRecord(
-      schema=text_format.Parse(
-        """
-        feature {
-          name: "input1"
-          type: FLOAT
-        }
-        """, schema_pb2.Schema()),
-      raw_record_column_name='__RAW_RECORD__')
-    self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-        arrow_schema=tfxio.ArrowSchema(),
-        tensor_representations=tfxio.TensorRepresentations())
-
 
     self._multihead_examples = [
         text_format.Parse(
@@ -166,22 +140,75 @@ def setUp(self):
       ['x', 'y', '__RAW_RECORD__']
     )
 
-    tfxio_multi = test_util.InMemoryTFExampleRecord(
-      schema=text_format.Parse(
+
+    self._multi_input_examples = [
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.8 }}}
+              feature {key: "y" value { float_list { value: 0.2 }}}
+            }
+            """, tf.train.Example()),
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.6 }}}
+              feature {key: "y" value { float_list { value: 0.1 }}}
+            }
+            """, tf.train.Example()),
+    ]
+
+    serialized_example_multi_input = []
+    for example in self._multi_input_examples:
+      serialized_example_multi_input.append([example.SerializeToString()])
+    self.record_batch_multi_input = pa.RecordBatch.from_arrays(
+      [
+        pa.array([[0.8], [0.6]], type=pa.list_(pa.float32())),
+        pa.array([[0.2], [0.1]], type=pa.list_(pa.float32())),
+        serialized_example_multi_input
+      ],
+      ['x', 'y', '__RAW_RECORD__']
+    )
+
+    tfxio = test_util.InMemoryTFExampleRecord(
+      schema = text_format.Parse(
         """
-          feature {
-            name: "x"
-            type: FLOAT
-          }
-          feature {
-            name: "y"
-            type: FLOAT
+        tensor_representation_group {
+          key: ""
+          value {
+            tensor_representation {
+              key: "x"
+              value {
+                dense_tensor {
+                  column_name: "x"
+                  shape { dim { size: 1 } }
+                }
+              }
+            }
+            tensor_representation {
+              key: "y"
+              value {
+                dense_tensor {
+                  column_name: "y"
+                  shape { dim { size: 1 } }
+                }
+              }
+            }
           }
+        }
+        feature {
+          name: "x"
+          type: FLOAT
+        }
+        feature {
+          name: "y"
+          type: FLOAT
+        }
         """, schema_pb2.Schema()),
       raw_record_column_name='__RAW_RECORD__')
-    self.tensor_adapter_config_multihead = tensor_adapter.TensorAdapterConfig(
-        arrow_schema=tfxio_multi.ArrowSchema(),
-        tensor_representations=tfxio_multi.TensorRepresentations())
+    self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
+        arrow_schema=tfxio.ArrowSchema(),
+        tensor_representations=tfxio.TensorRepresentations())
 
 
   def _build_predict_model(self, model_path):
@@ -279,8 +306,18 @@ def _build_multihead_model(self, model_path):
       builder.save()
 
   def _run_inference_with_beam(self, example_type, inference_spec_type,
-                               prediction_log_path):
-    if example_type == 'multi':
+                               prediction_log_path, include_config = False):
+    if include_config:
+      with beam.Pipeline() as pipeline:
+        _ = (
+          pipeline
+          | "createRecordBatch" >> beam.Create([self.record_batch_multi_input])
+          | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
+                inference_spec_type, self.tensor_adapter_config)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+    elif example_type == 'multi':
       with beam.Pipeline() as pipeline:
         _ = (
           pipeline
@@ -435,7 +472,6 @@ def testKerasModelPredict(self):
     inference_model = tf.keras.models.Model(inputs, [output1, output2])
 
     class TestKerasModel(tf.keras.Model):
-
       def __init__(self, inference_model):
         super(TestKerasModel, self).__init__(name='test_keras_model')
         self.inference_model = inference_model
@@ -445,10 +481,9 @@ def __init__(self, inference_model):
       ])
       def call(self, serialized_example):
         features = {
-            'input1':
-                tf.compat.v1.io.FixedLenFeature([1],
-                                                dtype=tf.float32,
-                                                default_value=0)
+            'input1': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32,
+              default_value=0)
         }
         input_tensor_dict = tf.io.parse_example(serialized_example, features)
         return inference_model(input_tensor_dict['input1'])
@@ -470,8 +505,35 @@ def call(self, serialized_example):
             saved_model_spec=model_spec_pb2.SavedModelSpec(
                 model_path=model_path)), prediction_log_path)
 
+    results = self._get_results(prediction_log_path)git st
+    self.assertLen(results, 2)
+
+  def testKerasModelPredictMultiTensor(self):
+    input1 = tf.keras.layers.Input((1,), name='x')
+    input2 = tf.keras.layers.Input((1,), name='y')
+
+    x1 = tf.keras.layers.Dense(10)(input1)
+    x2 = tf.keras.layers.Dense(10)(input2)
+    output = tf.keras.layers.Dense(5, name='output')(x2)
+
+    model = tf.keras.models.Model([input1, input2], output)
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        'multi',
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)),
+              prediction_log_path, include_config = True)
+
     results = self._get_results(prediction_log_path)
     self.assertLen(results, 2)
+    for result in results: 
+      self.assertLen(result.predict_log.request.inputs, 2)
+      self.assertEqual(list(result.predict_log.request.inputs), list(['x','y']))
 
   def testTelemetry(self):
     model_path = self._get_output_data_dir('model')
diff --git a/tfx_bsl/beam/util.py b/tfx_bsl/beam/util.py
new file mode 100644
index 00000000..4bdf4ba3
--- /dev/null
+++ b/tfx_bsl/beam/util.py
@@ -0,0 +1,59 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""TensorAdapter."""
+
+from __future__ import absolute_import
+from __future__ import division
+# Standard __future__ imports
+from __future__ import print_function
+
+
+import numpy as np
+import pyarrow as pa
+import pandas as pd
+import typing
+import json
+from typing import Dict
+
+
+_RECORDBATCH_COLUMN = '__RAW_RECORD__'
+
+class JSONAdapter(object):
+    """A JSONAdapter converts a RecordBatch to a JSON strings.
+
+    The conversion will take in a recordbatch that contains features from a 
+    tf.train.Example and will return a list of dict like string (JSON) where 
+    each item represent 
+    The conversion is determined by both the Arrow schema and the
+    TensorRepresentations, which must be provided at the initialization time.
+    Each TensorRepresentation contains the information needed to translates one
+    or more columns in a RecordBatch of the given Arrow schema into a TF Tensor
+    or CompositeTensor. They are contained in a Dict whose keys are
+    the names of the tensors, which will be the keys of the Dict produced by
+    ToBatchTensors().
+    """
+
+
+    def ToJSON(self, record_batch: pa.RecordBatch) -> Dict[Text, Any]:
+        """Returns a JSON string translated from `record_batch`.
+
+        Args:
+            record_batch: input RecordBatch.
+        """
+
+        df = record_batch.to_pandas()
+        if _RECORDBATCH_COLUMN in df.columns:
+            df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1)
+
+        return json.loads(df.to_json(orient='records'))
\ No newline at end of file

From c42ce34647c17057294973fecbe364f8d02f8a8c Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Mon, 13 Jul 2020 12:27:29 -0400
Subject: [PATCH 15/31] fix typo and renamed util to avoid conflict

---
 tfx_bsl/beam/{util.py => inference_util.py} | 4 ++--
 tfx_bsl/beam/run_inference_arrow.py         | 5 ++++-
 tfx_bsl/beam/run_inference_arrow_test.py    | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)
 rename tfx_bsl/beam/{util.py => inference_util.py} (95%)

diff --git a/tfx_bsl/beam/util.py b/tfx_bsl/beam/inference_util.py
similarity index 95%
rename from tfx_bsl/beam/util.py
rename to tfx_bsl/beam/inference_util.py
index 4bdf4ba3..53d3c98c 100644
--- a/tfx_bsl/beam/util.py
+++ b/tfx_bsl/beam/inference_util.py
@@ -24,7 +24,7 @@
 import pandas as pd
 import typing
 import json
-from typing import Dict
+from typing import List, Text
 
 
 _RECORDBATCH_COLUMN = '__RAW_RECORD__'
@@ -45,7 +45,7 @@ class JSONAdapter(object):
     """
 
 
-    def ToJSON(self, record_batch: pa.RecordBatch) -> Dict[Text, Any]:
+    def ToJSON(self, record_batch: pa.RecordBatch) -> List[Text]:
         """Returns a JSON string translated from `record_batch`.
 
         Args:
diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index 7d502d2c..4ab52d82 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -44,6 +44,7 @@
 import six
 import tensorflow as tf
 from tfx_bsl.beam import shared
+from tfx_bsl.beam import inference_util
 from tfx_bsl.public.proto import model_spec_pb2
 from tfx_bsl.telemetry import util
 from tfx_bsl.tfxio import tensor_adapter
@@ -314,6 +315,8 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch):
 
     model_input = None
     if self._io_tensor_spec is None:    # Case when we are running remote inference
+      # _jsonAdaptor = inference_util.JSONAdapter()
+      # model_input = _jsonAdaptor.ToJSON(elements)
       model_input = serialized_examples
     elif (len(self._io_tensor_spec.input_tensor_names) == 1):
       model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples}
@@ -355,7 +358,7 @@ def run_inference(
 
   @abc.abstractmethod
   def _post_process(
-    self, elements: List[Union[str, bytes]], outputs: Any) -> Iterable[Any]:
+    self, elements: Mapping[Any, Any], outputs: Any) -> Iterable[Any]:
     raise NotImplementedError
 
 
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index 61d50362..2bea6066 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -505,7 +505,7 @@ def call(self, serialized_example):
             saved_model_spec=model_spec_pb2.SavedModelSpec(
                 model_path=model_path)), prediction_log_path)
 
-    results = self._get_results(prediction_log_path)git st
+    results = self._get_results(prediction_log_path)
     self.assertLen(results, 2)
 
   def testKerasModelPredictMultiTensor(self):

From 4e8651c618bcbf30b7955b460b6d069815e025c5 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 15 Jul 2020 12:29:44 -0400
Subject: [PATCH 16/31] add APIs and use recordbatch to json module

---
 tfx_bsl/beam/inference_util.py           |  5 +-
 tfx_bsl/beam/run_inference_arrow.py      | 46 +-------------
 tfx_bsl/beam/run_inference_arrow_test.py | 18 +++++-
 tfx_bsl/public/beam/run_inference.py     | 80 +++++++++++++++++++++++-
 4 files changed, 101 insertions(+), 48 deletions(-)

diff --git a/tfx_bsl/beam/inference_util.py b/tfx_bsl/beam/inference_util.py
index 53d3c98c..57cfd321 100644
--- a/tfx_bsl/beam/inference_util.py
+++ b/tfx_bsl/beam/inference_util.py
@@ -22,8 +22,9 @@
 import numpy as np
 import pyarrow as pa
 import pandas as pd
-import typing
+import base64
 import json
+import typing
 from typing import List, Text
 
 
@@ -53,6 +54,8 @@ def ToJSON(self, record_batch: pa.RecordBatch) -> List[Text]:
         """
 
         df = record_batch.to_pandas()
+        as_binary = df.columns.str.endswith("_bytes")
+        df.loc[:, as_binary] = df.loc[:, as_binary].applymap(lambda x: {'b64': base64.b64encode(x).decode()})
         if _RECORDBATCH_COLUMN in df.columns:
             df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1)
 
diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index 4ab52d82..cf667d94 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -19,7 +19,6 @@
 from __future__ import print_function
 
 import abc
-import base64
 import collections
 import os
 import platform
@@ -315,9 +314,8 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch):
 
     model_input = None
     if self._io_tensor_spec is None:    # Case when we are running remote inference
-      # _jsonAdaptor = inference_util.JSONAdapter()
-      # model_input = _jsonAdaptor.ToJSON(elements)
-      model_input = serialized_examples
+      _jsonAdaptor = inference_util.JSONAdapter()
+      model_input = _jsonAdaptor.ToJSON(elements)
     elif (len(self._io_tensor_spec.input_tensor_names) == 1):
       model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples}
     else:
@@ -459,47 +457,9 @@ def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest:
   def _prepare_instances(
       cls, elements: List[Union[str, bytes]]
   ) -> Generator[Mapping[Text, Any], None, None]:
-    for example in elements:
-      instance = {}
-      tfexample = tf.train.Example.FromString(example)
-
-      for input_name, feature in tfexample.features.feature.items():
-        attr_name = feature.WhichOneof('kind')
-        if attr_name is None:
-          continue
-        attr = getattr(feature, attr_name)
-        values = cls._parse_feature_content(attr.value, attr_name,
-                                            cls._sending_as_binary(input_name))
-        # Flatten a sequence if its length is 1
-        values = (values[0] if len(values) == 1 else values)
-        instance[input_name] = values
+    for instance in elements:
       yield instance
 
-  @staticmethod
-  def _sending_as_binary(input_name: Text) -> bool:
-    """Whether data should be sent as binary."""
-    return input_name.endswith('_bytes')
-
-  @staticmethod
-  def _parse_feature_content(values: Sequence[Any], attr_name: Text,
-                             as_binary: bool) -> Sequence[Any]:
-    """Parse the content of tf.train.Feature object.
-
-    If bytes_list, parse a list of bytes-like objects to a list of strings so
-    that it would be JSON serializable.
-
-    If float_list or int64_list, do nothing.
-
-    If data should be sent as binary, mark it as binary by replacing it with
-    a single attribute named 'b64'.
-    """
-    if as_binary:
-      return [{'b64': base64.b64encode(x).decode()} for x in values]
-    elif attr_name == 'bytes_list':
-      return [x.decode() for x in values]
-    else:
-      return values
-
   def _check_elements(self) -> None:
     # TODO(b/151468119): support tf.train.SequenceExample
     if self._data_type != DataType.EXAMPLE:
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index 2bea6066..452b4dab 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -35,6 +35,7 @@
 from six.moves import http_client
 import tensorflow as tf
 from tfx_bsl.beam import run_inference_arrow
+from tfx_bsl.beam import inference_util
 from tfx_bsl.public.proto import model_spec_pb2
 from tfx_bsl.tfxio import test_util
 from tfx_bsl.tfxio import tensor_adapter
@@ -533,7 +534,7 @@ def testKerasModelPredictMultiTensor(self):
     self.assertLen(results, 2)
     for result in results: 
       self.assertLen(result.predict_log.request.inputs, 2)
-      self.assertEqual(list(result.predict_log.request.inputs), list(['x','y']))
+      self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
 
   def testTelemetry(self):
     model_path = self._get_output_data_dir('model')
@@ -689,8 +690,19 @@ def test_request_body_with_binary_data(self):
         feature { key: "y" value { int64_list { value: [1, 2] }}}
       }
       """, tf.train.Example())
-    result = list(
-        run_inference_arrow._RemotePredictDoFn._prepare_instances([example.SerializeToString()]))
+
+    serialized_example_remote = [example.SerializeToString()]
+    record_batch_remote = pa.RecordBatch.from_arrays(
+      [
+        pa.array(["ASa8asdf"], type=pa.binary()),
+        pa.array(["JLK7ljk3"], type=pa.utf8()),
+        pa.array([[1, 2]], type=pa.list_(pa.float32())),
+      ],
+      ['x_bytes', 'x', 'y']
+    )
+
+    _jsonAdaptor = inference_util.JSONAdapter()
+    result = list(_jsonAdaptor.ToJSON(record_batch_remote))
     self.assertEqual([
         {
             'x_bytes': {
diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py
index d27ab453..93941273 100644
--- a/tfx_bsl/public/beam/run_inference.py
+++ b/tfx_bsl/public/beam/run_inference.py
@@ -21,12 +21,20 @@
 
 import apache_beam as beam
 import tensorflow as tf
+import pyarrow as pa
+from typing import Union, Optional
+from tfx_bsl.tfxio import test_util
+from tfx_bsl.tfxio import tensor_adapter
+from tfx_bsl.tfxio import raw_tf_record
 from tfx_bsl.beam import run_inference
+from tfx_bsl.beam import run_inference_arrow
 from tfx_bsl.public.proto import model_spec_pb2
-from typing import Union
 from tensorflow_serving.apis import prediction_log_pb2
+from tensorflow_metadata.proto.v0 import schema_pb2
 
 
+_RECORDBATCH_COLUMN = '__RAW_RECORD__'
+
 @beam.ptransform_fn
 @beam.typehints.with_input_types(Union[tf.train.Example,
                                        tf.train.SequenceExample])
@@ -60,3 +68,73 @@ def RunInference(  # pylint: disable=invalid-name
   return (
       examples |
       'RunInferenceImpl' >> run_inference.RunInferenceImpl(inference_spec_type))
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(Union[tf.train.Example,
+                                       tf.train.SequenceExample])
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def RunInferenceArrow(  # pylint: disable=invalid-name
+    file_path,
+    inference_spec_type: model_spec_pb2.InferenceSpecType,
+    schema: Optional[schema_pb2.Schema] = None
+) -> beam.pvalue.PCollection:
+  """Run inference with a model.
+
+   There are two types of inference you can perform using this PTransform:
+   1. In-process inference from a SavedModel instance. Used when
+     `saved_model_spec` field is set in `inference_spec_type`.
+   2. Remote inference by using a service endpoint. Used when
+     `ai_platform_prediction_model_spec` field is set in
+     `inference_spec_type`.
+
+  Args:
+    file_path: File Path for which the examples are stored.
+    inference_spec_type: Model inference endpoint.
+
+  Returns:
+    A PCollection containing prediction logs.
+  """
+  with beam.Pipeline(options=PipelineOptions()) as pipeline:
+    tfxio = test_util.InMemoryTFExampleRecord(
+      schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
+    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
+      arrow_schema=tfxio.ArrowSchema(),
+      tensor_representations=tfxio.TensorRepresentations())
+    converter = raw_tf_record.RawTfRecordTFXIO(
+      file_path, raw_record_column_name=_RECORDBATCH_COLUMN)
+
+    return (pipeline
+            | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource()
+            | "RunInferenceImpl" >> run_inference_arrow.RunInferenceImpl(
+                    inference_spec_type, tensor_adapter_config=tensor_adapter_config))
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def RunInferenceRecord(  # pylint: disable=invalid-name
+    examples: beam.pvalue.PCollection,
+    inference_spec_type: model_spec_pb2.InferenceSpecType,
+    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
+) -> beam.pvalue.PCollection:
+  """Run inference with a model.
+
+   There are two types of inference you can perform using this PTransform:
+   1. In-process inference from a SavedModel instance. Used when
+     `saved_model_spec` field is set in `inference_spec_type`.
+   2. Remote inference by using a service endpoint. Used when
+     `ai_platform_prediction_model_spec` field is set in
+     `inference_spec_type`.
+
+  Args:
+    examples: A PCollection containing RecordBatch.
+    inference_spec_type: Model inference endpoint.
+
+  Returns:
+    A PCollection containing prediction logs.
+  """
+
+  return (
+      examples | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl(
+                        inference_spec_type, tensor_adapter_config))
\ No newline at end of file

From 89878b3af8a59b855799b5552ee70b99cf185649 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 16 Jul 2020 13:02:14 -0400
Subject: [PATCH 17/31] fix docstring

---
 tfx_bsl/beam/inference_util.py       | 11 +++--------
 tfx_bsl/public/beam/run_inference.py |  4 ++--
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/tfx_bsl/beam/inference_util.py b/tfx_bsl/beam/inference_util.py
index 57cfd321..af3b555c 100644
--- a/tfx_bsl/beam/inference_util.py
+++ b/tfx_bsl/beam/inference_util.py
@@ -35,14 +35,9 @@ class JSONAdapter(object):
 
     The conversion will take in a recordbatch that contains features from a 
     tf.train.Example and will return a list of dict like string (JSON) where 
-    each item represent 
-    The conversion is determined by both the Arrow schema and the
-    TensorRepresentations, which must be provided at the initialization time.
-    Each TensorRepresentation contains the information needed to translates one
-    or more columns in a RecordBatch of the given Arrow schema into a TF Tensor
-    or CompositeTensor. They are contained in a Dict whose keys are
-    the names of the tensors, which will be the keys of the Dict produced by
-    ToBatchTensors().
+    each item is a JSON representation of an example.
+
+    - return format: [{ feature1: value1, ... }, ...]
     """
 
 
diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py
index 93941273..80953a27 100644
--- a/tfx_bsl/public/beam/run_inference.py
+++ b/tfx_bsl/public/beam/run_inference.py
@@ -22,7 +22,7 @@
 import apache_beam as beam
 import tensorflow as tf
 import pyarrow as pa
-from typing import Union, Optional
+from typing import Union, Text, Optional
 from tfx_bsl.tfxio import test_util
 from tfx_bsl.tfxio import tensor_adapter
 from tfx_bsl.tfxio import raw_tf_record
@@ -75,7 +75,7 @@ def RunInference(  # pylint: disable=invalid-name
                                        tf.train.SequenceExample])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def RunInferenceArrow(  # pylint: disable=invalid-name
-    file_path,
+    file_path: Text,
     inference_spec_type: model_spec_pb2.InferenceSpecType,
     schema: Optional[schema_pb2.Schema] = None
 ) -> beam.pvalue.PCollection:

From 0cdf874a9f89bf134b336e44a5320fe23260f67c Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 16 Jul 2020 14:07:55 -0400
Subject: [PATCH 18/31] add missing case

---
 tfx_bsl/public/beam/run_inference.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py
index 80953a27..672ebf59 100644
--- a/tfx_bsl/public/beam/run_inference.py
+++ b/tfx_bsl/public/beam/run_inference.py
@@ -91,19 +91,22 @@ def RunInferenceArrow(  # pylint: disable=invalid-name
   Args:
     file_path: File Path for which the examples are stored.
     inference_spec_type: Model inference endpoint.
+    Schema [optional]: required for models that requires 
+      multi-tensor inputs.
 
   Returns:
     A PCollection containing prediction logs.
   """
-  with beam.Pipeline(options=PipelineOptions()) as pipeline:
+  converter = raw_tf_record.RawTfRecordTFXIO(
+    file_path, raw_record_column_name=_RECORDBATCH_COLUMN)
+  if schema:
     tfxio = test_util.InMemoryTFExampleRecord(
       schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
     tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
       arrow_schema=tfxio.ArrowSchema(),
       tensor_representations=tfxio.TensorRepresentations())
-    converter = raw_tf_record.RawTfRecordTFXIO(
-      file_path, raw_record_column_name=_RECORDBATCH_COLUMN)
 
+  with beam.Pipeline() as pipeline:
     return (pipeline
             | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource()
             | "RunInferenceImpl" >> run_inference_arrow.RunInferenceImpl(
@@ -137,4 +140,4 @@ def RunInferenceRecord(  # pylint: disable=invalid-name
 
   return (
       examples | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl(
-                        inference_spec_type, tensor_adapter_config))
\ No newline at end of file
+                        inference_spec_type, tensor_adapter_config))

From c7e2237bc9da7158172b4b19e0312c33e2859f93 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Fri, 17 Jul 2020 15:41:59 -0400
Subject: [PATCH 19/31] add model analysis function to util

---
 tfx_bsl/beam/bsl_util.py                 | 97 ++++++++++++++++++++++++
 tfx_bsl/beam/inference_util.py           | 57 --------------
 tfx_bsl/beam/run_inference_arrow.py      |  8 +-
 tfx_bsl/beam/run_inference_arrow_test.py |  5 +-
 tfx_bsl/public/beam/run_inference.py     |  2 +-
 5 files changed, 103 insertions(+), 66 deletions(-)
 create mode 100644 tfx_bsl/beam/bsl_util.py
 delete mode 100644 tfx_bsl/beam/inference_util.py

diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py
new file mode 100644
index 00000000..25633365
--- /dev/null
+++ b/tfx_bsl/beam/bsl_util.py
@@ -0,0 +1,97 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""TensorAdapter."""
+
+from __future__ import absolute_import
+from __future__ import division
+# Standard __future__ imports
+from __future__ import print_function
+
+
+import numpy as np
+import pyarrow as pa
+import pandas as pd
+import base64
+import json
+import typing
+from typing import Dict, List, Text, Any, Set, Optional
+
+_RECORDBATCH_COLUMN = '__RAW_RECORD__'
+KERAS_INPUT_SUFFIX = '_input'
+   
+
+def RecordToJSON(record_batch: pa.RecordBatch) -> List[Text]:
+  """Returns a JSON string translated from `record_batch`.
+
+    The conversion will take in a recordbatch that contains features from a 
+    tf.train.Example and will return a list of dict like string (JSON) where 
+    each item is a JSON representation of an example.
+    - return format: [{ feature1: value1, ... }, ...]
+
+  Args:
+  record_batch: input RecordBatch.
+  """
+  df = record_batch.to_pandas()
+  as_binary = df.columns.str.endswith("_bytes")
+  df.loc[:, as_binary] = df.loc[:, as_binary].applymap(lambda x: {'b64': base64.b64encode(x).decode()})
+  if _RECORDBATCH_COLUMN in df.columns:
+    df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1)
+
+  return json.loads(df.to_json(orient='records'))
+
+def find_input_name_in_features(features: Set[Text],
+                                input_name: Text) -> Optional[Text]:
+  """Maps input name to an entry in features. Returns None if not found."""
+  if input_name in features:
+    return input_name
+  # Some keras models prepend '_input' to the names of the inputs
+  # so try under '<name>_input' as well.
+  elif (input_name.endswith(KERAS_INPUT_SUFFIX) and
+        input_name[:-len(KERAS_INPUT_SUFFIX)] in features):
+    return input_name[:-len(KERAS_INPUT_SUFFIX)]
+  return None
+
+
+def filter_tensors_by_input_names(
+    tensors: Dict[Text, Any], 
+    input_names: List[Text]) -> Optional[Dict[Text, Any]]:
+  """Filter tensors by input names.
+  In case we don't find the specified input name in the tensors and there
+  exists only one input name, we assume we are feeding serialized examples to
+  the model and return None.
+  Args:
+    tensors: Dict of tensors.
+    input_names: List of input names.
+  Returns:
+    Filtered tensors.
+  Raises:
+    RuntimeError: When the specified input tensor cannot be found.
+  """
+
+  if not input_names:
+    return None
+  result = {}
+  tensor_keys = set(tensors.keys())
+  for name in input_names:
+    tensor_name = find_input_name_in_features(tensor_keys, name)
+    if tensor_name is None:
+      # This should happen only in the case where the model takes serialized
+      # examples as input. Else raise an exception.
+      if len(input_names) == 1:
+        return None
+      raise RuntimeError(
+          'Input tensor not found: {}. Existing keys: {}.'.format(
+              name, ','.join(tensors.keys())))
+    result[name] = tensors[tensor_name]
+  return result
diff --git a/tfx_bsl/beam/inference_util.py b/tfx_bsl/beam/inference_util.py
deleted file mode 100644
index af3b555c..00000000
--- a/tfx_bsl/beam/inference_util.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright 2019 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""TensorAdapter."""
-
-from __future__ import absolute_import
-from __future__ import division
-# Standard __future__ imports
-from __future__ import print_function
-
-
-import numpy as np
-import pyarrow as pa
-import pandas as pd
-import base64
-import json
-import typing
-from typing import List, Text
-
-
-_RECORDBATCH_COLUMN = '__RAW_RECORD__'
-
-class JSONAdapter(object):
-    """A JSONAdapter converts a RecordBatch to a JSON strings.
-
-    The conversion will take in a recordbatch that contains features from a 
-    tf.train.Example and will return a list of dict like string (JSON) where 
-    each item is a JSON representation of an example.
-
-    - return format: [{ feature1: value1, ... }, ...]
-    """
-
-
-    def ToJSON(self, record_batch: pa.RecordBatch) -> List[Text]:
-        """Returns a JSON string translated from `record_batch`.
-
-        Args:
-            record_batch: input RecordBatch.
-        """
-
-        df = record_batch.to_pandas()
-        as_binary = df.columns.str.endswith("_bytes")
-        df.loc[:, as_binary] = df.loc[:, as_binary].applymap(lambda x: {'b64': base64.b64encode(x).decode()})
-        if _RECORDBATCH_COLUMN in df.columns:
-            df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1)
-
-        return json.loads(df.to_json(orient='records'))
\ No newline at end of file
diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index cf667d94..a701db1e 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -43,7 +43,7 @@
 import six
 import tensorflow as tf
 from tfx_bsl.beam import shared
-from tfx_bsl.beam import inference_util
+from tfx_bsl.beam import bsl_util
 from tfx_bsl.public.proto import model_spec_pb2
 from tfx_bsl.telemetry import util
 from tfx_bsl.tfxio import tensor_adapter
@@ -56,7 +56,6 @@
 from tensorflow_serving.apis import inference_pb2
 from tensorflow_serving.apis import prediction_log_pb2
 from tensorflow_serving.apis import regression_pb2
-from tensorflow_model_analysis import model_util
 
 # TODO(b/131873699): Remove once 1.x support is dropped.
 # pylint: disable=g-import-not-at-top
@@ -314,8 +313,7 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch):
 
     model_input = None
     if self._io_tensor_spec is None:    # Case when we are running remote inference
-      _jsonAdaptor = inference_util.JSONAdapter()
-      model_input = _jsonAdaptor.ToJSON(elements)
+      model_input = bsl_util.RecordToJSON(elements)
     elif (len(self._io_tensor_spec.input_tensor_names) == 1):
       model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples}
     else:
@@ -327,7 +325,7 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch):
       _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config)
       dict_of_tensors = _tensor_adapter.ToBatchTensors(
         elements, produce_eager_tensors = False)
-      filtered_tensors = model_util.filter_tensors_by_input_names(
+      filtered_tensors = bsl_util.filter_tensors_by_input_names(
         dict_of_tensors, input_tensor_alias)
 
       model_input = {}
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index 452b4dab..c0d9b5c3 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -35,7 +35,7 @@
 from six.moves import http_client
 import tensorflow as tf
 from tfx_bsl.beam import run_inference_arrow
-from tfx_bsl.beam import inference_util
+from tfx_bsl.beam import bsl_util
 from tfx_bsl.public.proto import model_spec_pb2
 from tfx_bsl.tfxio import test_util
 from tfx_bsl.tfxio import tensor_adapter
@@ -701,8 +701,7 @@ def test_request_body_with_binary_data(self):
       ['x_bytes', 'x', 'y']
     )
 
-    _jsonAdaptor = inference_util.JSONAdapter()
-    result = list(_jsonAdaptor.ToJSON(record_batch_remote))
+    result = list(bsl_util.RecordToJSON(record_batch_remote))
     self.assertEqual([
         {
             'x_bytes': {
diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py
index 672ebf59..6cf85bc2 100644
--- a/tfx_bsl/public/beam/run_inference.py
+++ b/tfx_bsl/public/beam/run_inference.py
@@ -91,7 +91,7 @@ def RunInferenceArrow(  # pylint: disable=invalid-name
   Args:
     file_path: File Path for which the examples are stored.
     inference_spec_type: Model inference endpoint.
-    Schema [optional]: required for models that requires 
+    Schema [optional]: required for models that requires
       multi-tensor inputs.
 
   Returns:

From 42fab7e67d6f3dfa69ff9d96384cf3486151afaa Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 22 Jul 2020 12:17:48 -0400
Subject: [PATCH 20/31] update API and create constate file

---
 tfx_bsl/beam/bsl_constants.py            |  6 ++
 tfx_bsl/beam/bsl_util.py                 |  8 +--
 tfx_bsl/beam/run_inference_arrow.py      | 22 ++-----
 tfx_bsl/beam/run_inference_arrow_test.py | 15 ++---
 tfx_bsl/beam/run_inference_test.py       |  1 -
 tfx_bsl/public/beam/run_inference.py     | 74 +++++++++++++++---------
 6 files changed, 70 insertions(+), 56 deletions(-)
 create mode 100644 tfx_bsl/beam/bsl_constants.py

diff --git a/tfx_bsl/beam/bsl_constants.py b/tfx_bsl/beam/bsl_constants.py
new file mode 100644
index 00000000..caaba5aa
--- /dev/null
+++ b/tfx_bsl/beam/bsl_constants.py
@@ -0,0 +1,6 @@
+_RECORDBATCH_COLUMN = '__RAW_RECORD__'
+KERAS_INPUT_SUFFIX = '_input'
+
+class DataType(object):
+  EXAMPLE = 'EXAMPLE'
+  SEQUENCEEXAMPLE = 'SEQUENCEEXAMPLE'
diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py
index 25633365..ee06384d 100644
--- a/tfx_bsl/beam/bsl_util.py
+++ b/tfx_bsl/beam/bsl_util.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""TensorAdapter."""
+"""TFX-BSL util"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -26,9 +26,8 @@
 import json
 import typing
 from typing import Dict, List, Text, Any, Set, Optional
-
-_RECORDBATCH_COLUMN = '__RAW_RECORD__'
-KERAS_INPUT_SUFFIX = '_input'
+from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
+from tfx_bsl.beam.bsl_constants import KERAS_INPUT_SUFFIX
    
 
 def RecordToJSON(record_batch: pa.RecordBatch) -> List[Text]:
@@ -50,6 +49,7 @@ def RecordToJSON(record_batch: pa.RecordBatch) -> List[Text]:
 
   return json.loads(df.to_json(orient='records'))
 
+
 def find_input_name_in_features(features: Set[Text],
                                 input_name: Text) -> Optional[Text]:
   """Maps input name to an entry in features. Returns None if not found."""
diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
index a701db1e..0ffbc7b5 100644
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ b/tfx_bsl/beam/run_inference_arrow.py
@@ -50,6 +50,9 @@
 from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \
     Tuple, Union, Optional
 
+from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
+from tfx_bsl.beam.bsl_constants import DataType
+
 # TODO(b/140306674): stop using the internal TF API.
 from tensorflow.python.saved_model import loader_impl
 from tensorflow_serving.apis import classification_pb2
@@ -67,7 +70,6 @@
   pass
 
 
-_RECORDBATCH_COLUMN = '__RAW_RECORD__'
 _DEFAULT_INPUT_KEY = 'examples'
 _METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer'
 _METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess'
@@ -90,10 +92,6 @@ class OperationType(object):
   PREDICTION = 'PREDICTION'
   MULTIHEAD = 'MULTIHEAD'
 
-class DataType(object):
-  EXAMPLE = 'EXAMPLE'
-  SEQUENCEEXAMPLE = 'SEQUENCEEXAMPLE'
-
 
 @beam.ptransform_fn
 @beam.typehints.with_input_types(pa.RecordBatch)
@@ -101,6 +99,7 @@ class DataType(object):
 def RunInferenceImpl(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
     inference_spec_type: model_spec_pb2.InferenceSpecType,
+    data_type: DataType,
     tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
 ) -> beam.pvalue.PCollection:
   """Implementation of RunInference API.
@@ -120,10 +119,6 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   """
   logging.info('RunInference on model: %s', inference_spec_type)
 
-  # TODO (Maxine): either determine data type or take it as an input
-  # data_type = _get_data_type(examples)
-
-  data_type = DataType.EXAMPLE
   operation_type = _get_operation_type(inference_spec_type)
   if operation_type == OperationType.CLASSIFICATION:
     return examples | 'Classify' >> _Classify(
@@ -1150,15 +1145,6 @@ def _get_operation_type(
     return OperationType.PREDICTION
 
 
-def _get_data_type(elements: Sequence[Any]) -> Text:
-  if all(isinstance(elements, tf.train.Example)): 
-    return DataType.EXAMPLE
-  elif all(isinstance(element, tf.train.SequenceExample)):
-    return DataType.SEQUENCEEXAMPLE
-  else:
-    return DataType.EXAMPLE
-
-
 def _get_meta_graph_def(saved_model_pb: _SavedModel,
                         tags: Sequence[Text]) -> _MetaGraphDef:
   """Returns MetaGraphDef from SavedModel."""
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
index c0d9b5c3..441060e0 100644
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ b/tfx_bsl/beam/run_inference_arrow_test.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Tests for tfx_bsl.run_inference."""
+"""Tests for tfx_bsl.run_inference_arrow."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -34,8 +34,9 @@
 from googleapiclient import http
 from six.moves import http_client
 import tensorflow as tf
-from tfx_bsl.beam import run_inference_arrow
 from tfx_bsl.beam import bsl_util
+from tfx_bsl.beam import run_inference_arrow
+from tfx_bsl.beam.bsl_constants import DataType
 from tfx_bsl.public.proto import model_spec_pb2
 from tfx_bsl.tfxio import test_util
 from tfx_bsl.tfxio import tensor_adapter
@@ -314,7 +315,7 @@ def _run_inference_with_beam(self, example_type, inference_spec_type,
           pipeline
           | "createRecordBatch" >> beam.Create([self.record_batch_multi_input])
           | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-                inference_spec_type, self.tensor_adapter_config)
+                inference_spec_type, DataType.EXAMPLE, self.tensor_adapter_config)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -324,7 +325,7 @@ def _run_inference_with_beam(self, example_type, inference_spec_type,
           pipeline
           | "createRecordBatch" >> beam.Create([self.record_batch_multihead])
           | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-                inference_spec_type)
+                inference_spec_type, DataType.EXAMPLE)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -334,7 +335,7 @@ def _run_inference_with_beam(self, example_type, inference_spec_type,
           pipeline
           | "createRecordBatch" >> beam.Create([self.record_batch])
           | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-                inference_spec_type)
+                inference_spec_type, DataType.EXAMPLE)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -547,7 +548,7 @@ def testTelemetry(self):
         pipeline 
         | "createRecordBatch" >> beam.Create([self.record_batch_multihead])
         | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-              inference_spec_type))
+              inference_spec_type, DataType.EXAMPLE))
     run_result = pipeline.run()
     run_result.wait_until_finish()
 
@@ -605,7 +606,7 @@ def _set_up_pipeline(self, inference_spec_type):
         self.pipeline
         | "createRecordBatch" >> beam.Create([self.record_batch])
         | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-              inference_spec_type))
+              inference_spec_type, DataType.EXAMPLE))
 
   def _run_inference_with_beam(self):
     self.pipeline_result = self.pipeline.run()
diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
index 73251603..8601dc30 100644
--- a/tfx_bsl/beam/run_inference_test.py
+++ b/tfx_bsl/beam/run_inference_test.py
@@ -37,7 +37,6 @@
 from tfx_bsl.public.proto import model_spec_pb2
 
 from google.protobuf import text_format
-
 from tensorflow_serving.apis import prediction_log_pb2
 
 
diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py
index 6cf85bc2..788235e0 100644
--- a/tfx_bsl/public/beam/run_inference.py
+++ b/tfx_bsl/public/beam/run_inference.py
@@ -25,23 +25,25 @@
 from typing import Union, Text, Optional
 from tfx_bsl.tfxio import test_util
 from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import raw_tf_record
+from tfx_bsl.tfxio import tf_example_record
+from tfx_bsl.tfxio import tf_sequence_example_record
 from tfx_bsl.beam import run_inference
 from tfx_bsl.beam import run_inference_arrow
 from tfx_bsl.public.proto import model_spec_pb2
 from tensorflow_serving.apis import prediction_log_pb2
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
+from tfx_bsl.beam.bsl_constants import DataType
 
-_RECORDBATCH_COLUMN = '__RAW_RECORD__'
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(tf.train.Example)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def RunInference(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
-    inference_spec_type: model_spec_pb2.InferenceSpecType
+    inference_spec_type: model_spec_pb2.InferenceSpecType,
+    schema: Optional[schema_pb2.Schema] = None
 ) -> beam.pvalue.PCollection:
   """Run inference with a model.
 
@@ -52,30 +54,43 @@ def RunInference(  # pylint: disable=invalid-name
      `ai_platform_prediction_model_spec` field is set in
      `inference_spec_type`.
 
-  TODO(b/131873699): Add support for the following features:
-  1. Bytes as Input.
-  2. PTable Input.
-  3. Models as SideInput.
-
   Args:
     examples: A PCollection containing examples.
     inference_spec_type: Model inference endpoint.
+    Schema [optional]: required for models that requires
+      multi-tensor inputs.
 
   Returns:
     A PCollection containing prediction logs.
   """
 
-  return (
-      examples |
-      'RunInferenceImpl' >> run_inference.RunInferenceImpl(inference_spec_type))
+  data_type = DataType.EXAMPLE
+  converter = tf_example_record.TFExampleBeamRecord(
+    physical_format="inmem",
+    telemetry_descriptors=[],
+    schema=schema,
+    raw_record_column_name=_RECORDBATCH_COLUMN)
+
+  if schema:
+    tfxio = test_util.InMemoryTFExampleRecord(
+      schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
+    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
+      arrow_schema=tfxio.ArrowSchema(),
+      tensor_representations=tfxio.TensorRepresentations())
+
+  return (examples
+          | 'ParseExamples' >> beam.Map(tf.train.Example.SerializeToString)
+          | 'ConvertToRecordBatch' >> converter.BeamSource()
+          | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl(
+                  inference_spec_type, data_type,
+                  tensor_adapter_config=tensor_adapter_config))
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(tf.train.SequenceExample)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def RunInferenceArrow(  # pylint: disable=invalid-name
-    file_path: Text,
+def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
+    examples: beam.pvalue.PCollection,
     inference_spec_type: model_spec_pb2.InferenceSpecType,
     schema: Optional[schema_pb2.Schema] = None
 ) -> beam.pvalue.PCollection:
@@ -89,7 +104,7 @@ def RunInferenceArrow(  # pylint: disable=invalid-name
      `inference_spec_type`.
 
   Args:
-    file_path: File Path for which the examples are stored.
+    examples: A PCollection containing sequence examples.
     inference_spec_type: Model inference endpoint.
     Schema [optional]: required for models that requires
       multi-tensor inputs.
@@ -97,8 +112,14 @@ def RunInferenceArrow(  # pylint: disable=invalid-name
   Returns:
     A PCollection containing prediction logs.
   """
-  converter = raw_tf_record.RawTfRecordTFXIO(
-    file_path, raw_record_column_name=_RECORDBATCH_COLUMN)
+
+  data_type = DataType.SEQUENCEEXAMPLE
+  converter = tf_sequence_example_record.TFSequenceExampleBeamRecord(
+    physical_format="inmem",
+    telemetry_descriptors=[],
+    schema=schema,
+    raw_record_column_name=_RECORDBATCH_COLUMN)
+
   if schema:
     tfxio = test_util.InMemoryTFExampleRecord(
       schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
@@ -106,17 +127,18 @@ def RunInferenceArrow(  # pylint: disable=invalid-name
       arrow_schema=tfxio.ArrowSchema(),
       tensor_representations=tfxio.TensorRepresentations())
 
-  with beam.Pipeline() as pipeline:
-    return (pipeline
-            | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource()
-            | "RunInferenceImpl" >> run_inference_arrow.RunInferenceImpl(
-                    inference_spec_type, tensor_adapter_config=tensor_adapter_config))
+  return (examples
+          | 'ParseExamples' >> beam.Map(tf.train.Example.SerializeToString)
+          | 'ConvertToRecordBatch' >> converter.BeamSource()
+          | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl(
+                  inference_spec_type, data_type,
+                  tensor_adapter_config=tensor_adapter_config))
 
 
 @beam.ptransform_fn
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def RunInferenceRecord(  # pylint: disable=invalid-name
+def RunInferenceOnRecordBatch(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
     inference_spec_type: model_spec_pb2.InferenceSpecType,
     tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None

From 353604a2c365737b11472034da498e7ed1a871c4 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 23 Jul 2020 18:02:09 -0400
Subject: [PATCH 21/31] include TFXIO module in tests and create and tested
 APIS

---
 tfx_bsl/beam/bsl_util.py                   |    9 +-
 tfx_bsl/beam/run_inference.py              |  522 +++++----
 tfx_bsl/beam/run_inference_arrow.py        | 1216 --------------------
 tfx_bsl/beam/run_inference_arrow_test.py   |  718 ------------
 tfx_bsl/beam/run_inference_record_batch.py |   57 +
 tfx_bsl/beam/run_inference_test.py         |  217 +++-
 tfx_bsl/public/beam/run_inference.py       |   43 +-
 7 files changed, 534 insertions(+), 2248 deletions(-)
 delete mode 100644 tfx_bsl/beam/run_inference_arrow.py
 delete mode 100644 tfx_bsl/beam/run_inference_arrow_test.py
 create mode 100644 tfx_bsl/beam/run_inference_record_batch.py

diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py
index ee06384d..b7d46576 100644
--- a/tfx_bsl/beam/bsl_util.py
+++ b/tfx_bsl/beam/bsl_util.py
@@ -41,12 +41,19 @@ def RecordToJSON(record_batch: pa.RecordBatch) -> List[Text]:
   Args:
   record_batch: input RecordBatch.
   """
+  def flatten(element: List[Any]):
+    if len(element) == 1:
+        return element[0]
+    return element
+
   df = record_batch.to_pandas()
   as_binary = df.columns.str.endswith("_bytes")
-  df.loc[:, as_binary] = df.loc[:, as_binary].applymap(lambda x: {'b64': base64.b64encode(x).decode()})
+  df.loc[:, as_binary] = df.loc[:, as_binary].applymap(
+    lambda values: [{'b64': base64.b64encode(x).decode()} for x in values])
   if _RECORDBATCH_COLUMN in df.columns:
     df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1)
 
+  df = df.applymap(lambda x: flatten(x))
   return json.loads(df.to_json(orient='records'))
 
 
diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py
index 6987a15d..320ac1da 100644
--- a/tfx_bsl/beam/run_inference.py
+++ b/tfx_bsl/beam/run_inference.py
@@ -19,7 +19,6 @@
 from __future__ import print_function
 
 import abc
-import base64
 import collections
 import os
 import platform
@@ -32,6 +31,7 @@
 
 from absl import logging
 import apache_beam as beam
+import pyarrow as pa
 from apache_beam.options.pipeline_options import GoogleCloudOptions
 from apache_beam.options.pipeline_options import PipelineOptions
 from apache_beam.utils import retry
@@ -39,13 +39,19 @@
 from googleapiclient import discovery
 from googleapiclient import http
 import numpy as np
+import json
 import six
 import tensorflow as tf
 from tfx_bsl.beam import shared
+from tfx_bsl.beam import bsl_util
 from tfx_bsl.public.proto import model_spec_pb2
 from tfx_bsl.telemetry import util
+from tfx_bsl.tfxio import tensor_adapter
 from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \
-    Tuple, Union
+    Tuple, Union, Optional
+
+from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
+from tfx_bsl.beam.bsl_constants import DataType
 
 # TODO(b/140306674): stop using the internal TF API.
 from tensorflow.python.saved_model import loader_impl
@@ -54,7 +60,6 @@
 from tensorflow_serving.apis import prediction_log_pb2
 from tensorflow_serving.apis import regression_pb2
 
-
 # TODO(b/131873699): Remove once 1.x support is dropped.
 # pylint: disable=g-import-not-at-top
 try:
@@ -64,6 +69,7 @@
 except ImportError:
   pass
 
+
 _DEFAULT_INPUT_KEY = 'examples'
 _METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer'
 _METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess'
@@ -73,19 +79,11 @@
 _SECOND_TO_MICROSECOND = 1000000
 _REMOTE_INFERENCE_NUM_RETRIES = 5
 
-# We define the following aliases of Any because the actual types are not
-# public.
+# We define the following aliases of Any because the actual types are not public.
 _SignatureDef = Any
 _MetaGraphDef = Any
 _SavedModel = Any
 
-_BulkInferResult = Union[prediction_log_pb2.PredictLog,
-                         Tuple[tf.train.Example, regression_pb2.Regression],
-                         Tuple[tf.train.Example,
-                               inference_pb2.MultiInferenceResponse],
-                         Tuple[tf.train.Example,
-                               classification_pb2.Classifications]]
-
 
 # TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7
 class OperationType(object):
@@ -96,18 +94,21 @@ class OperationType(object):
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def RunInferenceImpl(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
-    inference_spec_type: model_spec_pb2.InferenceSpecType
+    inference_spec_type: model_spec_pb2.InferenceSpecType, data_type: Text,
+    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
 ) -> beam.pvalue.PCollection:
   """Implementation of RunInference API.
 
   Args:
-    examples: A PCollection containing examples.
+    examples: A PCollection containing RecordBatch of serialized examples.
     inference_spec_type: Model inference endpoint.
+    tensor_adapter_config [Optional]: Tensor adapter config which specifies how to
+      obtain tensors from the Arrow RecordBatch.
+        - Not required when running inference with remote model or 1 input
 
   Returns:
     A PCollection containing prediction logs.
@@ -117,39 +118,41 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   """
   logging.info('RunInference on model: %s', inference_spec_type)
 
-  batched_examples = examples | 'BatchExamples' >> beam.BatchElements()
   operation_type = _get_operation_type(inference_spec_type)
   if operation_type == OperationType.CLASSIFICATION:
-    return batched_examples | 'Classify' >> _Classify(inference_spec_type)
+    return examples | 'Classify' >> _Classify(
+                        inference_spec_type, data_type, tensor_adapter_config)
   elif operation_type == OperationType.REGRESSION:
-    return batched_examples | 'Regress' >> _Regress(inference_spec_type)
+    return examples | 'Regress' >> _Regress(
+                        inference_spec_type, data_type, tensor_adapter_config)
   elif operation_type == OperationType.PREDICTION:
-    return batched_examples | 'Predict' >> _Predict(inference_spec_type)
+    return examples | 'Predict' >> _Predict(
+                        inference_spec_type, data_type, tensor_adapter_config)
   elif operation_type == OperationType.MULTIHEAD:
-    return (batched_examples
-            | 'MultiInference' >> _MultiInference(inference_spec_type))
+    return (examples | 'MultiInference' >> _MultiInference(
+                        inference_spec_type, data_type, tensor_adapter_config))
   else:
     raise ValueError('Unsupported operation_type %s' % operation_type)
 
 
 _IOTensorSpec = collections.namedtuple(
     '_IOTensorSpec',
-    ['input_tensor_alias', 'input_tensor_name', 'output_alias_tensor_names'])
+    ['input_tensor_alias', 'input_tensor_names', 'input_tensor_types', 'output_alias_tensor_names'])
 
 _Signature = collections.namedtuple('_Signature', ['name', 'signature_def'])
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-              inference_spec_type: model_spec_pb2.InferenceSpecType):
+              inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
+              tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
   """Performs classify PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (pcoll
-            | 'Classify' >> beam.ParDo(
-                _BatchClassifyDoFn(inference_spec_type, shared.Shared()))
+            | 'Classify' >> beam.ParDo(_BatchClassifyDoFn(
+                  inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))
             | 'BuildPredictionLogForClassifications' >> beam.ParDo(
                 _BuildPredictionLogForClassificationsDoFn()))
   else:
@@ -157,16 +160,16 @@ def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-             inference_spec_type: model_spec_pb2.InferenceSpecType):
+             inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
+             tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
   """Performs regress PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (pcoll
-            | 'Regress' >> beam.ParDo(
-                _BatchRegressDoFn(inference_spec_type, shared.Shared()))
+            | 'Regress' >> beam.ParDo(_BatchRegressDoFn(
+                  inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))
             | 'BuildPredictionLogForRegressions' >> beam.ParDo(
                 _BuildPredictionLogForRegressionsDoFn()))
   else:
@@ -174,39 +177,39 @@ def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-             inference_spec_type: model_spec_pb2.InferenceSpecType):
+             inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
+             tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
   """Performs predict PTransform."""
   if _using_in_process_inference(inference_spec_type):
     predictions = (
         pcoll
-        | 'Predict' >> beam.ParDo(
-            _BatchPredictDoFn(inference_spec_type, shared.Shared())))
+        | 'Predict' >> beam.ParDo(_BatchPredictDoFn(
+              inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)))
   else:
     predictions = (
         pcoll
-        | 'RemotePredict' >> beam.ParDo(
-            _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options)))
+        | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn(
+              inference_spec_type, pcoll.pipeline.options, data_type, tensor_adapter_config)))
   return (predictions
           | 'BuildPredictionLogForPredictions' >> beam.ParDo(
               _BuildPredictionLogForPredictionsDoFn()))
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(Union[tf.train.Example,
-                                       tf.train.SequenceExample])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-                    inference_spec_type: model_spec_pb2.InferenceSpecType):
+                    inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
+                    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
   """Performs multi inference PTransform."""
   if _using_in_process_inference(inference_spec_type):
     return (
         pcoll
-        | 'MultiInference' >> beam.ParDo(
-            _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared()))
+        | 'MultiInference' >> beam.ParDo(_BatchMultiInferenceDoFn(
+              inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))
         | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn()))
   else:
     raise NotImplementedError
@@ -261,32 +264,76 @@ def update_metrics_with_cache(self):
         self._model_byte_size.update(self.model_byte_size_cache)
         self.model_byte_size_cache = None
 
-    def update(self, elements: List[Union[tf.train.Example,
-                                          tf.train.SequenceExample]],
-               latency_micro_secs: int) -> None:
+    # For feature inputs, using serialized example for batch size
+    def update(
+      self, elements: List[Union[str, bytes]], latency_micro_secs: int) -> None:
       self._inference_batch_latency_micro_secs.update(latency_micro_secs)
       self._num_instances.inc(len(elements))
       self._inference_counter.inc(len(elements))
       self._inference_request_batch_size.update(len(elements))
       self._inference_request_batch_byte_size.update(
-          sum(element.ByteSize() for element in elements))
+          sum(len(element) for element in elements))
+
 
-  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
+  def __init__(
+    self, inference_spec_type: model_spec_pb2.InferenceSpecType,
+    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
     super(_BaseDoFn, self).__init__()
     self._clock = None
     self._metrics_collector = self._MetricsCollector(inference_spec_type)
+    self._tensor_adapter_config = tensor_adapter_config
+    self._io_tensor_spec = None   # This value may be None if the model is remote
 
   def setup(self):
     self._clock = _ClockFactory.make_clock()
 
-  def process(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Iterable[Any]:
+  def _extract_from_recordBatch(self, elements: pa.RecordBatch):
+    """
+    Function to extract the compatible input with model signature
+    """
+    serialized_examples = None
+    for column_name, column_array in zip(elements.schema.names, elements.columns):
+      if column_name == _RECORDBATCH_COLUMN:
+        column_type = column_array.flatten().type
+        if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)):
+          raise ValueError(
+            'Expected a list of serialized examples in bytes or as a string, got %s' %
+            type(example))
+        serialized_examples = column_array.flatten().to_pylist()
+        break
+
+    if (serialized_examples is None):
+      raise ValueError('Raw examples not found.')
+
+    model_input = None
+    if self._io_tensor_spec is None:    # Case when we are running remote inference
+      model_input = bsl_util.RecordToJSON(elements)
+    elif (len(self._io_tensor_spec.input_tensor_names) == 1):
+      model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples}
+    else:
+      if (self._tensor_adapter_config is None):
+        raise ValueError('Tensor adaptor config is required with a multi-input model')
+
+      input_tensor_names = self._io_tensor_spec.input_tensor_names
+      input_tensor_alias = self._io_tensor_spec.input_tensor_alias
+      _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config)
+      dict_of_tensors = _tensor_adapter.ToBatchTensors(
+        elements, produce_eager_tensors = False)
+      filtered_tensors = bsl_util.filter_tensors_by_input_names(
+        dict_of_tensors, input_tensor_alias)
+
+      model_input = {}
+      for feature, tensor_name in zip(input_tensor_alias, input_tensor_names):
+        model_input[tensor_name] = filtered_tensors[feature]
+    return serialized_examples, model_input
+
+  def process(self, elements: pa.RecordBatch) -> Iterable[Any]:
     batch_start_time = self._clock.get_current_time_in_microseconds()
-    outputs = self.run_inference(elements)
-    result = self._post_process(elements, outputs)
+    serialized_examples, model_input = self._extract_from_recordBatch(elements)
+    outputs = self.run_inference(model_input)
+    result = self._post_process(model_input, outputs)
     self._metrics_collector.update(
-        elements,
+        serialized_examples,
         self._clock.get_current_time_in_microseconds() - batch_start_time)
     return result
 
@@ -295,14 +342,13 @@ def finish_bundle(self):
 
   @abc.abstractmethod
   def run_inference(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
+    self, tensors: Mapping[Any, Any]
   ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
     raise NotImplementedError
 
   @abc.abstractmethod
-  def _post_process(self, elements: List[Union[tf.train.Example,
-                                               tf.train.SequenceExample]],
-                    outputs: Any) -> Iterable[Any]:
+  def _post_process(
+    self, elements: Mapping[Any, Any], outputs: Any) -> Iterable[Any]:
     raise NotImplementedError
 
 
@@ -322,8 +368,7 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
           exception.resp.status in (503, 429))
 
 
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
+@beam.typehints.with_input_types(pa.RecordBatch)
 # Using output typehints triggers NotImplementedError('BEAM-2717)' on
 # streaming mode on Dataflow runner.
 # TODO(b/151468119): Consider to re-batch with online serving request size
@@ -350,9 +395,11 @@ class _RemotePredictDoFn(_BaseDoFn):
   """
 
   def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-               pipeline_options: PipelineOptions):
-    super(_RemotePredictDoFn, self).__init__(inference_spec_type)
+               pipeline_options: PipelineOptions, data_type: Text,
+               tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
+    super(_RemotePredictDoFn, self).__init__(inference_spec_type, tensor_adapter_config)
     self._api_client = None
+    self._data_type = data_type
 
     project_id = (
         inference_spec_type.ai_platform_prediction_model_spec.project_id or
@@ -387,8 +434,7 @@ def setup(self):
       num_retries=_REMOTE_INFERENCE_NUM_RETRIES,
       retry_filter=_retry_on_unavailable_and_resource_error_filter)
   def _execute_request(
-      self,
-      request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]:
+      self, request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]:
     result = request.execute()
     if 'error' in result:
       raise ValueError(result['error'])
@@ -400,63 +446,26 @@ def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest:
 
   @classmethod
   def _prepare_instances(
-      cls, elements: List[tf.train.Example]
+      cls, elements: List[Union[str, bytes]]
   ) -> Generator[Mapping[Text, Any], None, None]:
-    for example in elements:
-      # TODO(b/151468119): support tf.train.SequenceExample
-      if not isinstance(example, tf.train.Example):
-        raise ValueError('Remote prediction only supports tf.train.Example')
-
-      instance = {}
-      for input_name, feature in example.features.feature.items():
-        attr_name = feature.WhichOneof('kind')
-        if attr_name is None:
-          continue
-        attr = getattr(feature, attr_name)
-        values = cls._parse_feature_content(attr.value, attr_name,
-                                            cls._sending_as_binary(input_name))
-        # Flatten a sequence if its length is 1
-        values = (values[0] if len(values) == 1 else values)
-        instance[input_name] = values
+    for instance in elements:
       yield instance
 
-  @staticmethod
-  def _sending_as_binary(input_name: Text) -> bool:
-    """Whether data should be sent as binary."""
-    return input_name.endswith('_bytes')
-
-  @staticmethod
-  def _parse_feature_content(values: Sequence[Any], attr_name: Text,
-                             as_binary: bool) -> List[Any]:
-    """Parse the content of tf.train.Feature object.
-
-    If bytes_list, parse a list of bytes-like objects to a list of strings so
-    that it would be JSON serializable.
-
-    If float_list or int64_list, do nothing.
-
-    If data should be sent as binary, mark it as binary by replacing it with
-    a single attribute named 'b64'.
-    """
-    if as_binary:
-      return [{'b64': base64.b64encode(x).decode()} for x in values]
-    elif attr_name == 'bytes_list':
-      return [x.decode() for x in values]
-    else:
-      # Converts proto RepeatedScalarContainer to list so it is
-      # JSON-serializable
-      return list(values)
+  def _check_elements(self) -> None:
+    # TODO(b/151468119): support tf.train.SequenceExample
+    if self._data_type != DataType.EXAMPLE:
+      raise ValueError('Remote prediction only supports tf.train.Example')
 
   def run_inference(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Sequence[Mapping[Text, Any]]:
+    self, elements: List[Union[str, bytes]]) -> Sequence[Mapping[Text, Any]]:
+    self._check_elements()
     body = {'instances': list(self._prepare_instances(elements))}
     request = self._make_request(body)
     response = self._execute_request(request)
     return response['predictions']
 
   def _post_process(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]],
+      self, elements: List[Union[str, bytes]],
       outputs: Sequence[Mapping[Text, Any]]
   ) -> Iterable[prediction_log_pb2.PredictLog]:
     result = []
@@ -478,6 +487,7 @@ def _post_process(
 # is fixed.
 # TODO(b/143484017): Add batch_size back off in the case there are functional
 # reasons large batch sizes cannot be handled.
+
 class _BaseBatchSavedModelDoFn(_BaseDoFn):
   """A DoFn that runs in-process batch inference with a model.
 
@@ -489,21 +499,20 @@ class _BaseBatchSavedModelDoFn(_BaseDoFn):
   """
 
   def __init__(
-      self,
-      inference_spec_type: model_spec_pb2.InferenceSpecType,
-      shared_model_handle: shared.Shared,
-  ):
-    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type)
+      self, inference_spec_type: model_spec_pb2.InferenceSpecType,
+      shared_model_handle: shared.Shared, data_type,
+      tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
+    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, tensor_adapter_config)
     self._inference_spec_type = inference_spec_type
     self._shared_model_handle = shared_model_handle
     self._model_path = inference_spec_type.saved_model_spec.model_path
     self._tags = None
     self._signatures = _get_signatures(
-        inference_spec_type.saved_model_spec.model_path,
-        inference_spec_type.saved_model_spec.signature_name,
-        _get_tags(inference_spec_type))
+      inference_spec_type.saved_model_spec.model_path,
+      inference_spec_type.saved_model_spec.signature_name,
+      _get_tags(inference_spec_type))
     self._session = None
-    self._io_tensor_spec = None
+    self._data_type = data_type
 
   def setup(self):
     """Load the model.
@@ -551,69 +560,61 @@ def _pre_process(self) -> _IOTensorSpec:
     # Pre process functions will validate for each signature.
     io_tensor_specs = []
     for signature in self._signatures:
-      if len(signature.signature_def.inputs) != 1:
-        raise ValueError('Signature should have 1 and only 1 inputs')
-      if (list(signature.signature_def.inputs.values())[0].dtype !=
+      if (len(signature.signature_def.inputs) == 1 and
+          list(signature.signature_def.inputs.values())[0].dtype !=
           tf.string.as_datatype_enum):
         raise ValueError(
-            'Input dtype is expected to be %s, got %s' %
+            'With 1 input, dtype is expected to be %s, got %s' %
             tf.string.as_datatype_enum,
             list(signature.signature_def.inputs.values())[0].dtype)
       io_tensor_specs.append(_signature_pre_process(signature.signature_def))
-    input_tensor_name = ''
-    input_tensor_alias = ''
+    input_tensor_names = []
+    input_tensor_alias = []
+    input_tensor_types = {}
     output_alias_tensor_names = {}
     for io_tensor_spec in io_tensor_specs:
-      if not input_tensor_name:
-        input_tensor_name = io_tensor_spec.input_tensor_name
+      if not input_tensor_names:
+        input_tensor_names = io_tensor_spec.input_tensor_names
         input_tensor_alias = io_tensor_spec.input_tensor_alias
-      elif input_tensor_name != io_tensor_spec.input_tensor_name:
+      elif input_tensor_names != io_tensor_spec.input_tensor_names:
         raise ValueError('Input tensor must be the same for all Signatures.')
-      for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items(
-      ):
+      for alias, tensor_type in io_tensor_spec.input_tensor_types.items():
+        input_tensor_types[alias] = tensor_type
+      for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items():
         output_alias_tensor_names[alias] = tensor_name
-    if (not output_alias_tensor_names or not input_tensor_name or
+    if (not output_alias_tensor_names or not input_tensor_names or
         not input_tensor_alias):
       raise ValueError('No valid fetch tensors or feed tensors.')
-    return _IOTensorSpec(input_tensor_alias, input_tensor_name,
-                         output_alias_tensor_names)
+    return _IOTensorSpec(input_tensor_alias, input_tensor_names,
+                         input_tensor_types, output_alias_tensor_names)
 
   def _has_tpu_tag(self) -> bool:
     return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
             tf.saved_model.TPU in self._tags)
 
   def run_inference(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Mapping[Text, np.ndarray]:
-    self._check_elements(elements)
-    outputs = self._run_tf_operations(elements)
+    self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]:
+    self._check_elements()
+    outputs = self._run_tf_operations(tensors)
     return outputs
 
   def _run_tf_operations(
-      self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]]
-  ) -> Mapping[Text, np.ndarray]:
-    input_values = []
-    for element in elements:
-      input_values.append(element.SerializeToString())
+    self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]:
     result = self._session.run(
-        self._io_tensor_spec.output_alias_tensor_names,
-        feed_dict={self._io_tensor_spec.input_tensor_name: input_values})
+        self._io_tensor_spec.output_alias_tensor_names, feed_dict=tensors)
     if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
       raise RuntimeError('Output length does not match fetches')
     return result
 
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
+  def _check_elements(self) -> None:
     """Unimplemented."""
 
     raise NotImplementedError
 
 
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
-@beam.typehints.with_output_types(Tuple[tf.train.Example,
-                                        classification_pb2.Classifications])
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(Tuple[Union[str, bytes],
+                                  classification_pb2.Classifications])
 class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that run inference on classification model."""
 
@@ -626,47 +627,44 @@ def setup(self):
           signature_def.method_name)
     super(_BatchClassifyDoFn, self).setup()
 
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    if not all(isinstance(element, tf.train.Example) for element in elements):
+  def _check_elements(self) -> None:
+    if self._data_type != DataType.EXAMPLE:
       raise ValueError('Classify only supports tf.train.Example')
 
   def _post_process(
-      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
-                                                                   np.ndarray]
-  ) -> Iterable[Tuple[tf.train.Example, classification_pb2.Classifications]]:
+      self, elements: Mapping[Any, Any],
+      outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[Tuple[Union[str, bytes], classification_pb2.Classifications]]:
+    serialized_examples, = elements.values()
     classifications = _post_process_classify(
-        self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
-    return zip(elements, classifications)
+        self._io_tensor_spec.output_alias_tensor_names,
+        serialized_examples, outputs)
+    return zip(serialized_examples, classifications)
 
 
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
-@beam.typehints.with_output_types(Tuple[tf.train.Example,
-                                        regression_pb2.Regression])
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(Tuple[Union[str, bytes], 
+                                  regression_pb2.Regression])
 class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that run inference on regression model."""
 
   def setup(self):
     super(_BatchRegressDoFn, self).setup()
 
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    if not all(isinstance(element, tf.train.Example) for element in elements):
+  def _check_elements(self) -> None:
+    if self._data_type != DataType.EXAMPLE:
       raise ValueError('Regress only supports tf.train.Example')
 
   def _post_process(
-      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
-                                                                   np.ndarray]
-  ) -> Iterable[Tuple[tf.train.Example, regression_pb2.Regression]]:
-    regressions = _post_process_regress(elements, outputs)
-    return zip(elements, regressions)
+      self, elements: Mapping[Any, Any],
+      outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[Tuple[Union[str, bytes], regression_pb2.Regression]]:
+    serialized_examples, = elements.values()
+    regressions = _post_process_regress(serialized_examples, outputs)
+    return zip(serialized_examples, regressions)
 
 
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
+@beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
 class _BatchPredictDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that runs inference on predict model."""
@@ -680,19 +678,39 @@ def setup(self):
           signature_def.method_name)
     super(_BatchPredictDoFn, self).setup()
 
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
+  def _check_elements(self) -> None:
     pass
 
   def _post_process(
-      self, elements: Union[Sequence[tf.train.Example],
-                            Sequence[tf.train.SequenceExample]],
+      self, elements: Mapping[Any, Any],
       outputs: Mapping[Text, np.ndarray]
   ) -> Iterable[prediction_log_pb2.PredictLog]:
+    if not self._io_tensor_spec.input_tensor_types:
+      raise ValueError('No valid tensor types.')
+    input_tensor_names = self._io_tensor_spec.input_tensor_names
     input_tensor_alias = self._io_tensor_spec.input_tensor_alias
+    input_tensor_types = self._io_tensor_spec.input_tensor_types
     signature_name = self._signatures[0].name
-    batch_size = len(elements)
+
+    if len(input_tensor_alias) != len(input_tensor_names):
+      raise ValueError('Expected to have one name and one alias per tensor')
+
+    include_request = True
+    if len(input_tensor_names) == 1:
+      serialized_examples, = elements.values()
+      batch_size = len(serialized_examples)
+      process_elements = serialized_examples
+    else:
+      for tensor_name, tensor in elements.items():
+        if not isinstance(tensor, np.ndarray):
+          include_request = False
+          break
+
+      if include_request:
+        batch_size = len(elements[input_tensor_names[0]])
+      else:
+        batch_size = elements[input_tensor_names[0]].shape[0]
+
     for output_alias, output in outputs.items():
       if len(output.shape) < 1 or output.shape[0] != batch_size:
         raise ValueError(
@@ -703,16 +721,25 @@ def _post_process(
     predict_log_tmpl = prediction_log_pb2.PredictLog()
     predict_log_tmpl.request.model_spec.signature_name = signature_name
     predict_log_tmpl.response.model_spec.signature_name = signature_name
-    input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias]
-    input_tensor_proto.dtype = tf.string.as_datatype_enum
-    input_tensor_proto.tensor_shape.dim.add().size = 1
+    for alias, tensor_type in input_tensor_types.items():
+      input_tensor_proto = predict_log_tmpl.request.inputs[alias]
+      input_tensor_proto.dtype = tf.as_dtype(tensor_type).as_datatype_enum
+      # TODO (Maxine): fix dimension?
+      input_tensor_proto.tensor_shape.dim.add().size = 1
 
     result = []
     for i in range(batch_size):
       predict_log = prediction_log_pb2.PredictLog()
       predict_log.CopyFrom(predict_log_tmpl)
-      predict_log.request.inputs[input_tensor_alias].string_val.append(
-          elements[i].SerializeToString())
+
+      if include_request:
+        if len(input_tensor_alias) == 1:
+          alias = input_tensor_alias[0]
+          predict_log.request.inputs[alias].string_val.append(process_elements[i])
+        else:
+          for alias, tensor_name in zip(input_tensor_alias, input_tensor_names):
+            predict_log.request.inputs[alias].float_val.append(elements[tensor_name][i])
+
       for output_alias, output in outputs.items():
         # Mimic tensor::Split
         tensor_proto = tf.make_tensor_proto(
@@ -724,37 +751,36 @@ def _post_process(
     return result
 
 
-@beam.typehints.with_input_types(List[Union[tf.train.Example,
-                                            tf.train.SequenceExample]])
-@beam.typehints.with_output_types(Tuple[tf.train.Example,
-                                        inference_pb2.MultiInferenceResponse])
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(Tuple[Union[str, bytes],
+                                  inference_pb2.MultiInferenceResponse])
 class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
   """A DoFn that runs inference on multi-head model."""
 
-  def _check_elements(
-      self, elements: List[Union[tf.train.Example,
-                                 tf.train.SequenceExample]]) -> None:
-    if not all(isinstance(element, tf.train.Example) for element in elements):
-      raise ValueError('Multi inference only supports tf.train.Example')
+  def _check_elements(self) -> None:
+    if self._data_type != DataType.EXAMPLE:
+      raise ValueError('Multi-inference only supports tf.train.Example')
 
   def _post_process(
-      self, elements: Sequence[tf.train.Example], outputs: Mapping[Text,
-                                                                   np.ndarray]
-  ) -> Iterable[Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]]:
+      self, elements: Mapping[Any, Any],
+      outputs: Mapping[Text, np.ndarray]
+  ) -> Iterable[Tuple[Union[str, bytes], inference_pb2.MultiInferenceResponse]]:
     classifications = None
     regressions = None
+    serialized_examples, = elements.values()
     for signature in self._signatures:
       signature_def = signature.signature_def
       if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
         classifications = _post_process_classify(
-            self._io_tensor_spec.output_alias_tensor_names, elements, outputs)
+            self._io_tensor_spec.output_alias_tensor_names,
+            serialized_examples, outputs)
       elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME:
-        regressions = _post_process_regress(elements, outputs)
+        regressions = _post_process_regress(serialized_examples, outputs)
       else:
         raise ValueError('Signature method %s is not supported for '
                          'multi inference' % signature_def.method_name)
     result = []
-    for i in range(len(elements)):
+    for i in range(len(serialized_examples)):
       response = inference_pb2.MultiInferenceResponse()
       for signature in self._signatures:
         signature_def = signature.signature_def
@@ -775,41 +801,42 @@ def _post_process(
       if len(response.results) != len(self._signatures):
         raise RuntimeError('Multi inference response result length does not '
                            'match the number of signatures')
-      result.append((elements[i], response))
+      result.append((serialized_examples[i], response))
     return result
 
 
-@beam.typehints.with_input_types(Tuple[tf.train.Example,
-                                       classification_pb2.Classifications])
+
+@beam.typehints.with_input_types(Tuple[Union[str, bytes],
+                                classification_pb2.Classifications])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 class _BuildPredictionLogForClassificationsDoFn(beam.DoFn):
   """A DoFn that builds prediction log from classifications."""
 
   def process(
-      self, element: Tuple[tf.train.Example, classification_pb2.Classifications]
+      self, element: Tuple[Union[str, bytes], classification_pb2.Classifications]
   ) -> Iterable[prediction_log_pb2.PredictionLog]:
     (train_example, classifications) = element
     result = prediction_log_pb2.PredictionLog()
     result.classify_log.request.input.example_list.examples.add().CopyFrom(
-        train_example)
+        tf.train.Example.FromString(train_example))
     result.classify_log.response.result.classifications.add().CopyFrom(
         classifications)
     yield result
 
 
-@beam.typehints.with_input_types(Tuple[tf.train.Example,
-                                       regression_pb2.Regression])
+@beam.typehints.with_input_types(Tuple[Union[str, bytes],
+                                regression_pb2.Regression])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 class _BuildPredictionLogForRegressionsDoFn(beam.DoFn):
   """A DoFn that builds prediction log from regressions."""
 
   def process(
-      self, element: Tuple[tf.train.Example, regression_pb2.Regression]
+    self, element: Tuple[Union[str, bytes], regression_pb2.Regression]
   ) -> Iterable[prediction_log_pb2.PredictionLog]:
     (train_example, regression) = element
     result = prediction_log_pb2.PredictionLog()
     result.regress_log.request.input.example_list.examples.add().CopyFrom(
-        train_example)
+        tf.train.Example.FromString(train_example))
     result.regress_log.response.result.regressions.add().CopyFrom(regression)
     yield result
 
@@ -827,28 +854,28 @@ def process(
     yield result
 
 
-@beam.typehints.with_input_types(Tuple[tf.train.Example,
-                                       inference_pb2.MultiInferenceResponse])
+@beam.typehints.with_input_types(Tuple[Union[str, bytes],
+                                inference_pb2.MultiInferenceResponse])
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 class _BuildMultiInferenceLogDoFn(beam.DoFn):
   """A DoFn that builds prediction log from multi-head inference result."""
 
   def process(
-      self, element: Tuple[tf.train.Example,
-                           inference_pb2.MultiInferenceResponse]
+      self, element: Tuple[Union[str, bytes],
+      inference_pb2.MultiInferenceResponse]
   ) -> Iterable[prediction_log_pb2.PredictionLog]:
     (train_example, multi_inference_response) = element
     result = prediction_log_pb2.PredictionLog()
     (result.multi_inference_log.request.input.example_list.examples.add()
-     .CopyFrom(train_example))
+      .CopyFrom(tf.train.Example.FromString(train_example)))
     result.multi_inference_log.response.CopyFrom(multi_inference_response)
     yield result
 
 
 def _post_process_classify(
     output_alias_tensor_names: Mapping[Text, Text],
-    elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray]
-) -> Sequence[classification_pb2.Classifications]:
+    elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray]
+  ) -> Sequence[classification_pb2.Classifications]:
   """Returns classifications from inference output."""
 
   # This is to avoid error "The truth value of an array with
@@ -908,7 +935,7 @@ def _post_process_classify(
 
 
 def _post_process_regress(
-    elements: Sequence[tf.train.Example],
+    elements: Sequence[Union[str, bytes]],
     outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]:
   """Returns regressions from inference output."""
 
@@ -945,28 +972,27 @@ def _post_process_regress(
 
 def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec:
   """Returns IOTensorSpec from signature."""
-
-  if len(signature.inputs) != 1:
-    raise ValueError('Signature should have 1 and only 1 inputs')
-  input_tensor_alias = list(signature.inputs.keys())[0]
-  if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum:
+  if (len(signature.inputs) == 1 and
+      list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum):
     raise ValueError(
-        'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum,
-        list(signature.inputs.values())[0].dtype)
+      'With 1 input, dtype is expected to be %s, got %s' %
+      tf.string.as_datatype_enum,
+      list(signature.inputs.values())[0].dtype)
+  input_tensor_alias = [alias for alias in signature.inputs.keys()]
   if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_classify(signature))
+    input_tensor_names, input_tensor_types, output_alias_tensor_names = (
+      _signature_pre_process_classify(signature))
   elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_predict(signature))
+    input_tensor_names, input_tensor_types, output_alias_tensor_names = (
+      _signature_pre_process_predict(signature))
   elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_regress(signature))
+    input_tensor_names, input_tensor_types, output_alias_tensor_names = (
+      _signature_pre_process_regress(signature))
   else:
     raise ValueError('Signature method %s is not supported' %
-                     signature.method_name)
-  return _IOTensorSpec(input_tensor_alias, input_tensor_name,
-                       output_alias_tensor_names)
+                      signature.method_name)
+  return _IOTensorSpec(input_tensor_alias, input_tensor_names,
+                       input_tensor_types, output_alias_tensor_names)
 
 
 def _signature_pre_process_classify(
@@ -979,13 +1005,14 @@ def _signature_pre_process_classify(
   Returns:
     A tuple of input tensor name and output alias tensor names.
   """
-
+  if len(signature.inputs) != 1:
+    raise ValueError('Classify signature should have 1 and only 1 inputs')
   if len(signature.outputs) != 1 and len(signature.outputs) != 2:
     raise ValueError('Classify signature should have 1 or 2 outputs')
   if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs:
     raise ValueError('No classification inputs found in SignatureDef: %s' %
                      signature.inputs)
-  input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name
+  input_tensor_names = [signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name]
   output_alias_tensor_names = {}
   if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and
       tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs):
@@ -1000,7 +1027,7 @@ def _signature_pre_process_classify(
   if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs:
     output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = (
         signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name)
-  return input_tensor_name, output_alias_tensor_names
+  return input_tensor_names, {}, output_alias_tensor_names
 
 
 def _signature_pre_process_predict(
@@ -1013,12 +1040,14 @@ def _signature_pre_process_predict(
   Returns:
     A tuple of input tensor name and output alias tensor names.
   """
-
-  input_tensor_name = list(signature.inputs.values())[0].name
+  input_tensor_names = [value.name for value in signature.inputs.values()]
+  input_tensor_types = dict([
+    (key, value.dtype) for key, value in signature.inputs.items()
+  ])
   output_alias_tensor_names = dict([
-      (key, output.name) for key, output in signature.outputs.items()
+    (key, output.name) for key, output in signature.outputs.items()
   ])
-  return input_tensor_name, output_alias_tensor_names
+  return input_tensor_names, input_tensor_types, output_alias_tensor_names
 
 
 def _signature_pre_process_regress(
@@ -1031,13 +1060,14 @@ def _signature_pre_process_regress(
   Returns:
     A tuple of input tensor name and output alias tensor names.
   """
-
+  if len(signature.inputs) != 1:
+    raise ValueError('Regress signature should have 1 and only 1 inputs')
   if len(signature.outputs) != 1:
     raise ValueError('Regress signature should have 1 output')
   if tf.saved_model.REGRESS_INPUTS not in signature.inputs:
     raise ValueError('No regression inputs found in SignatureDef: %s' %
                      signature.inputs)
-  input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name
+  input_tensor_names = [signature.inputs[tf.saved_model.REGRESS_INPUTS].name]
   if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs:
     raise ValueError('No regression outputs found in SignatureDef: %s' %
                      signature.outputs)
@@ -1045,7 +1075,7 @@ def _signature_pre_process_regress(
       tf.saved_model.REGRESS_OUTPUTS:
           signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name
   }
-  return input_tensor_name, output_alias_tensor_names
+  return input_tensor_names, {}, output_alias_tensor_names
 
 
 def _using_in_process_inference(
diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py
deleted file mode 100644
index 0ffbc7b5..00000000
--- a/tfx_bsl/beam/run_inference_arrow.py
+++ /dev/null
@@ -1,1216 +0,0 @@
-# Copyright 2019 Google LLC. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Run batch inference on saved model."""
-
-from __future__ import absolute_import
-from __future__ import division
-# Standard __future__ imports
-from __future__ import print_function
-
-import abc
-import collections
-import os
-import platform
-import sys
-import time
-try:
-  import resource
-except ImportError:
-  resource = None
-
-from absl import logging
-import apache_beam as beam
-import pyarrow as pa
-from apache_beam.options.pipeline_options import GoogleCloudOptions
-from apache_beam.options.pipeline_options import PipelineOptions
-from apache_beam.utils import retry
-import googleapiclient
-from googleapiclient import discovery
-from googleapiclient import http
-import numpy as np
-import json
-import six
-import tensorflow as tf
-from tfx_bsl.beam import shared
-from tfx_bsl.beam import bsl_util
-from tfx_bsl.public.proto import model_spec_pb2
-from tfx_bsl.telemetry import util
-from tfx_bsl.tfxio import tensor_adapter
-from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \
-    Tuple, Union, Optional
-
-from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
-from tfx_bsl.beam.bsl_constants import DataType
-
-# TODO(b/140306674): stop using the internal TF API.
-from tensorflow.python.saved_model import loader_impl
-from tensorflow_serving.apis import classification_pb2
-from tensorflow_serving.apis import inference_pb2
-from tensorflow_serving.apis import prediction_log_pb2
-from tensorflow_serving.apis import regression_pb2
-
-# TODO(b/131873699): Remove once 1.x support is dropped.
-# pylint: disable=g-import-not-at-top
-try:
-  # We need to import this in order to register all quantiles ops, even though
-  # it's not directly used.
-  from tensorflow.contrib.boosted_trees.python.ops import quantile_ops as _  # pylint: disable=unused-import
-except ImportError:
-  pass
-
-
-_DEFAULT_INPUT_KEY = 'examples'
-_METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer'
-_METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess'
-_METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION = 'CloudAIPlatformPrediction'
-_MILLISECOND_TO_MICROSECOND = 1000
-_MICROSECOND_TO_NANOSECOND = 1000
-_SECOND_TO_MICROSECOND = 1000000
-_REMOTE_INFERENCE_NUM_RETRIES = 5
-
-# We define the following aliases of Any because the actual types are not public.
-_SignatureDef = Any
-_MetaGraphDef = Any
-_SavedModel = Any
-
-
-# TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7
-class OperationType(object):
-  CLASSIFICATION = 'CLASSIFICATION'
-  REGRESSION = 'REGRESSION'
-  PREDICTION = 'PREDICTION'
-  MULTIHEAD = 'MULTIHEAD'
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def RunInferenceImpl(  # pylint: disable=invalid-name
-    examples: beam.pvalue.PCollection,
-    inference_spec_type: model_spec_pb2.InferenceSpecType,
-    data_type: DataType,
-    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
-) -> beam.pvalue.PCollection:
-  """Implementation of RunInference API.
-
-  Args:
-    examples: A PCollection containing RecordBatch of serialized examples.
-    inference_spec_type: Model inference endpoint.
-    tensor_adapter_config [Optional]: Tensor adapter config which specifies how to
-      obtain tensors from the Arrow RecordBatch.
-        - Not required when running inference with remote model or 1 input
-
-  Returns:
-    A PCollection containing prediction logs.
-
-  Raises:
-    ValueError; when operation is not supported.
-  """
-  logging.info('RunInference on model: %s', inference_spec_type)
-
-  operation_type = _get_operation_type(inference_spec_type)
-  if operation_type == OperationType.CLASSIFICATION:
-    return examples | 'Classify' >> _Classify(
-                        inference_spec_type, data_type, tensor_adapter_config)
-  elif operation_type == OperationType.REGRESSION:
-    return examples | 'Regress' >> _Regress(
-                        inference_spec_type, data_type, tensor_adapter_config)
-  elif operation_type == OperationType.PREDICTION:
-    return examples | 'Predict' >> _Predict(
-                        inference_spec_type, data_type, tensor_adapter_config)
-  elif operation_type == OperationType.MULTIHEAD:
-    return (examples | 'MultiInference' >> _MultiInference(
-                        inference_spec_type, data_type, tensor_adapter_config))
-  else:
-    raise ValueError('Unsupported operation_type %s' % operation_type)
-
-
-_IOTensorSpec = collections.namedtuple(
-    '_IOTensorSpec',
-    ['input_tensor_alias', 'input_tensor_names', 'input_tensor_types', 'output_alias_tensor_names'])
-
-_Signature = collections.namedtuple('_Signature', ['name', 'signature_def'])
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def _Classify(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-              inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
-              tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
-  """Performs classify PTransform."""
-  if _using_in_process_inference(inference_spec_type):
-    return (pcoll
-            | 'Classify' >> beam.ParDo(_BatchClassifyDoFn(
-                  inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))
-            | 'BuildPredictionLogForClassifications' >> beam.ParDo(
-                _BuildPredictionLogForClassificationsDoFn()))
-  else:
-    raise NotImplementedError
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def _Regress(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-             inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
-             tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
-  """Performs regress PTransform."""
-  if _using_in_process_inference(inference_spec_type):
-    return (pcoll
-            | 'Regress' >> beam.ParDo(_BatchRegressDoFn(
-                  inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))
-            | 'BuildPredictionLogForRegressions' >> beam.ParDo(
-                _BuildPredictionLogForRegressionsDoFn()))
-  else:
-    raise NotImplementedError
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-             inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
-             tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
-  """Performs predict PTransform."""
-  if _using_in_process_inference(inference_spec_type):
-    predictions = (
-        pcoll
-        | 'Predict' >> beam.ParDo(_BatchPredictDoFn(
-              inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)))
-  else:
-    predictions = (
-        pcoll
-        | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn(
-              inference_spec_type, pcoll.pipeline.options, data_type, tensor_adapter_config)))
-  return (predictions
-          | 'BuildPredictionLogForPredictions' >> beam.ParDo(
-              _BuildPredictionLogForPredictionsDoFn()))
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def _MultiInference(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
-                    inference_spec_type: model_spec_pb2.InferenceSpecType, data_type,
-                    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
-  """Performs multi inference PTransform."""
-  if _using_in_process_inference(inference_spec_type):
-    return (
-        pcoll
-        | 'MultiInference' >> beam.ParDo(_BatchMultiInferenceDoFn(
-              inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))
-        | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn()))
-  else:
-    raise NotImplementedError
-
-
-@six.add_metaclass(abc.ABCMeta)
-class _BaseDoFn(beam.DoFn):
-  """Base DoFn that performs bulk inference."""
-
-  class _MetricsCollector(object):
-    """A collector for beam metrics."""
-
-    def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
-      operation_type = _get_operation_type(inference_spec_type)
-      proximity_descriptor = (
-          _METRICS_DESCRIPTOR_IN_PROCESS
-          if _using_in_process_inference(inference_spec_type) else
-          _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION)
-      namespace = util.MakeTfxNamespace(
-          [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor])
-
-      # Metrics
-      self._inference_counter = beam.metrics.Metrics.counter(
-          namespace, 'num_inferences')
-      self._num_instances = beam.metrics.Metrics.counter(
-          namespace, 'num_instances')
-      self._inference_request_batch_size = beam.metrics.Metrics.distribution(
-          namespace, 'inference_request_batch_size')
-      self._inference_request_batch_byte_size = (
-          beam.metrics.Metrics.distribution(
-              namespace, 'inference_request_batch_byte_size'))
-      # Batch inference latency in microseconds.
-      self._inference_batch_latency_micro_secs = (
-          beam.metrics.Metrics.distribution(
-              namespace, 'inference_batch_latency_micro_secs'))
-      self._model_byte_size = beam.metrics.Metrics.distribution(
-          namespace, 'model_byte_size')
-      # Model load latency in milliseconds.
-      self._load_model_latency_milli_secs = beam.metrics.Metrics.distribution(
-          namespace, 'load_model_latency_milli_secs')
-
-      # Metrics cache
-      self.load_model_latency_milli_secs_cache = None
-      self.model_byte_size_cache = None
-
-    def update_metrics_with_cache(self):
-      if self.load_model_latency_milli_secs_cache is not None:
-        self._load_model_latency_milli_secs.update(
-            self.load_model_latency_milli_secs_cache)
-        self.load_model_latency_milli_secs_cache = None
-      if self.model_byte_size_cache is not None:
-        self._model_byte_size.update(self.model_byte_size_cache)
-        self.model_byte_size_cache = None
-
-    # For feature inputs, using serialized example for batch size
-    def update(
-      self, elements: List[Union[str, bytes]], latency_micro_secs: int) -> None:
-      self._inference_batch_latency_micro_secs.update(latency_micro_secs)
-      self._num_instances.inc(len(elements))
-      self._inference_counter.inc(len(elements))
-      self._inference_request_batch_size.update(len(elements))
-      self._inference_request_batch_byte_size.update(
-          sum(len(element) for element in elements))
-
-
-  def __init__(
-    self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
-    super(_BaseDoFn, self).__init__()
-    self._clock = None
-    self._metrics_collector = self._MetricsCollector(inference_spec_type)
-    self._tensor_adapter_config = tensor_adapter_config
-    self._io_tensor_spec = None   # This value may be None if the model is remote
-
-  def setup(self):
-    self._clock = _ClockFactory.make_clock()
-
-  def _extract_from_recordBatch(self, elements: pa.RecordBatch):
-    """
-    Function to extract the compatible input with model signature
-    """
-    serialized_examples = None
-    for column_name, column_array in zip(elements.schema.names, elements.columns):
-      if column_name == _RECORDBATCH_COLUMN:
-        column_type = column_array.flatten().type
-        if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)):
-          raise ValueError(
-            'Expected a list of serialized examples in bytes or as a string, got %s' % 
-            type(example))
-        serialized_examples = column_array.flatten().to_pylist()
-        break
-
-    if (serialized_examples is None):
-      raise ValueError('Raw examples not found.')
-
-    model_input = None
-    if self._io_tensor_spec is None:    # Case when we are running remote inference
-      model_input = bsl_util.RecordToJSON(elements)
-    elif (len(self._io_tensor_spec.input_tensor_names) == 1):
-      model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples}
-    else:
-      if (self._tensor_adapter_config is None):
-        raise ValueError('Tensor adaptor config is required with a multi-input model')
-    
-      input_tensor_names = self._io_tensor_spec.input_tensor_names
-      input_tensor_alias = self._io_tensor_spec.input_tensor_alias
-      _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config)
-      dict_of_tensors = _tensor_adapter.ToBatchTensors(
-        elements, produce_eager_tensors = False)
-      filtered_tensors = bsl_util.filter_tensors_by_input_names(
-        dict_of_tensors, input_tensor_alias)
-
-      model_input = {}
-      for feature, tensor_name in zip(input_tensor_alias, input_tensor_names):
-        model_input[tensor_name] = filtered_tensors[feature]
-    return serialized_examples, model_input
-
-  def process(self, elements: pa.RecordBatch) -> Iterable[Any]:
-    batch_start_time = self._clock.get_current_time_in_microseconds()
-    serialized_examples, model_input = self._extract_from_recordBatch(elements)
-    outputs = self.run_inference(model_input)
-    result = self._post_process(model_input, outputs)
-    self._metrics_collector.update(
-        serialized_examples,
-        self._clock.get_current_time_in_microseconds() - batch_start_time)
-    return result
-
-  def finish_bundle(self):
-    self._metrics_collector.update_metrics_with_cache()
-
-  @abc.abstractmethod
-  def run_inference(
-    self, tensors: Mapping[Any, Any]
-  ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
-    raise NotImplementedError
-
-  @abc.abstractmethod
-  def _post_process(
-    self, elements: Mapping[Any, Any], outputs: Any) -> Iterable[Any]:
-    raise NotImplementedError
-
-
-def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
-  """Retries for HttpError.
-
-  Retries if error is unavailable (503) or resource exhausted (429).
-  Resource exhausted may happen when qps or bandwidth exceeds quota.
-
-  Args:
-    exception: Exception from inference http request execution.
-  Returns:
-    A boolean of whether retry.
-  """
-
-  return (isinstance(exception, googleapiclient.errors.HttpError) and
-          exception.resp.status in (503, 429))
-
-
-@beam.typehints.with_input_types(pa.RecordBatch)
-# Using output typehints triggers NotImplementedError('BEAM-2717)' on
-# streaming mode on Dataflow runner.
-# TODO(b/151468119): Consider to re-batch with online serving request size
-# limit, and re-batch with RPC failures(InvalidArgument) regarding request size.
-# @beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
-class _RemotePredictDoFn(_BaseDoFn):
-  """A DoFn that performs predictions from a cloud-hosted TensorFlow model.
-
-  Supports both batch and streaming processing modes.
-  NOTE: Does not work on DirectRunner for streaming jobs [BEAM-7885].
-
-  In order to request predictions, you must deploy your trained model to AI
-  Platform Prediction in the TensorFlow SavedModel format. See
-  [Exporting a SavedModel for prediction]
-  (https://cloud.google.com/ai-platform/prediction/docs/exporting-savedmodel-for-prediction)
-  for more details.
-
-  To send binary data, you have to make sure that the name of an input ends in
-  `_bytes`.
-
-  NOTE: The returned `PredictLog` instances do not have `PredictRequest` part
-  filled. The reason is that it is difficult to determine the input tensor name
-  without having access to cloud-hosted model's signatures.
-  """
-
-  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-               pipeline_options: PipelineOptions, data_type,
-               tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
-    super(_RemotePredictDoFn, self).__init__(inference_spec_type, tensor_adapter_config)
-    self._api_client = None
-    self._data_type = data_type
-
-    project_id = (
-        inference_spec_type.ai_platform_prediction_model_spec.project_id or
-        pipeline_options.view_as(GoogleCloudOptions).project)
-    if not project_id:
-      raise ValueError('Either a non-empty project id or project flag in '
-                       ' beam pipeline options needs be provided.')
-
-    model_name = (
-        inference_spec_type.ai_platform_prediction_model_spec.model_name)
-    if not model_name:
-      raise ValueError('A non-empty model name must be provided.')
-
-    version_name = (
-        inference_spec_type.ai_platform_prediction_model_spec.version_name)
-    name_spec = 'projects/{}/models/{}'
-    # If version is not specified, the default version for a model is used.
-    if version_name:
-      name_spec += '/versions/{}'
-    self._full_model_name = name_spec.format(project_id, model_name,
-                                             version_name)
-
-  def setup(self):
-    super(_RemotePredictDoFn, self).setup()
-    # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to
-    # user agent once custom header is supported in googleapiclient.
-    self._api_client = discovery.build('ml', 'v1')
-
-  # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff.
-  @retry.with_exponential_backoff(
-      initial_delay_secs=1.0,
-      num_retries=_REMOTE_INFERENCE_NUM_RETRIES,
-      retry_filter=_retry_on_unavailable_and_resource_error_filter)
-  def _execute_request(
-      self,
-      request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]:
-    result = request.execute()
-    if 'error' in result:
-      raise ValueError(result['error'])
-    return result
-
-  def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest:
-    return self._api_client.projects().predict(
-        name=self._full_model_name, body=body)
-
-  @classmethod
-  def _prepare_instances(
-      cls, elements: List[Union[str, bytes]]
-  ) -> Generator[Mapping[Text, Any], None, None]:
-    for instance in elements:
-      yield instance
-
-  def _check_elements(self) -> None:
-    # TODO(b/151468119): support tf.train.SequenceExample
-    if self._data_type != DataType.EXAMPLE:
-      raise ValueError('Remote prediction only supports tf.train.Example')
-
-  def run_inference(
-    self, elements: List[Union[str, bytes]]) -> Sequence[Mapping[Text, Any]]:
-    self._check_elements()
-    body = {'instances': list(self._prepare_instances(elements))}
-    request = self._make_request(body)
-    response = self._execute_request(request)
-    return response['predictions']
-
-  def _post_process(
-      self, elements: List[Union[str, bytes]], 
-      outputs: Sequence[Mapping[Text, Any]]
-  ) -> Iterable[prediction_log_pb2.PredictLog]:
-    result = []
-    for output in outputs:
-      predict_log = prediction_log_pb2.PredictLog()
-      for output_alias, values in output.items():
-        values = np.array(values)
-        tensor_proto = tf.make_tensor_proto(
-            values=values,
-            dtype=tf.as_dtype(values.dtype).as_datatype_enum,
-            shape=np.expand_dims(values, axis=0).shape)
-        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
-      result.append(predict_log)
-    return result
-
-
-# TODO(b/131873699): Add typehints once
-# [BEAM-8381](https://issues.apache.org/jira/browse/BEAM-8381)
-# is fixed.
-# TODO(b/143484017): Add batch_size back off in the case there are functional
-# reasons large batch sizes cannot be handled.
-
-class _BaseBatchSavedModelDoFn(_BaseDoFn):
-  """A DoFn that runs in-process batch inference with a model.
-
-    Models need to have the required serving signature as mentioned in
-    [Tensorflow Serving](https://www.tensorflow.org/tfx/serving/signature_defs)
-
-    This function will check model signatures first. Then it will load and run
-    model inference in batch.
-  """
-
-  def __init__(
-      self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-      shared_model_handle: shared.Shared, data_type,
-      tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
-    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, tensor_adapter_config)
-    self._inference_spec_type = inference_spec_type
-    self._shared_model_handle = shared_model_handle
-    self._model_path = inference_spec_type.saved_model_spec.model_path
-    self._tags = None
-    self._signatures = _get_signatures(
-      inference_spec_type.saved_model_spec.model_path,
-      inference_spec_type.saved_model_spec.signature_name,
-      _get_tags(inference_spec_type))
-    self._session = None
-    self._data_type = data_type
-
-  def setup(self):
-    """Load the model.
-
-    Note that worker may crash if exception is thrown in setup due
-    to b/139207285.
-    """
-
-    super(_BaseBatchSavedModelDoFn, self).setup()
-    self._tags = _get_tags(self._inference_spec_type)
-    self._io_tensor_spec = self._pre_process()
-
-    if self._has_tpu_tag():
-      # TODO(b/131873699): Support TPU inference.
-      raise ValueError('TPU inference is not supported yet.')
-    self._session = self._load_model()
-
-  def _load_model(self):
-    """Load a saved model into memory.
-
-    Returns:
-      Session instance.
-    """
-
-    def load():
-      """Function for constructing shared LoadedModel."""
-      # TODO(b/143484017): Do warmup and other heavy model construction here.
-      result = tf.compat.v1.Session(graph=tf.compat.v1.Graph())
-      memory_before = _get_current_process_memory_in_bytes()
-      start_time = self._clock.get_current_time_in_microseconds()
-      tf.compat.v1.saved_model.loader.load(result, self._tags, self._model_path)
-      end_time = self._clock.get_current_time_in_microseconds()
-      memory_after = _get_current_process_memory_in_bytes()
-      self._metrics_collector.load_model_latency_milli_secs_cache = (
-          (end_time - start_time) / _MILLISECOND_TO_MICROSECOND)
-      self._metrics_collector.model_byte_size_cache = (
-          memory_after - memory_before)
-      return result
-
-    if not self._model_path:
-      raise ValueError('Model path is not valid.')
-    return self._shared_model_handle.acquire(load)
-
-  def _pre_process(self) -> _IOTensorSpec:
-    # Pre process functions will validate for each signature.
-    io_tensor_specs = []
-    for signature in self._signatures:
-      if (len(signature.signature_def.inputs) == 1 and
-          list(signature.signature_def.inputs.values())[0].dtype !=
-          tf.string.as_datatype_enum):
-        raise ValueError(
-            'With 1 input, dtype is expected to be %s, got %s' %
-            tf.string.as_datatype_enum,
-            list(signature.signature_def.inputs.values())[0].dtype)
-      io_tensor_specs.append(_signature_pre_process(signature.signature_def))
-    input_tensor_names = []
-    input_tensor_alias = []
-    input_tensor_types = {}
-    output_alias_tensor_names = {}
-    for io_tensor_spec in io_tensor_specs:
-      if not input_tensor_names:
-        input_tensor_names = io_tensor_spec.input_tensor_names
-        input_tensor_alias = io_tensor_spec.input_tensor_alias
-      elif input_tensor_names != io_tensor_spec.input_tensor_names:
-        raise ValueError('Input tensor must be the same for all Signatures.')
-      for alias, tensor_type in io_tensor_spec.input_tensor_types.items():
-        input_tensor_types[alias] = tensor_type
-      for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items():
-        output_alias_tensor_names[alias] = tensor_name
-    if (not output_alias_tensor_names or not input_tensor_names or
-        not input_tensor_alias):
-      raise ValueError('No valid fetch tensors or feed tensors.')
-    return _IOTensorSpec(input_tensor_alias, input_tensor_names,
-                         input_tensor_types, output_alias_tensor_names)
-
-  def _has_tpu_tag(self) -> bool:
-    return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
-            tf.saved_model.TPU in self._tags)
-
-  def run_inference(
-    self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]:
-    self._check_elements()
-    outputs = self._run_tf_operations(tensors)
-    return outputs
-
-  def _run_tf_operations(
-    self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]:
-    result = self._session.run(
-        self._io_tensor_spec.output_alias_tensor_names, feed_dict=tensors)
-    if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
-      raise RuntimeError('Output length does not match fetches')
-    return result
-
-  def _check_elements(self) -> None:
-    """Unimplemented."""
-
-    raise NotImplementedError
-
-
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(Tuple[Union[str, bytes], 
-                                  classification_pb2.Classifications])
-class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn):
-  """A DoFn that run inference on classification model."""
-
-  def setup(self):
-    signature_def = self._signatures[0].signature_def
-    if signature_def.method_name != tf.saved_model.CLASSIFY_METHOD_NAME:
-      raise ValueError(
-          'BulkInferrerClassifyDoFn requires signature method '
-          'name %s, got: %s' % tf.saved_model.CLASSIFY_METHOD_NAME,
-          signature_def.method_name)
-    super(_BatchClassifyDoFn, self).setup()
-
-  def _check_elements(self) -> None:
-    if self._data_type != DataType.EXAMPLE:
-      raise ValueError('Classify only supports tf.train.Example')
-
-  def _post_process(
-      self, elements: Mapping[Any, Any],
-      outputs: Mapping[Text, np.ndarray]
-  ) -> Iterable[Tuple[Union[str, bytes], classification_pb2.Classifications]]:
-    serialized_examples, = elements.values()
-    classifications = _post_process_classify(
-        self._io_tensor_spec.output_alias_tensor_names,
-        serialized_examples, outputs)
-    return zip(serialized_examples, classifications)
-
-
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(Tuple[Union[str, bytes], 
-                                  regression_pb2.Regression])
-class _BatchRegressDoFn(_BaseBatchSavedModelDoFn):
-  """A DoFn that run inference on regression model."""
-
-  def setup(self):
-    super(_BatchRegressDoFn, self).setup()
-
-  def _check_elements(self) -> None:
-    if self._data_type != DataType.EXAMPLE:
-      raise ValueError('Regress only supports tf.train.Example')
-
-  def _post_process(
-      self, elements: Mapping[Any, Any],
-      outputs: Mapping[Text, np.ndarray]
-  ) -> Iterable[Tuple[Union[str, bytes], regression_pb2.Regression]]:
-    serialized_examples, = elements.values()
-    regressions = _post_process_regress(serialized_examples, outputs)
-    return zip(serialized_examples, regressions)
-
-
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictLog)
-class _BatchPredictDoFn(_BaseBatchSavedModelDoFn):
-  """A DoFn that runs inference on predict model."""
-
-  def setup(self):
-    signature_def = self._signatures[0].signature_def
-    if signature_def.method_name != tf.saved_model.PREDICT_METHOD_NAME:
-      raise ValueError(
-          'BulkInferrerPredictDoFn requires signature method '
-          'name %s, got: %s' % tf.saved_model.PREDICT_METHOD_NAME,
-          signature_def.method_name)
-    super(_BatchPredictDoFn, self).setup()
-
-  def _check_elements(self) -> None:
-    pass
-
-  def _post_process(
-      self, elements: Mapping[Any, Any],
-      outputs: Mapping[Text, np.ndarray]
-  ) -> Iterable[prediction_log_pb2.PredictLog]:
-    if not self._io_tensor_spec.input_tensor_types:
-      raise ValueError('No valid tensor types.')
-    input_tensor_names = self._io_tensor_spec.input_tensor_names
-    input_tensor_alias = self._io_tensor_spec.input_tensor_alias
-    input_tensor_types = self._io_tensor_spec.input_tensor_types
-    signature_name = self._signatures[0].name
-
-    if len(input_tensor_alias) != len(input_tensor_names):
-      raise ValueError('Expected to have one name and one alias per tensor')
-
-    include_request = True
-    if len(input_tensor_names) == 1:
-      serialized_examples, = elements.values()
-      batch_size = len(serialized_examples)
-      process_elements = serialized_examples
-    else:
-      # Only include request in the predictLog when the all tensors are dense
-      # is there a better way to check this? 
-      for tensor_name, tensor in elements.items():
-        if not isinstance(tensor, np.ndarray):
-          include_request = False
-          break
-
-      if include_request:
-        batch_size = len(elements[input_tensor_names[0]])
-      else:
-        batch_size = elements[input_tensor_names[0]].shape[0]
-
-    for output_alias, output in outputs.items():
-      if len(output.shape) < 1 or output.shape[0] != batch_size:
-        raise ValueError(
-            'Expected output tensor %s to have at least one '
-            'dimension, with the first having a size equal to the input batch '
-            'size %s. Instead found %s' %
-            (output_alias, batch_size, output.shape))
-    predict_log_tmpl = prediction_log_pb2.PredictLog()
-    predict_log_tmpl.request.model_spec.signature_name = signature_name
-    predict_log_tmpl.response.model_spec.signature_name = signature_name
-    for alias, tensor_type in input_tensor_types.items():
-      input_tensor_proto = predict_log_tmpl.request.inputs[alias]
-      input_tensor_proto.dtype = tf.as_dtype(tensor_type).as_datatype_enum
-      # TODO (Maxine): fix dimension?
-      input_tensor_proto.tensor_shape.dim.add().size = 1
-
-    result = []
-    for i in range(batch_size):
-      predict_log = prediction_log_pb2.PredictLog()
-      predict_log.CopyFrom(predict_log_tmpl)
-
-      if include_request:
-        if len(input_tensor_alias) == 1:
-          alias = input_tensor_alias[0]
-          predict_log.request.inputs[alias].string_val.append(process_elements[i])
-        else:
-          for alias, tensor_name in zip(input_tensor_alias, input_tensor_names):
-            predict_log.request.inputs[alias].float_val.append(elements[tensor_name][i])
-
-      for output_alias, output in outputs.items():
-        # Mimic tensor::Split
-        tensor_proto = tf.make_tensor_proto(
-            values=output[i],
-            dtype=tf.as_dtype(output[i].dtype).as_datatype_enum,
-            shape=np.expand_dims(output[i], axis=0).shape)
-        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
-      result.append(predict_log)
-    return result
-
-
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(Tuple[Union[str, bytes], 
-                                  inference_pb2.MultiInferenceResponse])
-class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn):
-  """A DoFn that runs inference on multi-head model."""
-
-  def _check_elements(self) -> None:
-    if self._data_type != DataType.EXAMPLE:
-      raise ValueError('Multi-inference only supports tf.train.Example')
-
-  def _post_process(
-      self, elements: Mapping[Any, Any],
-      outputs: Mapping[Text, np.ndarray]
-  ) -> Iterable[Tuple[Union[str, bytes], inference_pb2.MultiInferenceResponse]]:
-    classifications = None
-    regressions = None
-    serialized_examples, = elements.values()
-    for signature in self._signatures:
-      signature_def = signature.signature_def
-      if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-        classifications = _post_process_classify(
-            self._io_tensor_spec.output_alias_tensor_names,
-            serialized_examples, outputs)
-      elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME:
-        regressions = _post_process_regress(serialized_examples, outputs)
-      else:
-        raise ValueError('Signature method %s is not supported for '
-                         'multi inference' % signature_def.method_name)
-    result = []
-    for i in range(len(serialized_examples)):
-      response = inference_pb2.MultiInferenceResponse()
-      for signature in self._signatures:
-        signature_def = signature.signature_def
-        inference_result = response.results.add()
-        if (signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME and
-            classifications):
-          inference_result.classification_result.classifications.add().CopyFrom(
-              classifications[i])
-        elif (
-            signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME and
-            regressions):
-          inference_result.regression_result.regressions.add().CopyFrom(
-              regressions[i])
-        else:
-          raise ValueError('Signature method %s is not supported for '
-                           'multi inference' % signature_def.method_name)
-        inference_result.model_spec.signature_name = signature.name
-      if len(response.results) != len(self._signatures):
-        raise RuntimeError('Multi inference response result length does not '
-                           'match the number of signatures')
-      result.append((serialized_examples[i], response))
-    return result
-
-
-
-@beam.typehints.with_input_types(Tuple[Union[str, bytes], 
-                                classification_pb2.Classifications])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-class _BuildPredictionLogForClassificationsDoFn(beam.DoFn):
-  """A DoFn that builds prediction log from classifications."""
-
-  def process(
-      self, 
-      element: Tuple[Union[str, bytes], classification_pb2.Classifications]
-  ) -> Iterable[prediction_log_pb2.PredictionLog]:
-    (train_example, classifications) = element
-    result = prediction_log_pb2.PredictionLog()
-    result.classify_log.request.input.example_list.examples.add().CopyFrom(
-        tf.train.Example.FromString(train_example))
-    result.classify_log.response.result.classifications.add().CopyFrom(
-        classifications)
-    yield result
-
-
-@beam.typehints.with_input_types(Tuple[Union[str, bytes], 
-                                regression_pb2.Regression])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-class _BuildPredictionLogForRegressionsDoFn(beam.DoFn):
-  """A DoFn that builds prediction log from regressions."""
-
-  def process(
-    self, element: Tuple[Union[str, bytes], regression_pb2.Regression]
-  ) -> Iterable[prediction_log_pb2.PredictionLog]:
-    (train_example, regression) = element
-    result = prediction_log_pb2.PredictionLog()
-    result.regress_log.request.input.example_list.examples.add().CopyFrom(
-        tf.train.Example.FromString(train_example))
-    result.regress_log.response.result.regressions.add().CopyFrom(regression)
-    yield result
-
-
-@beam.typehints.with_input_types(prediction_log_pb2.PredictLog)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-class _BuildPredictionLogForPredictionsDoFn(beam.DoFn):
-  """A DoFn that builds prediction log from predictions."""
-
-  def process(
-      self, element: prediction_log_pb2.PredictLog
-  ) -> Iterable[prediction_log_pb2.PredictionLog]:
-    result = prediction_log_pb2.PredictionLog()
-    result.predict_log.CopyFrom(element)
-    yield result
-
-
-@beam.typehints.with_input_types(Tuple[Union[str, bytes],
-                                inference_pb2.MultiInferenceResponse])
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-class _BuildMultiInferenceLogDoFn(beam.DoFn):
-  """A DoFn that builds prediction log from multi-head inference result."""
-
-  def process(
-      self, element: Tuple[Union[str, bytes], 
-      inference_pb2.MultiInferenceResponse]
-  ) -> Iterable[prediction_log_pb2.PredictionLog]:
-    (train_example, multi_inference_response) = element
-    result = prediction_log_pb2.PredictionLog()
-    (result.multi_inference_log.request.input.example_list.examples.add()
-      .CopyFrom(tf.train.Example.FromString(train_example)))
-    result.multi_inference_log.response.CopyFrom(multi_inference_response)
-    yield result
-
-
-
-def _post_process_classify(
-    output_alias_tensor_names: Mapping[Text, Text],
-    elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray]
-  ) -> Sequence[classification_pb2.Classifications]:
-  """Returns classifications from inference output."""
-
-  # This is to avoid error "The truth value of an array with
-  # more than one element is ambiguous."
-  has_classes = False
-  has_scores = False
-  if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in output_alias_tensor_names:
-    classes = outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES]
-    has_classes = True
-  if tf.saved_model.CLASSIFY_OUTPUT_SCORES in output_alias_tensor_names:
-    scores = outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES]
-    has_scores = True
-  if has_classes:
-    if classes.ndim != 2:
-      raise ValueError('Expected Tensor shape: [batch_size num_classes] but '
-                       'got %s' % classes.shape)
-    if classes.dtype != tf.string.as_numpy_dtype:
-      raise ValueError('Expected classes Tensor of %s. Got: %s' %
-                       (tf.string.as_numpy_dtype, classes.dtype))
-    if classes.shape[0] != len(elements):
-      raise ValueError('Expected classes output batch size of %s, got %s' %
-                       (len(elements), classes.shape[0]))
-  if has_scores:
-    if scores.ndim != 2:
-      raise ValueError("""Expected Tensor shape: [batch_size num_classes] but
-        got %s""" % scores.shape)
-    if scores.dtype != tf.float32.as_numpy_dtype:
-      raise ValueError('Expected classes Tensor of %s. Got: %s' %
-                       (tf.float32.as_numpy_dtype, scores.dtype))
-    if scores.shape[0] != len(elements):
-      raise ValueError('Expected classes output batch size of %s, got %s' %
-                       (len(elements), scores.shape[0]))
-  num_classes = 0
-  if has_classes and has_scores:
-    if scores.shape[1] != classes.shape[1]:
-      raise ValueError('Tensors class and score should match in shape[1]. '
-                       'Got %s vs %s' % (classes.shape[1], scores.shape[1]))
-    num_classes = classes.shape[1]
-  elif has_classes:
-    num_classes = classes.shape[1]
-  elif has_scores:
-    num_classes = scores.shape[1]
-
-  result = []
-  for i in range(len(elements)):
-    a_classification = classification_pb2.Classifications()
-    for c in range(num_classes):
-      a_class = a_classification.classes.add()
-      if has_classes:
-        a_class.label = classes[i][c]
-      if has_scores:
-        a_class.score = scores[i][c]
-    result.append(a_classification)
-  if len(result) != len(elements):
-    raise RuntimeError('Classifications length does not match elements')
-  return result
-
-
-def _post_process_regress(
-    elements: Sequence[Union[str, bytes]],
-    outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]:
-  """Returns regressions from inference output."""
-
-  if tf.saved_model.REGRESS_OUTPUTS not in outputs:
-    raise ValueError('No regression outputs found in outputs: %s' %
-                     outputs.keys())
-  output = outputs[tf.saved_model.REGRESS_OUTPUTS]
-  batch_size = len(elements)
-  if not (output.ndim == 1 or (output.ndim == 2 and output.shape[1] == 1)):
-    raise ValueError("""Expected output Tensor shape to be either [batch_size]
-                     or [batch_size, 1] but got %s""" % output.shape)
-  if batch_size != output.shape[0]:
-    raise ValueError(
-        'Input batch size did not match output batch size: %s vs %s' %
-        (batch_size, output.shape[0]))
-  if output.dtype != tf.float32.as_numpy_dtype:
-    raise ValueError('Expected output Tensor of %s. Got: %s' %
-                     (tf.float32.as_numpy_dtype, output.dtype))
-  if output.size != batch_size:
-    raise ValueError('Expected output batch size to be %s. Got: %s' %
-                     (batch_size, output.size))
-  flatten_output = output.flatten()
-  result = []
-  for regression_result in flatten_output:
-    regression = regression_pb2.Regression()
-    regression.value = regression_result
-    result.append(regression)
-
-  # Add additional check to save downstream consumer checks.
-  if len(result) != len(elements):
-    raise RuntimeError('Regression length does not match elements')
-  return result
-
-
-def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec:
-  """Returns IOTensorSpec from signature."""
-  if (len(signature.inputs) == 1 and
-      list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum):
-    raise ValueError(
-      'With 1 input, dtype is expected to be %s, got %s' %
-      tf.string.as_datatype_enum,
-      list(signature.inputs.values())[0].dtype)
-  input_tensor_alias = [alias for alias in signature.inputs.keys()]
-  if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-    input_tensor_names, input_tensor_types, output_alias_tensor_names = (
-      _signature_pre_process_classify(signature))
-  elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME:
-    input_tensor_names, input_tensor_types, output_alias_tensor_names = (
-      _signature_pre_process_predict(signature))
-  elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME:
-    input_tensor_names, input_tensor_types, output_alias_tensor_names = (
-      _signature_pre_process_regress(signature))
-  else:
-    raise ValueError('Signature method %s is not supported' %
-                      signature.method_name)
-  return _IOTensorSpec(input_tensor_alias, input_tensor_names,
-                       input_tensor_types, output_alias_tensor_names)
-
-
-def _signature_pre_process_classify(
-    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
-  """Returns input tensor name and output alias tensor names from signature.
-
-  Args:
-    signature: SignatureDef
-
-  Returns:
-    A tuple of input tensor name and output alias tensor names.
-  """
-  if len(signature.inputs) != 1:
-    raise ValueError('Classify signature should have 1 and only 1 inputs')
-  if len(signature.outputs) != 1 and len(signature.outputs) != 2:
-    raise ValueError('Classify signature should have 1 or 2 outputs')
-  if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs:
-    raise ValueError('No classification inputs found in SignatureDef: %s' %
-                     signature.inputs)
-  input_tensor_names = [signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name]
-  output_alias_tensor_names = {}
-  if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and
-      tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs):
-    raise ValueError(
-        """Expected classification signature outputs to contain at
-        least one of %s or %s. Signature was: %s""" %
-        tf.saved_model.CLASSIFY_OUTPUT_CLASSES,
-        tf.saved_model.CLASSIFY_OUTPUT_SCORES, signature)
-  if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in signature.outputs:
-    output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] = (
-        signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES].name)
-  if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs:
-    output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = (
-        signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name)
-  return input_tensor_names, {}, output_alias_tensor_names
-
-
-def _signature_pre_process_predict(
-    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
-  """Returns input tensor name and output alias tensor names from signature.
-
-  Args:
-    signature: SignatureDef
-
-  Returns:
-    A tuple of input tensor name and output alias tensor names.
-  """
-  input_tensor_names = [value.name for value in signature.inputs.values()]
-  input_tensor_types = dict([
-    (key, value.dtype) for key, value in signature.inputs.items()
-  ])
-  output_alias_tensor_names = dict([
-    (key, output.name) for key, output in signature.outputs.items()
-  ])
-  return input_tensor_names, input_tensor_types, output_alias_tensor_names
-
-
-def _signature_pre_process_regress(
-    signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]:
-  """Returns input tensor name and output alias tensor names from signature.
-
-  Args:
-    signature: SignatureDef
-
-  Returns:
-    A tuple of input tensor name and output alias tensor names.
-  """
-  if len(signature.inputs) != 1:
-    raise ValueError('Regress signature should have 1 and only 1 inputs')
-  if len(signature.outputs) != 1:
-    raise ValueError('Regress signature should have 1 output')
-  if tf.saved_model.REGRESS_INPUTS not in signature.inputs:
-    raise ValueError('No regression inputs found in SignatureDef: %s' %
-                     signature.inputs)
-  input_tensor_names = [signature.inputs[tf.saved_model.REGRESS_INPUTS].name]
-  if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs:
-    raise ValueError('No regression outputs found in SignatureDef: %s' %
-                     signature.outputs)
-  output_alias_tensor_names = {
-      tf.saved_model.REGRESS_OUTPUTS:
-          signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name
-  }
-  return input_tensor_names, {}, output_alias_tensor_names
-
-
-def _using_in_process_inference(
-    inference_spec_type: model_spec_pb2.InferenceSpecType) -> bool:
-  return inference_spec_type.WhichOneof('type') == 'saved_model_spec'
-
-
-def _get_signatures(model_path: Text, signatures: Sequence[Text],
-                    tags: Sequence[Text]) -> Sequence[_Signature]:
-  """Returns a sequence of {model_signature_name: signature}."""
-
-  if signatures:
-    signature_names = signatures
-  else:
-    signature_names = [tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
-
-  saved_model_pb = loader_impl.parse_saved_model(model_path)
-  meta_graph_def = _get_meta_graph_def(saved_model_pb, tags)
-  result = []
-  for signature_name in signature_names:
-    if signature_name in meta_graph_def.signature_def:
-      result.append(
-          _Signature(signature_name,
-                     meta_graph_def.signature_def[signature_name]))
-    else:
-      raise RuntimeError('Signature %s could not be found in SavedModel' %
-                         signature_name)
-  return result
-
-
-def _get_operation_type(
-    inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text:
-  if _using_in_process_inference(inference_spec_type):
-    signatures = _get_signatures(
-        inference_spec_type.saved_model_spec.model_path,
-        inference_spec_type.saved_model_spec.signature_name,
-        _get_tags(inference_spec_type))
-    if not signatures:
-      raise ValueError('Model does not have valid signature to use')
-
-    if len(signatures) == 1:
-      method_name = signatures[0].signature_def.method_name
-      if method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-        return OperationType.CLASSIFICATION
-      elif method_name == tf.saved_model.REGRESS_METHOD_NAME:
-        return OperationType.REGRESSION
-      elif method_name == tf.saved_model.PREDICT_METHOD_NAME:
-        return OperationType.PREDICTION
-      else:
-        raise ValueError('Unsupported signature method_name %s' % method_name)
-    else:
-      for signature in signatures:
-        method_name = signature.signature_def.method_name
-        if (method_name != tf.saved_model.CLASSIFY_METHOD_NAME and
-            method_name != tf.saved_model.REGRESS_METHOD_NAME):
-          raise ValueError('Unsupported signature method_name for multi-head '
-                           'model inference: %s' % method_name)
-      return OperationType.MULTIHEAD
-  else:
-    # Remote inference supports predictions only.
-    return OperationType.PREDICTION
-
-
-def _get_meta_graph_def(saved_model_pb: _SavedModel,
-                        tags: Sequence[Text]) -> _MetaGraphDef:
-  """Returns MetaGraphDef from SavedModel."""
-
-  for meta_graph_def in saved_model_pb.meta_graphs:
-    if set(meta_graph_def.meta_info_def.tags) == set(tags):
-      return meta_graph_def
-  raise RuntimeError('MetaGraphDef associated with tags %s could not be '
-                     'found in SavedModel' % tags)
-
-
-def _get_current_process_memory_in_bytes():
-  """Returns memory usage in bytes."""
-
-  if resource is not None:
-    usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-    if _is_darwin():
-      return usage
-    return usage * 1024
-  else:
-    logging.warning('Resource module is not available for current platform, '
-                    'memory usage cannot be fetched.')
-  return 0
-
-
-def _get_tags(
-    inference_spec_type: model_spec_pb2.InferenceSpecType) -> Sequence[Text]:
-  """Returns tags from ModelSpec."""
-
-  if inference_spec_type.saved_model_spec.tag:
-    return list(inference_spec_type.saved_model_spec.tag)
-  else:
-    return [tf.saved_model.SERVING]
-
-
-def _is_darwin() -> bool:
-  return sys.platform == 'darwin'
-
-
-def _is_windows() -> bool:
-  return platform.system() == 'Windows' or os.name == 'nt'
-
-
-def _is_cygwin() -> bool:
-  return platform.system().startswith('CYGWIN_NT')
-
-
-class _Clock(object):
-
-  def get_current_time_in_microseconds(self) -> int:
-    return int(time.time() * _SECOND_TO_MICROSECOND)
-
-
-class _FineGrainedClock(_Clock):
-
-  def get_current_time_in_microseconds(self) -> int:
-    return int(
-        time.clock_gettime_ns(time.CLOCK_REALTIME) /  # pytype: disable=module-attr
-        _MICROSECOND_TO_NANOSECOND)
-
-
-class _ClockFactory(object):
-
-  @staticmethod
-  def make_clock() -> _Clock:
-    if (hasattr(time, 'clock_gettime_ns') and not _is_windows()
-        and not _is_cygwin()):
-      return _FineGrainedClock()
-    return _Clock()
diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py
deleted file mode 100644
index 441060e0..00000000
--- a/tfx_bsl/beam/run_inference_arrow_test.py
+++ /dev/null
@@ -1,718 +0,0 @@
-# Copyright 2019 Google LLC. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Tests for tfx_bsl.run_inference_arrow."""
-
-from __future__ import absolute_import
-from __future__ import division
-# Standard __future__ imports
-from __future__ import print_function
-
-import json
-import os
-try:
-  import unittest.mock as mock
-except ImportError:
-  import mock
-
-import apache_beam as beam
-import pyarrow as pa
-from apache_beam.metrics.metric import MetricsFilter
-from apache_beam.testing.util import assert_that
-from apache_beam.testing.util import equal_to
-from googleapiclient import discovery
-from googleapiclient import http
-from six.moves import http_client
-import tensorflow as tf
-from tfx_bsl.beam import bsl_util
-from tfx_bsl.beam import run_inference_arrow
-from tfx_bsl.beam.bsl_constants import DataType
-from tfx_bsl.public.proto import model_spec_pb2
-from tfx_bsl.tfxio import test_util
-from tfx_bsl.tfxio import tensor_adapter
-
-from google.protobuf import text_format
-from tensorflow_serving.apis import prediction_log_pb2
-from tensorflow_metadata.proto.v0 import schema_pb2
-
-
-class RunInferenceArrowFixture(tf.test.TestCase):
-
-  def setUp(self):
-    super(RunInferenceArrowFixture, self).setUp()
-    self._predict_examples = [
-        text_format.Parse(
-            """
-              features {
-                feature { key: "input1" value { float_list { value: 0 }}}
-              }
-              """, tf.train.Example()),
-    ]
-
-    serialized_example = []
-    for example in self._predict_examples:
-      serialized_example.append([example.SerializeToString()])
-    self.record_batch = pa.RecordBatch.from_arrays(
-    [
-        pa.array([[0]], type=pa.list_(pa.float32())),
-        serialized_example
-    ],
-    ['input1', '__RAW_RECORD__'])
-
-
-  def _get_output_data_dir(self, sub_dir=None):
-    test_dir = self._testMethodName
-    path = os.path.join(
-        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
-        test_dir)
-    if not tf.io.gfile.exists(path):
-      tf.io.gfile.makedirs(path)
-    if sub_dir is not None:
-      path = os.path.join(path, sub_dir)
-    return path
-
-
-class RunOfflineInferenceArrowTest(RunInferenceArrowFixture):
-
-  def setUp(self):
-    super(RunOfflineInferenceArrowTest, self).setUp()
-    self._predict_examples = [
-        text_format.Parse(
-            """
-              features {
-                feature { key: "input1" value { float_list { value: 0 }}}
-              }
-              """, tf.train.Example()),
-        text_format.Parse(
-            """
-              features {
-                feature { key: "input1" value { float_list { value: 1 }}}
-              }
-              """, tf.train.Example()),
-    ]
-
-    serialized_example = []
-    for example in self._predict_examples:
-      serialized_example.append([example.SerializeToString()])
-    self.record_batch = pa.RecordBatch.from_arrays(
-      [
-        pa.array([[0], [1]], type=pa.list_(pa.float32())),
-        serialized_example
-      ],
-      ['input1', '__RAW_RECORD__']
-    )
-
-
-    self._multihead_examples = [
-        text_format.Parse(
-            """
-            features {
-              feature {key: "x" value { float_list { value: 0.8 }}}
-              feature {key: "y" value { float_list { value: 0.2 }}}
-            }
-            """, tf.train.Example()),
-        text_format.Parse(
-            """
-            features {
-              feature {key: "x" value { float_list { value: 0.6 }}}
-              feature {key: "y" value { float_list { value: 0.1 }}}
-            }
-            """, tf.train.Example()),
-    ]
-
-    serialized_example_multi = []
-    for example in self._multihead_examples:
-      serialized_example_multi.append([example.SerializeToString()])
-    self.record_batch_multihead = pa.RecordBatch.from_arrays(
-      [
-        pa.array([[0.8], [0.6]], type=pa.list_(pa.float32())),
-        pa.array([[0.2], [0.1]], type=pa.list_(pa.float32())),
-        serialized_example_multi
-      ],
-      ['x', 'y', '__RAW_RECORD__']
-    )
-
-
-    self._multi_input_examples = [
-        text_format.Parse(
-            """
-            features {
-              feature {key: "x" value { float_list { value: 0.8 }}}
-              feature {key: "y" value { float_list { value: 0.2 }}}
-            }
-            """, tf.train.Example()),
-        text_format.Parse(
-            """
-            features {
-              feature {key: "x" value { float_list { value: 0.6 }}}
-              feature {key: "y" value { float_list { value: 0.1 }}}
-            }
-            """, tf.train.Example()),
-    ]
-
-    serialized_example_multi_input = []
-    for example in self._multi_input_examples:
-      serialized_example_multi_input.append([example.SerializeToString()])
-    self.record_batch_multi_input = pa.RecordBatch.from_arrays(
-      [
-        pa.array([[0.8], [0.6]], type=pa.list_(pa.float32())),
-        pa.array([[0.2], [0.1]], type=pa.list_(pa.float32())),
-        serialized_example_multi_input
-      ],
-      ['x', 'y', '__RAW_RECORD__']
-    )
-
-    tfxio = test_util.InMemoryTFExampleRecord(
-      schema = text_format.Parse(
-        """
-        tensor_representation_group {
-          key: ""
-          value {
-            tensor_representation {
-              key: "x"
-              value {
-                dense_tensor {
-                  column_name: "x"
-                  shape { dim { size: 1 } }
-                }
-              }
-            }
-            tensor_representation {
-              key: "y"
-              value {
-                dense_tensor {
-                  column_name: "y"
-                  shape { dim { size: 1 } }
-                }
-              }
-            }
-          }
-        }
-        feature {
-          name: "x"
-          type: FLOAT
-        }
-        feature {
-          name: "y"
-          type: FLOAT
-        }
-        """, schema_pb2.Schema()),
-      raw_record_column_name='__RAW_RECORD__')
-    self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-        arrow_schema=tfxio.ArrowSchema(),
-        tensor_representations=tfxio.TensorRepresentations())
-
-
-  def _build_predict_model(self, model_path):
-    """Exports the dummy sum predict model."""
-
-    with tf.compat.v1.Graph().as_default():
-      input_tensors = {
-          'x': tf.compat.v1.io.FixedLenFeature(
-              [1], dtype=tf.float32, default_value=0)
-      }
-      serving_receiver = (
-          tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn(
-              input_tensors)())
-      output_tensors = {'y': serving_receiver.features['x'] * 2}
-      sess = tf.compat.v1.Session()
-      sess.run(tf.compat.v1.initializers.global_variables())
-      signature_def = tf.compat.v1.estimator.export.PredictOutput(
-          output_tensors).as_signature_def(serving_receiver.receiver_tensors)
-      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
-      builder.add_meta_graph_and_variables(
-          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
-          signature_def_map={
-              tf.compat.v1.saved_model.signature_constants
-              .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-                  signature_def,
-          })
-      builder.save()
-
-  def _build_regression_signature(self, input_tensor, output_tensor):
-    """Helper function for building a regression SignatureDef."""
-    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-        input_tensor)
-    signature_inputs = {
-        tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS:
-            input_tensor_info
-    }
-    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-        output_tensor)
-    signature_outputs = {
-        tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS:
-            output_tensor_info
-    }
-    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
-        signature_inputs, signature_outputs,
-        tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME)
-
-  def _build_classification_signature(self, input_tensor, scores_tensor):
-    """Helper function for building a classification SignatureDef."""
-    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-        input_tensor)
-    signature_inputs = {
-        tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS:
-            input_tensor_info
-    }
-    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-        scores_tensor)
-    signature_outputs = {
-        tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
-            output_tensor_info
-    }
-    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
-        signature_inputs, signature_outputs,
-        tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
-
-  def _build_multihead_model(self, model_path):
-    with tf.compat.v1.Graph().as_default():
-      input_example = tf.compat.v1.placeholder(
-          tf.string, name='input_examples_tensor')
-      config = {
-          'x': tf.compat.v1.io.FixedLenFeature(
-              [1], dtype=tf.float32, default_value=0),
-          'y': tf.compat.v1.io.FixedLenFeature(
-              [1], dtype=tf.float32, default_value=0),
-      }
-      features = tf.compat.v1.parse_example(input_example, config)
-      x = features['x']
-      y = features['y']
-      sum_pred = x + y
-      diff_pred = tf.abs(x - y)
-      sess = tf.compat.v1.Session()
-      sess.run(tf.compat.v1.initializers.global_variables())
-      signature_def_map = {
-          'regress_diff':
-              self._build_regression_signature(input_example, diff_pred),
-          'classify_sum':
-              self._build_classification_signature(input_example, sum_pred),
-          tf.compat.v1.saved_model.signature_constants
-          .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-              self._build_regression_signature(input_example, sum_pred)
-      }
-      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
-      builder.add_meta_graph_and_variables(
-          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
-          signature_def_map=signature_def_map)
-      builder.save()
-
-  def _run_inference_with_beam(self, example_type, inference_spec_type,
-                               prediction_log_path, include_config = False):
-    if include_config:
-      with beam.Pipeline() as pipeline:
-        _ = (
-          pipeline
-          | "createRecordBatch" >> beam.Create([self.record_batch_multi_input])
-          | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-                inference_spec_type, DataType.EXAMPLE, self.tensor_adapter_config)
-          | 'WritePredictions' >> beam.io.WriteToTFRecord(
-              prediction_log_path,
-              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-    elif example_type == 'multi':
-      with beam.Pipeline() as pipeline:
-        _ = (
-          pipeline
-          | "createRecordBatch" >> beam.Create([self.record_batch_multihead])
-          | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-                inference_spec_type, DataType.EXAMPLE)
-          | 'WritePredictions' >> beam.io.WriteToTFRecord(
-              prediction_log_path,
-              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-    else:
-      with beam.Pipeline() as pipeline:
-        _ = (
-          pipeline
-          | "createRecordBatch" >> beam.Create([self.record_batch])
-          | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-                inference_spec_type, DataType.EXAMPLE)
-          | 'WritePredictions' >> beam.io.WriteToTFRecord(
-              prediction_log_path,
-              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-
-  def _get_results(self, prediction_log_path):
-    results = []
-    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
-      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
-      for record_string in record_iterator:
-        prediction_log = prediction_log_pb2.PredictionLog()
-        prediction_log.MergeFromString(record_string)
-        results.append(prediction_log)
-    return results
-
-  def testModelPathInvalid(self):
-    prediction_log_path = self._get_output_data_dir('predictions')
-    with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'):
-      self._run_inference_with_beam(
-          'predict',
-          model_spec_pb2.InferenceSpecType(
-              saved_model_spec=model_spec_pb2.SavedModelSpec(
-                  model_path=self._get_output_data_dir())), prediction_log_path)
-
-  def testEstimatorModelPredict(self):
-    model_path = self._get_output_data_dir('model')
-    self._build_predict_model(model_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        'predict',
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path)), prediction_log_path)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-    self.assertEqual(
-        results[0].predict_log.request.inputs[
-            run_inference_arrow._DEFAULT_INPUT_KEY].string_val[0],
-        self._predict_examples[0].SerializeToString())
-    self.assertEqual(results[0].predict_log.response.outputs['y'].dtype,
-                     tf.float32)
-    self.assertLen(
-        results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2)
-    self.assertEqual(
-        results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size,
-        1)
-    self.assertEqual(
-        results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size,
-        1)
-
-  def testClassifyModel(self):
-    model_path = self._get_output_data_dir('model')
-    self._build_multihead_model(model_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        'multi',
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path, signature_name=['classify_sum'])),
-        prediction_log_path)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-    classify_log = results[0].classify_log
-    self.assertLen(classify_log.request.input.example_list.examples, 1)
-    self.assertEqual(classify_log.request.input.example_list.examples[0],
-                     self._multihead_examples[0])
-    self.assertLen(classify_log.response.result.classifications, 1)
-    self.assertLen(classify_log.response.result.classifications[0].classes, 1)
-    self.assertAlmostEqual(
-        classify_log.response.result.classifications[0].classes[0].score, 1.0)
-
-  def testRegressModel(self):
-    model_path = self._get_output_data_dir('model')
-    self._build_multihead_model(model_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        'multi',
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path, signature_name=['regress_diff'])),
-        prediction_log_path)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-    regress_log = results[0].regress_log
-    self.assertLen(regress_log.request.input.example_list.examples, 1)
-    self.assertEqual(regress_log.request.input.example_list.examples[0],
-                     self._multihead_examples[0])
-    self.assertLen(regress_log.response.result.regressions, 1)
-    self.assertAlmostEqual(regress_log.response.result.regressions[0].value,
-                           0.6)
-
-  def testMultiInferenceModel(self):
-    model_path = self._get_output_data_dir('model')
-    self._build_multihead_model(model_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        'multi',
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path,
-                signature_name=['regress_diff', 'classify_sum'])),
-        prediction_log_path)
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-    multi_inference_log = results[0].multi_inference_log
-    self.assertLen(multi_inference_log.request.input.example_list.examples, 1)
-    self.assertEqual(multi_inference_log.request.input.example_list.examples[0],
-                     self._multihead_examples[0])
-    self.assertLen(multi_inference_log.response.results, 2)
-    signature_names = []
-    for result in multi_inference_log.response.results:
-      signature_names.append(result.model_spec.signature_name)
-    self.assertIn('regress_diff', signature_names)
-    self.assertIn('classify_sum', signature_names)
-    result = multi_inference_log.response.results[0]
-    self.assertEqual(result.model_spec.signature_name, 'regress_diff')
-    self.assertLen(result.regression_result.regressions, 1)
-    self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6)
-    result = multi_inference_log.response.results[1]
-    self.assertEqual(result.model_spec.signature_name, 'classify_sum')
-    self.assertLen(result.classification_result.classifications, 1)
-    self.assertLen(result.classification_result.classifications[0].classes, 1)
-    self.assertAlmostEqual(
-        result.classification_result.classifications[0].classes[0].score, 1.0)
-
-  def testKerasModelPredict(self):
-    inputs = tf.keras.Input(shape=(1,), name='input1')
-    output1 = tf.keras.layers.Dense(
-        1, activation=tf.nn.sigmoid, name='output1')(
-            inputs)
-    output2 = tf.keras.layers.Dense(
-        1, activation=tf.nn.sigmoid, name='output2')(
-            inputs)
-    inference_model = tf.keras.models.Model(inputs, [output1, output2])
-
-    class TestKerasModel(tf.keras.Model):
-      def __init__(self, inference_model):
-        super(TestKerasModel, self).__init__(name='test_keras_model')
-        self.inference_model = inference_model
-
-      @tf.function(input_signature=[
-          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
-      ])
-      def call(self, serialized_example):
-        features = {
-            'input1': tf.compat.v1.io.FixedLenFeature(
-              [1], dtype=tf.float32,
-              default_value=0)
-        }
-        input_tensor_dict = tf.io.parse_example(serialized_example, features)
-        return inference_model(input_tensor_dict['input1'])
-
-    model = TestKerasModel(inference_model)
-    model.compile(
-        optimizer=tf.keras.optimizers.Adam(lr=.001),
-        loss=tf.keras.losses.binary_crossentropy,
-        metrics=['accuracy'])
-
-    model_path = self._get_output_data_dir('model')
-    tf.compat.v1.keras.experimental.export_saved_model(
-        model, model_path, serving_only=True)
-
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        'predict',
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path)), prediction_log_path)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-
-  def testKerasModelPredictMultiTensor(self):
-    input1 = tf.keras.layers.Input((1,), name='x')
-    input2 = tf.keras.layers.Input((1,), name='y')
-
-    x1 = tf.keras.layers.Dense(10)(input1)
-    x2 = tf.keras.layers.Dense(10)(input2)
-    output = tf.keras.layers.Dense(5, name='output')(x2)
-
-    model = tf.keras.models.Model([input1, input2], output)
-    model_path = self._get_output_data_dir('model')
-    tf.compat.v1.keras.experimental.export_saved_model(
-        model, model_path, serving_only=True)
-
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        'multi',
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path)),
-              prediction_log_path, include_config = True)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-    for result in results: 
-      self.assertLen(result.predict_log.request.inputs, 2)
-      self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
-
-  def testTelemetry(self):
-    model_path = self._get_output_data_dir('model')
-    self._build_multihead_model(model_path)
-    inference_spec_type = model_spec_pb2.InferenceSpecType(
-        saved_model_spec=model_spec_pb2.SavedModelSpec(
-            model_path=model_path, signature_name=['classify_sum']))
-    pipeline = beam.Pipeline()
-    _ = (
-        pipeline 
-        | "createRecordBatch" >> beam.Create([self.record_batch_multihead])
-        | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-              inference_spec_type, DataType.EXAMPLE))
-    run_result = pipeline.run()
-    run_result.wait_until_finish()
-
-    num_inferences = run_result.metrics().query(
-        MetricsFilter().with_name('num_inferences'))
-    self.assertTrue(num_inferences['counters'])
-    self.assertEqual(num_inferences['counters'][0].result, 2)
-    num_instances = run_result.metrics().query(
-        MetricsFilter().with_name('num_instances'))
-    self.assertTrue(num_instances['counters'])
-    self.assertEqual(num_instances['counters'][0].result, 2)
-    inference_request_batch_size = run_result.metrics().query(
-        MetricsFilter().with_name('inference_request_batch_size'))
-    self.assertTrue(inference_request_batch_size['distributions'])
-    self.assertEqual(
-        inference_request_batch_size['distributions'][0].result.sum, 2)
-    inference_request_batch_byte_size = run_result.metrics().query(
-        MetricsFilter().with_name('inference_request_batch_byte_size'))
-    self.assertTrue(inference_request_batch_byte_size['distributions'])
-    self.assertEqual(
-        inference_request_batch_byte_size['distributions'][0].result.sum,
-        sum(element.ByteSize() for element in self._multihead_examples))
-    inference_batch_latency_micro_secs = run_result.metrics().query(
-        MetricsFilter().with_name('inference_batch_latency_micro_secs'))
-    self.assertTrue(inference_batch_latency_micro_secs['distributions'])
-    self.assertGreaterEqual(
-        inference_batch_latency_micro_secs['distributions'][0].result.sum, 0)
-    load_model_latency_milli_secs = run_result.metrics().query(
-        MetricsFilter().with_name('load_model_latency_milli_secs'))
-    self.assertTrue(load_model_latency_milli_secs['distributions'])
-    self.assertGreaterEqual(
-        load_model_latency_milli_secs['distributions'][0].result.sum, 0)
-
-
-class RunRemoteInferenceArrowTest(RunInferenceArrowFixture):
-
-  def setUp(self):
-    super(RunRemoteInferenceArrowTest, self).setUp()
-    # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
-    self._discovery_testdata_dir = os.path.join(
-        os.path.join(os.path.dirname(__file__), 'testdata'),
-        'ml_discovery.json')
-
-  @staticmethod
-  def _make_response_body(content, successful):
-    if successful:
-      response_dict = {'predictions': content}
-    else:
-      response_dict = {'error': content}
-    return json.dumps(response_dict)
-
-  def _set_up_pipeline(self, inference_spec_type):
-    self.pipeline = beam.Pipeline()
-    self.pcoll = (
-        self.pipeline
-        | "createRecordBatch" >> beam.Create([self.record_batch])
-        | 'RunInference' >> run_inference_arrow.RunInferenceImpl(
-              inference_spec_type, DataType.EXAMPLE))
-
-  def _run_inference_with_beam(self):
-    self.pipeline_result = self.pipeline.run()
-    self.pipeline_result.wait_until_finish()
-
-  def test_model_predict(self):
-    predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
-    builder = http.RequestMockBuilder({
-        'ml.projects.predict':
-            (None, self._make_response_body(predictions, successful=True))
-    })
-    resource = discovery.build(
-        'ml',
-        'v1',
-        http=http.HttpMock(self._discovery_testdata_dir,
-                           {'status': http_client.OK}),
-        requestBuilder=builder)
-    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
-      response_mock.side_effect = lambda service, version: resource
-      inference_spec_type = model_spec_pb2.InferenceSpecType(
-          ai_platform_prediction_model_spec=model_spec_pb2
-          .AIPlatformPredictionModelSpec(
-              project_id='test-project',
-              model_name='test-model',
-          ))
-
-      prediction_log = prediction_log_pb2.PredictionLog()
-      prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
-          tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
-      prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
-          tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
-
-      self._set_up_pipeline(inference_spec_type)
-      assert_that(self.pcoll, equal_to([prediction_log]))
-      self._run_inference_with_beam()
-
-  def test_exception_raised_when_response_body_contains_error_entry(self):
-    error_msg = 'Base64 decode failed.'
-    builder = http.RequestMockBuilder({
-        'ml.projects.predict':
-            (None, self._make_response_body(error_msg, successful=False))
-    })
-    resource = discovery.build(
-        'ml',
-        'v1',
-        http=http.HttpMock(self._discovery_testdata_dir,
-                           {'status': http_client.OK}),
-        requestBuilder=builder)
-    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
-      response_mock.side_effect = lambda service, version: resource
-      inference_spec_type = model_spec_pb2.InferenceSpecType(
-          ai_platform_prediction_model_spec=model_spec_pb2
-          .AIPlatformPredictionModelSpec(
-              project_id='test-project',
-              model_name='test-model',
-          ))
-
-      try:
-        self._set_up_pipeline(inference_spec_type)
-        self._run_inference_with_beam()
-      except ValueError as exc:
-        actual_error_msg = str(exc)
-        self.assertTrue(actual_error_msg.startswith(error_msg))
-      else:
-        self.fail('Test was expected to throw ValueError exception')
-
-  def test_exception_raised_when_project_id_is_empty(self):
-    inference_spec_type = model_spec_pb2.InferenceSpecType(
-        ai_platform_prediction_model_spec=model_spec_pb2
-        .AIPlatformPredictionModelSpec(model_name='test-model',))
-
-    with self.assertRaises(ValueError):
-      self._set_up_pipeline(inference_spec_type)
-      self._run_inference_with_beam()
-
-  def test_request_body_with_binary_data(self):
-    example = text_format.Parse(
-      """
-      features {
-        feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
-        feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
-        feature { key: "y" value { int64_list { value: [1, 2] }}}
-      }
-      """, tf.train.Example())
-
-    serialized_example_remote = [example.SerializeToString()]
-    record_batch_remote = pa.RecordBatch.from_arrays(
-      [
-        pa.array(["ASa8asdf"], type=pa.binary()),
-        pa.array(["JLK7ljk3"], type=pa.utf8()),
-        pa.array([[1, 2]], type=pa.list_(pa.float32())),
-      ],
-      ['x_bytes', 'x', 'y']
-    )
-
-    result = list(bsl_util.RecordToJSON(record_batch_remote))
-    self.assertEqual([
-        {
-            'x_bytes': {
-                'b64': 'QVNhOGFzZGY='
-            },
-            'x': 'JLK7ljk3',
-            'y': [1, 2]
-        },
-    ], result)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tfx_bsl/beam/run_inference_record_batch.py b/tfx_bsl/beam/run_inference_record_batch.py
new file mode 100644
index 00000000..ca4543ff
--- /dev/null
+++ b/tfx_bsl/beam/run_inference_record_batch.py
@@ -0,0 +1,57 @@
+# Copyright 2019 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Private API of inference."""
+
+from __future__ import absolute_import
+from __future__ import division
+# Standard __future__ imports
+from __future__ import print_function
+
+import apache_beam as beam
+import tensorflow as tf
+import pyarrow as pa
+from typing import Text, Optional
+from tfx_bsl.beam import run_inference
+from tfx_bsl.public.proto import model_spec_pb2
+from tensorflow_serving.apis import prediction_log_pb2
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(pa.RecordBatch)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def RunInferenceOnRecordBatch(  # pylint: disable=invalid-name
+    examples: beam.pvalue.PCollection,
+    inference_spec_type: model_spec_pb2.InferenceSpecType, data_type: Text
+    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
+) -> beam.pvalue.PCollection:
+  """Run inference with a model.
+
+   There are two types of inference you can perform using this PTransform:
+   1. In-process inference from a SavedModel instance. Used when
+     `saved_model_spec` field is set in `inference_spec_type`.
+   2. Remote inference by using a service endpoint. Used when
+     `ai_platform_prediction_model_spec` field is set in
+     `inference_spec_type`.
+
+  Args:
+    examples: A PCollection containing RecordBatch.
+    inference_spec_type: Model inference endpoint.
+
+  Returns:
+    A PCollection containing prediction logs.
+  """
+
+  return (
+      examples | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(
+                        inference_spec_type, data_type, tensor_adapter_config))
diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
index 8601dc30..d3580788 100644
--- a/tfx_bsl/beam/run_inference_test.py
+++ b/tfx_bsl/beam/run_inference_test.py
@@ -26,6 +26,7 @@
   import mock
 
 import apache_beam as beam
+import pyarrow as pa
 from apache_beam.metrics.metric import MetricsFilter
 from apache_beam.testing.util import assert_that
 from apache_beam.testing.util import equal_to
@@ -33,17 +34,24 @@
 from googleapiclient import http
 from six.moves import http_client
 import tensorflow as tf
+from tfx_bsl.beam import bsl_util
 from tfx_bsl.beam import run_inference
+from tfx_bsl.beam.bsl_constants import DataType
+from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
 from tfx_bsl.public.proto import model_spec_pb2
+from tfx_bsl.tfxio import test_util
+from tfx_bsl.tfxio import tensor_adapter
+from tfx_bsl.tfxio import tf_example_record
 
 from google.protobuf import text_format
 from tensorflow_serving.apis import prediction_log_pb2
+from tensorflow_metadata.proto.v0 import schema_pb2
 
 
-class RunInferenceFixture(tf.test.TestCase):
+class RunInferenceArrowFixture(tf.test.TestCase):
 
   def setUp(self):
-    super(RunInferenceFixture, self).setUp()
+    super(RunInferenceArrowFixture, self).setUp()
     self._predict_examples = [
         text_format.Parse(
             """
@@ -70,10 +78,10 @@ def _prepare_predict_examples(self, example_path):
         output_file.write(example.SerializeToString())
 
 
-class RunOfflineInferenceTest(RunInferenceFixture):
+class RunOfflineInferenceArrowTest(RunInferenceArrowFixture):
 
   def setUp(self):
-    super(RunOfflineInferenceTest, self).setUp()
+    super(RunOfflineInferenceArrowTest, self).setUp()
     self._predict_examples = [
         text_format.Parse(
             """
@@ -88,6 +96,7 @@ def setUp(self):
               }
               """, tf.train.Example()),
     ]
+
     self._multihead_examples = [
         text_format.Parse(
             """
@@ -105,12 +114,47 @@ def setUp(self):
             """, tf.train.Example()),
     ]
 
+    self.schema = text_format.Parse(
+      """
+      tensor_representation_group {
+        key: ""
+        value {
+          tensor_representation {
+            key: "x"
+            value {
+              dense_tensor {
+                column_name: "x"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+          tensor_representation {
+            key: "y"
+            value {
+              dense_tensor {
+                column_name: "y"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+        }
+      }
+      feature {
+        name: "x"
+        type: FLOAT
+      }
+      feature {
+        name: "y"
+        type: FLOAT
+      }
+      """, schema_pb2.Schema())
 
   def _prepare_multihead_examples(self, example_path):
     with tf.io.TFRecordWriter(example_path) as output_file:
       for example in self._multihead_examples:
         output_file.write(example.SerializeToString())
 
+
   def _build_predict_model(self, model_path):
     """Exports the dummy sum predict model."""
 
@@ -206,14 +250,37 @@ def _build_multihead_model(self, model_path):
       builder.save()
 
   def _run_inference_with_beam(self, example_path, inference_spec_type,
-                               prediction_log_path):
-    with beam.Pipeline() as pipeline:
-      _ = (
+                               prediction_log_path, include_config = False):
+    converter = tf_example_record.TFExampleBeamRecord(
+      physical_format="inmem",
+      telemetry_descriptors=[],
+      raw_record_column_name=_RECORDBATCH_COLUMN)
+
+    if include_config:
+      tfxio = test_util.InMemoryTFExampleRecord(
+        schema=self.schema, raw_record_column_name=_RECORDBATCH_COLUMN)
+      tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
+        arrow_schema=tfxio.ArrowSchema(),
+        tensor_representations=tfxio.TensorRepresentations())
+
+      with beam.Pipeline() as pipeline:
+        _ = (
           pipeline
           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-          | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-          |
-          'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)
+          | 'ConvertToRecordBatch' >> converter.BeamSource()
+          | 'RunInference' >> run_inference.RunInferenceImpl(
+                inference_spec_type, DataType.EXAMPLE, tensor_adapter_config)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+    else:
+      with beam.Pipeline() as pipeline:
+        _ = (
+          pipeline
+          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+          | 'ConvertToRecordBatch' >> converter.BeamSource()
+          | 'RunInference' >> run_inference.RunInferenceImpl(
+                inference_spec_type, DataType.EXAMPLE)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -362,7 +429,6 @@ def testKerasModelPredict(self):
     inference_model = tf.keras.models.Model(inputs, [output1, output2])
 
     class TestKerasModel(tf.keras.Model):
-
       def __init__(self, inference_model):
         super(TestKerasModel, self).__init__(name='test_keras_model')
         self.inference_model = inference_model
@@ -372,10 +438,9 @@ def __init__(self, inference_model):
       ])
       def call(self, serialized_example):
         features = {
-            'input1':
-                tf.compat.v1.io.FixedLenFeature([1],
-                                                dtype=tf.float32,
-                                                default_value=0)
+            'input1': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32,
+              default_value=0)
         }
         input_tensor_dict = tf.io.parse_example(serialized_example, features)
         return inference_model(input_tensor_dict['input1'])
@@ -386,12 +451,12 @@ def call(self, serialized_example):
         loss=tf.keras.losses.binary_crossentropy,
         metrics=['accuracy'])
 
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
     model_path = self._get_output_data_dir('model')
     tf.compat.v1.keras.experimental.export_saved_model(
         model, model_path, serving_only=True)
 
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_predict_examples(example_path)
     prediction_log_path = self._get_output_data_dir('predictions')
     self._run_inference_with_beam(
         example_path,
@@ -402,6 +467,66 @@ def call(self, serialized_example):
     results = self._get_results(prediction_log_path)
     self.assertLen(results, 2)
 
+  def testKerasModelPredictMultiTensor(self):
+    input1 = tf.keras.layers.Input((1,), name='x')
+    input2 = tf.keras.layers.Input((1,), name='y')
+
+    x1 = tf.keras.layers.Dense(10)(input1)
+    x2 = tf.keras.layers.Dense(10)(input2)
+    output = tf.keras.layers.Dense(5, name='output')(x2)
+
+    model = tf.keras.models.Model([input1, input2], output)
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)),
+              prediction_log_path, include_config = True)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    for result in results:
+      self.assertLen(result.predict_log.request.inputs, 2)
+      self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
+
+  def testMultiTensorError(self):
+    input1 = tf.keras.layers.Input((1,), name='x')
+    input2 = tf.keras.layers.Input((1,), name='y')
+
+    x1 = tf.keras.layers.Dense(10)(input1)
+    x2 = tf.keras.layers.Dense(10)(input2)
+    output = tf.keras.layers.Dense(5, name='output')(x2)
+
+    model = tf.keras.models.Model([input1, input2], output)
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+
+    error_msg = 'Tensor adaptor config is required with a multi-input model'
+    try:
+      self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)),
+              prediction_log_path, include_config = False)
+    except ValueError as exc:
+      actual_error_msg = str(exc)
+      self.assertTrue(actual_error_msg.startswith(error_msg))
+    else:
+      self.fail('Test was expected to throw ValueError exception')
+
   def testTelemetry(self):
     example_path = self._get_output_data_dir('examples')
     self._prepare_multihead_examples(example_path)
@@ -410,11 +535,18 @@ def testTelemetry(self):
     inference_spec_type = model_spec_pb2.InferenceSpecType(
         saved_model_spec=model_spec_pb2.SavedModelSpec(
             model_path=model_path, signature_name=['classify_sum']))
+  
     pipeline = beam.Pipeline()
+    converter = tf_example_record.TFExampleBeamRecord(
+      physical_format="inmem",
+      telemetry_descriptors=[],
+      raw_record_column_name=_RECORDBATCH_COLUMN)
     _ = (
-        pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-        | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
+        pipeline 
+        | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+        | 'ConvertToRecordBatch' >> converter.BeamSource()
+        | 'RunInference' >> run_inference.RunInferenceImpl(
+              inference_spec_type, DataType.EXAMPLE))
     run_result = pipeline.run()
     run_result.wait_until_finish()
 
@@ -449,13 +581,13 @@ def testTelemetry(self):
         load_model_latency_milli_secs['distributions'][0].result.sum, 0)
 
 
-class RunRemoteInferenceTest(RunInferenceFixture):
+class RunRemoteInferenceArrowTest(RunInferenceArrowFixture):
 
   def setUp(self):
-    super(RunRemoteInferenceTest, self).setUp()
+    super(RunRemoteInferenceArrowTest, self).setUp()
+    # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
     self.example_path = self._get_output_data_dir('example')
     self._prepare_predict_examples(self.example_path)
-    # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
     self._discovery_testdata_dir = os.path.join(
         os.path.join(os.path.dirname(__file__), 'testdata'),
         'ml_discovery.json')
@@ -470,11 +602,16 @@ def _make_response_body(content, successful):
 
   def _set_up_pipeline(self, inference_spec_type):
     self.pipeline = beam.Pipeline()
+    converter = tf_example_record.TFExampleBeamRecord(
+      physical_format="inmem",
+      telemetry_descriptors=[],
+      raw_record_column_name=_RECORDBATCH_COLUMN)
     self.pcoll = (
         self.pipeline
         | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
-        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-        | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
+        | 'ConvertToRecordBatch' >> converter.BeamSource()
+        | 'RunInference' >> run_inference.RunInferenceImpl(
+              inference_spec_type, DataType.EXAMPLE))
 
   def _run_inference_with_beam(self):
     self.pipeline_result = self.pipeline.run()
@@ -582,18 +719,25 @@ def test_can_format_requests(self):
         }
         """, tf.train.Example())
 
+      converter = tf_example_record.TFExampleBeamRecord(
+        physical_format="inmem",
+        telemetry_descriptors=[],
+        raw_record_column_name=_RECORDBATCH_COLUMN)
+
       self.pipeline = beam.Pipeline()
       self.pcoll = (
           self.pipeline
-          | 'ReadExamples' >> beam.Create([example])
-          |
-          'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type))
+          | 'CreateExamples' >> beam.Create([example])
+          | 'ParseExamples' >> beam.Map(lambda x: x.SerializeToString())
+          | 'ConvertToRecordBatch' >> converter.BeamSource()
+          | 'RunInference' >> run_inference.RunInferenceImpl(
+                  inference_spec_type, DataType.EXAMPLE))
 
       self._run_inference_with_beam()
 
   def test_request_body_with_binary_data(self):
     example = text_format.Parse(
-        """
+      """
       features {
         feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
         feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
@@ -601,8 +745,19 @@ def test_request_body_with_binary_data(self):
         feature { key: "z" value { float_list { value: [4.5, 5, 5.5] }}}
       }
       """, tf.train.Example())
-    result = list(
-        run_inference._RemotePredictDoFn._prepare_instances([example]))
+
+    serialized_example_remote = [example.SerializeToString()]
+    record_batch_remote = pa.RecordBatch.from_arrays(
+      [
+        pa.array([["ASa8asdf"]], type=pa.list_(pa.binary())),
+        pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())),
+        pa.array([[1, 2]], type=pa.list_(pa.int32())),
+        pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32()))
+      ],
+      ['x_bytes', 'x', 'y', 'z']
+    )
+
+    result = list(bsl_util.RecordToJSON(record_batch_remote))
     self.assertEqual([
         {
             'x_bytes': {
diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py
index 788235e0..58633f95 100644
--- a/tfx_bsl/public/beam/run_inference.py
+++ b/tfx_bsl/public/beam/run_inference.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
-"""Publich API of batch inference."""
+"""Public API of batch inference."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -28,7 +28,6 @@
 from tfx_bsl.tfxio import tf_example_record
 from tfx_bsl.tfxio import tf_sequence_example_record
 from tfx_bsl.beam import run_inference
-from tfx_bsl.beam import run_inference_arrow
 from tfx_bsl.public.proto import model_spec_pb2
 from tensorflow_serving.apis import prediction_log_pb2
 from tensorflow_metadata.proto.v0 import schema_pb2
@@ -71,6 +70,7 @@ def RunInference(  # pylint: disable=invalid-name
     schema=schema,
     raw_record_column_name=_RECORDBATCH_COLUMN)
 
+  tensor_adapter_config = None
   if schema:
     tfxio = test_util.InMemoryTFExampleRecord(
       schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
@@ -79,9 +79,9 @@ def RunInference(  # pylint: disable=invalid-name
       tensor_representations=tfxio.TensorRepresentations())
 
   return (examples
-          | 'ParseExamples' >> beam.Map(tf.train.Example.SerializeToString)
+          | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
           | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl(
+          | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(
                   inference_spec_type, data_type,
                   tensor_adapter_config=tensor_adapter_config))
 
@@ -120,6 +120,7 @@ def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
     schema=schema,
     raw_record_column_name=_RECORDBATCH_COLUMN)
 
+  tensor_adapter_config = None
   if schema:
     tfxio = test_util.InMemoryTFExampleRecord(
       schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
@@ -128,38 +129,8 @@ def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
       tensor_representations=tfxio.TensorRepresentations())
 
   return (examples
-          | 'ParseExamples' >> beam.Map(tf.train.Example.SerializeToString)
+          | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
           | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl(
+          | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(
                   inference_spec_type, data_type,
                   tensor_adapter_config=tensor_adapter_config))
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def RunInferenceOnRecordBatch(  # pylint: disable=invalid-name
-    examples: beam.pvalue.PCollection,
-    inference_spec_type: model_spec_pb2.InferenceSpecType,
-    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
-) -> beam.pvalue.PCollection:
-  """Run inference with a model.
-
-   There are two types of inference you can perform using this PTransform:
-   1. In-process inference from a SavedModel instance. Used when
-     `saved_model_spec` field is set in `inference_spec_type`.
-   2. Remote inference by using a service endpoint. Used when
-     `ai_platform_prediction_model_spec` field is set in
-     `inference_spec_type`.
-
-  Args:
-    examples: A PCollection containing RecordBatch.
-    inference_spec_type: Model inference endpoint.
-
-  Returns:
-    A PCollection containing prediction logs.
-  """
-
-  return (
-      examples | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl(
-                        inference_spec_type, tensor_adapter_config))

From f172fe379fb9ff7d6b3e9034d5842b347cb21373 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Fri, 24 Jul 2020 18:27:39 -0400
Subject: [PATCH 22/31] move private APIs and test them

---
 tfx_bsl/beam/run_inference.py              | 115 +++++-
 tfx_bsl/beam/run_inference_record_batch.py |  57 ---
 tfx_bsl/beam/run_inference_test.py         | 454 ++++++++++++++++++++-
 tfx_bsl/public/beam/run_inference.py       |  59 +--
 4 files changed, 565 insertions(+), 120 deletions(-)
 delete mode 100644 tfx_bsl/beam/run_inference_record_batch.py

diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py
index 320ac1da..075b0d07 100644
--- a/tfx_bsl/beam/run_inference.py
+++ b/tfx_bsl/beam/run_inference.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Run batch inference on saved model."""
+"""Run batch inference on saved model and private APIs of inference."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -46,7 +46,10 @@
 from tfx_bsl.beam import bsl_util
 from tfx_bsl.public.proto import model_spec_pb2
 from tfx_bsl.telemetry import util
+from tfx_bsl.tfxio import test_util
 from tfx_bsl.tfxio import tensor_adapter
+from tfx_bsl.tfxio import tf_example_record
+from tfx_bsl.tfxio import tf_sequence_example_record
 from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \
     Tuple, Union, Optional
 
@@ -59,6 +62,7 @@
 from tensorflow_serving.apis import inference_pb2
 from tensorflow_serving.apis import prediction_log_pb2
 from tensorflow_serving.apis import regression_pb2
+from tensorflow_metadata.proto.v0 import schema_pb2
 
 # TODO(b/131873699): Remove once 1.x support is dropped.
 # pylint: disable=g-import-not-at-top
@@ -93,10 +97,115 @@ class OperationType(object):
   MULTIHEAD = 'MULTIHEAD'
 
 
+@beam.ptransform_fn
+@beam.typehints.with_input_types(tf.train.Example)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def RunInferenceOnExamples(  # pylint: disable=invalid-name
+    examples: beam.pvalue.PCollection,
+    inference_spec_type: model_spec_pb2.InferenceSpecType,
+    schema: Optional[schema_pb2.Schema] = None
+) -> beam.pvalue.PCollection:
+  """Run inference with a model.
+
+   There are two types of inference you can perform using this PTransform:
+   1. In-process inference from a SavedModel instance. Used when
+     `saved_model_spec` field is set in `inference_spec_type`.
+   2. Remote inference by using a service endpoint. Used when
+     `ai_platform_prediction_model_spec` field is set in
+     `inference_spec_type`.
+
+   TODO(b/131873699): Add support for the following features:
+   1. Bytes as Input.
+   2. PTable Input.
+   3. Models as SideInput.
+
+  Args:
+    examples: A PCollection containing examples.
+    inference_spec_type: Model inference endpoint.
+    Schema [optional]: required for models that requires
+      multi-tensor inputs.
+
+  Returns:
+    A PCollection containing prediction logs.
+  """
+
+  data_type = DataType.EXAMPLE
+  converter = tf_example_record.TFExampleBeamRecord(
+    physical_format="inmem",
+    telemetry_descriptors=[],
+    schema=schema,
+    raw_record_column_name=_RECORDBATCH_COLUMN)
+
+  tensor_adapter_config = None
+  if schema:
+    tfxio = test_util.InMemoryTFExampleRecord(
+      schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
+    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
+      arrow_schema=tfxio.ArrowSchema(),
+      tensor_representations=tfxio.TensorRepresentations())
+
+  return (examples
+          | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
+          | 'ConvertToRecordBatch' >> converter.BeamSource()
+          | 'RunInferenceImpl' >> RunInferenceOnRecordBatch(
+                  inference_spec_type, data_type,
+                  tensor_adapter_config=tensor_adapter_config))
+
+
+@beam.ptransform_fn
+@beam.typehints.with_input_types(tf.train.SequenceExample)
+@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
+def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
+    examples: beam.pvalue.PCollection,
+    inference_spec_type: model_spec_pb2.InferenceSpecType,
+    schema: Optional[schema_pb2.Schema] = None
+) -> beam.pvalue.PCollection:
+  """Run inference with a model.
+
+   There are two types of inference you can perform using this PTransform:
+   1. In-process inference from a SavedModel instance. Used when
+     `saved_model_spec` field is set in `inference_spec_type`.
+   2. Remote inference by using a service endpoint. Used when
+     `ai_platform_prediction_model_spec` field is set in
+     `inference_spec_type`.
+
+  Args:
+    examples: A PCollection containing sequence examples.
+    inference_spec_type: Model inference endpoint.
+    Schema [optional]: required for models that requires
+      multi-tensor inputs.
+
+  Returns:
+    A PCollection containing prediction logs.
+  """
+
+  data_type = DataType.SEQUENCEEXAMPLE
+  converter = tf_sequence_example_record.TFSequenceExampleBeamRecord(
+    physical_format="inmem",
+    telemetry_descriptors=[],
+    schema=schema,
+    raw_record_column_name=_RECORDBATCH_COLUMN)
+
+  tensor_adapter_config = None
+  if schema:
+    tfxio = test_util.InMemoryTFExampleRecord(
+      schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
+    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
+      arrow_schema=tfxio.ArrowSchema(),
+      tensor_representations=tfxio.TensorRepresentations())
+
+  return (examples
+          | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
+          | 'ConvertToRecordBatch' >> converter.BeamSource()
+          | 'RunInferenceImpl' >> RunInferenceOnRecordBatch(
+                  inference_spec_type, data_type,
+                  tensor_adapter_config=tensor_adapter_config))
+
+
 @beam.ptransform_fn
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def RunInferenceImpl(  # pylint: disable=invalid-name
+def RunInferenceOnRecordBatch(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
     inference_spec_type: model_spec_pb2.InferenceSpecType, data_type: Text,
     tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
@@ -104,7 +213,7 @@ def RunInferenceImpl(  # pylint: disable=invalid-name
   """Implementation of RunInference API.
 
   Args:
-    examples: A PCollection containing RecordBatch of serialized examples.
+    examples: A PCollection containing RecordBatch of serialized examples and features.
     inference_spec_type: Model inference endpoint.
     tensor_adapter_config [Optional]: Tensor adapter config which specifies how to
       obtain tensors from the Arrow RecordBatch.
diff --git a/tfx_bsl/beam/run_inference_record_batch.py b/tfx_bsl/beam/run_inference_record_batch.py
deleted file mode 100644
index ca4543ff..00000000
--- a/tfx_bsl/beam/run_inference_record_batch.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright 2019 Google LLC. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Private API of inference."""
-
-from __future__ import absolute_import
-from __future__ import division
-# Standard __future__ imports
-from __future__ import print_function
-
-import apache_beam as beam
-import tensorflow as tf
-import pyarrow as pa
-from typing import Text, Optional
-from tfx_bsl.beam import run_inference
-from tfx_bsl.public.proto import model_spec_pb2
-from tensorflow_serving.apis import prediction_log_pb2
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(pa.RecordBatch)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def RunInferenceOnRecordBatch(  # pylint: disable=invalid-name
-    examples: beam.pvalue.PCollection,
-    inference_spec_type: model_spec_pb2.InferenceSpecType, data_type: Text
-    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
-) -> beam.pvalue.PCollection:
-  """Run inference with a model.
-
-   There are two types of inference you can perform using this PTransform:
-   1. In-process inference from a SavedModel instance. Used when
-     `saved_model_spec` field is set in `inference_spec_type`.
-   2. Remote inference by using a service endpoint. Used when
-     `ai_platform_prediction_model_spec` field is set in
-     `inference_spec_type`.
-
-  Args:
-    examples: A PCollection containing RecordBatch.
-    inference_spec_type: Model inference endpoint.
-
-  Returns:
-    A PCollection containing prediction logs.
-  """
-
-  return (
-      examples | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(
-                        inference_spec_type, data_type, tensor_adapter_config))
diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
index d3580788..94e218de 100644
--- a/tfx_bsl/beam/run_inference_test.py
+++ b/tfx_bsl/beam/run_inference_test.py
@@ -48,10 +48,10 @@
 from tensorflow_metadata.proto.v0 import schema_pb2
 
 
-class RunInferenceArrowFixture(tf.test.TestCase):
+class RunInferenceFixture(tf.test.TestCase):
 
   def setUp(self):
-    super(RunInferenceArrowFixture, self).setUp()
+    super(RunInferenceFixture, self).setUp()
     self._predict_examples = [
         text_format.Parse(
             """
@@ -78,7 +78,438 @@ def _prepare_predict_examples(self, example_path):
         output_file.write(example.SerializeToString())
 
 
-class RunOfflineInferenceArrowTest(RunInferenceArrowFixture):
+class RunOfflineInferenceExamplesTest(RunInferenceFixture):
+
+  def setUp(self):
+    super(RunOfflineInferenceExamplesTest, self).setUp()
+    self._predict_examples = [
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 0 }}}
+              }
+              """, tf.train.Example()),
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 1 }}}
+              }
+              """, tf.train.Example()),
+    ]
+    self._multihead_examples = [
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.8 }}}
+              feature {key: "y" value { float_list { value: 0.2 }}}
+            }
+            """, tf.train.Example()),
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.6 }}}
+              feature {key: "y" value { float_list { value: 0.1 }}}
+            }
+            """, tf.train.Example()),
+    ]
+
+    self.schema = text_format.Parse(
+      """
+      tensor_representation_group {
+        key: ""
+        value {
+          tensor_representation {
+            key: "x"
+            value {
+              dense_tensor {
+                column_name: "x"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+          tensor_representation {
+            key: "y"
+            value {
+              dense_tensor {
+                column_name: "y"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+        }
+      }
+      feature {
+        name: "x"
+        type: FLOAT
+      }
+      feature {
+        name: "y"
+        type: FLOAT
+      }
+      """, schema_pb2.Schema())
+
+  def _prepare_multihead_examples(self, example_path):
+    with tf.io.TFRecordWriter(example_path) as output_file:
+      for example in self._multihead_examples:
+        output_file.write(example.SerializeToString())
+
+  def _run_inference_with_beam(self, example_path, inference_spec_type,
+                               prediction_log_path, include_schema = False):
+    schema = None
+    if include_schema:
+      schema = self.schema
+
+    with beam.Pipeline() as pipeline:
+      _ = (
+          pipeline
+          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+          | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+          | 'RunInference' >> run_inference.RunInferenceOnExamples(
+              inference_spec_type, schema=schema)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+
+  def _get_results(self, prediction_log_path):
+    results = []
+    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
+      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
+      for record_string in record_iterator:
+        prediction_log = prediction_log_pb2.PredictionLog()
+        prediction_log.MergeFromString(record_string)
+        results.append(prediction_log)
+    return results
+
+
+  def testKerasModelPredict(self):
+    inputs = tf.keras.Input(shape=(1,), name='input1')
+    output1 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output1')(
+            inputs)
+    output2 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output2')(
+            inputs)
+    inference_model = tf.keras.models.Model(inputs, [output1, output2])
+
+    class TestKerasModel(tf.keras.Model):
+
+      def __init__(self, inference_model):
+        super(TestKerasModel, self).__init__(name='test_keras_model')
+        self.inference_model = inference_model
+
+      @tf.function(input_signature=[
+          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
+      ])
+      def call(self, serialized_example):
+        features = {
+            'input1':
+                tf.compat.v1.io.FixedLenFeature([1],
+                                                dtype=tf.float32,
+                                                default_value=0)
+        }
+        input_tensor_dict = tf.io.parse_example(serialized_example, features)
+        return inference_model(input_tensor_dict['input1'])
+
+    model = TestKerasModel(inference_model)
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(lr=.001),
+        loss=tf.keras.losses.binary_crossentropy,
+        metrics=['accuracy'])
+
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+
+  def testKerasModelPredictMultiTensor(self):
+    input1 = tf.keras.layers.Input((1,), name='x')
+    input2 = tf.keras.layers.Input((1,), name='y')
+
+    x1 = tf.keras.layers.Dense(10)(input1)
+    x2 = tf.keras.layers.Dense(10)(input2)
+    output = tf.keras.layers.Dense(5, name='output')(x2)
+
+    model = tf.keras.models.Model([input1, input2], output)
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)),
+        prediction_log_path, include_schema = True)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    for result in results:
+      self.assertLen(result.predict_log.request.inputs, 2)
+      self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
+
+
+class RunRemoteInferenceExamplesTest(RunInferenceFixture):
+
+  def setUp(self):
+    super(RunRemoteInferenceExamplesTest, self).setUp()
+    self.example_path = self._get_output_data_dir('example')
+    self._prepare_predict_examples(self.example_path)
+    # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
+    self._discovery_testdata_dir = os.path.join(
+        os.path.join(os.path.dirname(__file__), 'testdata'),
+        'ml_discovery.json')
+
+  @staticmethod
+  def _make_response_body(content, successful):
+    if successful:
+      response_dict = {'predictions': content}
+    else:
+      response_dict = {'error': content}
+    return json.dumps(response_dict)
+
+  def _set_up_pipeline(self, inference_spec_type):
+    self.pipeline = beam.Pipeline()
+    self.pcoll = (
+        self.pipeline
+        | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
+        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+        | 'RunInference' >> run_inference.RunInferenceOnExamples(inference_spec_type))
+
+  def _run_inference_with_beam(self):
+    self.pipeline_result = self.pipeline.run()
+    self.pipeline_result.wait_until_finish()
+
+  def test_model_predict(self):
+    predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
+    builder = http.RequestMockBuilder({
+        'ml.projects.predict':
+            (None, self._make_response_body(predictions, successful=True))
+    })
+    resource = discovery.build(
+        'ml',
+        'v1',
+        http=http.HttpMock(self._discovery_testdata_dir,
+                           {'status': http_client.OK}),
+        requestBuilder=builder)
+    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
+      response_mock.side_effect = lambda service, version: resource
+      inference_spec_type = model_spec_pb2.InferenceSpecType(
+          ai_platform_prediction_model_spec=model_spec_pb2
+          .AIPlatformPredictionModelSpec(
+              project_id='test-project',
+              model_name='test-model',
+          ))
+
+      prediction_log = prediction_log_pb2.PredictionLog()
+      prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
+          tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
+      prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
+          tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
+
+      self._set_up_pipeline(inference_spec_type)
+      assert_that(self.pcoll, equal_to([prediction_log]))
+      self._run_inference_with_beam()
+
+
+class RunOfflineInferenceSequenceExamplesTest(RunInferenceFixture):
+
+  def setUp(self):
+    super(RunOfflineInferenceSequenceExamplesTest, self).setUp()
+    self._predict_examples = [
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 0 }}}
+              }
+              """, tf.train.Example()),
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 1 }}}
+              }
+              """, tf.train.Example()),
+    ]
+    self._multihead_examples = [
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.8 }}}
+              feature {key: "y" value { float_list { value: 0.2 }}}
+            }
+            """, tf.train.Example()),
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.6 }}}
+              feature {key: "y" value { float_list { value: 0.1 }}}
+            }
+            """, tf.train.Example()),
+    ]
+
+    self.schema = text_format.Parse(
+      """
+      tensor_representation_group {
+        key: ""
+        value {
+          tensor_representation {
+            key: "x"
+            value {
+              dense_tensor {
+                column_name: "x"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+          tensor_representation {
+            key: "y"
+            value {
+              dense_tensor {
+                column_name: "y"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+        }
+      }
+      feature {
+        name: "x"
+        type: FLOAT
+      }
+      feature {
+        name: "y"
+        type: FLOAT
+      }
+      """, schema_pb2.Schema())
+
+  def _prepare_multihead_examples(self, example_path):
+    with tf.io.TFRecordWriter(example_path) as output_file:
+      for example in self._multihead_examples:
+        output_file.write(example.SerializeToString())
+
+  def _run_inference_with_beam(self, example_path, inference_spec_type,
+                               prediction_log_path, include_schema = False):
+    schema = None
+    if include_schema:
+      schema = self.schema
+
+    with beam.Pipeline() as pipeline:
+      _ = (
+          pipeline
+          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+          | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString)
+          | 'RunInference' >> run_inference.RunInferenceOnSequenceExamples(
+              inference_spec_type, schema=schema)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+
+  def _get_results(self, prediction_log_path):
+    results = []
+    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
+      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
+      for record_string in record_iterator:
+        prediction_log = prediction_log_pb2.PredictionLog()
+        prediction_log.MergeFromString(record_string)
+        results.append(prediction_log)
+    return results
+
+
+  def testKerasModelPredict(self):
+    inputs = tf.keras.Input(shape=(1,), name='input1')
+    output1 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output1')(
+            inputs)
+    output2 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output2')(
+            inputs)
+    inference_model = tf.keras.models.Model(inputs, [output1, output2])
+
+    class TestKerasModel(tf.keras.Model):
+
+      def __init__(self, inference_model):
+        super(TestKerasModel, self).__init__(name='test_keras_model')
+        self.inference_model = inference_model
+
+      @tf.function(input_signature=[
+          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
+      ])
+      def call(self, serialized_example):
+        features = {
+            'input1':
+                tf.compat.v1.io.FixedLenFeature([1],
+                                                dtype=tf.float32,
+                                                default_value=0)
+        }
+        input_tensor_dict = tf.io.parse_example(serialized_example, features)
+        return inference_model(input_tensor_dict['input1'])
+
+    model = TestKerasModel(inference_model)
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(lr=.001),
+        loss=tf.keras.losses.binary_crossentropy,
+        metrics=['accuracy'])
+
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+
+  def testKerasModelPredictMultiTensor(self):
+    input1 = tf.keras.layers.Input((1,), name='x')
+    input2 = tf.keras.layers.Input((1,), name='y')
+
+    x1 = tf.keras.layers.Dense(10)(input1)
+    x2 = tf.keras.layers.Dense(10)(input2)
+    output = tf.keras.layers.Dense(5, name='output')(x2)
+
+    model = tf.keras.models.Model([input1, input2], output)
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)),
+        prediction_log_path, include_schema = True)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    for result in results:
+      self.assertLen(result.predict_log.request.inputs, 2)
+      self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
+
+
+class RunOfflineInferenceArrowTest(RunInferenceFixture):
 
   def setUp(self):
     super(RunOfflineInferenceArrowTest, self).setUp()
@@ -251,6 +682,7 @@ def _build_multihead_model(self, model_path):
 
   def _run_inference_with_beam(self, example_path, inference_spec_type,
                                prediction_log_path, include_config = False):
+    # test RunInferenceOnRecordBatch
     converter = tf_example_record.TFExampleBeamRecord(
       physical_format="inmem",
       telemetry_descriptors=[],
@@ -268,8 +700,8 @@ def _run_inference_with_beam(self, example_path, inference_spec_type,
           pipeline
           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
           | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInference' >> run_inference.RunInferenceImpl(
-                inference_spec_type, DataType.EXAMPLE, tensor_adapter_config)
+          | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
+              inference_spec_type, DataType.EXAMPLE, tensor_adapter_config)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
@@ -279,7 +711,7 @@ def _run_inference_with_beam(self, example_path, inference_spec_type,
           pipeline
           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
           | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInference' >> run_inference.RunInferenceImpl(
+          | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
                 inference_spec_type, DataType.EXAMPLE)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
@@ -488,7 +920,7 @@ def testKerasModelPredictMultiTensor(self):
         model_spec_pb2.InferenceSpecType(
             saved_model_spec=model_spec_pb2.SavedModelSpec(
                 model_path=model_path)),
-              prediction_log_path, include_config = True)
+        prediction_log_path, include_config = True)
 
     results = self._get_results(prediction_log_path)
     self.assertLen(results, 2)
@@ -545,7 +977,7 @@ def testTelemetry(self):
         pipeline 
         | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
         | 'ConvertToRecordBatch' >> converter.BeamSource()
-        | 'RunInference' >> run_inference.RunInferenceImpl(
+        | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
               inference_spec_type, DataType.EXAMPLE))
     run_result = pipeline.run()
     run_result.wait_until_finish()
@@ -581,7 +1013,7 @@ def testTelemetry(self):
         load_model_latency_milli_secs['distributions'][0].result.sum, 0)
 
 
-class RunRemoteInferenceArrowTest(RunInferenceArrowFixture):
+class RunRemoteInferenceArrowTest(RunInferenceFixture):
 
   def setUp(self):
     super(RunRemoteInferenceArrowTest, self).setUp()
@@ -610,7 +1042,7 @@ def _set_up_pipeline(self, inference_spec_type):
         self.pipeline
         | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
         | 'ConvertToRecordBatch' >> converter.BeamSource()
-        | 'RunInference' >> run_inference.RunInferenceImpl(
+        | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
               inference_spec_type, DataType.EXAMPLE))
 
   def _run_inference_with_beam(self):
@@ -730,7 +1162,7 @@ def test_can_format_requests(self):
           | 'CreateExamples' >> beam.Create([example])
           | 'ParseExamples' >> beam.Map(lambda x: x.SerializeToString())
           | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInference' >> run_inference.RunInferenceImpl(
+          | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
                   inference_spec_type, DataType.EXAMPLE))
 
       self._run_inference_with_beam()
diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py
index 58633f95..8e173d5d 100644
--- a/tfx_bsl/public/beam/run_inference.py
+++ b/tfx_bsl/public/beam/run_inference.py
@@ -22,20 +22,12 @@
 import apache_beam as beam
 import tensorflow as tf
 import pyarrow as pa
-from typing import Union, Text, Optional
-from tfx_bsl.tfxio import test_util
-from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import tf_example_record
-from tfx_bsl.tfxio import tf_sequence_example_record
+from typing import Text, Optional
 from tfx_bsl.beam import run_inference
 from tfx_bsl.public.proto import model_spec_pb2
 from tensorflow_serving.apis import prediction_log_pb2
 from tensorflow_metadata.proto.v0 import schema_pb2
 
-from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
-from tfx_bsl.beam.bsl_constants import DataType
-
-
 @beam.ptransform_fn
 @beam.typehints.with_input_types(tf.train.Example)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
@@ -53,6 +45,11 @@ def RunInference(  # pylint: disable=invalid-name
      `ai_platform_prediction_model_spec` field is set in
      `inference_spec_type`.
 
+   TODO(b/131873699): Add support for the following features:
+   1. Bytes as Input.
+   2. PTable Input.
+   3. Models as SideInput.
+
   Args:
     examples: A PCollection containing examples.
     inference_spec_type: Model inference endpoint.
@@ -63,27 +60,9 @@ def RunInference(  # pylint: disable=invalid-name
     A PCollection containing prediction logs.
   """
 
-  data_type = DataType.EXAMPLE
-  converter = tf_example_record.TFExampleBeamRecord(
-    physical_format="inmem",
-    telemetry_descriptors=[],
-    schema=schema,
-    raw_record_column_name=_RECORDBATCH_COLUMN)
-
-  tensor_adapter_config = None
-  if schema:
-    tfxio = test_util.InMemoryTFExampleRecord(
-      schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
-    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-      arrow_schema=tfxio.ArrowSchema(),
-      tensor_representations=tfxio.TensorRepresentations())
-
   return (examples
-          | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
-          | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(
-                  inference_spec_type, data_type,
-                  tensor_adapter_config=tensor_adapter_config))
+          | 'RunInferenceOnExamples' >> run_inference.RunInferenceOnExamples(
+                  inference_spec_type, schema=schema))
 
 
 @beam.ptransform_fn
@@ -113,24 +92,6 @@ def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
     A PCollection containing prediction logs.
   """
 
-  data_type = DataType.SEQUENCEEXAMPLE
-  converter = tf_sequence_example_record.TFSequenceExampleBeamRecord(
-    physical_format="inmem",
-    telemetry_descriptors=[],
-    schema=schema,
-    raw_record_column_name=_RECORDBATCH_COLUMN)
-
-  tensor_adapter_config = None
-  if schema:
-    tfxio = test_util.InMemoryTFExampleRecord(
-      schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
-    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-      arrow_schema=tfxio.ArrowSchema(),
-      tensor_representations=tfxio.TensorRepresentations())
-
   return (examples
-          | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
-          | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(
-                  inference_spec_type, data_type,
-                  tensor_adapter_config=tensor_adapter_config))
+          | 'RunInferenceOnSequenceExamples' >> run_inference.RunInferenceOnSequenceExamples(
+                  inference_spec_type, schema=schema))

From daf394e0faaf189711ca1aa69f01711cb4ee712e Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 30 Jul 2020 13:47:42 -0400
Subject: [PATCH 23/31] fix test

---
 tfx_bsl/beam/bsl_util.py           |    9 +-
 tfx_bsl/beam/run_inference_test.py | 1864 ++++++++++++++--------------
 2 files changed, 934 insertions(+), 939 deletions(-)

diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py
index 496f9eb4..3bc8c624 100644
--- a/tfx_bsl/beam/bsl_util.py
+++ b/tfx_bsl/beam/bsl_util.py
@@ -51,13 +51,8 @@ def flatten(element: List[Any]):
     return [{'b64': base64.b64encode(value).decode()} for value in df[_RECORDBATCH_COLUMN]]
   else:
     as_binary = df.columns.str.endswith("_bytes")
-    # Handles the case where there is only one entry
-    if len(df) == 1:
-      df.loc[:, as_binary] = df.loc[:, as_binary].applymap(
-        lambda feature: [{'b64': base64.b64encode(feature).decode()}])
-    else:
-      df.loc[:, as_binary] = df.loc[:, as_binary].applymap(
-          lambda feature: [{'b64': base64.b64encode(value).decode()} for value in feature])
+    df.loc[:, as_binary] = df.loc[:, as_binary].applymap(
+        lambda feature: [{'b64': base64.b64encode(value).decode()} for value in feature])
 
     if _RECORDBATCH_COLUMN in df.columns:
       df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1)
diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
index 7dee9811..990e243e 100644
--- a/tfx_bsl/beam/run_inference_test.py
+++ b/tfx_bsl/beam/run_inference_test.py
@@ -79,939 +79,939 @@ def _prepare_predict_examples(self, example_path):
         output_file.write(example.SerializeToString())
 
 
-# class RunOfflineInferenceExamplesTest(RunInferenceFixture):
-
-#   def setUp(self):
-#     super(RunOfflineInferenceExamplesTest, self).setUp()
-#     self._predict_examples = [
-#         text_format.Parse(
-#             """
-#               features {
-#                 feature { key: "input1" value { float_list { value: 0 }}}
-#               }
-#               """, tf.train.Example()),
-#         text_format.Parse(
-#             """
-#               features {
-#                 feature { key: "input1" value { float_list { value: 1 }}}
-#               }
-#               """, tf.train.Example()),
-#     ]
-#     self._multihead_examples = [
-#         text_format.Parse(
-#             """
-#             features {
-#               feature {key: "x" value { float_list { value: 0.8 }}}
-#               feature {key: "y" value { float_list { value: 0.2 }}}
-#             }
-#             """, tf.train.Example()),
-#         text_format.Parse(
-#             """
-#             features {
-#               feature {key: "x" value { float_list { value: 0.6 }}}
-#               feature {key: "y" value { float_list { value: 0.1 }}}
-#             }
-#             """, tf.train.Example()),
-#     ]
-
-#     self.schema = text_format.Parse(
-#       """
-#       tensor_representation_group {
-#         key: ""
-#         value {
-#           tensor_representation {
-#             key: "x"
-#             value {
-#               dense_tensor {
-#                 column_name: "x"
-#                 shape { dim { size: 1 } }
-#               }
-#             }
-#           }
-#           tensor_representation {
-#             key: "y"
-#             value {
-#               dense_tensor {
-#                 column_name: "y"
-#                 shape { dim { size: 1 } }
-#               }
-#             }
-#           }
-#         }
-#       }
-#       feature {
-#         name: "x"
-#         type: FLOAT
-#       }
-#       feature {
-#         name: "y"
-#         type: FLOAT
-#       }
-#       """, schema_pb2.Schema())
-
-#   def _prepare_multihead_examples(self, example_path):
-#     with tf.io.TFRecordWriter(example_path) as output_file:
-#       for example in self._multihead_examples:
-#         output_file.write(example.SerializeToString())
-
-#   def _run_inference_with_beam(self, example_path, inference_spec_type,
-#                                prediction_log_path, include_schema = False):
-#     schema = None
-#     if include_schema:
-#       schema = self.schema
-
-#     with beam.Pipeline() as pipeline:
-#       _ = (
-#           pipeline
-#           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-#           | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-#           | 'RunInference' >> run_inference.RunInferenceOnExamples(
-#               inference_spec_type, schema=schema)
-#           | 'WritePredictions' >> beam.io.WriteToTFRecord(
-#               prediction_log_path,
-#               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-
-#   def _get_results(self, prediction_log_path):
-#     results = []
-#     for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
-#       record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
-#       for record_string in record_iterator:
-#         prediction_log = prediction_log_pb2.PredictionLog()
-#         prediction_log.MergeFromString(record_string)
-#         results.append(prediction_log)
-#     return results
-
-
-#   def testKerasModelPredict(self):
-#     inputs = tf.keras.Input(shape=(1,), name='input1')
-#     output1 = tf.keras.layers.Dense(
-#         1, activation=tf.nn.sigmoid, name='output1')(
-#             inputs)
-#     output2 = tf.keras.layers.Dense(
-#         1, activation=tf.nn.sigmoid, name='output2')(
-#             inputs)
-#     inference_model = tf.keras.models.Model(inputs, [output1, output2])
-
-#     class TestKerasModel(tf.keras.Model):
-
-#       def __init__(self, inference_model):
-#         super(TestKerasModel, self).__init__(name='test_keras_model')
-#         self.inference_model = inference_model
-
-#       @tf.function(input_signature=[
-#           tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
-#       ])
-#       def call(self, serialized_example):
-#         features = {
-#             'input1':
-#                 tf.compat.v1.io.FixedLenFeature([1],
-#                                                 dtype=tf.float32,
-#                                                 default_value=0)
-#         }
-#         input_tensor_dict = tf.io.parse_example(serialized_example, features)
-#         return inference_model(input_tensor_dict['input1'])
-
-#     model = TestKerasModel(inference_model)
-#     model.compile(
-#         optimizer=tf.keras.optimizers.Adam(lr=.001),
-#         loss=tf.keras.losses.binary_crossentropy,
-#         metrics=['accuracy'])
-
-#     model_path = self._get_output_data_dir('model')
-#     tf.compat.v1.keras.experimental.export_saved_model(
-#         model, model_path, serving_only=True)
-
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_predict_examples(example_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path)), prediction_log_path)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-
-#   def testKerasModelPredictMultiTensor(self):
-#     input1 = tf.keras.layers.Input((1,), name='x')
-#     input2 = tf.keras.layers.Input((1,), name='y')
-
-#     x1 = tf.keras.layers.Dense(10)(input1)
-#     x2 = tf.keras.layers.Dense(10)(input2)
-#     output = tf.keras.layers.Dense(5, name='output')(x2)
-
-#     model = tf.keras.models.Model([input1, input2], output)
-#     model_path = self._get_output_data_dir('model')
-#     tf.compat.v1.keras.experimental.export_saved_model(
-#         model, model_path, serving_only=True)
-
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path)),
-#         prediction_log_path, include_schema = True)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     for result in results:
-#       self.assertLen(result.predict_log.request.inputs, 2)
-#       self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
-
-
-# class RunRemoteInferenceExamplesTest(RunInferenceFixture):
-
-#   def setUp(self):
-#     super(RunRemoteInferenceExamplesTest, self).setUp()
-#     self.example_path = self._get_output_data_dir('example')
-#     self._prepare_predict_examples(self.example_path)
-#     # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
-#     self._discovery_testdata_dir = os.path.join(
-#         os.path.join(os.path.dirname(__file__), 'testdata'),
-#         'ml_discovery.json')
-
-#   @staticmethod
-#   def _make_response_body(content, successful):
-#     if successful:
-#       response_dict = {'predictions': content}
-#     else:
-#       response_dict = {'error': content}
-#     return json.dumps(response_dict)
-
-#   def _set_up_pipeline(self, inference_spec_type):
-#     self.pipeline = beam.Pipeline()
-#     self.pcoll = (
-#         self.pipeline
-#         | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
-#         | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-#         | 'RunInference' >> run_inference.RunInferenceOnExamples(inference_spec_type))
-
-#   def _run_inference_with_beam(self):
-#     self.pipeline_result = self.pipeline.run()
-#     self.pipeline_result.wait_until_finish()
-
-#   def test_model_predict(self):
-#     predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
-#     builder = http.RequestMockBuilder({
-#         'ml.projects.predict':
-#             (None, self._make_response_body(predictions, successful=True))
-#     })
-#     resource = discovery.build(
-#         'ml',
-#         'v1',
-#         http=http.HttpMock(self._discovery_testdata_dir,
-#                            {'status': http_client.OK}),
-#         requestBuilder=builder)
-#     with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
-#       response_mock.side_effect = lambda service, version: resource
-#       inference_spec_type = model_spec_pb2.InferenceSpecType(
-#           ai_platform_prediction_model_spec=model_spec_pb2
-#           .AIPlatformPredictionModelSpec(
-#               project_id='test-project',
-#               model_name='test-model',
-#           ))
-
-#       prediction_log = prediction_log_pb2.PredictionLog()
-#       prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
-#           tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
-#       prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
-#           tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
-
-#       self._set_up_pipeline(inference_spec_type)
-#       assert_that(self.pcoll, equal_to([prediction_log]))
-#       self._run_inference_with_beam()
-
-
-# class RunOfflineInferenceSequenceExamplesTest(RunInferenceFixture):
-
-#   def setUp(self):
-#     super(RunOfflineInferenceSequenceExamplesTest, self).setUp()
-#     self._predict_examples = [
-#         text_format.Parse(
-#             """
-#               features {
-#                 feature { key: "input1" value { float_list { value: 0 }}}
-#               }
-#               """, tf.train.Example()),
-#         text_format.Parse(
-#             """
-#               features {
-#                 feature { key: "input1" value { float_list { value: 1 }}}
-#               }
-#               """, tf.train.Example()),
-#     ]
-#     self._multihead_examples = [
-#         text_format.Parse(
-#             """
-#             features {
-#               feature {key: "x" value { float_list { value: 0.8 }}}
-#               feature {key: "y" value { float_list { value: 0.2 }}}
-#             }
-#             """, tf.train.Example()),
-#         text_format.Parse(
-#             """
-#             features {
-#               feature {key: "x" value { float_list { value: 0.6 }}}
-#               feature {key: "y" value { float_list { value: 0.1 }}}
-#             }
-#             """, tf.train.Example()),
-#     ]
-
-#     self.schema = text_format.Parse(
-#       """
-#       tensor_representation_group {
-#         key: ""
-#         value {
-#           tensor_representation {
-#             key: "x"
-#             value {
-#               dense_tensor {
-#                 column_name: "x"
-#                 shape { dim { size: 1 } }
-#               }
-#             }
-#           }
-#           tensor_representation {
-#             key: "y"
-#             value {
-#               dense_tensor {
-#                 column_name: "y"
-#                 shape { dim { size: 1 } }
-#               }
-#             }
-#           }
-#         }
-#       }
-#       feature {
-#         name: "x"
-#         type: FLOAT
-#       }
-#       feature {
-#         name: "y"
-#         type: FLOAT
-#       }
-#       """, schema_pb2.Schema())
-
-#   def _prepare_multihead_examples(self, example_path):
-#     with tf.io.TFRecordWriter(example_path) as output_file:
-#       for example in self._multihead_examples:
-#         output_file.write(example.SerializeToString())
-
-#   def _run_inference_with_beam(self, example_path, inference_spec_type,
-#                                prediction_log_path, include_schema = False):
-#     schema = None
-#     if include_schema:
-#       schema = self.schema
-
-#     with beam.Pipeline() as pipeline:
-#       _ = (
-#           pipeline
-#           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-#           | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString)
-#           | 'RunInference' >> run_inference.RunInferenceOnSequenceExamples(
-#               inference_spec_type, schema=schema)
-#           | 'WritePredictions' >> beam.io.WriteToTFRecord(
-#               prediction_log_path,
-#               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-
-#   def _get_results(self, prediction_log_path):
-#     results = []
-#     for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
-#       record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
-#       for record_string in record_iterator:
-#         prediction_log = prediction_log_pb2.PredictionLog()
-#         prediction_log.MergeFromString(record_string)
-#         results.append(prediction_log)
-#     return results
-
-
-#   def testKerasModelPredict(self):
-#     inputs = tf.keras.Input(shape=(1,), name='input1')
-#     output1 = tf.keras.layers.Dense(
-#         1, activation=tf.nn.sigmoid, name='output1')(
-#             inputs)
-#     output2 = tf.keras.layers.Dense(
-#         1, activation=tf.nn.sigmoid, name='output2')(
-#             inputs)
-#     inference_model = tf.keras.models.Model(inputs, [output1, output2])
-
-#     class TestKerasModel(tf.keras.Model):
-
-#       def __init__(self, inference_model):
-#         super(TestKerasModel, self).__init__(name='test_keras_model')
-#         self.inference_model = inference_model
-
-#       @tf.function(input_signature=[
-#           tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
-#       ])
-#       def call(self, serialized_example):
-#         features = {
-#             'input1':
-#                 tf.compat.v1.io.FixedLenFeature([1],
-#                                                 dtype=tf.float32,
-#                                                 default_value=0)
-#         }
-#         input_tensor_dict = tf.io.parse_example(serialized_example, features)
-#         return inference_model(input_tensor_dict['input1'])
-
-#     model = TestKerasModel(inference_model)
-#     model.compile(
-#         optimizer=tf.keras.optimizers.Adam(lr=.001),
-#         loss=tf.keras.losses.binary_crossentropy,
-#         metrics=['accuracy'])
-
-#     model_path = self._get_output_data_dir('model')
-#     tf.compat.v1.keras.experimental.export_saved_model(
-#         model, model_path, serving_only=True)
-
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_predict_examples(example_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path)), prediction_log_path)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-
-#   def testKerasModelPredictMultiTensor(self):
-#     input1 = tf.keras.layers.Input((1,), name='x')
-#     input2 = tf.keras.layers.Input((1,), name='y')
-
-#     x1 = tf.keras.layers.Dense(10)(input1)
-#     x2 = tf.keras.layers.Dense(10)(input2)
-#     output = tf.keras.layers.Dense(5, name='output')(x2)
-
-#     model = tf.keras.models.Model([input1, input2], output)
-#     model_path = self._get_output_data_dir('model')
-#     tf.compat.v1.keras.experimental.export_saved_model(
-#         model, model_path, serving_only=True)
-
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path)),
-#         prediction_log_path, include_schema = True)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     for result in results:
-#       self.assertLen(result.predict_log.request.inputs, 2)
-#       self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
-
-
-# class RunOfflineInferenceArrowTest(RunInferenceFixture):
-
-#   def setUp(self):
-#     super(RunOfflineInferenceArrowTest, self).setUp()
-#     self._predict_examples = [
-#         text_format.Parse(
-#             """
-#               features {
-#                 feature { key: "input1" value { float_list { value: 0 }}}
-#               }
-#               """, tf.train.Example()),
-#         text_format.Parse(
-#             """
-#               features {
-#                 feature { key: "input1" value { float_list { value: 1 }}}
-#               }
-#               """, tf.train.Example()),
-#     ]
-
-#     self._multihead_examples = [
-#         text_format.Parse(
-#             """
-#             features {
-#               feature {key: "x" value { float_list { value: 0.8 }}}
-#               feature {key: "y" value { float_list { value: 0.2 }}}
-#             }
-#             """, tf.train.Example()),
-#         text_format.Parse(
-#             """
-#             features {
-#               feature {key: "x" value { float_list { value: 0.6 }}}
-#               feature {key: "y" value { float_list { value: 0.1 }}}
-#             }
-#             """, tf.train.Example()),
-#     ]
-
-#     self.schema = text_format.Parse(
-#       """
-#       tensor_representation_group {
-#         key: ""
-#         value {
-#           tensor_representation {
-#             key: "x"
-#             value {
-#               dense_tensor {
-#                 column_name: "x"
-#                 shape { dim { size: 1 } }
-#               }
-#             }
-#           }
-#           tensor_representation {
-#             key: "y"
-#             value {
-#               dense_tensor {
-#                 column_name: "y"
-#                 shape { dim { size: 1 } }
-#               }
-#             }
-#           }
-#         }
-#       }
-#       feature {
-#         name: "x"
-#         type: FLOAT
-#       }
-#       feature {
-#         name: "y"
-#         type: FLOAT
-#       }
-#       """, schema_pb2.Schema())
-
-#   def _prepare_multihead_examples(self, example_path):
-#     with tf.io.TFRecordWriter(example_path) as output_file:
-#       for example in self._multihead_examples:
-#         output_file.write(example.SerializeToString())
-
-
-#   def _build_predict_model(self, model_path):
-#     """Exports the dummy sum predict model."""
-
-#     with tf.compat.v1.Graph().as_default():
-#       input_tensors = {
-#           'x': tf.compat.v1.io.FixedLenFeature(
-#               [1], dtype=tf.float32, default_value=0)
-#       }
-#       serving_receiver = (
-#           tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn(
-#               input_tensors)())
-#       output_tensors = {'y': serving_receiver.features['x'] * 2}
-#       sess = tf.compat.v1.Session()
-#       sess.run(tf.compat.v1.initializers.global_variables())
-#       signature_def = tf.compat.v1.estimator.export.PredictOutput(
-#           output_tensors).as_signature_def(serving_receiver.receiver_tensors)
-#       builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
-#       builder.add_meta_graph_and_variables(
-#           sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
-#           signature_def_map={
-#               tf.compat.v1.saved_model.signature_constants
-#               .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-#                   signature_def,
-#           })
-#       builder.save()
-
-#   def _build_regression_signature(self, input_tensor, output_tensor):
-#     """Helper function for building a regression SignatureDef."""
-#     input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-#         input_tensor)
-#     signature_inputs = {
-#         tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS:
-#             input_tensor_info
-#     }
-#     output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-#         output_tensor)
-#     signature_outputs = {
-#         tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS:
-#             output_tensor_info
-#     }
-#     return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
-#         signature_inputs, signature_outputs,
-#         tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME)
-
-#   def _build_classification_signature(self, input_tensor, scores_tensor):
-#     """Helper function for building a classification SignatureDef."""
-#     input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-#         input_tensor)
-#     signature_inputs = {
-#         tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS:
-#             input_tensor_info
-#     }
-#     output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
-#         scores_tensor)
-#     signature_outputs = {
-#         tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
-#             output_tensor_info
-#     }
-#     return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
-#         signature_inputs, signature_outputs,
-#         tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
-
-#   def _build_multihead_model(self, model_path):
-#     with tf.compat.v1.Graph().as_default():
-#       input_example = tf.compat.v1.placeholder(
-#           tf.string, name='input_examples_tensor')
-#       config = {
-#           'x': tf.compat.v1.io.FixedLenFeature(
-#               [1], dtype=tf.float32, default_value=0),
-#           'y': tf.compat.v1.io.FixedLenFeature(
-#               [1], dtype=tf.float32, default_value=0),
-#       }
-#       features = tf.compat.v1.parse_example(input_example, config)
-#       x = features['x']
-#       y = features['y']
-#       sum_pred = x + y
-#       diff_pred = tf.abs(x - y)
-#       sess = tf.compat.v1.Session()
-#       sess.run(tf.compat.v1.initializers.global_variables())
-#       signature_def_map = {
-#           'regress_diff':
-#               self._build_regression_signature(input_example, diff_pred),
-#           'classify_sum':
-#               self._build_classification_signature(input_example, sum_pred),
-#           tf.compat.v1.saved_model.signature_constants
-#           .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-#               self._build_regression_signature(input_example, sum_pred)
-#       }
-#       builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
-#       builder.add_meta_graph_and_variables(
-#           sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
-#           signature_def_map=signature_def_map)
-#       builder.save()
-
-#   def _run_inference_with_beam(self, example_path, inference_spec_type,
-#                                prediction_log_path, include_config = False):
-#     # test RunInferenceOnRecordBatch
-#     converter = tf_example_record.TFExampleBeamRecord(
-#       physical_format="inmem",
-#       telemetry_descriptors=[],
-#       raw_record_column_name=_RECORDBATCH_COLUMN)
-
-#     if include_config:
-#       tfxio = test_util.InMemoryTFExampleRecord(
-#         schema=self.schema, raw_record_column_name=_RECORDBATCH_COLUMN)
-#       tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-#         arrow_schema=tfxio.ArrowSchema(),
-#         tensor_representations=tfxio.TensorRepresentations())
-
-#       with beam.Pipeline() as pipeline:
-#         _ = (
-#           pipeline
-#           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-#           | 'ConvertToRecordBatch' >> converter.BeamSource()
-#           | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
-#               inference_spec_type, DataType.EXAMPLE, tensor_adapter_config)
-#           | 'WritePredictions' >> beam.io.WriteToTFRecord(
-#               prediction_log_path,
-#               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-#     else:
-#       with beam.Pipeline() as pipeline:
-#         _ = (
-#           pipeline
-#           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-#           | 'ConvertToRecordBatch' >> converter.BeamSource()
-#           | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
-#                 inference_spec_type, DataType.EXAMPLE)
-#           | 'WritePredictions' >> beam.io.WriteToTFRecord(
-#               prediction_log_path,
-#               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-
-#   def _get_results(self, prediction_log_path):
-#     results = []
-#     for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
-#       record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
-#       for record_string in record_iterator:
-#         prediction_log = prediction_log_pb2.PredictionLog()
-#         prediction_log.MergeFromString(record_string)
-#         results.append(prediction_log)
-#     return results
-
-#   def testModelPathInvalid(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_predict_examples(example_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'):
-#       self._run_inference_with_beam(
-#           example_path,
-#           model_spec_pb2.InferenceSpecType(
-#               saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                   model_path=self._get_output_data_dir())), prediction_log_path)
-
-#   def testEstimatorModelPredict(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_predict_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     self._build_predict_model(model_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path)), prediction_log_path)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     self.assertEqual(
-#         results[0].predict_log.request.inputs[
-#             run_inference._DEFAULT_INPUT_KEY].string_val[0],
-#         self._predict_examples[0].SerializeToString())
-#     self.assertEqual(results[0].predict_log.response.outputs['y'].dtype,
-#                      tf.float32)
-#     self.assertLen(
-#         results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2)
-#     self.assertEqual(
-#         results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size,
-#         1)
-#     self.assertEqual(
-#         results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size,
-#         1)
-
-#   def testClassifyModel(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     self._build_multihead_model(model_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path, signature_name=['classify_sum'])),
-#         prediction_log_path)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     classify_log = results[0].classify_log
-#     self.assertLen(classify_log.request.input.example_list.examples, 1)
-#     self.assertEqual(classify_log.request.input.example_list.examples[0],
-#                      self._multihead_examples[0])
-#     self.assertLen(classify_log.response.result.classifications, 1)
-#     self.assertLen(classify_log.response.result.classifications[0].classes, 1)
-#     self.assertAlmostEqual(
-#         classify_log.response.result.classifications[0].classes[0].score, 1.0)
-
-#   def testRegressModel(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     self._build_multihead_model(model_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path, signature_name=['regress_diff'])),
-#         prediction_log_path)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     regress_log = results[0].regress_log
-#     self.assertLen(regress_log.request.input.example_list.examples, 1)
-#     self.assertEqual(regress_log.request.input.example_list.examples[0],
-#                      self._multihead_examples[0])
-#     self.assertLen(regress_log.response.result.regressions, 1)
-#     self.assertAlmostEqual(regress_log.response.result.regressions[0].value,
-#                            0.6)
-
-#   def testMultiInferenceModel(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     self._build_multihead_model(model_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path,
-#                 signature_name=['regress_diff', 'classify_sum'])),
-#         prediction_log_path)
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     multi_inference_log = results[0].multi_inference_log
-#     self.assertLen(multi_inference_log.request.input.example_list.examples, 1)
-#     self.assertEqual(multi_inference_log.request.input.example_list.examples[0],
-#                      self._multihead_examples[0])
-#     self.assertLen(multi_inference_log.response.results, 2)
-#     signature_names = []
-#     for result in multi_inference_log.response.results:
-#       signature_names.append(result.model_spec.signature_name)
-#     self.assertIn('regress_diff', signature_names)
-#     self.assertIn('classify_sum', signature_names)
-#     result = multi_inference_log.response.results[0]
-#     self.assertEqual(result.model_spec.signature_name, 'regress_diff')
-#     self.assertLen(result.regression_result.regressions, 1)
-#     self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6)
-#     result = multi_inference_log.response.results[1]
-#     self.assertEqual(result.model_spec.signature_name, 'classify_sum')
-#     self.assertLen(result.classification_result.classifications, 1)
-#     self.assertLen(result.classification_result.classifications[0].classes, 1)
-#     self.assertAlmostEqual(
-#         result.classification_result.classifications[0].classes[0].score, 1.0)
-
-#   def testKerasModelPredict(self):
-#     inputs = tf.keras.Input(shape=(1,), name='input1')
-#     output1 = tf.keras.layers.Dense(
-#         1, activation=tf.nn.sigmoid, name='output1')(
-#             inputs)
-#     output2 = tf.keras.layers.Dense(
-#         1, activation=tf.nn.sigmoid, name='output2')(
-#             inputs)
-#     inference_model = tf.keras.models.Model(inputs, [output1, output2])
-
-#     class TestKerasModel(tf.keras.Model):
-#       def __init__(self, inference_model):
-#         super(TestKerasModel, self).__init__(name='test_keras_model')
-#         self.inference_model = inference_model
-
-#       @tf.function(input_signature=[
-#           tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
-#       ])
-#       def call(self, serialized_example):
-#         features = {
-#             'input1': tf.compat.v1.io.FixedLenFeature(
-#               [1], dtype=tf.float32,
-#               default_value=0)
-#         }
-#         input_tensor_dict = tf.io.parse_example(serialized_example, features)
-#         return inference_model(input_tensor_dict['input1'])
-
-#     model = TestKerasModel(inference_model)
-#     model.compile(
-#         optimizer=tf.keras.optimizers.Adam(lr=.001),
-#         loss=tf.keras.losses.binary_crossentropy,
-#         metrics=['accuracy'])
-
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_predict_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     tf.compat.v1.keras.experimental.export_saved_model(
-#         model, model_path, serving_only=True)
-
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path)), prediction_log_path)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-
-#   def testKerasModelPredictMultiTensor(self):
-#     input1 = tf.keras.layers.Input((1,), name='x')
-#     input2 = tf.keras.layers.Input((1,), name='y')
-
-#     x1 = tf.keras.layers.Dense(10)(input1)
-#     x2 = tf.keras.layers.Dense(10)(input2)
-#     output = tf.keras.layers.Dense(5, name='output')(x2)
-
-#     model = tf.keras.models.Model([input1, input2], output)
-#     model_path = self._get_output_data_dir('model')
-#     tf.compat.v1.keras.experimental.export_saved_model(
-#         model, model_path, serving_only=True)
-
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-#     self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path)),
-#         prediction_log_path, include_config = True)
-
-#     results = self._get_results(prediction_log_path)
-#     self.assertLen(results, 2)
-#     for result in results:
-#       self.assertLen(result.predict_log.request.inputs, 2)
-#       self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
-
-#   def testMultiTensorError(self):
-#     input1 = tf.keras.layers.Input((1,), name='x')
-#     input2 = tf.keras.layers.Input((1,), name='y')
-
-#     x1 = tf.keras.layers.Dense(10)(input1)
-#     x2 = tf.keras.layers.Dense(10)(input2)
-#     output = tf.keras.layers.Dense(5, name='output')(x2)
-
-#     model = tf.keras.models.Model([input1, input2], output)
-#     model_path = self._get_output_data_dir('model')
-#     tf.compat.v1.keras.experimental.export_saved_model(
-#         model, model_path, serving_only=True)
-
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     prediction_log_path = self._get_output_data_dir('predictions')
-
-#     error_msg = 'Tensor adaptor config is required with a multi-input model'
-#     try:
-#       self._run_inference_with_beam(
-#         example_path,
-#         model_spec_pb2.InferenceSpecType(
-#             saved_model_spec=model_spec_pb2.SavedModelSpec(
-#                 model_path=model_path)),
-#               prediction_log_path, include_config = False)
-#     except ValueError as exc:
-#       actual_error_msg = str(exc)
-#       self.assertTrue(actual_error_msg.startswith(error_msg))
-#     else:
-#       self.fail('Test was expected to throw ValueError exception')
-
-#   def testTelemetry(self):
-#     example_path = self._get_output_data_dir('examples')
-#     self._prepare_multihead_examples(example_path)
-#     model_path = self._get_output_data_dir('model')
-#     self._build_multihead_model(model_path)
-#     inference_spec_type = model_spec_pb2.InferenceSpecType(
-#         saved_model_spec=model_spec_pb2.SavedModelSpec(
-#             model_path=model_path, signature_name=['classify_sum']))
+class RunOfflineInferenceExamplesTest(RunInferenceFixture):
+
+  def setUp(self):
+    super(RunOfflineInferenceExamplesTest, self).setUp()
+    self._predict_examples = [
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 0 }}}
+              }
+              """, tf.train.Example()),
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 1 }}}
+              }
+              """, tf.train.Example()),
+    ]
+    self._multihead_examples = [
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.8 }}}
+              feature {key: "y" value { float_list { value: 0.2 }}}
+            }
+            """, tf.train.Example()),
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.6 }}}
+              feature {key: "y" value { float_list { value: 0.1 }}}
+            }
+            """, tf.train.Example()),
+    ]
+
+    self.schema = text_format.Parse(
+      """
+      tensor_representation_group {
+        key: ""
+        value {
+          tensor_representation {
+            key: "x"
+            value {
+              dense_tensor {
+                column_name: "x"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+          tensor_representation {
+            key: "y"
+            value {
+              dense_tensor {
+                column_name: "y"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+        }
+      }
+      feature {
+        name: "x"
+        type: FLOAT
+      }
+      feature {
+        name: "y"
+        type: FLOAT
+      }
+      """, schema_pb2.Schema())
+
+  def _prepare_multihead_examples(self, example_path):
+    with tf.io.TFRecordWriter(example_path) as output_file:
+      for example in self._multihead_examples:
+        output_file.write(example.SerializeToString())
+
+  def _run_inference_with_beam(self, example_path, inference_spec_type,
+                               prediction_log_path, include_schema = False):
+    schema = None
+    if include_schema:
+      schema = self.schema
+
+    with beam.Pipeline() as pipeline:
+      _ = (
+          pipeline
+          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+          | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+          | 'RunInference' >> run_inference.RunInferenceOnExamples(
+              inference_spec_type, schema=schema)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+
+  def _get_results(self, prediction_log_path):
+    results = []
+    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
+      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
+      for record_string in record_iterator:
+        prediction_log = prediction_log_pb2.PredictionLog()
+        prediction_log.MergeFromString(record_string)
+        results.append(prediction_log)
+    return results
+
+
+  def testKerasModelPredict(self):
+    inputs = tf.keras.Input(shape=(1,), name='input1')
+    output1 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output1')(
+            inputs)
+    output2 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output2')(
+            inputs)
+    inference_model = tf.keras.models.Model(inputs, [output1, output2])
+
+    class TestKerasModel(tf.keras.Model):
+
+      def __init__(self, inference_model):
+        super(TestKerasModel, self).__init__(name='test_keras_model')
+        self.inference_model = inference_model
+
+      @tf.function(input_signature=[
+          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
+      ])
+      def call(self, serialized_example):
+        features = {
+            'input1':
+                tf.compat.v1.io.FixedLenFeature([1],
+                                                dtype=tf.float32,
+                                                default_value=0)
+        }
+        input_tensor_dict = tf.io.parse_example(serialized_example, features)
+        return inference_model(input_tensor_dict['input1'])
+
+    model = TestKerasModel(inference_model)
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(lr=.001),
+        loss=tf.keras.losses.binary_crossentropy,
+        metrics=['accuracy'])
+
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+
+  def testKerasModelPredictMultiTensor(self):
+    input1 = tf.keras.layers.Input((1,), name='x')
+    input2 = tf.keras.layers.Input((1,), name='y')
+
+    x1 = tf.keras.layers.Dense(10)(input1)
+    x2 = tf.keras.layers.Dense(10)(input2)
+    output = tf.keras.layers.Dense(5, name='output')(x2)
+
+    model = tf.keras.models.Model([input1, input2], output)
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)),
+        prediction_log_path, include_schema = True)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    for result in results:
+      self.assertLen(result.predict_log.request.inputs, 2)
+      self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
+
+
+class RunRemoteInferenceExamplesTest(RunInferenceFixture):
+
+  def setUp(self):
+    super(RunRemoteInferenceExamplesTest, self).setUp()
+    self.example_path = self._get_output_data_dir('example')
+    self._prepare_predict_examples(self.example_path)
+    # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
+    self._discovery_testdata_dir = os.path.join(
+        os.path.join(os.path.dirname(__file__), 'testdata'),
+        'ml_discovery.json')
+
+  @staticmethod
+  def _make_response_body(content, successful):
+    if successful:
+      response_dict = {'predictions': content}
+    else:
+      response_dict = {'error': content}
+    return json.dumps(response_dict)
+
+  def _set_up_pipeline(self, inference_spec_type):
+    self.pipeline = beam.Pipeline()
+    self.pcoll = (
+        self.pipeline
+        | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
+        | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
+        | 'RunInference' >> run_inference.RunInferenceOnExamples(inference_spec_type))
+
+  def _run_inference_with_beam(self):
+    self.pipeline_result = self.pipeline.run()
+    self.pipeline_result.wait_until_finish()
+
+  def test_model_predict(self):
+    predictions = [{'output_1': [0.901], 'output_2': [0.997]}]
+    builder = http.RequestMockBuilder({
+        'ml.projects.predict':
+            (None, self._make_response_body(predictions, successful=True))
+    })
+    resource = discovery.build(
+        'ml',
+        'v1',
+        http=http.HttpMock(self._discovery_testdata_dir,
+                           {'status': http_client.OK}),
+        requestBuilder=builder)
+    with mock.patch('googleapiclient.discovery.' 'build') as response_mock:
+      response_mock.side_effect = lambda service, version: resource
+      inference_spec_type = model_spec_pb2.InferenceSpecType(
+          ai_platform_prediction_model_spec=model_spec_pb2
+          .AIPlatformPredictionModelSpec(
+              project_id='test-project',
+              model_name='test-model',
+          ))
+
+      prediction_log = prediction_log_pb2.PredictionLog()
+      prediction_log.predict_log.response.outputs['output_1'].CopyFrom(
+          tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1)))
+      prediction_log.predict_log.response.outputs['output_2'].CopyFrom(
+          tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1)))
+
+      self._set_up_pipeline(inference_spec_type)
+      assert_that(self.pcoll, equal_to([prediction_log]))
+      self._run_inference_with_beam()
+
+
+class RunOfflineInferenceSequenceExamplesTest(RunInferenceFixture):
+
+  def setUp(self):
+    super(RunOfflineInferenceSequenceExamplesTest, self).setUp()
+    self._predict_examples = [
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 0 }}}
+              }
+              """, tf.train.Example()),
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 1 }}}
+              }
+              """, tf.train.Example()),
+    ]
+    self._multihead_examples = [
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.8 }}}
+              feature {key: "y" value { float_list { value: 0.2 }}}
+            }
+            """, tf.train.Example()),
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.6 }}}
+              feature {key: "y" value { float_list { value: 0.1 }}}
+            }
+            """, tf.train.Example()),
+    ]
+
+    self.schema = text_format.Parse(
+      """
+      tensor_representation_group {
+        key: ""
+        value {
+          tensor_representation {
+            key: "x"
+            value {
+              dense_tensor {
+                column_name: "x"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+          tensor_representation {
+            key: "y"
+            value {
+              dense_tensor {
+                column_name: "y"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+        }
+      }
+      feature {
+        name: "x"
+        type: FLOAT
+      }
+      feature {
+        name: "y"
+        type: FLOAT
+      }
+      """, schema_pb2.Schema())
+
+  def _prepare_multihead_examples(self, example_path):
+    with tf.io.TFRecordWriter(example_path) as output_file:
+      for example in self._multihead_examples:
+        output_file.write(example.SerializeToString())
+
+  def _run_inference_with_beam(self, example_path, inference_spec_type,
+                               prediction_log_path, include_schema = False):
+    schema = None
+    if include_schema:
+      schema = self.schema
+
+    with beam.Pipeline() as pipeline:
+      _ = (
+          pipeline
+          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+          | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString)
+          | 'RunInference' >> run_inference.RunInferenceOnSequenceExamples(
+              inference_spec_type, schema=schema)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+
+  def _get_results(self, prediction_log_path):
+    results = []
+    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
+      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
+      for record_string in record_iterator:
+        prediction_log = prediction_log_pb2.PredictionLog()
+        prediction_log.MergeFromString(record_string)
+        results.append(prediction_log)
+    return results
+
+
+  def testKerasModelPredict(self):
+    inputs = tf.keras.Input(shape=(1,), name='input1')
+    output1 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output1')(
+            inputs)
+    output2 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output2')(
+            inputs)
+    inference_model = tf.keras.models.Model(inputs, [output1, output2])
+
+    class TestKerasModel(tf.keras.Model):
+
+      def __init__(self, inference_model):
+        super(TestKerasModel, self).__init__(name='test_keras_model')
+        self.inference_model = inference_model
+
+      @tf.function(input_signature=[
+          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
+      ])
+      def call(self, serialized_example):
+        features = {
+            'input1':
+                tf.compat.v1.io.FixedLenFeature([1],
+                                                dtype=tf.float32,
+                                                default_value=0)
+        }
+        input_tensor_dict = tf.io.parse_example(serialized_example, features)
+        return inference_model(input_tensor_dict['input1'])
+
+    model = TestKerasModel(inference_model)
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(lr=.001),
+        loss=tf.keras.losses.binary_crossentropy,
+        metrics=['accuracy'])
+
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+
+  def testKerasModelPredictMultiTensor(self):
+    input1 = tf.keras.layers.Input((1,), name='x')
+    input2 = tf.keras.layers.Input((1,), name='y')
+
+    x1 = tf.keras.layers.Dense(10)(input1)
+    x2 = tf.keras.layers.Dense(10)(input2)
+    output = tf.keras.layers.Dense(5, name='output')(x2)
+
+    model = tf.keras.models.Model([input1, input2], output)
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)),
+        prediction_log_path, include_schema = True)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    for result in results:
+      self.assertLen(result.predict_log.request.inputs, 2)
+      self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
+
+
+class RunOfflineInferenceArrowTest(RunInferenceFixture):
+
+  def setUp(self):
+    super(RunOfflineInferenceArrowTest, self).setUp()
+    self._predict_examples = [
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 0 }}}
+              }
+              """, tf.train.Example()),
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 1 }}}
+              }
+              """, tf.train.Example()),
+    ]
+
+    self._multihead_examples = [
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.8 }}}
+              feature {key: "y" value { float_list { value: 0.2 }}}
+            }
+            """, tf.train.Example()),
+        text_format.Parse(
+            """
+            features {
+              feature {key: "x" value { float_list { value: 0.6 }}}
+              feature {key: "y" value { float_list { value: 0.1 }}}
+            }
+            """, tf.train.Example()),
+    ]
+
+    self.schema = text_format.Parse(
+      """
+      tensor_representation_group {
+        key: ""
+        value {
+          tensor_representation {
+            key: "x"
+            value {
+              dense_tensor {
+                column_name: "x"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+          tensor_representation {
+            key: "y"
+            value {
+              dense_tensor {
+                column_name: "y"
+                shape { dim { size: 1 } }
+              }
+            }
+          }
+        }
+      }
+      feature {
+        name: "x"
+        type: FLOAT
+      }
+      feature {
+        name: "y"
+        type: FLOAT
+      }
+      """, schema_pb2.Schema())
+
+  def _prepare_multihead_examples(self, example_path):
+    with tf.io.TFRecordWriter(example_path) as output_file:
+      for example in self._multihead_examples:
+        output_file.write(example.SerializeToString())
+
+
+  def _build_predict_model(self, model_path):
+    """Exports the dummy sum predict model."""
+
+    with tf.compat.v1.Graph().as_default():
+      input_tensors = {
+          'x': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0)
+      }
+      serving_receiver = (
+          tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn(
+              input_tensors)())
+      output_tensors = {'y': serving_receiver.features['x'] * 2}
+      sess = tf.compat.v1.Session()
+      sess.run(tf.compat.v1.initializers.global_variables())
+      signature_def = tf.compat.v1.estimator.export.PredictOutput(
+          output_tensors).as_signature_def(serving_receiver.receiver_tensors)
+      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
+      builder.add_meta_graph_and_variables(
+          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
+          signature_def_map={
+              tf.compat.v1.saved_model.signature_constants
+              .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+                  signature_def,
+          })
+      builder.save()
+
+  def _build_regression_signature(self, input_tensor, output_tensor):
+    """Helper function for building a regression SignatureDef."""
+    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        input_tensor)
+    signature_inputs = {
+        tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS:
+            input_tensor_info
+    }
+    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        output_tensor)
+    signature_outputs = {
+        tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS:
+            output_tensor_info
+    }
+    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+        signature_inputs, signature_outputs,
+        tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME)
+
+  def _build_classification_signature(self, input_tensor, scores_tensor):
+    """Helper function for building a classification SignatureDef."""
+    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        input_tensor)
+    signature_inputs = {
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS:
+            input_tensor_info
+    }
+    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        scores_tensor)
+    signature_outputs = {
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
+            output_tensor_info
+    }
+    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+        signature_inputs, signature_outputs,
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
+
+  def _build_multihead_model(self, model_path):
+    with tf.compat.v1.Graph().as_default():
+      input_example = tf.compat.v1.placeholder(
+          tf.string, name='input_examples_tensor')
+      config = {
+          'x': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0),
+          'y': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0),
+      }
+      features = tf.compat.v1.parse_example(input_example, config)
+      x = features['x']
+      y = features['y']
+      sum_pred = x + y
+      diff_pred = tf.abs(x - y)
+      sess = tf.compat.v1.Session()
+      sess.run(tf.compat.v1.initializers.global_variables())
+      signature_def_map = {
+          'regress_diff':
+              self._build_regression_signature(input_example, diff_pred),
+          'classify_sum':
+              self._build_classification_signature(input_example, sum_pred),
+          tf.compat.v1.saved_model.signature_constants
+          .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+              self._build_regression_signature(input_example, sum_pred)
+      }
+      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
+      builder.add_meta_graph_and_variables(
+          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
+          signature_def_map=signature_def_map)
+      builder.save()
+
+  def _run_inference_with_beam(self, example_path, inference_spec_type,
+                               prediction_log_path, include_config = False):
+    # test RunInferenceOnRecordBatch
+    converter = tf_example_record.TFExampleBeamRecord(
+      physical_format="inmem",
+      telemetry_descriptors=[],
+      raw_record_column_name=_RECORDBATCH_COLUMN)
+
+    if include_config:
+      tfxio = test_util.InMemoryTFExampleRecord(
+        schema=self.schema, raw_record_column_name=_RECORDBATCH_COLUMN)
+      tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
+        arrow_schema=tfxio.ArrowSchema(),
+        tensor_representations=tfxio.TensorRepresentations())
+
+      with beam.Pipeline() as pipeline:
+        _ = (
+          pipeline
+          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+          | 'ConvertToRecordBatch' >> converter.BeamSource()
+          | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
+              inference_spec_type, DataType.EXAMPLE, tensor_adapter_config)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+    else:
+      with beam.Pipeline() as pipeline:
+        _ = (
+          pipeline
+          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+          | 'ConvertToRecordBatch' >> converter.BeamSource()
+          | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
+                inference_spec_type, DataType.EXAMPLE)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+
+  def _get_results(self, prediction_log_path):
+    results = []
+    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
+      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
+      for record_string in record_iterator:
+        prediction_log = prediction_log_pb2.PredictionLog()
+        prediction_log.MergeFromString(record_string)
+        results.append(prediction_log)
+    return results
+
+  def testModelPathInvalid(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'):
+      self._run_inference_with_beam(
+          example_path,
+          model_spec_pb2.InferenceSpecType(
+              saved_model_spec=model_spec_pb2.SavedModelSpec(
+                  model_path=self._get_output_data_dir())), prediction_log_path)
+
+  def testEstimatorModelPredict(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_predict_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    self.assertEqual(
+        results[0].predict_log.request.inputs[
+            run_inference._DEFAULT_INPUT_KEY].string_val[0],
+        self._predict_examples[0].SerializeToString())
+    self.assertEqual(results[0].predict_log.response.outputs['y'].dtype,
+                     tf.float32)
+    self.assertLen(
+        results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2)
+    self.assertEqual(
+        results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size,
+        1)
+    self.assertEqual(
+        results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size,
+        1)
+
+  def testClassifyModel(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path, signature_name=['classify_sum'])),
+        prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    classify_log = results[0].classify_log
+    self.assertLen(classify_log.request.input.example_list.examples, 1)
+    self.assertEqual(classify_log.request.input.example_list.examples[0],
+                     self._multihead_examples[0])
+    self.assertLen(classify_log.response.result.classifications, 1)
+    self.assertLen(classify_log.response.result.classifications[0].classes, 1)
+    self.assertAlmostEqual(
+        classify_log.response.result.classifications[0].classes[0].score, 1.0)
+
+  def testRegressModel(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path, signature_name=['regress_diff'])),
+        prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    regress_log = results[0].regress_log
+    self.assertLen(regress_log.request.input.example_list.examples, 1)
+    self.assertEqual(regress_log.request.input.example_list.examples[0],
+                     self._multihead_examples[0])
+    self.assertLen(regress_log.response.result.regressions, 1)
+    self.assertAlmostEqual(regress_log.response.result.regressions[0].value,
+                           0.6)
+
+  def testMultiInferenceModel(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path,
+                signature_name=['regress_diff', 'classify_sum'])),
+        prediction_log_path)
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    multi_inference_log = results[0].multi_inference_log
+    self.assertLen(multi_inference_log.request.input.example_list.examples, 1)
+    self.assertEqual(multi_inference_log.request.input.example_list.examples[0],
+                     self._multihead_examples[0])
+    self.assertLen(multi_inference_log.response.results, 2)
+    signature_names = []
+    for result in multi_inference_log.response.results:
+      signature_names.append(result.model_spec.signature_name)
+    self.assertIn('regress_diff', signature_names)
+    self.assertIn('classify_sum', signature_names)
+    result = multi_inference_log.response.results[0]
+    self.assertEqual(result.model_spec.signature_name, 'regress_diff')
+    self.assertLen(result.regression_result.regressions, 1)
+    self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6)
+    result = multi_inference_log.response.results[1]
+    self.assertEqual(result.model_spec.signature_name, 'classify_sum')
+    self.assertLen(result.classification_result.classifications, 1)
+    self.assertLen(result.classification_result.classifications[0].classes, 1)
+    self.assertAlmostEqual(
+        result.classification_result.classifications[0].classes[0].score, 1.0)
+
+  def testKerasModelPredict(self):
+    inputs = tf.keras.Input(shape=(1,), name='input1')
+    output1 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output1')(
+            inputs)
+    output2 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output2')(
+            inputs)
+    inference_model = tf.keras.models.Model(inputs, [output1, output2])
+
+    class TestKerasModel(tf.keras.Model):
+      def __init__(self, inference_model):
+        super(TestKerasModel, self).__init__(name='test_keras_model')
+        self.inference_model = inference_model
+
+      @tf.function(input_signature=[
+          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
+      ])
+      def call(self, serialized_example):
+        features = {
+            'input1': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32,
+              default_value=0)
+        }
+        input_tensor_dict = tf.io.parse_example(serialized_example, features)
+        return inference_model(input_tensor_dict['input1'])
+
+    model = TestKerasModel(inference_model)
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(lr=.001),
+        loss=tf.keras.losses.binary_crossentropy,
+        metrics=['accuracy'])
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+
+  def testKerasModelPredictMultiTensor(self):
+    input1 = tf.keras.layers.Input((1,), name='x')
+    input2 = tf.keras.layers.Input((1,), name='y')
+
+    x1 = tf.keras.layers.Dense(10)(input1)
+    x2 = tf.keras.layers.Dense(10)(input2)
+    output = tf.keras.layers.Dense(5, name='output')(x2)
+
+    model = tf.keras.models.Model([input1, input2], output)
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)),
+        prediction_log_path, include_config = True)
+
+    results = self._get_results(prediction_log_path)
+    self.assertLen(results, 2)
+    for result in results:
+      self.assertLen(result.predict_log.request.inputs, 2)
+      self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
+
+  def testMultiTensorError(self):
+    input1 = tf.keras.layers.Input((1,), name='x')
+    input2 = tf.keras.layers.Input((1,), name='y')
+
+    x1 = tf.keras.layers.Dense(10)(input1)
+    x2 = tf.keras.layers.Dense(10)(input2)
+    output = tf.keras.layers.Dense(5, name='output')(x2)
+
+    model = tf.keras.models.Model([input1, input2], output)
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+
+    error_msg = 'Tensor adaptor config is required with a multi-input model'
+    try:
+      self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)),
+              prediction_log_path, include_config = False)
+    except ValueError as exc:
+      actual_error_msg = str(exc)
+      self.assertTrue(actual_error_msg.startswith(error_msg))
+    else:
+      self.fail('Test was expected to throw ValueError exception')
+
+  def testTelemetry(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    inference_spec_type = model_spec_pb2.InferenceSpecType(
+        saved_model_spec=model_spec_pb2.SavedModelSpec(
+            model_path=model_path, signature_name=['classify_sum']))
   
-#     pipeline = beam.Pipeline()
-#     converter = tf_example_record.TFExampleBeamRecord(
-#       physical_format="inmem",
-#       telemetry_descriptors=[],
-#       raw_record_column_name=_RECORDBATCH_COLUMN)
-#     _ = (
-#         pipeline 
-#         | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-#         | 'ConvertToRecordBatch' >> converter.BeamSource()
-#         | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
-#               inference_spec_type, DataType.EXAMPLE))
-#     run_result = pipeline.run()
-#     run_result.wait_until_finish()
-
-#     num_inferences = run_result.metrics().query(
-#         MetricsFilter().with_name('num_inferences'))
-#     self.assertTrue(num_inferences['counters'])
-#     self.assertEqual(num_inferences['counters'][0].result, 2)
-#     num_instances = run_result.metrics().query(
-#         MetricsFilter().with_name('num_instances'))
-#     self.assertTrue(num_instances['counters'])
-#     self.assertEqual(num_instances['counters'][0].result, 2)
-#     inference_request_batch_size = run_result.metrics().query(
-#         MetricsFilter().with_name('inference_request_batch_size'))
-#     self.assertTrue(inference_request_batch_size['distributions'])
-#     self.assertEqual(
-#         inference_request_batch_size['distributions'][0].result.sum, 2)
-#     inference_request_batch_byte_size = run_result.metrics().query(
-#         MetricsFilter().with_name('inference_request_batch_byte_size'))
-#     self.assertTrue(inference_request_batch_byte_size['distributions'])
-#     self.assertEqual(
-#         inference_request_batch_byte_size['distributions'][0].result.sum,
-#         sum(element.ByteSize() for element in self._multihead_examples))
-#     inference_batch_latency_micro_secs = run_result.metrics().query(
-#         MetricsFilter().with_name('inference_batch_latency_micro_secs'))
-#     self.assertTrue(inference_batch_latency_micro_secs['distributions'])
-#     self.assertGreaterEqual(
-#         inference_batch_latency_micro_secs['distributions'][0].result.sum, 0)
-#     load_model_latency_milli_secs = run_result.metrics().query(
-#         MetricsFilter().with_name('load_model_latency_milli_secs'))
-#     self.assertTrue(load_model_latency_milli_secs['distributions'])
-#     self.assertGreaterEqual(
-#         load_model_latency_milli_secs['distributions'][0].result.sum, 0)
+    pipeline = beam.Pipeline()
+    converter = tf_example_record.TFExampleBeamRecord(
+      physical_format="inmem",
+      telemetry_descriptors=[],
+      raw_record_column_name=_RECORDBATCH_COLUMN)
+    _ = (
+        pipeline 
+        | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+        | 'ConvertToRecordBatch' >> converter.BeamSource()
+        | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
+              inference_spec_type, DataType.EXAMPLE))
+    run_result = pipeline.run()
+    run_result.wait_until_finish()
+
+    num_inferences = run_result.metrics().query(
+        MetricsFilter().with_name('num_inferences'))
+    self.assertTrue(num_inferences['counters'])
+    self.assertEqual(num_inferences['counters'][0].result, 2)
+    num_instances = run_result.metrics().query(
+        MetricsFilter().with_name('num_instances'))
+    self.assertTrue(num_instances['counters'])
+    self.assertEqual(num_instances['counters'][0].result, 2)
+    inference_request_batch_size = run_result.metrics().query(
+        MetricsFilter().with_name('inference_request_batch_size'))
+    self.assertTrue(inference_request_batch_size['distributions'])
+    self.assertEqual(
+        inference_request_batch_size['distributions'][0].result.sum, 2)
+    inference_request_batch_byte_size = run_result.metrics().query(
+        MetricsFilter().with_name('inference_request_batch_byte_size'))
+    self.assertTrue(inference_request_batch_byte_size['distributions'])
+    self.assertEqual(
+        inference_request_batch_byte_size['distributions'][0].result.sum,
+        sum(element.ByteSize() for element in self._multihead_examples))
+    inference_batch_latency_micro_secs = run_result.metrics().query(
+        MetricsFilter().with_name('inference_batch_latency_micro_secs'))
+    self.assertTrue(inference_batch_latency_micro_secs['distributions'])
+    self.assertGreaterEqual(
+        inference_batch_latency_micro_secs['distributions'][0].result.sum, 0)
+    load_model_latency_milli_secs = run_result.metrics().query(
+        MetricsFilter().with_name('load_model_latency_milli_secs'))
+    self.assertTrue(load_model_latency_milli_secs['distributions'])
+    self.assertGreaterEqual(
+        load_model_latency_milli_secs['distributions'][0].result.sum, 0)
 
 
 class RunRemoteInferenceArrowTest(RunInferenceFixture):

From 9bc26b443b351017267ad1bdaab8792ede0cc5e9 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 30 Jul 2020 13:49:09 -0400
Subject: [PATCH 24/31] Delete temp test

---
 tfx_bsl/public/beam/test_api.py | 251 --------------------------------
 1 file changed, 251 deletions(-)
 delete mode 100644 tfx_bsl/public/beam/test_api.py

diff --git a/tfx_bsl/public/beam/test_api.py b/tfx_bsl/public/beam/test_api.py
deleted file mode 100644
index 89646148..00000000
--- a/tfx_bsl/public/beam/test_api.py
+++ /dev/null
@@ -1,251 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-# Standard __future__ imports
-from __future__ import print_function
-
-import json
-import os
-try:
-  import unittest.mock as mock
-except ImportError:
-  import mock
-
-import apache_beam as beam
-import pyarrow as pa
-from apache_beam.metrics.metric import MetricsFilter
-from apache_beam.testing.util import assert_that
-from apache_beam.testing.util import equal_to
-from googleapiclient import discovery
-from googleapiclient import http
-from six.moves import http_client
-import tensorflow as tf
-from tfx_bsl.beam import bsl_util
-from tfx_bsl.public.beam import run_inference
-from tfx_bsl.beam.bsl_constants import DataType
-from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
-from tfx_bsl.public.proto import model_spec_pb2
-from tfx_bsl.tfxio import test_util
-from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import tf_example_record
-
-from google.protobuf import text_format
-from tensorflow_serving.apis import prediction_log_pb2
-from tensorflow_metadata.proto.v0 import schema_pb2
-
-
-class RunInferenceFixture(tf.test.TestCase):
-
-  def setUp(self):
-    super(RunInferenceFixture, self).setUp()
-    self._predict_examples = [
-        text_format.Parse(
-            """
-              features {
-                feature { key: "input1" value { float_list { value: 0 }}}
-              }
-              """, tf.train.Example()),
-    ]
-
-  def _get_output_data_dir(self, sub_dir=None):
-    test_dir = self._testMethodName
-    path = os.path.join(
-        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
-        test_dir)
-    if not tf.io.gfile.exists(path):
-      tf.io.gfile.makedirs(path)
-    if sub_dir is not None:
-      path = os.path.join(path, sub_dir)
-    return path
-
-  def _prepare_predict_examples(self, example_path):
-    with tf.io.TFRecordWriter(example_path) as output_file:
-      for example in self._predict_examples:
-        output_file.write(example.SerializeToString())
-
-
-class RunOfflineInferenceExamplesTest(RunInferenceFixture):
-
-  def setUp(self):
-    super(RunOfflineInferenceExamplesTest, self).setUp()
-    self._predict_examples = [
-        text_format.Parse(
-            """
-              features {
-                feature { key: "input1" value { float_list { value: 0 }}}
-              }
-              """, tf.train.Example()),
-        text_format.Parse(
-            """
-              features {
-                feature { key: "input1" value { float_list { value: 1 }}}
-              }
-              """, tf.train.Example()),
-    ]
-    self._multihead_examples = [
-        text_format.Parse(
-            """
-            features {
-              feature {key: "x" value { float_list { value: 0.8 }}}
-              feature {key: "y" value { float_list { value: 0.2 }}}
-            }
-            """, tf.train.Example()),
-        text_format.Parse(
-            """
-            features {
-              feature {key: "x" value { float_list { value: 0.6 }}}
-              feature {key: "y" value { float_list { value: 0.1 }}}
-            }
-            """, tf.train.Example()),
-    ]
-
-    self.schema = text_format.Parse(
-      """
-      tensor_representation_group {
-        key: ""
-        value {
-          tensor_representation {
-            key: "x"
-            value {
-              dense_tensor {
-                column_name: "x"
-                shape { dim { size: 1 } }
-              }
-            }
-          }
-          tensor_representation {
-            key: "y"
-            value {
-              dense_tensor {
-                column_name: "y"
-                shape { dim { size: 1 } }
-              }
-            }
-          }
-        }
-      }
-      feature {
-        name: "x"
-        type: FLOAT
-      }
-      feature {
-        name: "y"
-        type: FLOAT
-      }
-      """, schema_pb2.Schema())
-
-  def _prepare_multihead_examples(self, example_path):
-    with tf.io.TFRecordWriter(example_path) as output_file:
-      for example in self._multihead_examples:
-        output_file.write(example.SerializeToString())
-
-  def _run_inference_with_beam(self, example_path, inference_spec_type,
-                               prediction_log_path, include_schema = False):
-    schema = None
-    if include_schema:
-      schema = self.schema
-
-    with beam.Pipeline() as pipeline:
-      _ = (
-          pipeline
-          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-          | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
-          | 'RunInference' >> run_inference.RunInference(
-              inference_spec_type, schema=schema)
-          | 'WritePredictions' >> beam.io.WriteToTFRecord(
-              prediction_log_path,
-              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-
-  def _get_results(self, prediction_log_path):
-    results = []
-    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
-      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
-      for record_string in record_iterator:
-        prediction_log = prediction_log_pb2.PredictionLog()
-        prediction_log.MergeFromString(record_string)
-        results.append(prediction_log)
-    return results
-
-
-  def testKerasModelPredict(self):
-    inputs = tf.keras.Input(shape=(1,), name='input1')
-    output1 = tf.keras.layers.Dense(
-        1, activation=tf.nn.sigmoid, name='output1')(
-            inputs)
-    output2 = tf.keras.layers.Dense(
-        1, activation=tf.nn.sigmoid, name='output2')(
-            inputs)
-    inference_model = tf.keras.models.Model(inputs, [output1, output2])
-
-    class TestKerasModel(tf.keras.Model):
-
-      def __init__(self, inference_model):
-        super(TestKerasModel, self).__init__(name='test_keras_model')
-        self.inference_model = inference_model
-
-      @tf.function(input_signature=[
-          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
-      ])
-      def call(self, serialized_example):
-        features = {
-            'input1':
-                tf.compat.v1.io.FixedLenFeature([1],
-                                                dtype=tf.float32,
-                                                default_value=0)
-        }
-        input_tensor_dict = tf.io.parse_example(serialized_example, features)
-        return inference_model(input_tensor_dict['input1'])
-
-    model = TestKerasModel(inference_model)
-    model.compile(
-        optimizer=tf.keras.optimizers.Adam(lr=.001),
-        loss=tf.keras.losses.binary_crossentropy,
-        metrics=['accuracy'])
-
-    model_path = self._get_output_data_dir('model')
-    tf.compat.v1.keras.experimental.export_saved_model(
-        model, model_path, serving_only=True)
-
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_predict_examples(example_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        example_path,
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path)), prediction_log_path)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-
-  def testKerasModelPredictMultiTensor(self):
-    input1 = tf.keras.layers.Input((1,), name='x')
-    input2 = tf.keras.layers.Input((1,), name='y')
-
-    x1 = tf.keras.layers.Dense(10)(input1)
-    x2 = tf.keras.layers.Dense(10)(input2)
-    output = tf.keras.layers.Dense(5, name='output')(x2)
-
-    model = tf.keras.models.Model([input1, input2], output)
-    model_path = self._get_output_data_dir('model')
-    tf.compat.v1.keras.experimental.export_saved_model(
-        model, model_path, serving_only=True)
-
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_multihead_examples(example_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    self._run_inference_with_beam(
-        example_path,
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path)),
-        prediction_log_path, include_schema = True)
-
-    results = self._get_results(prediction_log_path)
-    self.assertLen(results, 2)
-    for result in results:
-      self.assertLen(result.predict_log.request.inputs, 2)
-      self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
-
-
-if __name__ == '__main__':
-  tf.test.main()

From 25b8631e12e1b0e7762934258f25132372f68210 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 30 Jul 2020 18:17:40 -0400
Subject: [PATCH 25/31] add test for serialized example

---
 tfx_bsl/beam/run_inference_test.py | 34 ++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
index 990e243e..192bd58f 100644
--- a/tfx_bsl/beam/run_inference_test.py
+++ b/tfx_bsl/beam/run_inference_test.py
@@ -1199,6 +1199,40 @@ def test_request_body_with_binary_data(self):
         },
     ], result)
 
+  def test_request_serialized_example(self):
+    example = text_format.Parse(
+      """
+      features {
+        feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
+        feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
+        feature { key: "y" value { int64_list { value: [1, 2] }}}
+      }
+      """, tf.train.Example())
+    inference_spec_type = model_spec_pb2.InferenceSpecType(
+        ai_platform_prediction_model_spec=model_spec_pb2
+        .AIPlatformPredictionModelSpec(
+            project_id='test_project',
+            model_name='test_model',
+            version_name='test_version',
+            use_serialization_config=True))
+    
+    serialized_example_remote = [example.SerializeToString()]
+    record_batch_remote = pa.RecordBatch.from_arrays(
+      [
+        pa.array([["ASa8asdf"]], type=pa.list_(pa.binary())),
+        pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())),
+        pa.array([[1, 2]], type=pa.list_(pa.int32())),
+        pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32())),
+        serialized_example_remote
+      ],
+      ['x_bytes', 'x', 'y', 'z', _RECORDBATCH_COLUMN]
+    )
+
+    result = list(bsl_util.RecordToJSON(record_batch_remote, True))
+    self.assertEqual(result, [{
+        'b64': base64.b64encode(example.SerializeToString()).decode()
+    }])
+
 
 if __name__ == '__main__':
   tf.test.main()

From b2e66895af7a9c31dc336107426dbb98c39ccbe2 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Tue, 4 Aug 2020 17:00:42 -0400
Subject: [PATCH 26/31] address comments and fix post-process

---
 tfx_bsl/beam/bsl_constants.py      |   2 +-
 tfx_bsl/beam/bsl_util.py           |  44 +++++++---
 tfx_bsl/beam/run_inference.py      | 133 ++++++++++++++---------------
 tfx_bsl/beam/run_inference_test.py |  31 +++----
 4 files changed, 110 insertions(+), 100 deletions(-)

diff --git a/tfx_bsl/beam/bsl_constants.py b/tfx_bsl/beam/bsl_constants.py
index caaba5aa..4f797b15 100644
--- a/tfx_bsl/beam/bsl_constants.py
+++ b/tfx_bsl/beam/bsl_constants.py
@@ -1,5 +1,5 @@
 _RECORDBATCH_COLUMN = '__RAW_RECORD__'
-KERAS_INPUT_SUFFIX = '_input'
+_KERAS_INPUT_SUFFIX = '_input'
 
 class DataType(object):
   EXAMPLE = 'EXAMPLE'
diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py
index 3bc8c624..3d2a7929 100644
--- a/tfx_bsl/beam/bsl_util.py
+++ b/tfx_bsl/beam/bsl_util.py
@@ -27,9 +27,27 @@
 import typing
 from typing import Dict, List, Text, Any, Set, Optional
 from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
-from tfx_bsl.beam.bsl_constants import KERAS_INPUT_SUFFIX
+from tfx_bsl.beam.bsl_constants import _KERAS_INPUT_SUFFIX
    
 
+def ExtractSerializedExampleFromRecordBatch(elements: pa.RecordBatch) -> List[Text]:
+  serialized_examples = None
+  for column_name, column_array in zip(elements.schema.names, elements.columns):
+    if column_name == _RECORDBATCH_COLUMN:
+      column_type = column_array.flatten().type
+      if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)):
+        raise ValueError(
+          'Expected a list of serialized examples in bytes or as a string, got %s' %
+          type(example))
+      serialized_examples = column_array.flatten().to_pylist()
+      break
+
+  if (serialized_examples is None):
+    raise ValueError('Raw examples not found.')
+
+  return serialized_examples
+
+
 def RecordToJSON(record_batch: pa.RecordBatch, prepare_instances_serialized) -> List[Text]:
   """Returns a JSON string translated from `record_batch`.
 
@@ -41,6 +59,9 @@ def RecordToJSON(record_batch: pa.RecordBatch, prepare_instances_serialized) ->
   Args:
   record_batch: input RecordBatch.
   """
+
+  # TODO (b/155912552): Handle this for sequence example.
+
   def flatten(element: List[Any]):
     if len(element) == 1:
         return element[0]
@@ -60,16 +81,16 @@ def flatten(element: List[Any]):
     return json.loads(df.to_json(orient='records'))
 
 
-def find_input_name_in_features(features: Set[Text],
-                                input_name: Text) -> Optional[Text]:
+def _find_input_name_in_features(features: Set[Text],
+                                 input_name: Text) -> Optional[Text]:
   """Maps input name to an entry in features. Returns None if not found."""
   if input_name in features:
     return input_name
   # Some keras models prepend '_input' to the names of the inputs
   # so try under '<name>_input' as well.
-  elif (input_name.endswith(KERAS_INPUT_SUFFIX) and
-        input_name[:-len(KERAS_INPUT_SUFFIX)] in features):
-    return input_name[:-len(KERAS_INPUT_SUFFIX)]
+  elif (input_name.endswith(_KERAS_INPUT_SUFFIX) and
+        input_name[:-len(_KERAS_INPUT_SUFFIX)] in features):
+    return input_name[:-len(_KERAS_INPUT_SUFFIX)]
   return None
 
 
@@ -93,13 +114,14 @@ def filter_tensors_by_input_names(
     return None
   result = {}
   tensor_keys = set(tensors.keys())
+
+  # The case where the model takes serialized examples as input.
+  if len(input_names) == 1 and _find_input_name_in_features(tensor_keys, input_names[0]):
+    return None
+
   for name in input_names:
-    tensor_name = find_input_name_in_features(tensor_keys, name)
+    tensor_name = _find_input_name_in_features(tensor_keys, name)
     if tensor_name is None:
-      # This should happen only in the case where the model takes serialized
-      # examples as input. Else raise an exception.
-      if len(input_names) == 1:
-        return None
       raise RuntimeError(
           'Input tensor not found: {}. Existing keys: {}.'.format(
               name, ','.join(tensors.keys())))
diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py
index 9011dd54..f3bdb5f4 100644
--- a/tfx_bsl/beam/run_inference.py
+++ b/tfx_bsl/beam/run_inference.py
@@ -301,7 +301,7 @@ def _Predict(pcoll: beam.pvalue.PCollection,  # pylint: disable=invalid-name
     predictions = (
         pcoll
         | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn(
-              inference_spec_type, pcoll.pipeline.options, data_type, tensor_adapter_config)))
+              inference_spec_type, pcoll.pipeline.options, data_type)))
   return (predictions
           | 'BuildPredictionLogForPredictions' >> beam.ParDo(
               _BuildPredictionLogForPredictionsDoFn()))
@@ -385,59 +385,24 @@ def update(
 
 
   def __init__(
-    self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
+    self, inference_spec_type: model_spec_pb2.InferenceSpecType):
     super(_BaseDoFn, self).__init__()
     self._clock = None
     self.inference_spec_type = inference_spec_type
     self._metrics_collector = self._MetricsCollector(inference_spec_type)
-    self._tensor_adapter_config = tensor_adapter_config
-    self._io_tensor_spec = None   # This value may be None if the model is remote
 
   def setup(self):
     self._clock = _ClockFactory.make_clock()
 
+  @abc.abstractmethod
   def _extract_from_recordBatch(self, elements: pa.RecordBatch):
     """
     Function to extract the compatible input with model signature
+    return:
+      - serialized examples for metrics
+      - model input for processing and post processing
     """
-    serialized_examples = None
-    for column_name, column_array in zip(elements.schema.names, elements.columns):
-      if column_name == _RECORDBATCH_COLUMN:
-        column_type = column_array.flatten().type
-        if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)):
-          raise ValueError(
-            'Expected a list of serialized examples in bytes or as a string, got %s' %
-            type(example))
-        serialized_examples = column_array.flatten().to_pylist()
-        break
-
-    if (serialized_examples is None):
-      raise ValueError('Raw examples not found.')
-
-    model_input = None
-    if self._io_tensor_spec is None:    # Case when we are running remote inference
-      prepare_instances_serialized = (
-        self.inference_spec_type.ai_platform_prediction_model_spec.use_serialization_config)
-      model_input = bsl_util.RecordToJSON(elements, prepare_instances_serialized)
-    elif (len(self._io_tensor_spec.input_tensor_names) == 1):
-      model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples}
-    else:
-      if (self._tensor_adapter_config is None):
-        raise ValueError('Tensor adaptor config is required with a multi-input model')
-
-      input_tensor_names = self._io_tensor_spec.input_tensor_names
-      input_tensor_alias = self._io_tensor_spec.input_tensor_alias
-      _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config)
-      dict_of_tensors = _tensor_adapter.ToBatchTensors(
-        elements, produce_eager_tensors = False)
-      filtered_tensors = bsl_util.filter_tensors_by_input_names(
-        dict_of_tensors, input_tensor_alias)
-
-      model_input = {}
-      for feature, tensor_name in zip(input_tensor_alias, input_tensor_names):
-        model_input[tensor_name] = filtered_tensors[feature]
-    return serialized_examples, model_input
+    raise NotImplementedError
 
   def process(self, elements: pa.RecordBatch) -> Iterable[Any]:
     batch_start_time = self._clock.get_current_time_in_microseconds()
@@ -507,9 +472,8 @@ class _RemotePredictDoFn(_BaseDoFn):
   """
 
   def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-               pipeline_options: PipelineOptions, data_type: Text,
-               tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
-    super(_RemotePredictDoFn, self).__init__(inference_spec_type, tensor_adapter_config)
+               pipeline_options: PipelineOptions, data_type: Text):
+    super(_RemotePredictDoFn, self).__init__(inference_spec_type)
     self._api_client = None
     self._data_type = data_type
 
@@ -540,6 +504,13 @@ def setup(self):
     # user agent once custom header is supported in googleapiclient.
     self._api_client = discovery.build('ml', 'v1')
 
+  def _extract_from_recordBatch(self, elements: pa.RecordBatch):
+    serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements)
+    prepare_instances_serialized = (
+      self.inference_spec_type.ai_platform_prediction_model_spec.use_serialization_config)
+    model_input = bsl_util.RecordToJSON(elements, prepare_instances_serialized)
+    return serialized_examples, model_input
+
   # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff.
   @retry.with_exponential_backoff(
       initial_delay_secs=1.0,
@@ -614,7 +585,7 @@ def __init__(
       self, inference_spec_type: model_spec_pb2.InferenceSpecType,
       shared_model_handle: shared.Shared, data_type,
       tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None):
-    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, tensor_adapter_config)
+    super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type)
     self._inference_spec_type = inference_spec_type
     self._shared_model_handle = shared_model_handle
     self._model_path = inference_spec_type.saved_model_spec.model_path
@@ -625,6 +596,7 @@ def __init__(
       _get_tags(inference_spec_type))
     self._session = None
     self._data_type = data_type
+    self._tensor_adapter_config = tensor_adapter_config
 
   def setup(self):
     """Load the model.
@@ -704,6 +676,29 @@ def _has_tpu_tag(self) -> bool:
     return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
             tf.saved_model.TPU in self._tags)
 
+  def _extract_from_recordBatch(self, elements: pa.RecordBatch):
+    serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements)
+
+    model_input = None
+    if (len(self._io_tensor_spec.input_tensor_names) == 1):
+      model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples}
+    else:
+      if (self._tensor_adapter_config is None):
+        raise ValueError('Tensor adaptor config is required with a multi-input model')
+
+      input_tensor_names = self._io_tensor_spec.input_tensor_names
+      input_tensor_alias = self._io_tensor_spec.input_tensor_alias
+      _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config)
+      dict_of_tensors = _tensor_adapter.ToBatchTensors(
+        elements, produce_eager_tensors = False)
+      filtered_tensors = bsl_util.filter_tensors_by_input_names(
+        dict_of_tensors, input_tensor_alias)
+
+      model_input = {}
+      for feature, tensor_name in zip(input_tensor_alias, input_tensor_names):
+        model_input[tensor_name] = filtered_tensors[feature]
+    return serialized_examples, model_input
+
   def run_inference(
     self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]:
     self._check_elements()
@@ -830,21 +825,23 @@ def _post_process(
             'dimension, with the first having a size equal to the input batch '
             'size %s. Instead found %s' %
             (output_alias, batch_size, output.shape))
-    predict_log_tmpl = prediction_log_pb2.PredictLog()
-    predict_log_tmpl.request.model_spec.signature_name = signature_name
-    predict_log_tmpl.response.model_spec.signature_name = signature_name
-    for alias, tensor_type in input_tensor_types.items():
-      input_tensor_proto = predict_log_tmpl.request.inputs[alias]
-      input_tensor_proto.dtype = tf.as_dtype(tensor_type).as_datatype_enum
-      # TODO (Maxine): fix dimension?
-      input_tensor_proto.tensor_shape.dim.add().size = 1
-
-    result = []
-    for i in range(batch_size):
-      predict_log = prediction_log_pb2.PredictLog()
-      predict_log.CopyFrom(predict_log_tmpl)
+  
+    if include_request:
+      predict_log_tmpl = prediction_log_pb2.PredictLog()
+      predict_log_tmpl.request.model_spec.signature_name = signature_name
+      predict_log_tmpl.response.model_spec.signature_name = signature_name
+      for alias, tensor_name in zip(input_tensor_alias, input_tensor_names):
+        input_tensor_proto = predict_log_tmpl.request.inputs[alias]
+        input_tensor_proto.dtype = tf.as_dtype(input_tensor_types[alias]).as_datatype_enum
+        if len(input_tensor_alias) == 1:
+          input_tensor_proto.tensor_shape.dim.add().size = 1
+        else:
+          input_tensor_proto.tensor_shape.dim.add().size = len(elements[tensor_name][0])
 
-      if include_request:
+      result = []
+      for i in range(batch_size):
+        predict_log = prediction_log_pb2.PredictLog()
+        predict_log.CopyFrom(predict_log_tmpl)
         if len(input_tensor_alias) == 1:
           alias = input_tensor_alias[0]
           predict_log.request.inputs[alias].string_val.append(process_elements[i])
@@ -852,14 +849,14 @@ def _post_process(
           for alias, tensor_name in zip(input_tensor_alias, input_tensor_names):
             predict_log.request.inputs[alias].float_val.append(elements[tensor_name][i])
 
-      for output_alias, output in outputs.items():
-        # Mimic tensor::Split
-        tensor_proto = tf.make_tensor_proto(
-            values=output[i],
-            dtype=tf.as_dtype(output[i].dtype).as_datatype_enum,
-            shape=np.expand_dims(output[i], axis=0).shape)
-        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
-      result.append(predict_log)
+    for output_alias, output in outputs.items():
+      # Mimic tensor::Split
+      tensor_proto = tf.make_tensor_proto(
+          values=output[i],
+          dtype=tf.as_dtype(output[i].dtype).as_datatype_enum,
+          shape=np.expand_dims(output[i], axis=0).shape)
+      predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
+    result.append(predict_log)
     return result
 
 
diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
index 192bd58f..f28ff1bb 100644
--- a/tfx_bsl/beam/run_inference_test.py
+++ b/tfx_bsl/beam/run_inference_test.py
@@ -267,8 +267,8 @@ class RunRemoteInferenceExamplesTest(RunInferenceFixture):
 
   def setUp(self):
     super(RunRemoteInferenceExamplesTest, self).setUp()
-    self.example_path = self._get_output_data_dir('example')
-    self._prepare_predict_examples(self.example_path)
+    self._example_path = self._get_output_data_dir('example')
+    self._prepare_predict_examples(self._example_path)
     # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
     self._discovery_testdata_dir = os.path.join(
         os.path.join(os.path.dirname(__file__), 'testdata'),
@@ -286,7 +286,7 @@ def _set_up_pipeline(self, inference_spec_type):
     self.pipeline = beam.Pipeline()
     self.pcoll = (
         self.pipeline
-        | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
+        | 'ReadExamples' >> beam.io.ReadFromTFRecord(self._example_path)
         | 'ParseExamples' >> beam.Map(tf.train.Example.FromString)
         | 'RunInference' >> run_inference.RunInferenceOnExamples(inference_spec_type))
 
@@ -1018,9 +1018,9 @@ class RunRemoteInferenceArrowTest(RunInferenceFixture):
 
   def setUp(self):
     super(RunRemoteInferenceArrowTest, self).setUp()
+    self._example_path = self._get_output_data_dir('example')
+    self._prepare_predict_examples(self._example_path)
     # This is from https://ml.googleapis.com/$discovery/rest?version=v1.
-    self.example_path = self._get_output_data_dir('example')
-    self._prepare_predict_examples(self.example_path)
     self._discovery_testdata_dir = os.path.join(
         os.path.join(os.path.dirname(__file__), 'testdata'),
         'ml_discovery.json')
@@ -1041,7 +1041,7 @@ def _set_up_pipeline(self, inference_spec_type):
       raw_record_column_name=_RECORDBATCH_COLUMN)
     self.pcoll = (
         self.pipeline
-        | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path)
+        | 'ReadExamples' >> beam.io.ReadFromTFRecord(self._example_path)
         | 'ConvertToRecordBatch' >> converter.BeamSource()
         | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
               inference_spec_type, DataType.EXAMPLE))
@@ -1167,19 +1167,9 @@ def test_can_format_requests(self):
       self._run_inference_with_beam()
 
   def test_request_body_with_binary_data(self):
-    example = text_format.Parse(
-      """
-      features {
-        feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
-        feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
-        feature { key: "y" value { int64_list { value: [1, 2] }}}
-        feature { key: "z" value { float_list { value: [4.5, 5, 5.5] }}}
-      }
-      """, tf.train.Example())
-    serialized_example_remote = [example.SerializeToString()]
     record_batch_remote = pa.RecordBatch.from_arrays(
       [
-        pa.array([["ASa8asdf"]], type=pa.list_(pa.binary())),
+        pa.array([["ASa8asdf", "ASa8asdf"]], type=pa.list_(pa.binary())),
         pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())),
         pa.array([[1, 2]], type=pa.list_(pa.int32())),
         pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32()))
@@ -1190,9 +1180,10 @@ def test_request_body_with_binary_data(self):
     result = list(bsl_util.RecordToJSON(record_batch_remote, False))
     self.assertEqual([
         {
-            'x_bytes': {
-                'b64': 'QVNhOGFzZGY='
-            },
+            'x_bytes': [
+              {'b64': 'QVNhOGFzZGY='}, 
+              {'b64': 'QVNhOGFzZGY='}
+            ],
             'x': 'JLK7ljk3',
             'y': [1, 2],
             'z': [4.5, 5, 5.5]

From dc9c513811d15c49241bff98638acb0993894169 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 6 Aug 2020 15:48:23 -0400
Subject: [PATCH 27/31] add pytypes for returns and complete comments

---
 tfx_bsl/beam/bsl_constants.py      |  1 -
 tfx_bsl/beam/bsl_util.py           | 19 ++++++----
 tfx_bsl/beam/run_inference.py      | 60 ++++++++++++++++++++----------
 tfx_bsl/beam/run_inference_test.py | 23 +++++-------
 4 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/tfx_bsl/beam/bsl_constants.py b/tfx_bsl/beam/bsl_constants.py
index 4f797b15..4b7473b2 100644
--- a/tfx_bsl/beam/bsl_constants.py
+++ b/tfx_bsl/beam/bsl_constants.py
@@ -1,5 +1,4 @@
 _RECORDBATCH_COLUMN = '__RAW_RECORD__'
-_KERAS_INPUT_SUFFIX = '_input'
 
 class DataType(object):
   EXAMPLE = 'EXAMPLE'
diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py
index 3d2a7929..2a735487 100644
--- a/tfx_bsl/beam/bsl_util.py
+++ b/tfx_bsl/beam/bsl_util.py
@@ -18,17 +18,16 @@
 # Standard __future__ imports
 from __future__ import print_function
 
-
 import numpy as np
 import pyarrow as pa
 import pandas as pd
 import base64
 import json
 import typing
-from typing import Dict, List, Text, Any, Set, Optional
+from typing import Dict, List, Text, Any, Set, Mapping, Optional
 from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
-from tfx_bsl.beam.bsl_constants import _KERAS_INPUT_SUFFIX
-   
+
+_KERAS_INPUT_SUFFIX = '_input'
 
 def ExtractSerializedExampleFromRecordBatch(elements: pa.RecordBatch) -> List[Text]:
   serialized_examples = None
@@ -42,19 +41,23 @@ def ExtractSerializedExampleFromRecordBatch(elements: pa.RecordBatch) -> List[Te
       serialized_examples = column_array.flatten().to_pylist()
       break
 
-  if (serialized_examples is None):
+  if not serialized_examples:
     raise ValueError('Raw examples not found.')
 
   return serialized_examples
 
 
-def RecordToJSON(record_batch: pa.RecordBatch, prepare_instances_serialized) -> List[Text]:
-  """Returns a JSON string translated from `record_batch`.
+def RecordToJSON(
+  record_batch: pa.RecordBatch, prepare_instances_serialized) -> List[Mapping[Text, Any]]:
+  """Returns a list of JSON dictionaries translated from `record_batch`.
 
     The conversion will take in a recordbatch that contains features from a 
     tf.train.Example and will return a list of dict like string (JSON) where 
     each item is a JSON representation of an example.
-    - return format: [{ feature1: value1, ... }, ...]
+
+    Return:
+      List of JSON dictionaries
+      - format: [{ feature1: value1, feature2: [value2_1, value2_2]... }, ...]
 
   Args:
   record_batch: input RecordBatch.
diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py
index f3bdb5f4..87f44bea 100644
--- a/tfx_bsl/beam/run_inference.py
+++ b/tfx_bsl/beam/run_inference.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Run batch inference on saved model and private APIs of inference."""
+"""Run batch inference on saved model with private APIs of inference."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -130,24 +130,29 @@ def RunInferenceOnExamples(  # pylint: disable=invalid-name
   """
 
   data_type = DataType.EXAMPLE
+  operation_type = _get_operation_type(inference_spec_type)
+  proximity_descriptor = (
+    _METRICS_DESCRIPTOR_IN_PROCESS
+    if _using_in_process_inference(inference_spec_type)
+    else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION)
   converter = tf_example_record.TFExampleBeamRecord(
     physical_format="inmem",
-    telemetry_descriptors=[],
+    telemetry_descriptors=[
+      _METRICS_DESCRIPTOR_INFERENCE,
+      operation_type, proximity_descriptor],
     schema=schema,
     raw_record_column_name=_RECORDBATCH_COLUMN)
 
   tensor_adapter_config = None
   if schema:
-    tfxio = test_util.InMemoryTFExampleRecord(
-      schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
     tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-      arrow_schema=tfxio.ArrowSchema(),
-      tensor_representations=tfxio.TensorRepresentations())
+      arrow_schema=converter.ArrowSchema(),
+      tensor_representations=converter.TensorRepresentations())
 
   return (examples
           | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
           | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInferenceImpl' >> RunInferenceOnRecordBatch(
+          | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch(
                   inference_spec_type, data_type,
                   tensor_adapter_config=tensor_adapter_config))
 
@@ -180,24 +185,29 @@ def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
   """
 
   data_type = DataType.SEQUENCEEXAMPLE
+  operation_type = _get_operation_type(inference_spec_type)
+  proximity_descriptor = (
+    _METRICS_DESCRIPTOR_IN_PROCESS
+    if _using_in_process_inference(inference_spec_type)
+    else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION)
   converter = tf_sequence_example_record.TFSequenceExampleBeamRecord(
     physical_format="inmem",
-    telemetry_descriptors=[],
+    telemetry_descriptors=[
+      _METRICS_DESCRIPTOR_INFERENCE,
+      operation_type, proximity_descriptor],
     schema=schema,
     raw_record_column_name=_RECORDBATCH_COLUMN)
 
   tensor_adapter_config = None
   if schema:
-    tfxio = test_util.InMemoryTFExampleRecord(
-      schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN)
     tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-      arrow_schema=tfxio.ArrowSchema(),
-      tensor_representations=tfxio.TensorRepresentations())
+      arrow_schema=converter.ArrowSchema(),
+      tensor_representations=converter.TensorRepresentations())
 
   return (examples
           | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
           | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInferenceImpl' >> RunInferenceOnRecordBatch(
+          | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch(
                   inference_spec_type, data_type,
                   tensor_adapter_config=tensor_adapter_config))
 
@@ -205,7 +215,7 @@ def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
 @beam.ptransform_fn
 @beam.typehints.with_input_types(pa.RecordBatch)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def RunInferenceOnRecordBatch(  # pylint: disable=invalid-name
+def _RunInferenceOnRecordBatch(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
     inference_spec_type: model_spec_pb2.InferenceSpecType, data_type: Text,
     tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None
@@ -217,7 +227,8 @@ def RunInferenceOnRecordBatch(  # pylint: disable=invalid-name
     inference_spec_type: Model inference endpoint.
     tensor_adapter_config [Optional]: Tensor adapter config which specifies how to
       obtain tensors from the Arrow RecordBatch.
-        - Not required when running inference with remote model or 1 input
+        - Not required when running inference with remote model or
+          serialized example as the single input tensor
 
   Returns:
     A PCollection containing prediction logs.
@@ -421,6 +432,15 @@ def finish_bundle(self):
   def run_inference(
     self, tensors: Mapping[Any, Any]
   ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
+    """
+    Run inference with extracted model input.
+
+    Parameters:
+      tensors: a dictionary consists of tensor names and tensors
+        in the form of ndArray, SparceTensorValues, etc.
+        - ex: { 'x': SparseTensorValue }
+              { 'y': [[1, 2, 3], [3, 4, 5] ...] }
+    """
     raise NotImplementedError
 
   @abc.abstractmethod
@@ -504,7 +524,8 @@ def setup(self):
     # user agent once custom header is supported in googleapiclient.
     self._api_client = discovery.build('ml', 'v1')
 
-  def _extract_from_recordBatch(self, elements: pa.RecordBatch):
+  def _extract_from_recordBatch(
+    self, elements: pa.RecordBatch) -> Tuple[List[Text], List[Mapping[Any, Any]]]:
     serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements)
     prepare_instances_serialized = (
       self.inference_spec_type.ai_platform_prediction_model_spec.use_serialization_config)
@@ -648,7 +669,7 @@ def _pre_process(self) -> _IOTensorSpec:
           list(signature.signature_def.inputs.values())[0].dtype !=
           tf.string.as_datatype_enum):
         raise ValueError(
-            'With 1 input, dtype is expected to be %s, got %s' %
+            'With 1 input, dtype is expected to be %s for serialized examples, got %s' %
             tf.string.as_datatype_enum,
             list(signature.signature_def.inputs.values())[0].dtype)
       io_tensor_specs.append(_signature_pre_process(signature.signature_def))
@@ -676,14 +697,15 @@ def _has_tpu_tag(self) -> bool:
     return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
             tf.saved_model.TPU in self._tags)
 
-  def _extract_from_recordBatch(self, elements: pa.RecordBatch):
+  def _extract_from_recordBatch(
+    self, elements: pa.RecordBatch) -> Tuple[List[Text], Mapping[Any, Any]]:
     serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements)
 
     model_input = None
     if (len(self._io_tensor_spec.input_tensor_names) == 1):
       model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples}
     else:
-      if (self._tensor_adapter_config is None):
+      if not self._tensor_adapter_config:
         raise ValueError('Tensor adaptor config is required with a multi-input model')
 
       input_tensor_names = self._io_tensor_spec.input_tensor_names
diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
index f28ff1bb..61f0d83c 100644
--- a/tfx_bsl/beam/run_inference_test.py
+++ b/tfx_bsl/beam/run_inference_test.py
@@ -683,25 +683,22 @@ def _build_multihead_model(self, model_path):
 
   def _run_inference_with_beam(self, example_path, inference_spec_type,
                                prediction_log_path, include_config = False):
-    # test RunInferenceOnRecordBatch
+    # test _RunInferenceOnRecordBatch
     converter = tf_example_record.TFExampleBeamRecord(
-      physical_format="inmem",
-      telemetry_descriptors=[],
-      raw_record_column_name=_RECORDBATCH_COLUMN)
+      physical_format="inmem", telemetry_descriptors=[],
+      schema=self.schema, raw_record_column_name=_RECORDBATCH_COLUMN)
 
     if include_config:
-      tfxio = test_util.InMemoryTFExampleRecord(
-        schema=self.schema, raw_record_column_name=_RECORDBATCH_COLUMN)
       tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-        arrow_schema=tfxio.ArrowSchema(),
-        tensor_representations=tfxio.TensorRepresentations())
+        arrow_schema=converter.ArrowSchema(),
+        tensor_representations=converter.TensorRepresentations())
 
       with beam.Pipeline() as pipeline:
         _ = (
           pipeline
           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
           | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
+          | 'RunInference' >> run_inference._RunInferenceOnRecordBatch(
               inference_spec_type, DataType.EXAMPLE, tensor_adapter_config)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
@@ -712,7 +709,7 @@ def _run_inference_with_beam(self, example_path, inference_spec_type,
           pipeline
           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
           | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
+          | 'RunInference' >> run_inference._RunInferenceOnRecordBatch(
                 inference_spec_type, DataType.EXAMPLE)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
@@ -978,7 +975,7 @@ def testTelemetry(self):
         pipeline 
         | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
         | 'ConvertToRecordBatch' >> converter.BeamSource()
-        | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
+        | 'RunInference' >> run_inference._RunInferenceOnRecordBatch(
               inference_spec_type, DataType.EXAMPLE))
     run_result = pipeline.run()
     run_result.wait_until_finish()
@@ -1043,7 +1040,7 @@ def _set_up_pipeline(self, inference_spec_type):
         self.pipeline
         | 'ReadExamples' >> beam.io.ReadFromTFRecord(self._example_path)
         | 'ConvertToRecordBatch' >> converter.BeamSource()
-        | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
+        | 'RunInference' >> run_inference._RunInferenceOnRecordBatch(
               inference_spec_type, DataType.EXAMPLE))
 
   def _run_inference_with_beam(self):
@@ -1161,7 +1158,7 @@ def test_can_format_requests(self):
           | 'CreateExamples' >> beam.Create([example])
           | 'ParseExamples' >> beam.Map(lambda x: x.SerializeToString())
           | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInference' >> run_inference.RunInferenceOnRecordBatch(
+          | 'RunInference' >> run_inference._RunInferenceOnRecordBatch(
                   inference_spec_type, DataType.EXAMPLE))
 
       self._run_inference_with_beam()

From 1a12c5c8ea505e29aafa474c7dbe724fe92366a9 Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Wed, 12 Aug 2020 11:56:44 -0400
Subject: [PATCH 28/31] separate test for bsl-util

---
 tfx_bsl/beam/bsl_util.py             | 11 +---
 tfx_bsl/beam/bsl_util_test.py        | 91 ++++++++++++++++++++++++++++
 tfx_bsl/public/beam/run_inference.py |  4 +-
 3 files changed, 96 insertions(+), 10 deletions(-)
 create mode 100644 tfx_bsl/beam/bsl_util_test.py

diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py
index 2a735487..4c86c745 100644
--- a/tfx_bsl/beam/bsl_util.py
+++ b/tfx_bsl/beam/bsl_util.py
@@ -29,7 +29,7 @@
 
 _KERAS_INPUT_SUFFIX = '_input'
 
-def ExtractSerializedExampleFromRecordBatch(elements: pa.RecordBatch) -> List[Text]:
+def ExtractSerializedExamplesFromRecordBatch(elements: pa.RecordBatch) -> List[Text]:
   serialized_examples = None
   for column_name, column_array in zip(elements.schema.names, elements.columns):
     if column_name == _RECORDBATCH_COLUMN:
@@ -64,12 +64,6 @@ def RecordToJSON(
   """
 
   # TODO (b/155912552): Handle this for sequence example.
-
-  def flatten(element: List[Any]):
-    if len(element) == 1:
-        return element[0]
-    return element
-
   df = record_batch.to_pandas()
   if prepare_instances_serialized: 
     return [{'b64': base64.b64encode(value).decode()} for value in df[_RECORDBATCH_COLUMN]]
@@ -80,10 +74,11 @@ def flatten(element: List[Any]):
 
     if _RECORDBATCH_COLUMN in df.columns:
       df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1)
-    df = df.applymap(lambda x: flatten(x))
+    df = df.applymap(lambda values: values[0] if len(values) == 1 else values)
     return json.loads(df.to_json(orient='records'))
 
 
+# TODO: Reuse these functions in TFMA.
 def _find_input_name_in_features(features: Set[Text],
                                  input_name: Text) -> Optional[Text]:
   """Maps input name to an entry in features. Returns None if not found."""
diff --git a/tfx_bsl/beam/bsl_util_test.py b/tfx_bsl/beam/bsl_util_test.py
new file mode 100644
index 00000000..25f84687
--- /dev/null
+++ b/tfx_bsl/beam/bsl_util_test.py
@@ -0,0 +1,91 @@
+# Copyright 2019 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for tfx_bsl.bsl_util."""
+
+from __future__ import absolute_import
+from __future__ import division
+# Standard __future__ imports
+from __future__ import print_function
+
+import base64
+import json
+import os
+try:
+  import unittest.mock as mock
+except ImportError:
+  import mock
+
+import apache_beam as beam
+import pyarrow as pa
+import tensorflow as tf
+from google.protobuf import text_format
+from tfx_bsl.beam import bsl_util
+from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
+
+
+class TestBslUtil(tf.test.TestCase):
+    def test_request_body_with_binary_data(self):
+        record_batch_remote = pa.RecordBatch.from_arrays(
+        [
+            pa.array([["ASa8asdf", "ASa8asdf"]], type=pa.list_(pa.binary())),
+            pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())),
+            pa.array([[1, 2]], type=pa.list_(pa.int32())),
+            pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32()))
+        ],
+        ['x_bytes', 'x', 'y', 'z']
+        )
+
+        result = list(bsl_util.RecordToJSON(record_batch_remote, False))
+        self.assertEqual([
+            {
+                'x_bytes': [
+                    {'b64': 'QVNhOGFzZGY='}, 
+                    {'b64': 'QVNhOGFzZGY='}
+                ],
+                'x': 'JLK7ljk3',
+                'y': [1, 2],
+                'z': [4.5, 5, 5.5]
+            },
+        ], result)
+
+    def test_request_serialized_example(self):
+        example = text_format.Parse(
+        """
+        features {
+            feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
+            feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
+            feature { key: "y" value { int64_list { value: [1, 2] }}}
+        }
+        """, tf.train.Example())
+        
+        serialized_example_remote = [example.SerializeToString()]
+        record_batch_remote = pa.RecordBatch.from_arrays(
+            [
+                pa.array([["ASa8asdf"]], type=pa.list_(pa.binary())),
+                pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())),
+                pa.array([[1, 2]], type=pa.list_(pa.int32())),
+                pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32())),
+                serialized_example_remote
+            ],
+            ['x_bytes', 'x', 'y', 'z', _RECORDBATCH_COLUMN]
+        )
+
+        result = list(bsl_util.RecordToJSON(record_batch_remote, True))
+        self.assertEqual(result, [{
+            'b64': base64.b64encode(example.SerializeToString()).decode()
+        }])
+
+
+if __name__ == '__main__':
+  tf.test.main()
\ No newline at end of file
diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py
index 8e173d5d..f8461b05 100644
--- a/tfx_bsl/public/beam/run_inference.py
+++ b/tfx_bsl/public/beam/run_inference.py
@@ -53,7 +53,7 @@ def RunInference(  # pylint: disable=invalid-name
   Args:
     examples: A PCollection containing examples.
     inference_spec_type: Model inference endpoint.
-    Schema [optional]: required for models that requires
+    schema [optional]: required for predict models that requires
       multi-tensor inputs.
 
   Returns:
@@ -85,7 +85,7 @@ def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
   Args:
     examples: A PCollection containing sequence examples.
     inference_spec_type: Model inference endpoint.
-    Schema [optional]: required for models that requires
+    schema [optional]: required for predict models that requires
       multi-tensor inputs.
 
   Returns:

From 2fa6720edffda6eb58bcbaa8338c91fd6db8dd6e Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Sat, 15 Aug 2020 12:07:23 -0400
Subject: [PATCH 29/31] checkpoint: address comments on post-process, and
 modified public api (WIP)

---
 tfx_bsl/beam/bsl_util_test.py        |   2 +-
 tfx_bsl/beam/run_inference.py        | 280 ++++++++++++++-------------
 tfx_bsl/beam/run_inference_test.py   | 172 +++++++++-------
 tfx_bsl/public/beam/run_inference.py |  38 +---
 4 files changed, 255 insertions(+), 237 deletions(-)

diff --git a/tfx_bsl/beam/bsl_util_test.py b/tfx_bsl/beam/bsl_util_test.py
index 25f84687..c1a63b0d 100644
--- a/tfx_bsl/beam/bsl_util_test.py
+++ b/tfx_bsl/beam/bsl_util_test.py
@@ -88,4 +88,4 @@ def test_request_serialized_example(self):
 
 
 if __name__ == '__main__':
-  tf.test.main()
\ No newline at end of file
+  tf.test.main()
diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py
index 87f44bea..f93e8212 100644
--- a/tfx_bsl/beam/run_inference.py
+++ b/tfx_bsl/beam/run_inference.py
@@ -50,8 +50,8 @@
 from tfx_bsl.tfxio import tensor_adapter
 from tfx_bsl.tfxio import tf_example_record
 from tfx_bsl.tfxio import tf_sequence_example_record
-from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \
-    Tuple, Union, Optional
+from typing import Any, Generator, Iterable, List, Mapping, Optional, \
+    Sequence, Text, TypeVar, Tuple, Union
 
 from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN
 from tfx_bsl.beam.bsl_constants import DataType
@@ -88,6 +88,7 @@
 _MetaGraphDef = Any
 _SavedModel = Any
 
+MixedExample = TypeVar('MixedExample', tf.train.Example, tf.train.SequenceExample)
 
 # TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7
 class OperationType(object):
@@ -98,7 +99,7 @@ class OperationType(object):
 
 
 @beam.ptransform_fn
-@beam.typehints.with_input_types(tf.train.Example)
+@beam.typehints.with_input_types(MixedExample)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def RunInferenceOnExamples(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
@@ -129,74 +130,50 @@ def RunInferenceOnExamples(  # pylint: disable=invalid-name
     A PCollection containing prediction logs.
   """
 
-  data_type = DataType.EXAMPLE
   operation_type = _get_operation_type(inference_spec_type)
   proximity_descriptor = (
     _METRICS_DESCRIPTOR_IN_PROCESS
     if _using_in_process_inference(inference_spec_type)
     else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION)
-  converter = tf_example_record.TFExampleBeamRecord(
-    physical_format="inmem",
-    telemetry_descriptors=[
-      _METRICS_DESCRIPTOR_INFERENCE,
-      operation_type, proximity_descriptor],
-    schema=schema,
-    raw_record_column_name=_RECORDBATCH_COLUMN)
 
-  tensor_adapter_config = None
-  if schema:
-    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-      arrow_schema=converter.ArrowSchema(),
-      tensor_representations=converter.TensorRepresentations())
-
-  return (examples
-          | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
-          | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch(
-                  inference_spec_type, data_type,
-                  tensor_adapter_config=tensor_adapter_config))
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(tf.train.SequenceExample)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
-    examples: beam.pvalue.PCollection,
-    inference_spec_type: model_spec_pb2.InferenceSpecType,
-    schema: Optional[schema_pb2.Schema] = None
-) -> beam.pvalue.PCollection:
-  """Run inference with a model.
-
-   There are two types of inference you can perform using this PTransform:
-   1. In-process inference from a SavedModel instance. Used when
-     `saved_model_spec` field is set in `inference_spec_type`.
-   2. Remote inference by using a service endpoint. Used when
-     `ai_platform_prediction_model_spec` field is set in
-     `inference_spec_type`.
-
-  Args:
-    examples: A PCollection containing sequence examples.
-    inference_spec_type: Model inference endpoint.
-    Schema [optional]: required for models that requires
-      multi-tensor inputs.
-
-  Returns:
-    A PCollection containing prediction logs.
-  """
-
-  data_type = DataType.SEQUENCEEXAMPLE
-  operation_type = _get_operation_type(inference_spec_type)
-  proximity_descriptor = (
-    _METRICS_DESCRIPTOR_IN_PROCESS
-    if _using_in_process_inference(inference_spec_type)
-    else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION)
-  converter = tf_sequence_example_record.TFSequenceExampleBeamRecord(
-    physical_format="inmem",
-    telemetry_descriptors=[
-      _METRICS_DESCRIPTOR_INFERENCE,
-      operation_type, proximity_descriptor],
-    schema=schema,
-    raw_record_column_name=_RECORDBATCH_COLUMN)
+  # determine input dataType
+  beam_type = examples.element_type
+  if beam_type == tf.train.Example or beam_type == tf.train.SequenceExample:
+    data_type = _get_data_type(beam_type)
+  else:
+    tagged = (examples | "SortInput" >> beam.Map(
+      lambda example: beam.pvalue.TaggedOutput(
+        'example' if isinstance(example, tf.train.Example)
+        else 'sequence', example)).with_outputs('example', 'sequence'))
+
+    import ipdb; ipdb.set_trace()
+
+    if tagged.example and tagged.sequence:
+      raise ValueError('A PCollection containing both tf.Example and '
+                       'tf.SequenceExample is not supported')
+    if not tagged.example:
+       data_type = DataType.SEQUENCEEXAMPLE
+    else:
+      data_type = DataType.EXAMPLE
+
+  if data_type == DataType.EXAMPLE:
+    converter = tf_example_record.TFExampleBeamRecord(
+      physical_format="inmem",
+      telemetry_descriptors=[
+        _METRICS_DESCRIPTOR_INFERENCE,
+        operation_type, proximity_descriptor],
+      schema=schema,
+      raw_record_column_name=_RECORDBATCH_COLUMN)
+  elif data_type == DataType.SEQUENCEEXAMPLE:
+    converter = tf_sequence_example_record.TFSequenceExampleBeamRecord(
+      physical_format="inmem",
+      telemetry_descriptors=[
+        _METRICS_DESCRIPTOR_INFERENCE,
+        operation_type, proximity_descriptor],
+      schema=schema,
+      raw_record_column_name=_RECORDBATCH_COLUMN)
+  else:
+    raise ValueError('Unsupported data_type %s' % data_type)
 
   tensor_adapter_config = None
   if schema:
@@ -205,11 +182,11 @@ def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
       tensor_representations=converter.TensorRepresentations())
 
   return (examples
-          | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
-          | 'ConvertToRecordBatch' >> converter.BeamSource()
-          | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch(
-                  inference_spec_type, data_type,
-                  tensor_adapter_config=tensor_adapter_config))
+        | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
+        | 'ConvertToRecordBatch' >> converter.BeamSource()
+        | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch(
+              inference_spec_type, data_type,
+              tensor_adapter_config=tensor_adapter_config))
 
 
 @beam.ptransform_fn
@@ -405,19 +382,26 @@ def __init__(
   def setup(self):
     self._clock = _ClockFactory.make_clock()
 
+  def _extract_serialized_from_recordBatch(
+    self, elements: pa.RecordBatch) -> List[Union[str, bytes]]:
+    """Function to extract serialized examples from the recordbatch"""
+    serialized_examples = bsl_util.ExtractSerializedExamplesFromRecordBatch(elements)
+    return serialized_examples
+
   @abc.abstractmethod
-  def _extract_from_recordBatch(self, elements: pa.RecordBatch):
-    """
-    Function to extract the compatible input with model signature
+  def _extract_inference_input_from_recordBatch(
+    self, elements: pa.RecordBatch) -> Union[Mapping[Any, Any], List[Mapping[Any, Any]]]:
+    """Function to extract the compatible input with model signature
+
     return:
-      - serialized examples for metrics
       - model input for processing and post processing
     """
     raise NotImplementedError
 
   def process(self, elements: pa.RecordBatch) -> Iterable[Any]:
     batch_start_time = self._clock.get_current_time_in_microseconds()
-    serialized_examples, model_input = self._extract_from_recordBatch(elements)
+    serialized_examples = self._extract_serialized_from_recordBatch(elements)
+    model_input = self._extract_inference_input_from_recordBatch(elements)
     outputs = self.run_inference(model_input)
     result = self._post_process(model_input, outputs)
     self._metrics_collector.update(
@@ -430,10 +414,9 @@ def finish_bundle(self):
 
   @abc.abstractmethod
   def run_inference(
-    self, tensors: Mapping[Any, Any]
+    self, tensors: Mapping[Text, Any]
   ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]:
-    """
-    Run inference with extracted model input.
+    """Run inference with extracted model input.
 
     Parameters:
       tensors: a dictionary consists of tensor names and tensors
@@ -524,13 +507,12 @@ def setup(self):
     # user agent once custom header is supported in googleapiclient.
     self._api_client = discovery.build('ml', 'v1')
 
-  def _extract_from_recordBatch(
-    self, elements: pa.RecordBatch) -> Tuple[List[Text], List[Mapping[Any, Any]]]:
-    serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements)
+  def _extract_inference_input_from_recordBatch(
+    self, elements: pa.RecordBatch) -> List[Mapping[Any, Any]]:
     prepare_instances_serialized = (
       self.inference_spec_type.ai_platform_prediction_model_spec.use_serialization_config)
     model_input = bsl_util.RecordToJSON(elements, prepare_instances_serialized)
-    return serialized_examples, model_input
+    return model_input
 
   # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff.
   @retry.with_exponential_backoff(
@@ -550,7 +532,7 @@ def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest:
 
   @classmethod
   def _prepare_instances(
-      cls, elements: List[Union[str, bytes]]
+      cls, elements: List[Mapping[Any, Any]]
   ) -> Generator[Mapping[Text, Any], None, None]:
     for instance in elements:
       yield instance
@@ -697,12 +679,11 @@ def _has_tpu_tag(self) -> bool:
     return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
             tf.saved_model.TPU in self._tags)
 
-  def _extract_from_recordBatch(
-    self, elements: pa.RecordBatch) -> Tuple[List[Text], Mapping[Any, Any]]:
-    serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements)
-
+  def _extract_inference_input_from_recordBatch(
+    self, elements: pa.RecordBatch) -> Mapping[Any, Any]:
     model_input = None
     if (len(self._io_tensor_spec.input_tensor_names) == 1):
+      serialized_examples = bsl_util.ExtractSerializedExamplesFromRecordBatch(elements)
       model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples}
     else:
       if not self._tensor_adapter_config:
@@ -711,24 +692,26 @@ def _extract_from_recordBatch(
       input_tensor_names = self._io_tensor_spec.input_tensor_names
       input_tensor_alias = self._io_tensor_spec.input_tensor_alias
       _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config)
+      # dict_of_tensors is a map from input_tensor_alias to tensor
       dict_of_tensors = _tensor_adapter.ToBatchTensors(
         elements, produce_eager_tensors = False)
       filtered_tensors = bsl_util.filter_tensors_by_input_names(
         dict_of_tensors, input_tensor_alias)
 
       model_input = {}
-      for feature, tensor_name in zip(input_tensor_alias, input_tensor_names):
-        model_input[tensor_name] = filtered_tensors[feature]
-    return serialized_examples, model_input
+      for tensor_alias, tensor_name in zip(input_tensor_alias, input_tensor_names):
+        model_input[tensor_name] = filtered_tensors[tensor_alias]
+    return model_input
 
   def run_inference(
-    self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]:
+    self, tensors: Mapping[Text, Any]) -> Mapping[Text, np.ndarray]:
+    # tensors: a dictionary consists of tensor alias and tensors
     self._check_elements()
     outputs = self._run_tf_operations(tensors)
     return outputs
 
   def _run_tf_operations(
-    self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]:
+    self, tensors: Mapping[Text, Any]) -> Mapping[Text, np.ndarray]:
     result = self._session.run(
         self._io_tensor_spec.output_alias_tensor_names, feed_dict=tensors)
     if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
@@ -824,61 +807,87 @@ def _post_process(
     if len(input_tensor_alias) != len(input_tensor_names):
       raise ValueError('Expected to have one name and one alias per tensor')
 
-    include_request = True
+    result = []
+    # Single tensor input
     if len(input_tensor_names) == 1:
       serialized_examples, = elements.values()
       batch_size = len(serialized_examples)
-      process_elements = serialized_examples
+
+      predict_log_tmpl = prediction_log_pb2.PredictLog()
+      predict_log_tmpl.request.model_spec.signature_name = signature_name
+      predict_log_tmpl.response.model_spec.signature_name = signature_name
+      input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias[0]]
+      input_tensor_proto.dtype = tf.string.as_datatype_enum
+      input_tensor_proto.tensor_shape.dim.add().size = 1
+
+      for output_alias, output in outputs.items():
+        if len(output.shape) < 1 or output.shape[0] != batch_size:
+          raise ValueError(
+              'Expected output tensor %s to have at least one '
+              'dimension, with the first having a size equal to the input batch '
+              'size %s. Instead found %s' %
+              (output_alias, batch_size, output.shape))
+
+      for i in range(batch_size):
+        predict_log = prediction_log_pb2.PredictLog()
+        predict_log.CopyFrom(predict_log_tmpl)
+        predict_log.request.inputs[input_tensor_alias[0]].string_val.append(
+          serialized_examples[i])
+        for output_alias, output in outputs.items():
+          # Mimic tensor::Split
+          tensor_proto = tf.make_tensor_proto(
+              values=output[i],
+              dtype=tf.as_dtype(output[i].dtype).as_datatype_enum,
+              shape=np.expand_dims(output[i], axis=0).shape)
+          predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
+        result.append(predict_log)
     else:
+      predict_log_tmpl = prediction_log_pb2.PredictLog()
+      predict_log_tmpl.request.model_spec.signature_name = signature_name
+      predict_log_tmpl.response.model_spec.signature_name = signature_name
+
+      # we will only include tensor_proto in requests when all input tensors are dense
+      include_request = True
       for tensor_name, tensor in elements.items():
         if not isinstance(tensor, np.ndarray):
           include_request = False
           break
 
       if include_request:
+        for alias, tensor_name in zip(input_tensor_alias, input_tensor_names):
+          input_tensor_proto = predict_log_tmpl.request.inputs[alias]
+          input_tensor_proto.dtype = tf.as_dtype(input_tensor_types[alias]).as_datatype_enum
+          input_tensor_proto.tensor_shape.dim.add().size = len(elements[tensor_name][0])
+
         batch_size = len(elements[input_tensor_names[0]])
+        for i in range(batch_size):
+          predict_log = prediction_log_pb2.PredictLog()
+          predict_log.CopyFrom(predict_log_tmpl)
+          for alias, tensor_name in zip(input_tensor_alias, input_tensor_names):
+              predict_log.request.inputs[alias].float_val.append(
+                elements[tensor_name][i])
       else:
         batch_size = elements[input_tensor_names[0]].shape[0]
+        predict_log = prediction_log_pb2.PredictLog()
+        predict_log.CopyFrom(predict_log_tmpl)
 
-    for output_alias, output in outputs.items():
-      if len(output.shape) < 1 or output.shape[0] != batch_size:
-        raise ValueError(
-            'Expected output tensor %s to have at least one '
-            'dimension, with the first having a size equal to the input batch '
-            'size %s. Instead found %s' %
-            (output_alias, batch_size, output.shape))
-  
-    if include_request:
-      predict_log_tmpl = prediction_log_pb2.PredictLog()
-      predict_log_tmpl.request.model_spec.signature_name = signature_name
-      predict_log_tmpl.response.model_spec.signature_name = signature_name
-      for alias, tensor_name in zip(input_tensor_alias, input_tensor_names):
-        input_tensor_proto = predict_log_tmpl.request.inputs[alias]
-        input_tensor_proto.dtype = tf.as_dtype(input_tensor_types[alias]).as_datatype_enum
-        if len(input_tensor_alias) == 1:
-          input_tensor_proto.tensor_shape.dim.add().size = 1
-        else:
-          input_tensor_proto.tensor_shape.dim.add().size = len(elements[tensor_name][0])
+      for output_alias, output in outputs.items():
+        if len(output.shape) < 1 or output.shape[0] != batch_size:
+            raise ValueError(
+              'Expected output tensor %s to have at least one '
+              'dimension, with the first having a size equal to the input batch '
+              'size %s. Instead found %s' %
+              (output_alias, batch_size, output.shape))
 
-      result = []
       for i in range(batch_size):
-        predict_log = prediction_log_pb2.PredictLog()
-        predict_log.CopyFrom(predict_log_tmpl)
-        if len(input_tensor_alias) == 1:
-          alias = input_tensor_alias[0]
-          predict_log.request.inputs[alias].string_val.append(process_elements[i])
-        else:
-          for alias, tensor_name in zip(input_tensor_alias, input_tensor_names):
-            predict_log.request.inputs[alias].float_val.append(elements[tensor_name][i])
-
-    for output_alias, output in outputs.items():
-      # Mimic tensor::Split
-      tensor_proto = tf.make_tensor_proto(
-          values=output[i],
-          dtype=tf.as_dtype(output[i].dtype).as_datatype_enum,
-          shape=np.expand_dims(output[i], axis=0).shape)
-      predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
-    result.append(predict_log)
+        for output_alias, output in outputs.items():
+          # Mimic tensor::Split
+          tensor_proto = tf.make_tensor_proto(
+              values=output[i],
+              dtype=tf.as_dtype(output[i].dtype).as_datatype_enum,
+              shape=np.expand_dims(output[i], axis=0).shape)
+          predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
+        result.append(predict_log)
     return result
 
 
@@ -1237,6 +1246,15 @@ def _get_signatures(model_path: Text, signatures: Sequence[Text],
   return result
 
 
+def _get_data_type(
+  data_type: Union[tf.train.Example, tf.train.SequenceExample]) -> Text:
+  if (data_type == tf.train.Example):
+    return DataType.EXAMPLE
+  elif (data_type == tf.train.SequenceExample):
+    return DataType.SequenceExample
+  else:
+    raise ValueError('Expected tf.Example or tf.SequenceExample, got %s' % data_type)
+
 def _get_operation_type(
     inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text:
   if _using_in_process_inference(inference_spec_type):
diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
index 61f0d83c..a32f5991 100644
--- a/tfx_bsl/beam/run_inference_test.py
+++ b/tfx_bsl/beam/run_inference_test.py
@@ -333,32 +333,32 @@ def setUp(self):
     self._predict_examples = [
         text_format.Parse(
             """
-              features {
-                feature { key: "input1" value { float_list { value: 0 }}}
-              }
-              """, tf.train.Example()),
+            context {
+              feature { key: "input1" value { float_list { value: 0 }}}
+            }
+            """, tf.train.SequenceExample()),
         text_format.Parse(
             """
-              features {
-                feature { key: "input1" value { float_list { value: 1 }}}
-              }
-              """, tf.train.Example()),
+            context {
+              feature { key: "input1" value { float_list { value: 1 }}}
+            }
+            """, tf.train.SequenceExample()),
     ]
     self._multihead_examples = [
         text_format.Parse(
             """
-            features {
+            context {
               feature {key: "x" value { float_list { value: 0.8 }}}
               feature {key: "y" value { float_list { value: 0.2 }}}
             }
-            """, tf.train.Example()),
+            """, tf.train.SequenceExample()),
         text_format.Parse(
             """
-            features {
+            context {
               feature {key: "x" value { float_list { value: 0.6 }}}
               feature {key: "y" value { float_list { value: 0.1 }}}
             }
-            """, tf.train.Example()),
+            """, tf.train.SequenceExample()),
     ]
 
     self.schema = text_format.Parse(
@@ -412,7 +412,7 @@ def _run_inference_with_beam(self, example_path, inference_spec_type,
           pipeline
           | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
           | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString)
-          | 'RunInference' >> run_inference.RunInferenceOnSequenceExamples(
+          | 'RunInference' >> run_inference.RunInferenceOnExamples(
               inference_spec_type, schema=schema)
           | 'WritePredictions' >> beam.io.WriteToTFRecord(
               prediction_log_path,
@@ -510,6 +510,94 @@ def testKerasModelPredictMultiTensor(self):
       self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
 
 
+class RunOfflineInferenceMixedExamplesTest(RunInferenceFixture):
+
+  def setUp(self):
+    super(RunOfflineInferenceMixedExamplesTest, self).setUp()
+    self._predict_examples = [
+        text_format.Parse(
+            """
+              features {
+                feature { key: "input1" value { float_list { value: 0 }}}
+              }
+              """, tf.train.Example()),
+        text_format.Parse(
+            """
+              context {
+                feature { key: "input1" value { float_list { value: 1 }}}
+              }
+              """, tf.train.SequenceExample()),
+    ]
+
+  def _run_inference_with_beam(self, example_path, inference_spec_type,
+                               prediction_log_path, include_schema = False):
+    with beam.Pipeline() as pipeline:
+      _ = (
+          pipeline
+          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
+          | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString)
+          | 'RunInference' >> run_inference.RunInferenceOnExamples(
+              inference_spec_type)
+          | 'WritePredictions' >> beam.io.WriteToTFRecord(
+              prediction_log_path,
+              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
+
+  def testMixedExamples(self):
+    inputs = tf.keras.Input(shape=(1,), name='input1')
+    output1 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output1')(
+            inputs)
+    output2 = tf.keras.layers.Dense(
+        1, activation=tf.nn.sigmoid, name='output2')(
+            inputs)
+    inference_model = tf.keras.models.Model(inputs, [output1, output2])
+
+    class TestKerasModel(tf.keras.Model):
+
+      def __init__(self, inference_model):
+        super(TestKerasModel, self).__init__(name='test_keras_model')
+        self.inference_model = inference_model
+
+      @tf.function(input_signature=[
+          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
+      ])
+      def call(self, serialized_example):
+        features = {
+            'input1':
+                tf.compat.v1.io.FixedLenFeature([1],
+                                                dtype=tf.float32,
+                                                default_value=0)
+        }
+        input_tensor_dict = tf.io.parse_example(serialized_example, features)
+        return inference_model(input_tensor_dict['input1'])
+
+    model = TestKerasModel(inference_model)
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(lr=.001),
+        loss=tf.keras.losses.binary_crossentropy,
+        metrics=['accuracy'])
+
+    model_path = self._get_output_data_dir('model')
+    tf.compat.v1.keras.experimental.export_saved_model(
+        model, model_path, serving_only=True)
+
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_predict_examples(example_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    error_msg = 'Expected element of type'
+    try:
+      self._run_inference_with_beam(
+        example_path,
+        model_spec_pb2.InferenceSpecType(
+            saved_model_spec=model_spec_pb2.SavedModelSpec(
+                model_path=model_path)), prediction_log_path)
+    except ValueError as exc:
+      actual_error_msg = str(exc)
+      self.assertTrue(actual_error_msg.startswith(error_msg))
+    else:
+      self.fail('Test was expected to throw ValueError exception')
+
+
 class RunOfflineInferenceArrowTest(RunInferenceFixture):
 
   def setUp(self):
@@ -1163,64 +1251,6 @@ def test_can_format_requests(self):
 
       self._run_inference_with_beam()
 
-  def test_request_body_with_binary_data(self):
-    record_batch_remote = pa.RecordBatch.from_arrays(
-      [
-        pa.array([["ASa8asdf", "ASa8asdf"]], type=pa.list_(pa.binary())),
-        pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())),
-        pa.array([[1, 2]], type=pa.list_(pa.int32())),
-        pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32()))
-      ],
-      ['x_bytes', 'x', 'y', 'z']
-    )
-
-    result = list(bsl_util.RecordToJSON(record_batch_remote, False))
-    self.assertEqual([
-        {
-            'x_bytes': [
-              {'b64': 'QVNhOGFzZGY='}, 
-              {'b64': 'QVNhOGFzZGY='}
-            ],
-            'x': 'JLK7ljk3',
-            'y': [1, 2],
-            'z': [4.5, 5, 5.5]
-        },
-    ], result)
-
-  def test_request_serialized_example(self):
-    example = text_format.Parse(
-      """
-      features {
-        feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}}
-        feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}}
-        feature { key: "y" value { int64_list { value: [1, 2] }}}
-      }
-      """, tf.train.Example())
-    inference_spec_type = model_spec_pb2.InferenceSpecType(
-        ai_platform_prediction_model_spec=model_spec_pb2
-        .AIPlatformPredictionModelSpec(
-            project_id='test_project',
-            model_name='test_model',
-            version_name='test_version',
-            use_serialization_config=True))
-    
-    serialized_example_remote = [example.SerializeToString()]
-    record_batch_remote = pa.RecordBatch.from_arrays(
-      [
-        pa.array([["ASa8asdf"]], type=pa.list_(pa.binary())),
-        pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())),
-        pa.array([[1, 2]], type=pa.list_(pa.int32())),
-        pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32())),
-        serialized_example_remote
-      ],
-      ['x_bytes', 'x', 'y', 'z', _RECORDBATCH_COLUMN]
-    )
-
-    result = list(bsl_util.RecordToJSON(record_batch_remote, True))
-    self.assertEqual(result, [{
-        'b64': base64.b64encode(example.SerializeToString()).decode()
-    }])
-
 
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py
index f8461b05..9a8eb738 100644
--- a/tfx_bsl/public/beam/run_inference.py
+++ b/tfx_bsl/public/beam/run_inference.py
@@ -22,14 +22,16 @@
 import apache_beam as beam
 import tensorflow as tf
 import pyarrow as pa
-from typing import Text, Optional
+from typing import Text, Optional, TypeVar
 from tfx_bsl.beam import run_inference
 from tfx_bsl.public.proto import model_spec_pb2
 from tensorflow_serving.apis import prediction_log_pb2
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+MixedExample = TypeVar('MixedExample', tf.train.Example, tf.train.SequenceExample)
+
 @beam.ptransform_fn
-@beam.typehints.with_input_types(tf.train.Example)
+@beam.typehints.with_input_types(MixedExample)
 @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
 def RunInference(  # pylint: disable=invalid-name
     examples: beam.pvalue.PCollection,
@@ -63,35 +65,3 @@ def RunInference(  # pylint: disable=invalid-name
   return (examples
           | 'RunInferenceOnExamples' >> run_inference.RunInferenceOnExamples(
                   inference_spec_type, schema=schema))
-
-
-@beam.ptransform_fn
-@beam.typehints.with_input_types(tf.train.SequenceExample)
-@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog)
-def RunInferenceOnSequenceExamples(  # pylint: disable=invalid-name
-    examples: beam.pvalue.PCollection,
-    inference_spec_type: model_spec_pb2.InferenceSpecType,
-    schema: Optional[schema_pb2.Schema] = None
-) -> beam.pvalue.PCollection:
-  """Run inference with a model.
-
-   There are two types of inference you can perform using this PTransform:
-   1. In-process inference from a SavedModel instance. Used when
-     `saved_model_spec` field is set in `inference_spec_type`.
-   2. Remote inference by using a service endpoint. Used when
-     `ai_platform_prediction_model_spec` field is set in
-     `inference_spec_type`.
-
-  Args:
-    examples: A PCollection containing sequence examples.
-    inference_spec_type: Model inference endpoint.
-    schema [optional]: required for predict models that requires
-      multi-tensor inputs.
-
-  Returns:
-    A PCollection containing prediction logs.
-  """
-
-  return (examples
-          | 'RunInferenceOnSequenceExamples' >> run_inference.RunInferenceOnSequenceExamples(
-                  inference_spec_type, schema=schema))

From 8c279ceb4faea1d863488aa6004eb59c16077f6c Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Sat, 15 Aug 2020 16:54:33 -0400
Subject: [PATCH 30/31] identify if example is empty

---
 tfx_bsl/beam/run_inference.py      | 15 +++++++++++----
 tfx_bsl/beam/run_inference_test.py |  2 +-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py
index f93e8212..ef7476fb 100644
--- a/tfx_bsl/beam/run_inference.py
+++ b/tfx_bsl/beam/run_inference.py
@@ -146,13 +146,20 @@ def RunInferenceOnExamples(  # pylint: disable=invalid-name
         'example' if isinstance(example, tf.train.Example)
         else 'sequence', example)).with_outputs('example', 'sequence'))
 
-    import ipdb; ipdb.set_trace()
+    def check_empty(elements: beam.pvalue.PCollection) -> bool:
+      is_empty_beam = (elements
+        | "CountElement" >> beam.combiners.Count.Globally()
+        | "CheckEmpty" >> beam.Map(lambda n: n == 0))
+      return is_empty_beam[0]
 
-    if tagged.example and tagged.sequence:
+    example_is_empty = tagged.example | "CheckExample" >> beam.CombineGlobally(check_empty)
+    sequence_is_empty = tagged.sequence | "CheckSequence" >> beam.CombineGlobally(check_empty)
+
+    if not example_is_empty and not sequence_is_empty:
       raise ValueError('A PCollection containing both tf.Example and '
                        'tf.SequenceExample is not supported')
-    if not tagged.example:
-       data_type = DataType.SEQUENCEEXAMPLE
+    if example_is_empty:
+      data_type = DataType.SEQUENCEEXAMPLE
     else:
       data_type = DataType.EXAMPLE
 
diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
index a32f5991..ee4191ac 100644
--- a/tfx_bsl/beam/run_inference_test.py
+++ b/tfx_bsl/beam/run_inference_test.py
@@ -584,7 +584,7 @@ def call(self, serialized_example):
     example_path = self._get_output_data_dir('examples')
     self._prepare_predict_examples(example_path)
     prediction_log_path = self._get_output_data_dir('predictions')
-    error_msg = 'Expected element of type'
+    error_msg = 'A PCollection containing both tf.Example'
     try:
       self._run_inference_with_beam(
         example_path,

From ff40846050827175b419b7b12c16a3f5b276a20d Mon Sep 17 00:00:00 2001
From: Maxine Zhang <meixinzhang@outlook.com>
Date: Thu, 20 Aug 2020 18:20:01 -0400
Subject: [PATCH 31/31] assert data tyoe and add tests for sequence examples on
 classify regress and multihead

---
 tfx_bsl/beam/run_inference.py      | 122 ++++++++-------
 tfx_bsl/beam/run_inference_test.py | 244 +++++++++++++++++------------
 2 files changed, 212 insertions(+), 154 deletions(-)

diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py
index ef7476fb..87bc2a66 100644
--- a/tfx_bsl/beam/run_inference.py
+++ b/tfx_bsl/beam/run_inference.py
@@ -129,41 +129,17 @@ def RunInferenceOnExamples(  # pylint: disable=invalid-name
   Returns:
     A PCollection containing prediction logs.
   """
-
+  tensor_adapter_config = None
   operation_type = _get_operation_type(inference_spec_type)
   proximity_descriptor = (
     _METRICS_DESCRIPTOR_IN_PROCESS
     if _using_in_process_inference(inference_spec_type)
     else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION)
 
-  # determine input dataType
-  beam_type = examples.element_type
-  if beam_type == tf.train.Example or beam_type == tf.train.SequenceExample:
-    data_type = _get_data_type(beam_type)
-  else:
-    tagged = (examples | "SortInput" >> beam.Map(
-      lambda example: beam.pvalue.TaggedOutput(
-        'example' if isinstance(example, tf.train.Example)
-        else 'sequence', example)).with_outputs('example', 'sequence'))
-
-    def check_empty(elements: beam.pvalue.PCollection) -> bool:
-      is_empty_beam = (elements
-        | "CountElement" >> beam.combiners.Count.Globally()
-        | "CheckEmpty" >> beam.Map(lambda n: n == 0))
-      return is_empty_beam[0]
-
-    example_is_empty = tagged.example | "CheckExample" >> beam.CombineGlobally(check_empty)
-    sequence_is_empty = tagged.sequence | "CheckSequence" >> beam.CombineGlobally(check_empty)
-
-    if not example_is_empty and not sequence_is_empty:
-      raise ValueError('A PCollection containing both tf.Example and '
-                       'tf.SequenceExample is not supported')
-    if example_is_empty:
-      data_type = DataType.SEQUENCEEXAMPLE
-    else:
-      data_type = DataType.EXAMPLE
-
-  if data_type == DataType.EXAMPLE:
+  if (operation_type == OperationType.CLASSIFICATION or
+      operation_type == OperationType.REGRESSION or
+      operation_type == OperationType.MULTIHEAD):
+    typed_examples = examples | AssertType(tf.train.Example, operation_type)
     converter = tf_example_record.TFExampleBeamRecord(
       physical_format="inmem",
       telemetry_descriptors=[
@@ -171,29 +147,54 @@ def check_empty(elements: beam.pvalue.PCollection) -> bool:
         operation_type, proximity_descriptor],
       schema=schema,
       raw_record_column_name=_RECORDBATCH_COLUMN)
-  elif data_type == DataType.SEQUENCEEXAMPLE:
-    converter = tf_sequence_example_record.TFSequenceExampleBeamRecord(
+
+    return (examples
+          | 'ParseExamples' >> beam.Map(lambda element: element.SerializeToString())
+          | 'ConvertToRecordBatch' >> converter.BeamSource()
+          | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch(
+                inference_spec_type, DataType.EXAMPLE,
+                tensor_adapter_config=tensor_adapter_config))
+  else:
+    # TODO: check if there are two types of input data in PREDICT Operation
+    ExampleConverter = tf_example_record.TFExampleBeamRecord(
+      physical_format="inmem",
+      telemetry_descriptors=[
+        _METRICS_DESCRIPTOR_INFERENCE,
+        operation_type, proximity_descriptor],
+      schema=schema,
+      raw_record_column_name=_RECORDBATCH_COLUMN)
+    SequenceConverter = tf_sequence_example_record.TFSequenceExampleBeamRecord(
       physical_format="inmem",
       telemetry_descriptors=[
         _METRICS_DESCRIPTOR_INFERENCE,
         operation_type, proximity_descriptor],
       schema=schema,
       raw_record_column_name=_RECORDBATCH_COLUMN)
-  else:
-    raise ValueError('Unsupported data_type %s' % data_type)
 
-  tensor_adapter_config = None
-  if schema:
-    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
-      arrow_schema=converter.ArrowSchema(),
-      tensor_representations=converter.TensorRepresentations())
+    tagged = (examples | "SortInput" >> beam.Map(
+      lambda example: beam.pvalue.TaggedOutput(
+        'example' if isinstance(example, tf.train.Example)
+        else 'sequence', example)).with_outputs('example', 'sequence'))
 
-  return (examples
-        | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
-        | 'ConvertToRecordBatch' >> converter.BeamSource()
-        | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch(
-              inference_spec_type, data_type,
-              tensor_adapter_config=tensor_adapter_config))
+    if schema:
+      tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
+        arrow_schema=ExampleConverter.ArrowSchema(),
+        tensor_representations=ExampleConverter.TensorRepresentations())
+
+    return ([
+        (tagged.example
+          | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString())
+          | 'ConvertExampleToRecordBatch' >> ExampleConverter.BeamSource()
+          | 'RunInferenceImplExample' >> _RunInferenceOnRecordBatch(
+                  inference_spec_type, DataType.EXAMPLE,
+                  tensor_adapter_config=tensor_adapter_config)),
+        (tagged.sequence
+          | 'ParseSequenceExamples' >> beam.Map(lambda example: example.SerializeToString())
+          | 'ConvertSequenceToRecordBatch' >> SequenceConverter.BeamSource()
+          | 'RunInferenceImplSequence' >> _RunInferenceOnRecordBatch(
+                  inference_spec_type, DataType.SEQUENCEEXAMPLE,
+                  tensor_adapter_config=tensor_adapter_config))
+      ] | 'FlattenResult' >> beam.Flatten())
 
 
 @beam.ptransform_fn
@@ -1253,15 +1254,6 @@ def _get_signatures(model_path: Text, signatures: Sequence[Text],
   return result
 
 
-def _get_data_type(
-  data_type: Union[tf.train.Example, tf.train.SequenceExample]) -> Text:
-  if (data_type == tf.train.Example):
-    return DataType.EXAMPLE
-  elif (data_type == tf.train.SequenceExample):
-    return DataType.SequenceExample
-  else:
-    raise ValueError('Expected tf.Example or tf.SequenceExample, got %s' % data_type)
-
 def _get_operation_type(
     inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text:
   if _using_in_process_inference(inference_spec_type):
@@ -1342,6 +1334,30 @@ def _is_cygwin() -> bool:
   return platform.system().startswith('CYGWIN_NT')
 
 
+class AssertType(beam.PTransform):
+    """Check and cast a PCollection's elements to a given type."""
+    def __init__(self, data_type: Any, operation_type: Text, label=None):
+        super().__init__(label)
+        self.data_type = data_type
+        self.operation_type = operation_type
+        self.first_data = False
+
+    def expand(self, pcoll: beam.pvalue.PCollection):
+        @beam.typehints.with_output_types(Iterable[self.data_type])
+        def _assert_fn(element: Any):
+            if not isinstance(element, self.data_type):
+                raise ValueError(
+                  'Operation type %s expected element of type %s, got: %s' %
+                  (self.operation_type, self.data_type, type(element)))
+            yield element
+
+        # Skip run-time type checking if the type already matches.
+        if pcoll.element_type == self.data_type:
+            return pcoll
+        else:
+            return pcoll | beam.ParDo(_assert_fn)
+
+
 class _Clock(object):
 
   def get_current_time_in_microseconds(self) -> int:
diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
index ee4191ac..6d8b21ae 100644
--- a/tfx_bsl/beam/run_inference_test.py
+++ b/tfx_bsl/beam/run_inference_test.py
@@ -56,10 +56,10 @@ def setUp(self):
     self._predict_examples = [
         text_format.Parse(
             """
-              features {
-                feature { key: "input1" value { float_list { value: 0 }}}
-              }
-              """, tf.train.Example()),
+            features {
+              feature { key: "input1" value { float_list { value: 0 }}}
+            }
+            """, tf.train.Example()),
     ]
 
   def _get_output_data_dir(self, sub_dir=None):
@@ -86,16 +86,16 @@ def setUp(self):
     self._predict_examples = [
         text_format.Parse(
             """
-              features {
-                feature { key: "input1" value { float_list { value: 0 }}}
-              }
-              """, tf.train.Example()),
+            features {
+              feature { key: "input1" value { float_list { value: 0 }}}
+            }
+            """, tf.train.Example()),
         text_format.Parse(
             """
-              features {
-                feature { key: "input1" value { float_list { value: 1 }}}
-              }
-              """, tf.train.Example()),
+            features {
+              feature { key: "input1" value { float_list { value: 1 }}}
+            }
+            """, tf.train.Example()),
     ]
     self._multihead_examples = [
         text_format.Parse(
@@ -429,6 +429,137 @@ def _get_results(self, prediction_log_path):
     return results
 
 
+  def _build_regression_signature(self, input_tensor, output_tensor):
+    """Helper function for building a regression SignatureDef."""
+    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        input_tensor)
+    signature_inputs = {
+        tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS:
+            input_tensor_info
+    }
+    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        output_tensor)
+    signature_outputs = {
+        tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS:
+            output_tensor_info
+    }
+    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+        signature_inputs, signature_outputs,
+        tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME)
+
+  def _build_classification_signature(self, input_tensor, scores_tensor):
+    """Helper function for building a classification SignatureDef."""
+    input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        input_tensor)
+    signature_inputs = {
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS:
+            input_tensor_info
+    }
+    output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info(
+        scores_tensor)
+    signature_outputs = {
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
+            output_tensor_info
+    }
+    return tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
+        signature_inputs, signature_outputs,
+        tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
+
+  def _build_multihead_model(self, model_path):
+    with tf.compat.v1.Graph().as_default():
+      input_example = tf.compat.v1.placeholder(
+          tf.string, name='input_examples_tensor')
+      config = {
+          'x': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0),
+          'y': tf.compat.v1.io.FixedLenFeature(
+              [1], dtype=tf.float32, default_value=0),
+      }
+      features = tf.compat.v1.parse_example(input_example, config)
+      x = features['x']
+      y = features['y']
+      sum_pred = x + y
+      diff_pred = tf.abs(x - y)
+      sess = tf.compat.v1.Session()
+      sess.run(tf.compat.v1.initializers.global_variables())
+      signature_def_map = {
+          'regress_diff':
+              self._build_regression_signature(input_example, diff_pred),
+          'classify_sum':
+              self._build_classification_signature(input_example, sum_pred),
+          tf.compat.v1.saved_model.signature_constants
+          .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+              self._build_regression_signature(input_example, sum_pred)
+      }
+      builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path)
+      builder.add_meta_graph_and_variables(
+          sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
+          signature_def_map=signature_def_map)
+      builder.save()
+
+
+  def testClassifyModelError(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    error_msg = 'Operation type'
+    try:
+      self._run_inference_with_beam(
+          example_path,
+          model_spec_pb2.InferenceSpecType(
+              saved_model_spec=model_spec_pb2.SavedModelSpec(
+                  model_path=model_path, signature_name=['classify_sum'])),
+          prediction_log_path)
+    except ValueError as exc:
+        actual_error_msg = str(exc)
+        self.assertTrue(actual_error_msg.startswith(error_msg))
+    else:
+      self.fail('Test was expected to throw ValueError exception')
+
+  def testRegressModelError(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    error_msg = 'Operation type'
+    try:
+      self._run_inference_with_beam(
+          example_path,
+          model_spec_pb2.InferenceSpecType(
+              saved_model_spec=model_spec_pb2.SavedModelSpec(
+                  model_path=model_path, signature_name=['regress_diff'])),
+          prediction_log_path)
+    except ValueError as exc:
+        actual_error_msg = str(exc)
+        self.assertTrue(actual_error_msg.startswith(error_msg))
+    else:
+      self.fail('Test was expected to throw ValueError exception')
+
+  def testMultiInferenceModelError(self):
+    example_path = self._get_output_data_dir('examples')
+    self._prepare_multihead_examples(example_path)
+    model_path = self._get_output_data_dir('model')
+    self._build_multihead_model(model_path)
+    prediction_log_path = self._get_output_data_dir('predictions')
+    error_msg = 'Operation type'
+    try:
+      self._run_inference_with_beam(
+          example_path,
+          model_spec_pb2.InferenceSpecType(
+              saved_model_spec=model_spec_pb2.SavedModelSpec(
+                  model_path=model_path,
+                  signature_name=['regress_diff', 'classify_sum'])),
+          prediction_log_path)
+    except ValueError as exc:
+        actual_error_msg = str(exc)
+        self.assertTrue(actual_error_msg.startswith(error_msg))
+    else:
+      self.fail('Test was expected to throw ValueError exception')
+
+
   def testKerasModelPredict(self):
     inputs = tf.keras.Input(shape=(1,), name='input1')
     output1 = tf.keras.layers.Dense(
@@ -510,94 +641,6 @@ def testKerasModelPredictMultiTensor(self):
       self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y']))
 
 
-class RunOfflineInferenceMixedExamplesTest(RunInferenceFixture):
-
-  def setUp(self):
-    super(RunOfflineInferenceMixedExamplesTest, self).setUp()
-    self._predict_examples = [
-        text_format.Parse(
-            """
-              features {
-                feature { key: "input1" value { float_list { value: 0 }}}
-              }
-              """, tf.train.Example()),
-        text_format.Parse(
-            """
-              context {
-                feature { key: "input1" value { float_list { value: 1 }}}
-              }
-              """, tf.train.SequenceExample()),
-    ]
-
-  def _run_inference_with_beam(self, example_path, inference_spec_type,
-                               prediction_log_path, include_schema = False):
-    with beam.Pipeline() as pipeline:
-      _ = (
-          pipeline
-          | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path)
-          | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString)
-          | 'RunInference' >> run_inference.RunInferenceOnExamples(
-              inference_spec_type)
-          | 'WritePredictions' >> beam.io.WriteToTFRecord(
-              prediction_log_path,
-              coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
-
-  def testMixedExamples(self):
-    inputs = tf.keras.Input(shape=(1,), name='input1')
-    output1 = tf.keras.layers.Dense(
-        1, activation=tf.nn.sigmoid, name='output1')(
-            inputs)
-    output2 = tf.keras.layers.Dense(
-        1, activation=tf.nn.sigmoid, name='output2')(
-            inputs)
-    inference_model = tf.keras.models.Model(inputs, [output1, output2])
-
-    class TestKerasModel(tf.keras.Model):
-
-      def __init__(self, inference_model):
-        super(TestKerasModel, self).__init__(name='test_keras_model')
-        self.inference_model = inference_model
-
-      @tf.function(input_signature=[
-          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
-      ])
-      def call(self, serialized_example):
-        features = {
-            'input1':
-                tf.compat.v1.io.FixedLenFeature([1],
-                                                dtype=tf.float32,
-                                                default_value=0)
-        }
-        input_tensor_dict = tf.io.parse_example(serialized_example, features)
-        return inference_model(input_tensor_dict['input1'])
-
-    model = TestKerasModel(inference_model)
-    model.compile(
-        optimizer=tf.keras.optimizers.Adam(lr=.001),
-        loss=tf.keras.losses.binary_crossentropy,
-        metrics=['accuracy'])
-
-    model_path = self._get_output_data_dir('model')
-    tf.compat.v1.keras.experimental.export_saved_model(
-        model, model_path, serving_only=True)
-
-    example_path = self._get_output_data_dir('examples')
-    self._prepare_predict_examples(example_path)
-    prediction_log_path = self._get_output_data_dir('predictions')
-    error_msg = 'A PCollection containing both tf.Example'
-    try:
-      self._run_inference_with_beam(
-        example_path,
-        model_spec_pb2.InferenceSpecType(
-            saved_model_spec=model_spec_pb2.SavedModelSpec(
-                model_path=model_path)), prediction_log_path)
-    except ValueError as exc:
-      actual_error_msg = str(exc)
-      self.assertTrue(actual_error_msg.startswith(error_msg))
-    else:
-      self.fail('Test was expected to throw ValueError exception')
-
-
 class RunOfflineInferenceArrowTest(RunInferenceFixture):
 
   def setUp(self):
@@ -674,7 +717,6 @@ def _prepare_multihead_examples(self, example_path):
       for example in self._multihead_examples:
         output_file.write(example.SerializeToString())
 
-
   def _build_predict_model(self, model_path):
     """Exports the dummy sum predict model."""