From 1d03b5ae85a20aa6746af0c30756bc1c69a9de2f Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 27 May 2020 10:45:08 -0400 Subject: [PATCH 01/31] created new file with arrow and modified base function --- tfx_bsl/beam/run_inference_arrow.py | 1166 ++++++++++++++++++++++ tfx_bsl/beam/run_inference_arrow_test.py | 581 +++++++++++ 2 files changed, 1747 insertions(+) create mode 100644 tfx_bsl/beam/run_inference_arrow.py create mode 100644 tfx_bsl/beam/run_inference_arrow_test.py diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py new file mode 100644 index 00000000..316b65a5 --- /dev/null +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -0,0 +1,1166 @@ +# Copyright 2019 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Run batch inference on saved model.""" + +from __future__ import absolute_import +from __future__ import division +# Standard __future__ imports +from __future__ import print_function + +import abc +import base64 +import collections +import os +import platform +import sys +import time +try: + import resource +except ImportError: + resource = None + +from absl import logging +import apache_beam as beam +import pyarrow as pa +from apache_beam.options.pipeline_options import GoogleCloudOptions +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.utils import retry +import googleapiclient +from googleapiclient import discovery +from googleapiclient import http +import numpy as np +import six +import tensorflow as tf +from tfx_bsl.beam import shared +from tfx_bsl.public.proto import model_spec_pb2 +from tfx_bsl.telemetry import util +from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \ + Tuple, Union + +# TODO(b/140306674): stop using the internal TF API. +from tensorflow.python.saved_model import loader_impl +from tensorflow_serving.apis import classification_pb2 +from tensorflow_serving.apis import inference_pb2 +from tensorflow_serving.apis import prediction_log_pb2 +from tensorflow_serving.apis import regression_pb2 + + +# TODO(b/131873699): Remove once 1.x support is dropped. +# pylint: disable=g-import-not-at-top +try: + # We need to import this in order to register all quantiles ops, even though + # it's not directly used. + from tensorflow.contrib.boosted_trees.python.ops import quantile_ops as _ # pylint: disable=unused-import +except ImportError: + pass + +_DEFAULT_INPUT_KEY = 'examples' +_METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer' +_METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess' +_METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION = 'CloudAIPlatformPrediction' +_MILLISECOND_TO_MICROSECOND = 1000 +_MICROSECOND_TO_NANOSECOND = 1000 +_SECOND_TO_MICROSECOND = 1000000 +_REMOTE_INFERENCE_NUM_RETRIES = 5 + +# We define the following aliases of Any because the actual types are not +# public. +_SignatureDef = Any +_MetaGraphDef = Any +_SavedModel = Any + +# TODO (Maxine): what is this? +_BulkInferResult = Union[prediction_log_pb2.PredictLog, + Tuple[tf.train.Example, regression_pb2.Regression], + Tuple[tf.train.Example, + inference_pb2.MultiInferenceResponse], + Tuple[tf.train.Example, + classification_pb2.Classifications]] + + +# TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7 +class OperationType(object): + CLASSIFICATION = 'CLASSIFICATION' + REGRESSION = 'REGRESSION' + PREDICTION = 'PREDICTION' + MULTIHEAD = 'MULTIHEAD' + + +# TODO (Me): pTransform from examples/sequence example here + +# TODO (Me): Union[bytes, pa.RecordBatch]? +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def RunInferenceImpl( # pylint: disable=invalid-name + examples: beam.pvalue.PCollection, + inference_spec_type: model_spec_pb2.InferenceSpecType +) -> beam.pvalue.PCollection: + """Implementation of RunInference API. + + Args: + examples: A PCollection containing RecordBatch. + inference_spec_type: Model inference endpoint. + + Returns: + A PCollection containing prediction logs. + + Raises: + ValueError; when operation is not supported. + """ + logging.info('RunInference on model: %s', inference_spec_type) + + batched_examples = examples | 'BatchExamples' >> beam.BatchElements() + operation_type = _get_operation_type(inference_spec_type) + if operation_type == OperationType.CLASSIFICATION: + return batched_examples | 'Classify' >> _Classify(inference_spec_type) + elif operation_type == OperationType.REGRESSION: + return batched_examples | 'Regress' >> _Regress(inference_spec_type) + elif operation_type == OperationType.PREDICTION: + return batched_examples | 'Predict' >> _Predict(inference_spec_type) + elif operation_type == OperationType.MULTIHEAD: + return (batched_examples + | 'MultiInference' >> _MultiInference(inference_spec_type)) + else: + raise ValueError('Unsupported operation_type %s' % operation_type) + + +_IOTensorSpec = collections.namedtuple( + '_IOTensorSpec', + ['input_tensor_alias', 'input_tensor_name', 'output_alias_tensor_names']) + +_Signature = collections.namedtuple('_Signature', ['name', 'signature_def']) + + +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name + inference_spec_type: model_spec_pb2.InferenceSpecType): + """Performs classify PTransform.""" + if _using_in_process_inference(inference_spec_type): + return (pcoll + | 'Classify' >> beam.ParDo( + _BatchClassifyDoFn(inference_spec_type, shared.Shared())) + | 'BuildPredictionLogForClassifications' >> beam.ParDo( + _BuildPredictionLogForClassificationsDoFn())) + else: + raise NotImplementedError + + +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name + inference_spec_type: model_spec_pb2.InferenceSpecType): + """Performs regress PTransform.""" + if _using_in_process_inference(inference_spec_type): + return (pcoll + | 'Regress' >> beam.ParDo( + _BatchRegressDoFn(inference_spec_type, shared.Shared())) + | 'BuildPredictionLogForRegressions' >> beam.ParDo( + _BuildPredictionLogForRegressionsDoFn())) + else: + raise NotImplementedError + + +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name + inference_spec_type: model_spec_pb2.InferenceSpecType): + """Performs predict PTransform.""" + if _using_in_process_inference(inference_spec_type): + predictions = ( + pcoll + | 'Predict' >> beam.ParDo( + _BatchPredictDoFn(inference_spec_type, shared.Shared()))) + else: + predictions = ( + pcoll + | 'RemotePredict' >> beam.ParDo( + _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options))) + return (predictions + | 'BuildPredictionLogForPredictions' >> beam.ParDo( + _BuildPredictionLogForPredictionsDoFn())) + + +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name + inference_spec_type: model_spec_pb2.InferenceSpecType): + """Performs multi inference PTransform.""" + if _using_in_process_inference(inference_spec_type): + return ( + pcoll + | 'MultiInference' >> beam.ParDo( + _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared())) + | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn())) + else: + raise NotImplementedError + + +@six.add_metaclass(abc.ABCMeta) +class _BaseDoFn(beam.DoFn): + """Base DoFn that performs bulk inference.""" + + class _MetricsCollector(object): + """A collector for beam metrics.""" + + def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): + operation_type = _get_operation_type(inference_spec_type) + proximity_descriptor = ( + _METRICS_DESCRIPTOR_IN_PROCESS + if _using_in_process_inference(inference_spec_type) else + _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) + namespace = util.MakeTfxNamespace( + [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor]) + + # Metrics + self._inference_counter = beam.metrics.Metrics.counter( + namespace, 'num_inferences') + self._num_instances = beam.metrics.Metrics.counter( + namespace, 'num_instances') + self._inference_request_batch_size = beam.metrics.Metrics.distribution( + namespace, 'inference_request_batch_size') + self._inference_request_batch_byte_size = ( + beam.metrics.Metrics.distribution( + namespace, 'inference_request_batch_byte_size')) + # Batch inference latency in microseconds. + self._inference_batch_latency_micro_secs = ( + beam.metrics.Metrics.distribution( + namespace, 'inference_batch_latency_micro_secs')) + self._model_byte_size = beam.metrics.Metrics.distribution( + namespace, 'model_byte_size') + # Model load latency in milliseconds. + self._load_model_latency_milli_secs = beam.metrics.Metrics.distribution( + namespace, 'load_model_latency_milli_secs') + + # Metrics cache + self.load_model_latency_milli_secs_cache = None + self.model_byte_size_cache = None + + def update_metrics_with_cache(self): + if self.load_model_latency_milli_secs_cache is not None: + self._load_model_latency_milli_secs.update( + self.load_model_latency_milli_secs_cache) + self.load_model_latency_milli_secs_cache = None + if self.model_byte_size_cache is not None: + self._model_byte_size.update(self.model_byte_size_cache) + self.model_byte_size_cache = None + + def update(self, elements: List[str], latency_micro_secs: int) -> None: + self._inference_batch_latency_micro_secs.update(latency_micro_secs) + self._num_instances.inc(len(elements)) + self._inference_counter.inc(len(elements)) + self._inference_request_batch_size.update(len(elements)) + self._inference_request_batch_byte_size.update( + sum(element.ByteSize() for element in elements)) + + def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): + super(_BaseDoFn, self).__init__() + self._clock = None + self._metrics_collector = self._MetricsCollector(inference_spec_type) + + def setup(self): + self._clock = _ClockFactory.make_clock() + + def process( + self, elements: pa.RecordBatch + ) -> Iterable[Any]: + batch_start_time = self._clock.get_current_time_in_microseconds() + # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter + # extract record batch from here, assuming first column + serialized_examples = elements.column(0) + outputs = self.run_inference(serialized_examples) + result = self._post_process(serialized_examples, outputs) + self._metrics_collector.update( + elements, + self._clock.get_current_time_in_microseconds() - batch_start_time) + return result + + def finish_bundle(self): + self._metrics_collector.update_metrics_with_cache() + + @abc.abstractmethod + def run_inference( + self, elements: List[str] + ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: + raise NotImplementedError + + @abc.abstractmethod + def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]: + raise NotImplementedError + + +def _retry_on_unavailable_and_resource_error_filter(exception: Exception): + """Retries for HttpError. + + Retries if error is unavailable (503) or resource exhausted (429). + Resource exhausted may happen when qps or bandwidth exceeds quota. + + Args: + exception: Exception from inference http request execution. + Returns: + A boolean of whether retry. + """ + + return (isinstance(exception, googleapiclient.errors.HttpError) and + exception.resp.status in (503, 429)) + +# TODO (Maxine): change all example to serialized +@beam.typehints.with_input_types(List[str]) +# Using output typehints triggers NotImplementedError('BEAM-2717)' on +# streaming mode on Dataflow runner. +# TODO(b/151468119): Consider to re-batch with online serving request size +# limit, and re-batch with RPC failures(InvalidArgument) regarding request size. +# @beam.typehints.with_output_types(prediction_log_pb2.PredictLog) +class _RemotePredictDoFn(_BaseDoFn): + """A DoFn that performs predictions from a cloud-hosted TensorFlow model. + + Supports both batch and streaming processing modes. + NOTE: Does not work on DirectRunner for streaming jobs [BEAM-7885]. + + In order to request predictions, you must deploy your trained model to AI + Platform Prediction in the TensorFlow SavedModel format. See + [Exporting a SavedModel for prediction] + (https://cloud.google.com/ai-platform/prediction/docs/exporting-savedmodel-for-prediction) + for more details. + + To send binary data, you have to make sure that the name of an input ends in + `_bytes`. + + NOTE: The returned `PredictLog` instances do not have `PredictRequest` part + filled. The reason is that it is difficult to determine the input tensor name + without having access to cloud-hosted model's signatures. + """ + + def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, + pipeline_options: PipelineOptions): + super(_RemotePredictDoFn, self).__init__(inference_spec_type) + self._api_client = None + + project_id = ( + inference_spec_type.ai_platform_prediction_model_spec.project_id or + pipeline_options.view_as(GoogleCloudOptions).project) + if not project_id: + raise ValueError('Either a non-empty project id or project flag in ' + ' beam pipeline options needs be provided.') + + model_name = ( + inference_spec_type.ai_platform_prediction_model_spec.model_name) + if not model_name: + raise ValueError('A non-empty model name must be provided.') + + version_name = ( + inference_spec_type.ai_platform_prediction_model_spec.version_name) + name_spec = 'projects/{}/models/{}' + # If version is not specified, the default version for a model is used. + if version_name: + name_spec += '/versions/{}' + self._full_model_name = name_spec.format(project_id, model_name, + version_name) + + def setup(self): + super(_RemotePredictDoFn, self).setup() + # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to + # user agent once custom header is supported in googleapiclient. + self._api_client = discovery.build('ml', 'v1') + + # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff. + @retry.with_exponential_backoff( + initial_delay_secs=1.0, + num_retries=_REMOTE_INFERENCE_NUM_RETRIES, + retry_filter=_retry_on_unavailable_and_resource_error_filter) + def _execute_request( + self, + request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]: + result = request.execute() + if 'error' in result: + raise ValueError(result['error']) + return result + + def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest: + return self._api_client.projects().predict( + name=self._full_model_name, body=body) + + @classmethod + def _prepare_instances( + cls, elements: List[tf.train.Example] + ) -> Generator[Mapping[Text, Any], None, None]: + for example in elements: + # TODO(b/151468119): support tf.train.SequenceExample + if not isinstance(example, tf.train.Example): + raise ValueError('Remote prediction only supports tf.train.Example') + + instance = {} + for input_name, feature in example.features.feature.items(): + attr_name = feature.WhichOneof('kind') + if attr_name is None: + continue + attr = getattr(feature, attr_name) + values = cls._parse_feature_content(attr.value, attr_name, + cls._sending_as_binary(input_name)) + # Flatten a sequence if its length is 1 + values = (values[0] if len(values) == 1 else values) + instance[input_name] = values + yield instance + + @staticmethod + def _sending_as_binary(input_name: Text) -> bool: + """Whether data should be sent as binary.""" + return input_name.endswith('_bytes') + + @staticmethod + def _parse_feature_content(values: Sequence[Any], attr_name: Text, + as_binary: bool) -> Sequence[Any]: + """Parse the content of tf.train.Feature object. + + If bytes_list, parse a list of bytes-like objects to a list of strings so + that it would be JSON serializable. + + If float_list or int64_list, do nothing. + + If data should be sent as binary, mark it as binary by replacing it with + a single attribute named 'b64'. + """ + if as_binary: + return [{'b64': base64.b64encode(x).decode()} for x in values] + elif attr_name == 'bytes_list': + return [x.decode() for x in values] + else: + return values + + def run_inference( + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] + ) -> Sequence[Mapping[Text, Any]]: + body = {'instances': list(self._prepare_instances(elements))} + request = self._make_request(body) + response = self._execute_request(request) + return response['predictions'] + + def _post_process( + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]], + outputs: Sequence[Mapping[Text, Any]] + ) -> Iterable[prediction_log_pb2.PredictLog]: + result = [] + for output in outputs: + predict_log = prediction_log_pb2.PredictLog() + for output_alias, values in output.items(): + values = np.array(values) + tensor_proto = tf.make_tensor_proto( + values=values, + dtype=tf.as_dtype(values.dtype).as_datatype_enum, + shape=np.expand_dims(values, axis=0).shape) + predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) + result.append(predict_log) + return result + + +# TODO(b/131873699): Add typehints once +# [BEAM-8381](https://issues.apache.org/jira/browse/BEAM-8381) +# is fixed. +# TODO(b/143484017): Add batch_size back off in the case there are functional +# reasons large batch sizes cannot be handled. +class _BaseBatchSavedModelDoFn(_BaseDoFn): + """A DoFn that runs in-process batch inference with a model. + + Models need to have the required serving signature as mentioned in + [Tensorflow Serving](https://www.tensorflow.org/tfx/serving/signature_defs) + + This function will check model signatures first. Then it will load and run + model inference in batch. + """ + + def __init__( + self, + inference_spec_type: model_spec_pb2.InferenceSpecType, + shared_model_handle: shared.Shared, + ): + super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type) + self._inference_spec_type = inference_spec_type + self._shared_model_handle = shared_model_handle + self._model_path = inference_spec_type.saved_model_spec.model_path + self._tags = None + self._signatures = _get_signatures( + inference_spec_type.saved_model_spec.model_path, + inference_spec_type.saved_model_spec.signature_name, + _get_tags(inference_spec_type)) + self._session = None + self._io_tensor_spec = None + + def setup(self): + """Load the model. + + Note that worker may crash if exception is thrown in setup due + to b/139207285. + """ + + super(_BaseBatchSavedModelDoFn, self).setup() + self._tags = _get_tags(self._inference_spec_type) + self._io_tensor_spec = self._pre_process() + + if self._has_tpu_tag(): + # TODO(b/131873699): Support TPU inference. + raise ValueError('TPU inference is not supported yet.') + self._session = self._load_model() + + def _load_model(self): + """Load a saved model into memory. + + Returns: + Session instance. + """ + + def load(): + """Function for constructing shared LoadedModel.""" + # TODO(b/143484017): Do warmup and other heavy model construction here. + result = tf.compat.v1.Session(graph=tf.compat.v1.Graph()) + memory_before = _get_current_process_memory_in_bytes() + start_time = self._clock.get_current_time_in_microseconds() + tf.compat.v1.saved_model.loader.load(result, self._tags, self._model_path) + end_time = self._clock.get_current_time_in_microseconds() + memory_after = _get_current_process_memory_in_bytes() + self._metrics_collector.load_model_latency_milli_secs_cache = ( + (end_time - start_time) / _MILLISECOND_TO_MICROSECOND) + self._metrics_collector.model_byte_size_cache = ( + memory_after - memory_before) + return result + + if not self._model_path: + raise ValueError('Model path is not valid.') + return self._shared_model_handle.acquire(load) + + def _pre_process(self) -> _IOTensorSpec: + # Pre process functions will validate for each signature. + io_tensor_specs = [] + for signature in self._signatures: + if len(signature.signature_def.inputs) != 1: + raise ValueError('Signature should have 1 and only 1 inputs') + if (list(signature.signature_def.inputs.values())[0].dtype != + tf.string.as_datatype_enum): + raise ValueError( + 'Input dtype is expected to be %s, got %s' % + tf.string.as_datatype_enum, + list(signature.signature_def.inputs.values())[0].dtype) + io_tensor_specs.append(_signature_pre_process(signature.signature_def)) + input_tensor_name = '' + input_tensor_alias = '' + output_alias_tensor_names = {} + for io_tensor_spec in io_tensor_specs: + if not input_tensor_name: + input_tensor_name = io_tensor_spec.input_tensor_name + input_tensor_alias = io_tensor_spec.input_tensor_alias + elif input_tensor_name != io_tensor_spec.input_tensor_name: + raise ValueError('Input tensor must be the same for all Signatures.') + for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items( + ): + output_alias_tensor_names[alias] = tensor_name + if (not output_alias_tensor_names or not input_tensor_name or + not input_tensor_alias): + raise ValueError('No valid fetch tensors or feed tensors.') + return _IOTensorSpec(input_tensor_alias, input_tensor_name, + output_alias_tensor_names) + + def _has_tpu_tag(self) -> bool: + return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and + tf.saved_model.TPU in self._tags) + + def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]: + self._check_elements(elements) + outputs = self._run_tf_operations(elements) + return outputs + + def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]: + result = self._session.run( + self._io_tensor_spec.output_alias_tensor_names, + feed_dict={self._io_tensor_spec.input_tensor_name: elements}) + if len(result) != len(self._io_tensor_spec.output_alias_tensor_names): + raise RuntimeError('Output length does not match fetches') + return result + + def _check_elements( + self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]]) -> None: + """Unimplemented.""" + + raise NotImplementedError + + +@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_output_types(Tuple[tf.train.Example, + classification_pb2.Classifications]) +class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn): + """A DoFn that run inference on classification model.""" + + def setup(self): + signature_def = self._signatures[0].signature_def + if signature_def.method_name != tf.saved_model.CLASSIFY_METHOD_NAME: + raise ValueError( + 'BulkInferrerClassifyDoFn requires signature method ' + 'name %s, got: %s' % tf.saved_model.CLASSIFY_METHOD_NAME, + signature_def.method_name) + super(_BatchClassifyDoFn, self).setup() + + def _check_elements( + self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]]) -> None: + if not all(isinstance(element, tf.train.Example) for element in elements): + raise ValueError('Classify only supports tf.train.Example') + + def _post_process( + self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, + np.ndarray] + ) -> Iterable[Tuple[tf.train.Example, classification_pb2.Classifications]]: + classifications = _post_process_classify( + self._io_tensor_spec.output_alias_tensor_names, elements, outputs) + return zip(elements, classifications) + + +@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_output_types(Tuple[tf.train.Example, + regression_pb2.Regression]) +class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): + """A DoFn that run inference on regression model.""" + + def setup(self): + super(_BatchRegressDoFn, self).setup() + + def _check_elements( + self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]]) -> None: + if not all(isinstance(element, tf.train.Example) for element in elements): + raise ValueError('Regress only supports tf.train.Example') + + def _post_process( + self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, + np.ndarray] + ) -> Iterable[Tuple[tf.train.Example, regression_pb2.Regression]]: + regressions = _post_process_regress(elements, outputs) + return zip(elements, regressions) + + +@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_output_types(prediction_log_pb2.PredictLog) +class _BatchPredictDoFn(_BaseBatchSavedModelDoFn): + """A DoFn that runs inference on predict model.""" + + def setup(self): + signature_def = self._signatures[0].signature_def + if signature_def.method_name != tf.saved_model.PREDICT_METHOD_NAME: + raise ValueError( + 'BulkInferrerPredictDoFn requires signature method ' + 'name %s, got: %s' % tf.saved_model.PREDICT_METHOD_NAME, + signature_def.method_name) + super(_BatchPredictDoFn, self).setup() + + def _check_elements( + self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]]) -> None: + pass + + def _post_process( + self, elements: Union[Sequence[tf.train.Example], + Sequence[tf.train.SequenceExample]], + outputs: Mapping[Text, np.ndarray] + ) -> Iterable[prediction_log_pb2.PredictLog]: + input_tensor_alias = self._io_tensor_spec.input_tensor_alias + signature_name = self._signatures[0].name + batch_size = len(elements) + for output_alias, output in outputs.items(): + if len(output.shape) < 1 or output.shape[0] != batch_size: + raise ValueError( + 'Expected output tensor %s to have at least one ' + 'dimension, with the first having a size equal to the input batch ' + 'size %s. Instead found %s' % + (output_alias, batch_size, output.shape)) + predict_log_tmpl = prediction_log_pb2.PredictLog() + predict_log_tmpl.request.model_spec.signature_name = signature_name + predict_log_tmpl.response.model_spec.signature_name = signature_name + input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias] + input_tensor_proto.dtype = tf.string.as_datatype_enum + input_tensor_proto.tensor_shape.dim.add().size = 1 + + result = [] + for i in range(batch_size): + predict_log = prediction_log_pb2.PredictLog() + predict_log.CopyFrom(predict_log_tmpl) + predict_log.request.inputs[input_tensor_alias].string_val.append( + elements[i].SerializeToString()) + for output_alias, output in outputs.items(): + # Mimic tensor::Split + tensor_proto = tf.make_tensor_proto( + values=output[i], + dtype=tf.as_dtype(output[i].dtype).as_datatype_enum, + shape=np.expand_dims(output[i], axis=0).shape) + predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) + result.append(predict_log) + return result + + +@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_output_types(Tuple[tf.train.Example, + inference_pb2.MultiInferenceResponse]) +class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): + """A DoFn that runs inference on multi-head model.""" + + def _check_elements( + self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]]) -> None: + if not all(isinstance(element, tf.train.Example) for element in elements): + raise ValueError('Multi inference only supports tf.train.Example') + + def _post_process( + self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, + np.ndarray] + ) -> Iterable[Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]]: + classifications = None + regressions = None + for signature in self._signatures: + signature_def = signature.signature_def + if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: + classifications = _post_process_classify( + self._io_tensor_spec.output_alias_tensor_names, elements, outputs) + elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME: + regressions = _post_process_regress(elements, outputs) + else: + raise ValueError('Signature method %s is not supported for ' + 'multi inference' % signature_def.method_name) + result = [] + for i in range(len(elements)): + response = inference_pb2.MultiInferenceResponse() + for signature in self._signatures: + signature_def = signature.signature_def + inference_result = response.results.add() + if (signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME and + classifications): + inference_result.classification_result.classifications.add().CopyFrom( + classifications[i]) + elif ( + signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME and + regressions): + inference_result.regression_result.regressions.add().CopyFrom( + regressions[i]) + else: + raise ValueError('Signature method %s is not supported for ' + 'multi inference' % signature_def.method_name) + inference_result.model_spec.signature_name = signature.name + if len(response.results) != len(self._signatures): + raise RuntimeError('Multi inference response result length does not ' + 'match the number of signatures') + result.append((elements[i], response)) + return result + + +@beam.typehints.with_input_types(Tuple[tf.train.Example, + classification_pb2.Classifications]) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +class _BuildPredictionLogForClassificationsDoFn(beam.DoFn): + """A DoFn that builds prediction log from classifications.""" + + def process( + self, element: Tuple[tf.train.Example, classification_pb2.Classifications] + ) -> Iterable[prediction_log_pb2.PredictionLog]: + (train_example, classifications) = element + result = prediction_log_pb2.PredictionLog() + result.classify_log.request.input.example_list.examples.add().CopyFrom( + train_example) + result.classify_log.response.result.classifications.add().CopyFrom( + classifications) + yield result + + +@beam.typehints.with_input_types(Tuple[tf.train.Example, + regression_pb2.Regression]) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +class _BuildPredictionLogForRegressionsDoFn(beam.DoFn): + """A DoFn that builds prediction log from regressions.""" + + def process( + self, element: Tuple[tf.train.Example, regression_pb2.Regression] + ) -> Iterable[prediction_log_pb2.PredictionLog]: + (train_example, regression) = element + result = prediction_log_pb2.PredictionLog() + result.regress_log.request.input.example_list.examples.add().CopyFrom( + train_example) + result.regress_log.response.result.regressions.add().CopyFrom(regression) + yield result + + +@beam.typehints.with_input_types(prediction_log_pb2.PredictLog) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +class _BuildPredictionLogForPredictionsDoFn(beam.DoFn): + """A DoFn that builds prediction log from predictions.""" + + def process( + self, element: prediction_log_pb2.PredictLog + ) -> Iterable[prediction_log_pb2.PredictionLog]: + result = prediction_log_pb2.PredictionLog() + result.predict_log.CopyFrom(element) + yield result + + +@beam.typehints.with_input_types(Tuple[tf.train.Example, + inference_pb2.MultiInferenceResponse]) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +class _BuildMultiInferenceLogDoFn(beam.DoFn): + """A DoFn that builds prediction log from multi-head inference result.""" + + def process( + self, element: Tuple[tf.train.Example, + inference_pb2.MultiInferenceResponse] + ) -> Iterable[prediction_log_pb2.PredictionLog]: + (train_example, multi_inference_response) = element + result = prediction_log_pb2.PredictionLog() + (result.multi_inference_log.request.input.example_list.examples.add() + .CopyFrom(train_example)) + result.multi_inference_log.response.CopyFrom(multi_inference_response) + yield result + + +# TODO (Maxine): moving these into class? +def _post_process_classify( + output_alias_tensor_names: Mapping[Text, Text], + elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray] +) -> Sequence[classification_pb2.Classifications]: + """Returns classifications from inference output.""" + + # This is to avoid error "The truth value of an array with + # more than one element is ambiguous." + has_classes = False + has_scores = False + if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in output_alias_tensor_names: + classes = outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] + has_classes = True + if tf.saved_model.CLASSIFY_OUTPUT_SCORES in output_alias_tensor_names: + scores = outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES] + has_scores = True + if has_classes: + if classes.ndim != 2: + raise ValueError('Expected Tensor shape: [batch_size num_classes] but ' + 'got %s' % classes.shape) + if classes.dtype != tf.string.as_numpy_dtype: + raise ValueError('Expected classes Tensor of %s. Got: %s' % + (tf.string.as_numpy_dtype, classes.dtype)) + if classes.shape[0] != len(elements): + raise ValueError('Expected classes output batch size of %s, got %s' % + (len(elements), classes.shape[0])) + if has_scores: + if scores.ndim != 2: + raise ValueError("""Expected Tensor shape: [batch_size num_classes] but + got %s""" % scores.shape) + if scores.dtype != tf.float32.as_numpy_dtype: + raise ValueError('Expected classes Tensor of %s. Got: %s' % + (tf.float32.as_numpy_dtype, scores.dtype)) + if scores.shape[0] != len(elements): + raise ValueError('Expected classes output batch size of %s, got %s' % + (len(elements), scores.shape[0])) + num_classes = 0 + if has_classes and has_scores: + if scores.shape[1] != classes.shape[1]: + raise ValueError('Tensors class and score should match in shape[1]. ' + 'Got %s vs %s' % (classes.shape[1], scores.shape[1])) + num_classes = classes.shape[1] + elif has_classes: + num_classes = classes.shape[1] + elif has_scores: + num_classes = scores.shape[1] + + result = [] + for i in range(len(elements)): + a_classification = classification_pb2.Classifications() + for c in range(num_classes): + a_class = a_classification.classes.add() + if has_classes: + a_class.label = classes[i][c] + if has_scores: + a_class.score = scores[i][c] + result.append(a_classification) + if len(result) != len(elements): + raise RuntimeError('Classifications length does not match elements') + return result + + +def _post_process_regress( + elements: Sequence[tf.train.Example], + outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]: + """Returns regressions from inference output.""" + + if tf.saved_model.REGRESS_OUTPUTS not in outputs: + raise ValueError('No regression outputs found in outputs: %s' % + outputs.keys()) + output = outputs[tf.saved_model.REGRESS_OUTPUTS] + batch_size = len(elements) + if not (output.ndim == 1 or (output.ndim == 2 and output.shape[1] == 1)): + raise ValueError("""Expected output Tensor shape to be either [batch_size] + or [batch_size, 1] but got %s""" % output.shape) + if batch_size != output.shape[0]: + raise ValueError( + 'Input batch size did not match output batch size: %s vs %s' % + (batch_size, output.shape[0])) + if output.dtype != tf.float32.as_numpy_dtype: + raise ValueError('Expected output Tensor of %s. Got: %s' % + (tf.float32.as_numpy_dtype, output.dtype)) + if output.size != batch_size: + raise ValueError('Expected output batch size to be %s. Got: %s' % + (batch_size, output.size)) + flatten_output = output.flatten() + result = [] + for regression_result in flatten_output: + regression = regression_pb2.Regression() + regression.value = regression_result + result.append(regression) + + # Add additional check to save downstream consumer checks. + if len(result) != len(elements): + raise RuntimeError('Regression length does not match elements') + return result + + +def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec: + """Returns IOTensorSpec from signature.""" + + if len(signature.inputs) != 1: + raise ValueError('Signature should have 1 and only 1 inputs') + input_tensor_alias = list(signature.inputs.keys())[0] + if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum: + raise ValueError( + 'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum, + list(signature.inputs.values())[0].dtype) + if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: + input_tensor_name, output_alias_tensor_names = ( + _signature_pre_process_classify(signature)) + elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME: + input_tensor_name, output_alias_tensor_names = ( + _signature_pre_process_predict(signature)) + elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME: + input_tensor_name, output_alias_tensor_names = ( + _signature_pre_process_regress(signature)) + else: + raise ValueError('Signature method %s is not supported' % + signature.method_name) + return _IOTensorSpec(input_tensor_alias, input_tensor_name, + output_alias_tensor_names) + + +def _signature_pre_process_classify( + signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: + """Returns input tensor name and output alias tensor names from signature. + + Args: + signature: SignatureDef + + Returns: + A tuple of input tensor name and output alias tensor names. + """ + + if len(signature.outputs) != 1 and len(signature.outputs) != 2: + raise ValueError('Classify signature should have 1 or 2 outputs') + if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs: + raise ValueError('No classification inputs found in SignatureDef: %s' % + signature.inputs) + input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name + output_alias_tensor_names = {} + if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and + tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs): + raise ValueError( + """Expected classification signature outputs to contain at + least one of %s or %s. Signature was: %s""" % + tf.saved_model.CLASSIFY_OUTPUT_CLASSES, + tf.saved_model.CLASSIFY_OUTPUT_SCORES, signature) + if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in signature.outputs: + output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] = ( + signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES].name) + if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs: + output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = ( + signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name) + return input_tensor_name, output_alias_tensor_names + + +def _signature_pre_process_predict( + signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: + """Returns input tensor name and output alias tensor names from signature. + + Args: + signature: SignatureDef + + Returns: + A tuple of input tensor name and output alias tensor names. + """ + + input_tensor_name = list(signature.inputs.values())[0].name + output_alias_tensor_names = dict([ + (key, output.name) for key, output in signature.outputs.items() + ]) + return input_tensor_name, output_alias_tensor_names + + +def _signature_pre_process_regress( + signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: + """Returns input tensor name and output alias tensor names from signature. + + Args: + signature: SignatureDef + + Returns: + A tuple of input tensor name and output alias tensor names. + """ + + if len(signature.outputs) != 1: + raise ValueError('Regress signature should have 1 output') + if tf.saved_model.REGRESS_INPUTS not in signature.inputs: + raise ValueError('No regression inputs found in SignatureDef: %s' % + signature.inputs) + input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name + if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs: + raise ValueError('No regression outputs found in SignatureDef: %s' % + signature.outputs) + output_alias_tensor_names = { + tf.saved_model.REGRESS_OUTPUTS: + signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name + } + return input_tensor_name, output_alias_tensor_names + + +def _using_in_process_inference( + inference_spec_type: model_spec_pb2.InferenceSpecType) -> bool: + return inference_spec_type.WhichOneof('type') == 'saved_model_spec' + + +def _get_signatures(model_path: Text, signatures: Sequence[Text], + tags: Sequence[Text]) -> Sequence[_Signature]: + """Returns a sequence of {model_signature_name: signature}.""" + + if signatures: + signature_names = signatures + else: + signature_names = [tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + + saved_model_pb = loader_impl.parse_saved_model(model_path) + meta_graph_def = _get_meta_graph_def(saved_model_pb, tags) + result = [] + for signature_name in signature_names: + if signature_name in meta_graph_def.signature_def: + result.append( + _Signature(signature_name, + meta_graph_def.signature_def[signature_name])) + else: + raise RuntimeError('Signature %s could not be found in SavedModel' % + signature_name) + return result + + +def _get_operation_type( + inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text: + if _using_in_process_inference(inference_spec_type): + signatures = _get_signatures( + inference_spec_type.saved_model_spec.model_path, + inference_spec_type.saved_model_spec.signature_name, + _get_tags(inference_spec_type)) + if not signatures: + raise ValueError('Model does not have valid signature to use') + + if len(signatures) == 1: + method_name = signatures[0].signature_def.method_name + if method_name == tf.saved_model.CLASSIFY_METHOD_NAME: + return OperationType.CLASSIFICATION + elif method_name == tf.saved_model.REGRESS_METHOD_NAME: + return OperationType.REGRESSION + elif method_name == tf.saved_model.PREDICT_METHOD_NAME: + return OperationType.PREDICTION + else: + raise ValueError('Unsupported signature method_name %s' % method_name) + else: + for signature in signatures: + method_name = signature.signature_def.method_name + if (method_name != tf.saved_model.CLASSIFY_METHOD_NAME and + method_name != tf.saved_model.REGRESS_METHOD_NAME): + raise ValueError('Unsupported signature method_name for multi-head ' + 'model inference: %s' % method_name) + return OperationType.MULTIHEAD + else: + # Remote inference supports predictions only. + return OperationType.PREDICTION + + +def _get_meta_graph_def(saved_model_pb: _SavedModel, + tags: Sequence[Text]) -> _MetaGraphDef: + """Returns MetaGraphDef from SavedModel.""" + + for meta_graph_def in saved_model_pb.meta_graphs: + if set(meta_graph_def.meta_info_def.tags) == set(tags): + return meta_graph_def + raise RuntimeError('MetaGraphDef associated with tags %s could not be ' + 'found in SavedModel' % tags) + + +def _get_current_process_memory_in_bytes(): + """Returns memory usage in bytes.""" + + if resource is not None: + usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if _is_darwin(): + return usage + return usage * 1024 + else: + logging.warning('Resource module is not available for current platform, ' + 'memory usage cannot be fetched.') + return 0 + + +def _get_tags( + inference_spec_type: model_spec_pb2.InferenceSpecType) -> Sequence[Text]: + """Returns tags from ModelSpec.""" + + if inference_spec_type.saved_model_spec.tag: + return list(inference_spec_type.saved_model_spec.tag) + else: + return [tf.saved_model.SERVING] + + +def _is_darwin() -> bool: + return sys.platform == 'darwin' + + +def _is_windows() -> bool: + return platform.system() == 'Windows' or os.name == 'nt' + + +def _is_cygwin() -> bool: + return platform.system().startswith('CYGWIN_NT') + + +class _Clock(object): + + def get_current_time_in_microseconds(self) -> int: + return int(time.time() * _SECOND_TO_MICROSECOND) + + +class _FineGrainedClock(_Clock): + + def get_current_time_in_microseconds(self) -> int: + return int( + time.clock_gettime_ns(time.CLOCK_REALTIME) / # pytype: disable=module-attr + _MICROSECOND_TO_NANOSECOND) + + +class _ClockFactory(object): + + @staticmethod + def make_clock() -> _Clock: + if (hasattr(time, 'clock_gettime_ns') and not _is_windows() + and not _is_cygwin()): + return _FineGrainedClock() + return _Clock() diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py new file mode 100644 index 00000000..a4eed521 --- /dev/null +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -0,0 +1,581 @@ +# Copyright 2019 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for tfx_bsl.run_inference.""" + +from __future__ import absolute_import +from __future__ import division +# Standard __future__ imports +from __future__ import print_function + +import json +import os +try: + import unittest.mock as mock +except ImportError: + import mock + +import apache_beam as beam +from apache_beam.metrics.metric import MetricsFilter +from apache_beam.testing.util import assert_that +from apache_beam.testing.util import equal_to +from googleapiclient import discovery +from googleapiclient import http +from six.moves import http_client +import tensorflow as tf +from tfx_bsl.beam import run_inference +from tfx_bsl.public.proto import model_spec_pb2 + +from google.protobuf import text_format + +from tensorflow_serving.apis import prediction_log_pb2 + + +class RunInferenceFixture(tf.test.TestCase): + + def setUp(self): + super(RunInferenceFixture, self).setUp() + self._predict_examples = [ + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), + ] + + def _get_output_data_dir(self, sub_dir=None): + test_dir = self._testMethodName + path = os.path.join( + os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), + test_dir) + if not tf.io.gfile.exists(path): + tf.io.gfile.makedirs(path) + if sub_dir is not None: + path = os.path.join(path, sub_dir) + return path + + def _prepare_predict_examples(self, example_path): + with tf.io.TFRecordWriter(example_path) as output_file: + for example in self._predict_examples: + output_file.write(example.SerializeToString()) + + +ARROW_INPUT_COLUMN = '__raw_record__' +class RunOfflineInferenceTest(RunInferenceFixture): + + def setUp(self): + super(RunOfflineInferenceTest, self).setUp() + + self._predict_examples = [ + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 1 }}} + } + """, tf.train.Example()), + ] + self._multihead_examples = [ + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.8 }}} + feature {key: "y" value { float_list { value: 0.2 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.6 }}} + feature {key: "y" value { float_list { value: 0.1 }}} + } + """, tf.train.Example()), + ] + + # TODO: Ask if these example can directly transform to recordBatch + + + def _prepare_multihead_examples(self, example_path): + with tf.io.TFRecordWriter(example_path) as output_file: + for example in self._multihead_examples: + output_file.write(example.SerializeToString()) + + def _build_predict_model(self, model_path): + """Exports the dummy sum predict model.""" + + with tf.compat.v1.Graph().as_default(): + input_tensors = { + 'x': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0) + } + serving_receiver = ( + tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn( + input_tensors)()) + output_tensors = {'y': serving_receiver.features['x'] * 2} + sess = tf.compat.v1.Session() + sess.run(tf.compat.v1.initializers.global_variables()) + signature_def = tf.compat.v1.estimator.export.PredictOutput( + output_tensors).as_signature_def(serving_receiver.receiver_tensors) + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) + builder.add_meta_graph_and_variables( + sess, [tf.compat.v1.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.compat.v1.saved_model.signature_constants + .DEFAULT_SERVING_SIGNATURE_DEF_KEY: + signature_def, + }) + builder.save() + + def _build_regression_signature(self, input_tensor, output_tensor): + """Helper function for building a regression SignatureDef.""" + input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + input_tensor) + signature_inputs = { + tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS: + input_tensor_info + } + output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + output_tensor) + signature_outputs = { + tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS: + output_tensor_info + } + return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( + signature_inputs, signature_outputs, + tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME) + + def _build_classification_signature(self, input_tensor, scores_tensor): + """Helper function for building a classification SignatureDef.""" + input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + input_tensor) + signature_inputs = { + tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS: + input_tensor_info + } + output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + scores_tensor) + signature_outputs = { + tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: + output_tensor_info + } + return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( + signature_inputs, signature_outputs, + tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME) + + def _build_multihead_model(self, model_path): + with tf.compat.v1.Graph().as_default(): + input_example = tf.compat.v1.placeholder( + tf.string, name='input_examples_tensor') + config = { + 'x': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0), + 'y': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0), + } + features = tf.compat.v1.parse_example(input_example, config) + x = features['x'] + y = features['y'] + sum_pred = x + y + diff_pred = tf.abs(x - y) + sess = tf.compat.v1.Session() + sess.run(tf.compat.v1.initializers.global_variables()) + signature_def_map = { + 'regress_diff': + self._build_regression_signature(input_example, diff_pred), + 'classify_sum': + self._build_classification_signature(input_example, sum_pred), + tf.compat.v1.saved_model.signature_constants + .DEFAULT_SERVING_SIGNATURE_DEF_KEY: + self._build_regression_signature(input_example, sum_pred) + } + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) + builder.add_meta_graph_and_variables( + sess, [tf.compat.v1.saved_model.tag_constants.SERVING], + signature_def_map=signature_def_map) + builder.save() + + def _run_inference_with_beam(self, example_path, inference_spec_type, + prediction_log_path): + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | + 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + + def _get_results(self, prediction_log_path): + results = [] + for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): + record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) + for record_string in record_iterator: + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.MergeFromString(record_string) + results.append(prediction_log) + return results + + def testModelPathInvalid(self): + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=self._get_output_data_dir())), prediction_log_path) + + def testEstimatorModelPredict(self): + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_predict_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + self.assertEqual( + results[0].predict_log.request.inputs[ + run_inference._DEFAULT_INPUT_KEY].string_val[0], + self._predict_examples[0].SerializeToString()) + self.assertEqual(results[0].predict_log.response.outputs['y'].dtype, + tf.float32) + self.assertLen( + results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2) + self.assertEqual( + results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size, + 1) + self.assertEqual( + results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size, + 1) + + def testClassifyModel(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['classify_sum'])), + prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + classify_log = results[0].classify_log + self.assertLen(classify_log.request.input.example_list.examples, 1) + self.assertEqual(classify_log.request.input.example_list.examples[0], + self._multihead_examples[0]) + self.assertLen(classify_log.response.result.classifications, 1) + self.assertLen(classify_log.response.result.classifications[0].classes, 1) + self.assertAlmostEqual( + classify_log.response.result.classifications[0].classes[0].score, 1.0) + + def testRegressModel(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['regress_diff'])), + prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + regress_log = results[0].regress_log + self.assertLen(regress_log.request.input.example_list.examples, 1) + self.assertEqual(regress_log.request.input.example_list.examples[0], + self._multihead_examples[0]) + self.assertLen(regress_log.response.result.regressions, 1) + self.assertAlmostEqual(regress_log.response.result.regressions[0].value, + 0.6) + + def testMultiInferenceModel(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, + signature_name=['regress_diff', 'classify_sum'])), + prediction_log_path) + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + multi_inference_log = results[0].multi_inference_log + self.assertLen(multi_inference_log.request.input.example_list.examples, 1) + self.assertEqual(multi_inference_log.request.input.example_list.examples[0], + self._multihead_examples[0]) + self.assertLen(multi_inference_log.response.results, 2) + signature_names = [] + for result in multi_inference_log.response.results: + signature_names.append(result.model_spec.signature_name) + self.assertIn('regress_diff', signature_names) + self.assertIn('classify_sum', signature_names) + result = multi_inference_log.response.results[0] + self.assertEqual(result.model_spec.signature_name, 'regress_diff') + self.assertLen(result.regression_result.regressions, 1) + self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6) + result = multi_inference_log.response.results[1] + self.assertEqual(result.model_spec.signature_name, 'classify_sum') + self.assertLen(result.classification_result.classifications, 1) + self.assertLen(result.classification_result.classifications[0].classes, 1) + self.assertAlmostEqual( + result.classification_result.classifications[0].classes[0].score, 1.0) + + def testKerasModelPredict(self): + inputs = tf.keras.Input(shape=(1,), name='input1') + output1 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output1')( + inputs) + output2 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output2')( + inputs) + inference_model = tf.keras.models.Model(inputs, [output1, output2]) + + class TestKerasModel(tf.keras.Model): + + def __init__(self, inference_model): + super(TestKerasModel, self).__init__(name='test_keras_model') + self.inference_model = inference_model + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') + ]) + def call(self, serialized_example): + features = { + 'input1': + tf.compat.v1.io.FixedLenFeature([1], + dtype=tf.float32, + default_value=0) + } + input_tensor_dict = tf.io.parse_example(serialized_example, features) + return inference_model(input_tensor_dict['input1']) + + model = TestKerasModel(inference_model) + model.compile( + optimizer=tf.keras.optimizers.Adam(lr=.001), + loss=tf.keras.losses.binary_crossentropy, + metrics=['accuracy']) + + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + + def testTelemetry(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + inference_spec_type = model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['classify_sum'])) + pipeline = beam.Pipeline() + _ = ( + pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) + run_result = pipeline.run() + run_result.wait_until_finish() + + num_inferences = run_result.metrics().query( + MetricsFilter().with_name('num_inferences')) + self.assertTrue(num_inferences['counters']) + self.assertEqual(num_inferences['counters'][0].result, 2) + num_instances = run_result.metrics().query( + MetricsFilter().with_name('num_instances')) + self.assertTrue(num_instances['counters']) + self.assertEqual(num_instances['counters'][0].result, 2) + inference_request_batch_size = run_result.metrics().query( + MetricsFilter().with_name('inference_request_batch_size')) + self.assertTrue(inference_request_batch_size['distributions']) + self.assertEqual( + inference_request_batch_size['distributions'][0].result.sum, 2) + inference_request_batch_byte_size = run_result.metrics().query( + MetricsFilter().with_name('inference_request_batch_byte_size')) + self.assertTrue(inference_request_batch_byte_size['distributions']) + self.assertEqual( + inference_request_batch_byte_size['distributions'][0].result.sum, + sum(element.ByteSize() for element in self._multihead_examples)) + inference_batch_latency_micro_secs = run_result.metrics().query( + MetricsFilter().with_name('inference_batch_latency_micro_secs')) + self.assertTrue(inference_batch_latency_micro_secs['distributions']) + self.assertGreaterEqual( + inference_batch_latency_micro_secs['distributions'][0].result.sum, 0) + load_model_latency_milli_secs = run_result.metrics().query( + MetricsFilter().with_name('load_model_latency_milli_secs')) + self.assertTrue(load_model_latency_milli_secs['distributions']) + self.assertGreaterEqual( + load_model_latency_milli_secs['distributions'][0].result.sum, 0) + + +class RunRemoteInferenceTest(RunInferenceFixture): + + def setUp(self): + super(RunRemoteInferenceTest, self).setUp() + self.example_path = self._get_output_data_dir('example') + self._prepare_predict_examples(self.example_path) + # This is from https://ml.googleapis.com/$discovery/rest?version=v1. + self._discovery_testdata_dir = os.path.join( + os.path.join(os.path.dirname(__file__), 'testdata'), + 'ml_discovery.json') + + @staticmethod + def _make_response_body(content, successful): + if successful: + response_dict = {'predictions': content} + else: + response_dict = {'error': content} + return json.dumps(response_dict) + + def _set_up_pipeline(self, inference_spec_type): + self.pipeline = beam.Pipeline() + self.pcoll = ( + self.pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) + + def _run_inference_with_beam(self): + self.pipeline_result = self.pipeline.run() + self.pipeline_result.wait_until_finish() + + def test_model_predict(self): + predictions = [{'output_1': [0.901], 'output_2': [0.997]}] + builder = http.RequestMockBuilder({ + 'ml.projects.predict': + (None, self._make_response_body(predictions, successful=True)) + }) + resource = discovery.build( + 'ml', + 'v1', + http=http.HttpMock(self._discovery_testdata_dir, + {'status': http_client.OK}), + requestBuilder=builder) + with mock.patch('googleapiclient.discovery.' 'build') as response_mock: + response_mock.side_effect = lambda service, version: resource + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec( + project_id='test-project', + model_name='test-model', + )) + + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.predict_log.response.outputs['output_1'].CopyFrom( + tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) + prediction_log.predict_log.response.outputs['output_2'].CopyFrom( + tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) + + self._set_up_pipeline(inference_spec_type) + assert_that(self.pcoll, equal_to([prediction_log])) + self._run_inference_with_beam() + + def test_exception_raised_when_response_body_contains_error_entry(self): + error_msg = 'Base64 decode failed.' + builder = http.RequestMockBuilder({ + 'ml.projects.predict': + (None, self._make_response_body(error_msg, successful=False)) + }) + resource = discovery.build( + 'ml', + 'v1', + http=http.HttpMock(self._discovery_testdata_dir, + {'status': http_client.OK}), + requestBuilder=builder) + with mock.patch('googleapiclient.discovery.' 'build') as response_mock: + response_mock.side_effect = lambda service, version: resource + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec( + project_id='test-project', + model_name='test-model', + )) + + try: + self._set_up_pipeline(inference_spec_type) + self._run_inference_with_beam() + except ValueError as exc: + actual_error_msg = str(exc) + self.assertTrue(actual_error_msg.startswith(error_msg)) + else: + self.fail('Test was expected to throw ValueError exception') + + def test_exception_raised_when_project_id_is_empty(self): + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec(model_name='test-model',)) + + with self.assertRaises(ValueError): + self._set_up_pipeline(inference_spec_type) + self._run_inference_with_beam() + + def test_request_body_with_binary_data(self): + example = text_format.Parse( + """ + features { + feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} + feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} + feature { key: "y" value { int64_list { value: [1, 2] }}} + } + """, tf.train.Example()) + result = list( + run_inference._RemotePredictDoFn._prepare_instances([example])) + self.assertEqual([ + { + 'x_bytes': { + 'b64': 'QVNhOGFzZGY=' + }, + 'x': 'JLK7ljk3', + 'y': [1, 2] + }, + ], result) + + +if __name__ == '__main__': + tf.test.main() From 1d553019cc0d48409553716f3e3389825d633203 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 27 May 2020 11:57:39 -0400 Subject: [PATCH 02/31] make master the same as before for comparison --- tfx_bsl/beam/run_inference_arrow.py | 72 ++++++++++++++---------- tfx_bsl/beam/run_inference_arrow_test.py | 6 +- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index 316b65a5..98e45148 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -32,7 +32,6 @@ from absl import logging import apache_beam as beam -import pyarrow as pa from apache_beam.options.pipeline_options import GoogleCloudOptions from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.utils import retry @@ -80,7 +79,6 @@ _MetaGraphDef = Any _SavedModel = Any -# TODO (Maxine): what is this? _BulkInferResult = Union[prediction_log_pb2.PredictLog, Tuple[tf.train.Example, regression_pb2.Regression], Tuple[tf.train.Example, @@ -97,11 +95,9 @@ class OperationType(object): MULTIHEAD = 'MULTIHEAD' -# TODO (Me): pTransform from examples/sequence example here - -# TODO (Me): Union[bytes, pa.RecordBatch]? @beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def RunInferenceImpl( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, @@ -110,7 +106,7 @@ def RunInferenceImpl( # pylint: disable=invalid-name """Implementation of RunInference API. Args: - examples: A PCollection containing RecordBatch. + examples: A PCollection containing examples. inference_spec_type: Model inference endpoint. Returns: @@ -144,7 +140,8 @@ def RunInferenceImpl( # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -160,7 +157,8 @@ def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -176,7 +174,8 @@ def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -197,7 +196,8 @@ def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -261,7 +261,9 @@ def update_metrics_with_cache(self): self._model_byte_size.update(self.model_byte_size_cache) self.model_byte_size_cache = None - def update(self, elements: List[str], latency_micro_secs: int) -> None: + def update(self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]], + latency_micro_secs: int) -> None: self._inference_batch_latency_micro_secs.update(latency_micro_secs) self._num_instances.inc(len(elements)) self._inference_counter.inc(len(elements)) @@ -278,14 +280,11 @@ def setup(self): self._clock = _ClockFactory.make_clock() def process( - self, elements: pa.RecordBatch + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] ) -> Iterable[Any]: batch_start_time = self._clock.get_current_time_in_microseconds() - # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter - # extract record batch from here, assuming first column - serialized_examples = elements.column(0) - outputs = self.run_inference(serialized_examples) - result = self._post_process(serialized_examples, outputs) + outputs = self.run_inference(elements) + result = self._post_process(elements, outputs) self._metrics_collector.update( elements, self._clock.get_current_time_in_microseconds() - batch_start_time) @@ -296,12 +295,14 @@ def finish_bundle(self): @abc.abstractmethod def run_inference( - self, elements: List[str] + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: raise NotImplementedError @abc.abstractmethod - def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]: + def _post_process(self, elements: List[Union[tf.train.Example, + tf.train.SequenceExample]], + outputs: Any) -> Iterable[Any]: raise NotImplementedError @@ -320,8 +321,9 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception): return (isinstance(exception, googleapiclient.errors.HttpError) and exception.resp.status in (503, 429)) -# TODO (Maxine): change all example to serialized -@beam.typehints.with_input_types(List[str]) + +@beam.typehints.with_input_types(List[Union[tf.train.Example, + tf.train.SequenceExample]]) # Using output typehints triggers NotImplementedError('BEAM-2717)' on # streaming mode on Dataflow runner. # TODO(b/151468119): Consider to re-batch with online serving request size @@ -578,15 +580,22 @@ def _has_tpu_tag(self) -> bool: return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and tf.saved_model.TPU in self._tags) - def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]: + def run_inference( + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] + ) -> Mapping[Text, np.ndarray]: self._check_elements(elements) outputs = self._run_tf_operations(elements) return outputs - def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]: + def _run_tf_operations( + self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] + ) -> Mapping[Text, np.ndarray]: + input_values = [] + for element in elements: + input_values.append(element.SerializeToString()) result = self._session.run( self._io_tensor_spec.output_alias_tensor_names, - feed_dict={self._io_tensor_spec.input_tensor_name: elements}) + feed_dict={self._io_tensor_spec.input_tensor_name: input_values}) if len(result) != len(self._io_tensor_spec.output_alias_tensor_names): raise RuntimeError('Output length does not match fetches') return result @@ -599,7 +608,8 @@ def _check_elements( raise NotImplementedError -@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_input_types(List[Union[tf.train.Example, + tf.train.SequenceExample]]) @beam.typehints.with_output_types(Tuple[tf.train.Example, classification_pb2.Classifications]) class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn): @@ -629,7 +639,8 @@ def _post_process( return zip(elements, classifications) -@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_input_types(List[Union[tf.train.Example, + tf.train.SequenceExample]]) @beam.typehints.with_output_types(Tuple[tf.train.Example, regression_pb2.Regression]) class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): @@ -652,7 +663,8 @@ def _post_process( return zip(elements, regressions) -@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_input_types(List[Union[tf.train.Example, + tf.train.SequenceExample]]) @beam.typehints.with_output_types(prediction_log_pb2.PredictLog) class _BatchPredictDoFn(_BaseBatchSavedModelDoFn): """A DoFn that runs inference on predict model.""" @@ -710,7 +722,8 @@ def _post_process( return result -@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_input_types(List[Union[tf.train.Example, + tf.train.SequenceExample]]) @beam.typehints.with_output_types(Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]) class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): @@ -830,7 +843,6 @@ def process( yield result -# TODO (Maxine): moving these into class? def _post_process_classify( output_alias_tensor_names: Mapping[Text, Text], elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray] diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index a4eed521..ce9ac4d0 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -71,12 +71,10 @@ def _prepare_predict_examples(self, example_path): output_file.write(example.SerializeToString()) -ARROW_INPUT_COLUMN = '__raw_record__' class RunOfflineInferenceTest(RunInferenceFixture): - + def setUp(self): super(RunOfflineInferenceTest, self).setUp() - self._predict_examples = [ text_format.Parse( """ @@ -108,8 +106,6 @@ def setUp(self): """, tf.train.Example()), ] - # TODO: Ask if these example can directly transform to recordBatch - def _prepare_multihead_examples(self, example_path): with tf.io.TFRecordWriter(example_path) as output_file: From c39a82db1d712fd6d2e817752d6fe4621188c653 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 27 May 2020 12:05:06 -0400 Subject: [PATCH 03/31] add changes for base class --- tfx_bsl/beam/run_inference_arrow.py | 72 ++++++++++++----------------- 1 file changed, 30 insertions(+), 42 deletions(-) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index 98e45148..316b65a5 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -32,6 +32,7 @@ from absl import logging import apache_beam as beam +import pyarrow as pa from apache_beam.options.pipeline_options import GoogleCloudOptions from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.utils import retry @@ -79,6 +80,7 @@ _MetaGraphDef = Any _SavedModel = Any +# TODO (Maxine): what is this? _BulkInferResult = Union[prediction_log_pb2.PredictLog, Tuple[tf.train.Example, regression_pb2.Regression], Tuple[tf.train.Example, @@ -95,9 +97,11 @@ class OperationType(object): MULTIHEAD = 'MULTIHEAD' +# TODO (Me): pTransform from examples/sequence example here + +# TODO (Me): Union[bytes, pa.RecordBatch]? @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def RunInferenceImpl( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, @@ -106,7 +110,7 @@ def RunInferenceImpl( # pylint: disable=invalid-name """Implementation of RunInference API. Args: - examples: A PCollection containing examples. + examples: A PCollection containing RecordBatch. inference_spec_type: Model inference endpoint. Returns: @@ -140,8 +144,7 @@ def RunInferenceImpl( # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -157,8 +160,7 @@ def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -174,8 +176,7 @@ def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -196,8 +197,7 @@ def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType): @@ -261,9 +261,7 @@ def update_metrics_with_cache(self): self._model_byte_size.update(self.model_byte_size_cache) self.model_byte_size_cache = None - def update(self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]], - latency_micro_secs: int) -> None: + def update(self, elements: List[str], latency_micro_secs: int) -> None: self._inference_batch_latency_micro_secs.update(latency_micro_secs) self._num_instances.inc(len(elements)) self._inference_counter.inc(len(elements)) @@ -280,11 +278,14 @@ def setup(self): self._clock = _ClockFactory.make_clock() def process( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] + self, elements: pa.RecordBatch ) -> Iterable[Any]: batch_start_time = self._clock.get_current_time_in_microseconds() - outputs = self.run_inference(elements) - result = self._post_process(elements, outputs) + # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter + # extract record batch from here, assuming first column + serialized_examples = elements.column(0) + outputs = self.run_inference(serialized_examples) + result = self._post_process(serialized_examples, outputs) self._metrics_collector.update( elements, self._clock.get_current_time_in_microseconds() - batch_start_time) @@ -295,14 +296,12 @@ def finish_bundle(self): @abc.abstractmethod def run_inference( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] + self, elements: List[str] ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: raise NotImplementedError @abc.abstractmethod - def _post_process(self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]], - outputs: Any) -> Iterable[Any]: + def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]: raise NotImplementedError @@ -321,9 +320,8 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception): return (isinstance(exception, googleapiclient.errors.HttpError) and exception.resp.status in (503, 429)) - -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) +# TODO (Maxine): change all example to serialized +@beam.typehints.with_input_types(List[str]) # Using output typehints triggers NotImplementedError('BEAM-2717)' on # streaming mode on Dataflow runner. # TODO(b/151468119): Consider to re-batch with online serving request size @@ -580,22 +578,15 @@ def _has_tpu_tag(self) -> bool: return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and tf.saved_model.TPU in self._tags) - def run_inference( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Mapping[Text, np.ndarray]: + def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]: self._check_elements(elements) outputs = self._run_tf_operations(elements) return outputs - def _run_tf_operations( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Mapping[Text, np.ndarray]: - input_values = [] - for element in elements: - input_values.append(element.SerializeToString()) + def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]: result = self._session.run( self._io_tensor_spec.output_alias_tensor_names, - feed_dict={self._io_tensor_spec.input_tensor_name: input_values}) + feed_dict={self._io_tensor_spec.input_tensor_name: elements}) if len(result) != len(self._io_tensor_spec.output_alias_tensor_names): raise RuntimeError('Output length does not match fetches') return result @@ -608,8 +599,7 @@ def _check_elements( raise NotImplementedError -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) +@beam.typehints.with_input_types(List[str]) @beam.typehints.with_output_types(Tuple[tf.train.Example, classification_pb2.Classifications]) class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn): @@ -639,8 +629,7 @@ def _post_process( return zip(elements, classifications) -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) +@beam.typehints.with_input_types(List[str]) @beam.typehints.with_output_types(Tuple[tf.train.Example, regression_pb2.Regression]) class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): @@ -663,8 +652,7 @@ def _post_process( return zip(elements, regressions) -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) +@beam.typehints.with_input_types(List[str]) @beam.typehints.with_output_types(prediction_log_pb2.PredictLog) class _BatchPredictDoFn(_BaseBatchSavedModelDoFn): """A DoFn that runs inference on predict model.""" @@ -722,8 +710,7 @@ def _post_process( return result -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) +@beam.typehints.with_input_types(List[str]) @beam.typehints.with_output_types(Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]) class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): @@ -843,6 +830,7 @@ def process( yield result +# TODO (Maxine): moving these into class? def _post_process_classify( output_alias_tensor_names: Mapping[Text, Text], elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray] From ace3f73fe66f822853307e7b24c53db93d6772d8 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 27 May 2020 12:09:35 -0400 Subject: [PATCH 04/31] add more changes --- tfx_bsl/beam/run_inference_arrow.py | 33 ++++++++++++++++------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index 316b65a5..793b1532 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -80,7 +80,7 @@ _MetaGraphDef = Any _SavedModel = Any -# TODO (Maxine): what is this? +# TODO (Maxine): Change this to serialized? _BulkInferResult = Union[prediction_log_pb2.PredictLog, Tuple[tf.train.Example, regression_pb2.Regression], Tuple[tf.train.Example, @@ -97,9 +97,9 @@ class OperationType(object): MULTIHEAD = 'MULTIHEAD' -# TODO (Me): pTransform from examples/sequence example here +# TODO (Maxine): pTransform from examples/sequence example here -# TODO (Me): Union[bytes, pa.RecordBatch]? +# TODO (Maxine): Union[bytes, pa.RecordBatch]? @beam.ptransform_fn @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) @@ -320,7 +320,7 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception): return (isinstance(exception, googleapiclient.errors.HttpError) and exception.resp.status in (503, 429)) -# TODO (Maxine): change all example to serialized + @beam.typehints.with_input_types(List[str]) # Using output typehints triggers NotImplementedError('BEAM-2717)' on # streaming mode on Dataflow runner. @@ -398,13 +398,15 @@ def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest: @classmethod def _prepare_instances( - cls, elements: List[tf.train.Example] + cls, elements: List[str] ) -> Generator[Mapping[Text, Any], None, None]: for example in elements: # TODO(b/151468119): support tf.train.SequenceExample - if not isinstance(example, tf.train.Example): - raise ValueError('Remote prediction only supports tf.train.Example') + if not isinstance(example, str): + # raise ValueError('Remote prediction only supports tf.train.Example') + raise ValueError('Example should be serialized before calling remote prediction') + # TODO (Maxine): Fix this part with serialized example instance = {} for input_name, feature in example.features.feature.items(): attr_name = feature.WhichOneof('kind') @@ -443,17 +445,14 @@ def _parse_feature_content(values: Sequence[Any], attr_name: Text, else: return values - def run_inference( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Sequence[Mapping[Text, Any]]: + def run_inference(self, elements: List[str]) -> Sequence[Mapping[Text, Any]]: body = {'instances': list(self._prepare_instances(elements))} request = self._make_request(body) response = self._execute_request(request) return response['predictions'] def _post_process( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]], - outputs: Sequence[Mapping[Text, Any]] + self, elements: List[str], outputs: Sequence[Mapping[Text, Any]] ) -> Iterable[prediction_log_pb2.PredictLog]: result = [] for output in outputs: @@ -474,6 +473,9 @@ def _post_process( # is fixed. # TODO(b/143484017): Add batch_size back off in the case there are functional # reasons large batch sizes cannot be handled. + +# TODO (Maxine): Anything I can do to check that the serialized string is an example or sequence example? +# converting it and then check? class _BaseBatchSavedModelDoFn(_BaseDoFn): """A DoFn that runs in-process batch inference with a model. @@ -592,13 +594,14 @@ def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]: return result def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: + self, elements: List[str]) -> None: """Unimplemented.""" raise NotImplementedError +# TODO (Maxine): Haven't change other than typeints beyond this point +# should I change these to example inside the functions or keep them as serialized @beam.typehints.with_input_types(List[str]) @beam.typehints.with_output_types(Tuple[tf.train.Example, classification_pb2.Classifications]) @@ -834,7 +837,7 @@ def process( def _post_process_classify( output_alias_tensor_names: Mapping[Text, Text], elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray] -) -> Sequence[classification_pb2.Classifications]: + ) -> Sequence[classification_pb2.Classifications]: """Returns classifications from inference output.""" # This is to avoid error "The truth value of an array with From 56357a0423bb4ffc68fffa467414690bfdf5910b Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 27 May 2020 19:07:16 -0400 Subject: [PATCH 05/31] modify batch functions --- .bazelrc | 2 + tfx_bsl/beam/run_inference_arrow.py | 137 ++++++++++++++-------------- 2 files changed, 73 insertions(+), 66 deletions(-) create mode 100644 .bazelrc diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 00000000..23842f86 --- /dev/null +++ b/.bazelrc @@ -0,0 +1,2 @@ +build --action_env ARROW_HEADER_DIR="/home/zhangmaxine/tfx-venv/lib/python3.7/site-packages/pyarrow/include" +build --action_env ARROW_SHARED_LIBRARY_DIR="/home/zhangmaxine/tfx-venv/lib/python3.7/site-packages/pyarrow" diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index 793b1532..9316988e 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -74,19 +74,15 @@ _SECOND_TO_MICROSECOND = 1000000 _REMOTE_INFERENCE_NUM_RETRIES = 5 -# We define the following aliases of Any because the actual types are not -# public. +# We define the following aliases of Any because the actual types are not public. _SignatureDef = Any _MetaGraphDef = Any _SavedModel = Any -# TODO (Maxine): Change this to serialized? _BulkInferResult = Union[prediction_log_pb2.PredictLog, - Tuple[tf.train.Example, regression_pb2.Regression], - Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse], - Tuple[tf.train.Example, - classification_pb2.Classifications]] + Tuple[str, regression_pb2.Regression], + Tuple[str, inference_pb2.MultiInferenceResponse], + Tuple[str, classification_pb2.Classifications]] # TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7 @@ -98,6 +94,12 @@ class OperationType(object): # TODO (Maxine): pTransform from examples/sequence example here +# remember input type? +# if (isinstance(element, tf.train.Example)) +# elif isinstance(element, tf.train.SequenceExample) + + +# TODO (Maxine): Work on remote # TODO (Maxine): Union[bytes, pa.RecordBatch]? @beam.ptransform_fn @@ -110,7 +112,7 @@ def RunInferenceImpl( # pylint: disable=invalid-name """Implementation of RunInference API. Args: - examples: A PCollection containing RecordBatch. + examples: A PCollection containing RecordBatch of serialized examples. inference_spec_type: Model inference endpoint. Returns: @@ -284,6 +286,20 @@ def process( # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter # extract record batch from here, assuming first column serialized_examples = elements.column(0) + + for element in serialized_examples: + if not isinstance(element, str): + raise ValueError('Expected a list of serialized examples (string type)') + + try: + example = tf.train.Example.FromString(element) + sequenceExample = tf.train.sequenceExample.FromString(element) + except: + raise ValueError( + 'RecordBatch should contain a serialized example of the type \ + tf.Train.Example or tf.Train.SequenceExample' + ) + outputs = self.run_inference(serialized_examples) result = self._post_process(serialized_examples, outputs) self._metrics_collector.update( @@ -402,9 +418,10 @@ def _prepare_instances( ) -> Generator[Mapping[Text, Any], None, None]: for example in elements: # TODO(b/151468119): support tf.train.SequenceExample - if not isinstance(example, str): - # raise ValueError('Remote prediction only supports tf.train.Example') - raise ValueError('Example should be serialized before calling remote prediction') + try: + example = tf.train.Example.FromString(example) + except: + raise ValueError('Remote prediction only supports tf.train.Example') # TODO (Maxine): Fix this part with serialized example instance = {} @@ -474,8 +491,6 @@ def _post_process( # TODO(b/143484017): Add batch_size back off in the case there are functional # reasons large batch sizes cannot be handled. -# TODO (Maxine): Anything I can do to check that the serialized string is an example or sequence example? -# converting it and then check? class _BaseBatchSavedModelDoFn(_BaseDoFn): """A DoFn that runs in-process batch inference with a model. @@ -547,6 +562,7 @@ def load(): def _pre_process(self) -> _IOTensorSpec: # Pre process functions will validate for each signature. + # TODO (Maxine): having more than 1 input io_tensor_specs = [] for signature in self._signatures: if len(signature.signature_def.inputs) != 1: @@ -600,11 +616,8 @@ def _check_elements( raise NotImplementedError -# TODO (Maxine): Haven't change other than typeints beyond this point -# should I change these to example inside the functions or keep them as serialized @beam.typehints.with_input_types(List[str]) -@beam.typehints.with_output_types(Tuple[tf.train.Example, - classification_pb2.Classifications]) +@beam.typehints.with_output_types(Tuple[str, classification_pb2.Classifications]) class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn): """A DoFn that run inference on classification model.""" @@ -617,40 +630,40 @@ def setup(self): signature_def.method_name) super(_BatchClassifyDoFn, self).setup() - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - if not all(isinstance(element, tf.train.Example) for element in elements): - raise ValueError('Classify only supports tf.train.Example') + def _check_elements(self, elements: List[str]) -> None: + for element in serialized_examples: + try: + example = tf.train.Example.FromString(element) + except: + raise ValueError('Classify only supports tf.train.Example') def _post_process( - self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, - np.ndarray] - ) -> Iterable[Tuple[tf.train.Example, classification_pb2.Classifications]]: + self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray] + ) -> Iterable[Tuple[str, classification_pb2.Classifications]]: classifications = _post_process_classify( self._io_tensor_spec.output_alias_tensor_names, elements, outputs) return zip(elements, classifications) @beam.typehints.with_input_types(List[str]) -@beam.typehints.with_output_types(Tuple[tf.train.Example, - regression_pb2.Regression]) +@beam.typehints.with_output_types(Tuple[str, regression_pb2.Regression]) class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): """A DoFn that run inference on regression model.""" def setup(self): super(_BatchRegressDoFn, self).setup() - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - if not all(isinstance(element, tf.train.Example) for element in elements): - raise ValueError('Regress only supports tf.train.Example') + def _check_elements(self, elements: List[str]) -> None: + for element in serialized_examples: + try: + example = tf.train.Example.FromString(element) + except: + raise ValueError('Regress only supports tf.train.Example') + def _post_process( - self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, - np.ndarray] - ) -> Iterable[Tuple[tf.train.Example, regression_pb2.Regression]]: + self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray] + ) -> Iterable[Tuple[str, regression_pb2.Regression]]: regressions = _post_process_regress(elements, outputs) return zip(elements, regressions) @@ -669,14 +682,11 @@ def setup(self): signature_def.method_name) super(_BatchPredictDoFn, self).setup() - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: + def _check_elements(self, elements: List[str]) -> None: pass def _post_process( - self, elements: Union[Sequence[tf.train.Example], - Sequence[tf.train.SequenceExample]], + self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray] ) -> Iterable[prediction_log_pb2.PredictLog]: input_tensor_alias = self._io_tensor_spec.input_tensor_alias @@ -700,8 +710,7 @@ def _post_process( for i in range(batch_size): predict_log = prediction_log_pb2.PredictLog() predict_log.CopyFrom(predict_log_tmpl) - predict_log.request.inputs[input_tensor_alias].string_val.append( - elements[i].SerializeToString()) + predict_log.request.inputs[input_tensor_alias].string_val.append(elements[i]) for output_alias, output in outputs.items(): # Mimic tensor::Split tensor_proto = tf.make_tensor_proto( @@ -714,21 +723,21 @@ def _post_process( @beam.typehints.with_input_types(List[str]) -@beam.typehints.with_output_types(Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse]) +@beam.typehints.with_output_types(Tuple[str, inference_pb2.MultiInferenceResponse]) class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): """A DoFn that runs inference on multi-head model.""" - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - if not all(isinstance(element, tf.train.Example) for element in elements): - raise ValueError('Multi inference only supports tf.train.Example') + def _check_elements(self, elements: List[str]) -> None: + for element in serialized_examples: + try: + example = tf.train.Example.FromString(element) + except: + raise ValueError('Multi-inference only supports tf.train.Example') + def _post_process( - self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, - np.ndarray] - ) -> Iterable[Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]]: + self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray] + ) -> Iterable[Tuple[str, inference_pb2.MultiInferenceResponse]]: classifications = None regressions = None for signature in self._signatures: @@ -767,14 +776,14 @@ def _post_process( return result -@beam.typehints.with_input_types(Tuple[tf.train.Example, - classification_pb2.Classifications]) +# TODO (Maxine): need to replace train example from this point on +@beam.typehints.with_input_types(Tuple[str, classification_pb2.Classifications]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) class _BuildPredictionLogForClassificationsDoFn(beam.DoFn): """A DoFn that builds prediction log from classifications.""" def process( - self, element: Tuple[tf.train.Example, classification_pb2.Classifications] + self, element: Tuple[str, classification_pb2.Classifications] ) -> Iterable[prediction_log_pb2.PredictionLog]: (train_example, classifications) = element result = prediction_log_pb2.PredictionLog() @@ -785,14 +794,13 @@ def process( yield result -@beam.typehints.with_input_types(Tuple[tf.train.Example, - regression_pb2.Regression]) +@beam.typehints.with_input_types(Tuple[str, regression_pb2.Regression]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) class _BuildPredictionLogForRegressionsDoFn(beam.DoFn): """A DoFn that builds prediction log from regressions.""" def process( - self, element: Tuple[tf.train.Example, regression_pb2.Regression] + self, element: Tuple[str, regression_pb2.Regression] ) -> Iterable[prediction_log_pb2.PredictionLog]: (train_example, regression) = element result = prediction_log_pb2.PredictionLog() @@ -815,15 +823,13 @@ def process( yield result -@beam.typehints.with_input_types(Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse]) +@beam.typehints.with_input_types(Tuple[str,inference_pb2.MultiInferenceResponse]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) class _BuildMultiInferenceLogDoFn(beam.DoFn): """A DoFn that builds prediction log from multi-head inference result.""" def process( - self, element: Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse] + self, element: Tuple[str, inference_pb2.MultiInferenceResponse] ) -> Iterable[prediction_log_pb2.PredictionLog]: (train_example, multi_inference_response) = element result = prediction_log_pb2.PredictionLog() @@ -833,10 +839,9 @@ def process( yield result -# TODO (Maxine): moving these into class? def _post_process_classify( output_alias_tensor_names: Mapping[Text, Text], - elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray] + elements: Sequence[str], outputs: Mapping[Text, np.ndarray] ) -> Sequence[classification_pb2.Classifications]: """Returns classifications from inference output.""" @@ -897,7 +902,7 @@ def _post_process_classify( def _post_process_regress( - elements: Sequence[tf.train.Example], + elements: Sequence[str], outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]: """Returns regressions from inference output.""" From ee6e928db951dba2c9dc4a00ae9ffe4a9faef374 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 27 May 2020 19:11:10 -0400 Subject: [PATCH 06/31] remove extra files --- .bazelrc | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 .bazelrc diff --git a/.bazelrc b/.bazelrc deleted file mode 100644 index 23842f86..00000000 --- a/.bazelrc +++ /dev/null @@ -1,2 +0,0 @@ -build --action_env ARROW_HEADER_DIR="/home/zhangmaxine/tfx-venv/lib/python3.7/site-packages/pyarrow/include" -build --action_env ARROW_SHARED_LIBRARY_DIR="/home/zhangmaxine/tfx-venv/lib/python3.7/site-packages/pyarrow" From 622fbcf129696de67f4a70ab9f18ca34af05191f Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Mon, 1 Jun 2020 19:28:37 -0400 Subject: [PATCH 07/31] add column choice (so far only support one column still) --- tfx_bsl/beam/run_inference_arrow.py | 78 +++++--- tfx_bsl/beam/run_inference_arrow_test.py | 245 +++++++++++------------ 2 files changed, 173 insertions(+), 150 deletions(-) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index 9316988e..ceaa46ef 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -46,7 +46,7 @@ from tfx_bsl.public.proto import model_spec_pb2 from tfx_bsl.telemetry import util from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \ - Tuple, Union + Tuple, Union, Optional # TODO(b/140306674): stop using the internal TF API. from tensorflow.python.saved_model import loader_impl @@ -99,15 +99,14 @@ class OperationType(object): # elif isinstance(element, tf.train.SequenceExample) -# TODO (Maxine): Work on remote - # TODO (Maxine): Union[bytes, pa.RecordBatch]? @beam.ptransform_fn @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def RunInferenceImpl( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, - inference_spec_type: model_spec_pb2.InferenceSpecType + inference_spec_type: model_spec_pb2.InferenceSpecType, + process_column: Optional[str] = None ) -> beam.pvalue.PCollection: """Implementation of RunInference API. @@ -126,14 +125,14 @@ def RunInferenceImpl( # pylint: disable=invalid-name batched_examples = examples | 'BatchExamples' >> beam.BatchElements() operation_type = _get_operation_type(inference_spec_type) if operation_type == OperationType.CLASSIFICATION: - return batched_examples | 'Classify' >> _Classify(inference_spec_type) + return batched_examples | 'Classify' >> _Classify(inference_spec_type, process_column) elif operation_type == OperationType.REGRESSION: - return batched_examples | 'Regress' >> _Regress(inference_spec_type) + return batched_examples | 'Regress' >> _Regress(inference_spec_type, process_column) elif operation_type == OperationType.PREDICTION: - return batched_examples | 'Predict' >> _Predict(inference_spec_type) + return batched_examples | 'Predict' >> _Predict(inference_spec_type, process_column) elif operation_type == OperationType.MULTIHEAD: return (batched_examples - | 'MultiInference' >> _MultiInference(inference_spec_type)) + | 'MultiInference' >> _MultiInference(inference_spec_type, process_column)) else: raise ValueError('Unsupported operation_type %s' % operation_type) @@ -149,12 +148,13 @@ def RunInferenceImpl( # pylint: disable=invalid-name @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): + inference_spec_type: model_spec_pb2.InferenceSpecType, + process_column: Optional[str] = None): """Performs classify PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll | 'Classify' >> beam.ParDo( - _BatchClassifyDoFn(inference_spec_type, shared.Shared())) + _BatchClassifyDoFn(inference_spec_type, process_column, shared.Shared())) | 'BuildPredictionLogForClassifications' >> beam.ParDo( _BuildPredictionLogForClassificationsDoFn())) else: @@ -165,12 +165,13 @@ def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): + inference_spec_type: model_spec_pb2.InferenceSpecType, + process_column: Optional[str] = None): """Performs regress PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll | 'Regress' >> beam.ParDo( - _BatchRegressDoFn(inference_spec_type, shared.Shared())) + _BatchRegressDoFn(inference_spec_type, process_column, shared.Shared())) | 'BuildPredictionLogForRegressions' >> beam.ParDo( _BuildPredictionLogForRegressionsDoFn())) else: @@ -181,13 +182,14 @@ def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): + inference_spec_type: model_spec_pb2.InferenceSpecType, + process_column: Optional[str] = None): """Performs predict PTransform.""" if _using_in_process_inference(inference_spec_type): predictions = ( pcoll | 'Predict' >> beam.ParDo( - _BatchPredictDoFn(inference_spec_type, shared.Shared()))) + _BatchPredictDoFn(inference_spec_type, process_column, shared.Shared()))) else: predictions = ( pcoll @@ -202,13 +204,14 @@ def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): + inference_spec_type: model_spec_pb2.InferenceSpecType, + process_column: Optional[str] = None): """Performs multi inference PTransform.""" if _using_in_process_inference(inference_spec_type): return ( pcoll | 'MultiInference' >> beam.ParDo( - _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared())) + _BatchMultiInferenceDoFn(inference_spec_type, process_column, shared.Shared())) | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn())) else: raise NotImplementedError @@ -271,9 +274,13 @@ def update(self, elements: List[str], latency_micro_secs: int) -> None: self._inference_request_batch_byte_size.update( sum(element.ByteSize() for element in elements)) - def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): + # TODO (Maxine): just one col for now, later, will do a list of str + def __init__( + self, inference_spec_type: model_spec_pb2.InferenceSpecType, + process_column: Optional[str] = None): super(_BaseDoFn, self).__init__() self._clock = None + self._process_column = process_column self._metrics_collector = self._MetricsCollector(inference_spec_type) def setup(self): @@ -283,22 +290,39 @@ def process( self, elements: pa.RecordBatch ) -> Iterable[Any]: batch_start_time = self._clock.get_current_time_in_microseconds() - # TODO (Maxine): set ARROW_INPUT_COLUMN or take as a parameter + # TODO (Maxine): take process as a parameter, should it be part of inference spec? # extract record batch from here, assuming first column - serialized_examples = elements.column(0) + + # what would record batch look like? (flatten or not) + # vs np.asarray(elements.column(0)) + if len(elements.columns) == 1: + serialized_examples = elements.column(0).to_pylist() + else: + if self._process_column is None: + raise ValueError('Must pass in a process column with multi-column RecordBatch') + serialized_examples = None + + for column_name, column_array in zip(elements.schema.names, elements.columns): + column_type = column_array.type + if column_name == self._process_column: + serialized_examples = column_array.to_pylist() + break for element in serialized_examples: if not isinstance(element, str): raise ValueError('Expected a list of serialized examples (string type)') + # TODO (Maxine): Is there a better way? try: example = tf.train.Example.FromString(element) - sequenceExample = tf.train.sequenceExample.FromString(element) except: - raise ValueError( - 'RecordBatch should contain a serialized example of the type \ - tf.Train.Example or tf.Train.SequenceExample' - ) + try: + sequenceExample = tf.train.sequenceExample.FromString(element) + except: + raise ValueError( + 'RecordBatch should contain a serialized example of the type \ + tf.Train.Example or tf.Train.SequenceExample' + ) outputs = self.run_inference(serialized_examples) result = self._post_process(serialized_examples, outputs) @@ -504,9 +528,9 @@ class _BaseBatchSavedModelDoFn(_BaseDoFn): def __init__( self, inference_spec_type: model_spec_pb2.InferenceSpecType, - shared_model_handle: shared.Shared, + shared_model_handle: shared.Shared, process_column: Optional[str] = None, ): - super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type) + super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, process_column) self._inference_spec_type = inference_spec_type self._shared_model_handle = shared_model_handle self._model_path = inference_spec_type.saved_model_spec.model_path @@ -776,7 +800,7 @@ def _post_process( return result -# TODO (Maxine): need to replace train example from this point on + @beam.typehints.with_input_types(Tuple[str, classification_pb2.Classifications]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) class _BuildPredictionLogForClassificationsDoFn(beam.DoFn): diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index ce9ac4d0..30416ae0 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -213,8 +213,7 @@ def _run_inference_with_beam(self, example_path, inference_spec_type, pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | - 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) + | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -450,127 +449,127 @@ def testTelemetry(self): load_model_latency_milli_secs['distributions'][0].result.sum, 0) -class RunRemoteInferenceTest(RunInferenceFixture): - - def setUp(self): - super(RunRemoteInferenceTest, self).setUp() - self.example_path = self._get_output_data_dir('example') - self._prepare_predict_examples(self.example_path) - # This is from https://ml.googleapis.com/$discovery/rest?version=v1. - self._discovery_testdata_dir = os.path.join( - os.path.join(os.path.dirname(__file__), 'testdata'), - 'ml_discovery.json') - - @staticmethod - def _make_response_body(content, successful): - if successful: - response_dict = {'predictions': content} - else: - response_dict = {'error': content} - return json.dumps(response_dict) - - def _set_up_pipeline(self, inference_spec_type): - self.pipeline = beam.Pipeline() - self.pcoll = ( - self.pipeline - | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) - | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) - - def _run_inference_with_beam(self): - self.pipeline_result = self.pipeline.run() - self.pipeline_result.wait_until_finish() - - def test_model_predict(self): - predictions = [{'output_1': [0.901], 'output_2': [0.997]}] - builder = http.RequestMockBuilder({ - 'ml.projects.predict': - (None, self._make_response_body(predictions, successful=True)) - }) - resource = discovery.build( - 'ml', - 'v1', - http=http.HttpMock(self._discovery_testdata_dir, - {'status': http_client.OK}), - requestBuilder=builder) - with mock.patch('googleapiclient.discovery.' 'build') as response_mock: - response_mock.side_effect = lambda service, version: resource - inference_spec_type = model_spec_pb2.InferenceSpecType( - ai_platform_prediction_model_spec=model_spec_pb2 - .AIPlatformPredictionModelSpec( - project_id='test-project', - model_name='test-model', - )) - - prediction_log = prediction_log_pb2.PredictionLog() - prediction_log.predict_log.response.outputs['output_1'].CopyFrom( - tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) - prediction_log.predict_log.response.outputs['output_2'].CopyFrom( - tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) - - self._set_up_pipeline(inference_spec_type) - assert_that(self.pcoll, equal_to([prediction_log])) - self._run_inference_with_beam() - - def test_exception_raised_when_response_body_contains_error_entry(self): - error_msg = 'Base64 decode failed.' - builder = http.RequestMockBuilder({ - 'ml.projects.predict': - (None, self._make_response_body(error_msg, successful=False)) - }) - resource = discovery.build( - 'ml', - 'v1', - http=http.HttpMock(self._discovery_testdata_dir, - {'status': http_client.OK}), - requestBuilder=builder) - with mock.patch('googleapiclient.discovery.' 'build') as response_mock: - response_mock.side_effect = lambda service, version: resource - inference_spec_type = model_spec_pb2.InferenceSpecType( - ai_platform_prediction_model_spec=model_spec_pb2 - .AIPlatformPredictionModelSpec( - project_id='test-project', - model_name='test-model', - )) - - try: - self._set_up_pipeline(inference_spec_type) - self._run_inference_with_beam() - except ValueError as exc: - actual_error_msg = str(exc) - self.assertTrue(actual_error_msg.startswith(error_msg)) - else: - self.fail('Test was expected to throw ValueError exception') - - def test_exception_raised_when_project_id_is_empty(self): - inference_spec_type = model_spec_pb2.InferenceSpecType( - ai_platform_prediction_model_spec=model_spec_pb2 - .AIPlatformPredictionModelSpec(model_name='test-model',)) - - with self.assertRaises(ValueError): - self._set_up_pipeline(inference_spec_type) - self._run_inference_with_beam() - - def test_request_body_with_binary_data(self): - example = text_format.Parse( - """ - features { - feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} - feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} - feature { key: "y" value { int64_list { value: [1, 2] }}} - } - """, tf.train.Example()) - result = list( - run_inference._RemotePredictDoFn._prepare_instances([example])) - self.assertEqual([ - { - 'x_bytes': { - 'b64': 'QVNhOGFzZGY=' - }, - 'x': 'JLK7ljk3', - 'y': [1, 2] - }, - ], result) +# class RunRemoteInferenceTest(RunInferenceFixture): + +# def setUp(self): +# super(RunRemoteInferenceTest, self).setUp() +# self.example_path = self._get_output_data_dir('example') +# self._prepare_predict_examples(self.example_path) +# # This is from https://ml.googleapis.com/$discovery/rest?version=v1. +# self._discovery_testdata_dir = os.path.join( +# os.path.join(os.path.dirname(__file__), 'testdata'), +# 'ml_discovery.json') + +# @staticmethod +# def _make_response_body(content, successful): +# if successful: +# response_dict = {'predictions': content} +# else: +# response_dict = {'error': content} +# return json.dumps(response_dict) + +# def _set_up_pipeline(self, inference_spec_type): +# self.pipeline = beam.Pipeline() +# self.pcoll = ( +# self.pipeline +# | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) +# | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) +# | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) + +# def _run_inference_with_beam(self): +# self.pipeline_result = self.pipeline.run() +# self.pipeline_result.wait_until_finish() + +# def test_model_predict(self): +# predictions = [{'output_1': [0.901], 'output_2': [0.997]}] +# builder = http.RequestMockBuilder({ +# 'ml.projects.predict': +# (None, self._make_response_body(predictions, successful=True)) +# }) +# resource = discovery.build( +# 'ml', +# 'v1', +# http=http.HttpMock(self._discovery_testdata_dir, +# {'status': http_client.OK}), +# requestBuilder=builder) +# with mock.patch('googleapiclient.discovery.' 'build') as response_mock: +# response_mock.side_effect = lambda service, version: resource +# inference_spec_type = model_spec_pb2.InferenceSpecType( +# ai_platform_prediction_model_spec=model_spec_pb2 +# .AIPlatformPredictionModelSpec( +# project_id='test-project', +# model_name='test-model', +# )) + +# prediction_log = prediction_log_pb2.PredictionLog() +# prediction_log.predict_log.response.outputs['output_1'].CopyFrom( +# tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) +# prediction_log.predict_log.response.outputs['output_2'].CopyFrom( +# tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) + +# self._set_up_pipeline(inference_spec_type) +# assert_that(self.pcoll, equal_to([prediction_log])) +# self._run_inference_with_beam() + +# def test_exception_raised_when_response_body_contains_error_entry(self): +# error_msg = 'Base64 decode failed.' +# builder = http.RequestMockBuilder({ +# 'ml.projects.predict': +# (None, self._make_response_body(error_msg, successful=False)) +# }) +# resource = discovery.build( +# 'ml', +# 'v1', +# http=http.HttpMock(self._discovery_testdata_dir, +# {'status': http_client.OK}), +# requestBuilder=builder) +# with mock.patch('googleapiclient.discovery.' 'build') as response_mock: +# response_mock.side_effect = lambda service, version: resource +# inference_spec_type = model_spec_pb2.InferenceSpecType( +# ai_platform_prediction_model_spec=model_spec_pb2 +# .AIPlatformPredictionModelSpec( +# project_id='test-project', +# model_name='test-model', +# )) + +# try: +# self._set_up_pipeline(inference_spec_type) +# self._run_inference_with_beam() +# except ValueError as exc: +# actual_error_msg = str(exc) +# self.assertTrue(actual_error_msg.startswith(error_msg)) +# else: +# self.fail('Test was expected to throw ValueError exception') + +# def test_exception_raised_when_project_id_is_empty(self): +# inference_spec_type = model_spec_pb2.InferenceSpecType( +# ai_platform_prediction_model_spec=model_spec_pb2 +# .AIPlatformPredictionModelSpec(model_name='test-model',)) + +# with self.assertRaises(ValueError): +# self._set_up_pipeline(inference_spec_type) +# self._run_inference_with_beam() + +# def test_request_body_with_binary_data(self): +# example = text_format.Parse( +# """ +# features { +# feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} +# feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} +# feature { key: "y" value { int64_list { value: [1, 2] }}} +# } +# """, tf.train.Example()) +# result = list( +# run_inference._RemotePredictDoFn._prepare_instances([example])) +# self.assertEqual([ +# { +# 'x_bytes': { +# 'b64': 'QVNhOGFzZGY=' +# }, +# 'x': 'JLK7ljk3', +# 'y': [1, 2] +# }, +# ], result) if __name__ == '__main__': From 7562e15350ca388795aef220bbee8f0e3730d4d3 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Fri, 5 Jun 2020 15:02:47 -0400 Subject: [PATCH 08/31] make internal implementation with arrow, add input type, process column opption and tests --- tfx_bsl/beam/run_inference_arrow.py | 242 ++++++------ tfx_bsl/beam/run_inference_arrow_test.py | 478 +++++++++++++++++++++-- 2 files changed, 581 insertions(+), 139 deletions(-) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index ceaa46ef..7d7cecf5 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -79,11 +79,6 @@ _MetaGraphDef = Any _SavedModel = Any -_BulkInferResult = Union[prediction_log_pb2.PredictLog, - Tuple[str, regression_pb2.Regression], - Tuple[str, inference_pb2.MultiInferenceResponse], - Tuple[str, classification_pb2.Classifications]] - # TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7 class OperationType(object): @@ -92,14 +87,13 @@ class OperationType(object): PREDICTION = 'PREDICTION' MULTIHEAD = 'MULTIHEAD' - -# TODO (Maxine): pTransform from examples/sequence example here -# remember input type? -# if (isinstance(element, tf.train.Example)) -# elif isinstance(element, tf.train.SequenceExample) +class DataType(object): + EXAMPLE = 'EXAMPLE' + SEQUENCEEXAMPLE = 'SEQUENCEEXAMPLE' -# TODO (Maxine): Union[bytes, pa.RecordBatch]? +# This API is private and called with only example or sequence example +# TODO (Maxine): pTransform from examples/sequence example here @beam.ptransform_fn @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) @@ -122,17 +116,25 @@ def RunInferenceImpl( # pylint: disable=invalid-name """ logging.info('RunInference on model: %s', inference_spec_type) - batched_examples = examples | 'BatchExamples' >> beam.BatchElements() + # TODO (Maxine): uncomment this once we change the api to take input + # Union[tf.train.Example, tf.train.SequenceExample] + # data_type = _get_data_type(examples) + + data_type = DataType.EXAMPLE operation_type = _get_operation_type(inference_spec_type) if operation_type == OperationType.CLASSIFICATION: - return batched_examples | 'Classify' >> _Classify(inference_spec_type, process_column) + return examples | 'Classify' >> _Classify( + inference_spec_type, data_type, process_column) elif operation_type == OperationType.REGRESSION: - return batched_examples | 'Regress' >> _Regress(inference_spec_type, process_column) + return examples | 'Regress' >> _Regress( + inference_spec_type, data_type, process_column) elif operation_type == OperationType.PREDICTION: - return batched_examples | 'Predict' >> _Predict(inference_spec_type, process_column) + return examples | 'Predict' >> _Predict( + inference_spec_type, data_type, process_column) elif operation_type == OperationType.MULTIHEAD: - return (batched_examples - | 'MultiInference' >> _MultiInference(inference_spec_type, process_column)) + return (examples + | 'MultiInference' >> _MultiInference( + inference_spec_type, data_type, process_column)) else: raise ValueError('Unsupported operation_type %s' % operation_type) @@ -149,12 +151,12 @@ def RunInferenceImpl( # pylint: disable=invalid-name @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType, - process_column: Optional[str] = None): + data_type, process_column: Optional[str] = None): """Performs classify PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll - | 'Classify' >> beam.ParDo( - _BatchClassifyDoFn(inference_spec_type, process_column, shared.Shared())) + | 'Classify' >> beam.ParDo(_BatchClassifyDoFn( + inference_spec_type, shared.Shared(), data_type, process_column)) | 'BuildPredictionLogForClassifications' >> beam.ParDo( _BuildPredictionLogForClassificationsDoFn())) else: @@ -166,12 +168,12 @@ def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType, - process_column: Optional[str] = None): + data_type, process_column: Optional[str] = None): """Performs regress PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll - | 'Regress' >> beam.ParDo( - _BatchRegressDoFn(inference_spec_type, process_column, shared.Shared())) + | 'Regress' >> beam.ParDo(_BatchRegressDoFn( + inference_spec_type, shared.Shared(), data_type, process_column)) | 'BuildPredictionLogForRegressions' >> beam.ParDo( _BuildPredictionLogForRegressionsDoFn())) else: @@ -183,18 +185,18 @@ def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType, - process_column: Optional[str] = None): + data_type, process_column: Optional[str] = None): """Performs predict PTransform.""" if _using_in_process_inference(inference_spec_type): predictions = ( pcoll - | 'Predict' >> beam.ParDo( - _BatchPredictDoFn(inference_spec_type, process_column, shared.Shared()))) + | 'Predict' >> beam.ParDo(_BatchPredictDoFn( + inference_spec_type, shared.Shared(), data_type, process_column))) else: predictions = ( pcoll - | 'RemotePredict' >> beam.ParDo( - _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options))) + | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn( + inference_spec_type, pcoll.pipeline.options, data_type))) return (predictions | 'BuildPredictionLogForPredictions' >> beam.ParDo( _BuildPredictionLogForPredictionsDoFn())) @@ -205,13 +207,13 @@ def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType, - process_column: Optional[str] = None): + data_type, process_column: Optional[str] = None): """Performs multi inference PTransform.""" if _using_in_process_inference(inference_spec_type): return ( pcoll - | 'MultiInference' >> beam.ParDo( - _BatchMultiInferenceDoFn(inference_spec_type, process_column, shared.Shared())) + | 'MultiInference' >> beam.ParDo(_BatchMultiInferenceDoFn( + inference_spec_type, shared.Shared(), data_type, process_column)) | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn())) else: raise NotImplementedError @@ -266,13 +268,14 @@ def update_metrics_with_cache(self): self._model_byte_size.update(self.model_byte_size_cache) self.model_byte_size_cache = None - def update(self, elements: List[str], latency_micro_secs: int) -> None: + def update( + self, elements: List[Union[str, bytes]], latency_micro_secs: int) -> None: self._inference_batch_latency_micro_secs.update(latency_micro_secs) self._num_instances.inc(len(elements)) self._inference_counter.inc(len(elements)) self._inference_request_batch_size.update(len(elements)) self._inference_request_batch_byte_size.update( - sum(element.ByteSize() for element in elements)) + sum(len(element) for element in elements)) # TODO (Maxine): just one col for now, later, will do a list of str def __init__( @@ -300,34 +303,25 @@ def process( else: if self._process_column is None: raise ValueError('Must pass in a process column with multi-column RecordBatch') - serialized_examples = None - for column_name, column_array in zip(elements.schema.names, elements.columns): - column_type = column_array.type - if column_name == self._process_column: - serialized_examples = column_array.to_pylist() - break - - for element in serialized_examples: - if not isinstance(element, str): - raise ValueError('Expected a list of serialized examples (string type)') - - # TODO (Maxine): Is there a better way? - try: - example = tf.train.Example.FromString(element) - except: - try: - sequenceExample = tf.train.sequenceExample.FromString(element) - except: - raise ValueError( - 'RecordBatch should contain a serialized example of the type \ - tf.Train.Example or tf.Train.SequenceExample' - ) + serialized_examples = None + for column_name, column_array in zip(elements.schema.names, elements.columns): + column_type = column_array.type + if column_name == self._process_column: + serialized_examples = column_array.to_pylist() + break + + for example in serialized_examples: + if not (isinstance(example, bytes) or isinstance(example, str)): + raise ValueError( + f'Expected a list of serialized examples in bytes or as a string, \ + got {type(example)}' + ) outputs = self.run_inference(serialized_examples) result = self._post_process(serialized_examples, outputs) self._metrics_collector.update( - elements, + serialized_examples, self._clock.get_current_time_in_microseconds() - batch_start_time) return result @@ -336,12 +330,13 @@ def finish_bundle(self): @abc.abstractmethod def run_inference( - self, elements: List[str] + self, elements: List[Union[str, bytes]] ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: raise NotImplementedError @abc.abstractmethod - def _post_process(self, elements: List[str], outputs: Any) -> Iterable[Any]: + def _post_process( + self, elements: List[Union[str, bytes]], outputs: Any) -> Iterable[Any]: raise NotImplementedError @@ -361,7 +356,7 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception): exception.resp.status in (503, 429)) -@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_input_types(pa.RecordBatch) # Using output typehints triggers NotImplementedError('BEAM-2717)' on # streaming mode on Dataflow runner. # TODO(b/151468119): Consider to re-batch with online serving request size @@ -388,7 +383,7 @@ class _RemotePredictDoFn(_BaseDoFn): """ def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, - pipeline_options: PipelineOptions): + pipeline_options: PipelineOptions, data_type): super(_RemotePredictDoFn, self).__init__(inference_spec_type) self._api_client = None @@ -438,18 +433,16 @@ def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest: @classmethod def _prepare_instances( - cls, elements: List[str] + cls, elements: List[Union[str, bytes]] ) -> Generator[Mapping[Text, Any], None, None]: for example in elements: # TODO(b/151468119): support tf.train.SequenceExample - try: - example = tf.train.Example.FromString(example) - except: + if data_type != DataType.EXAMPLE: raise ValueError('Remote prediction only supports tf.train.Example') - # TODO (Maxine): Fix this part with serialized example instance = {} - for input_name, feature in example.features.feature.items(): + tfexample = tf.train.Example.FromString(example) + for input_name, feature in tfexample.features.feature.items(): attr_name = feature.WhichOneof('kind') if attr_name is None: continue @@ -486,14 +479,16 @@ def _parse_feature_content(values: Sequence[Any], attr_name: Text, else: return values - def run_inference(self, elements: List[str]) -> Sequence[Mapping[Text, Any]]: + def run_inference( + self, elements: List[Union[str, bytes]]) -> Sequence[Mapping[Text, Any]]: body = {'instances': list(self._prepare_instances(elements))} request = self._make_request(body) response = self._execute_request(request) return response['predictions'] def _post_process( - self, elements: List[str], outputs: Sequence[Mapping[Text, Any]] + self, elements: List[Union[str, bytes]], + outputs: Sequence[Mapping[Text, Any]] ) -> Iterable[prediction_log_pb2.PredictLog]: result = [] for output in outputs: @@ -528,7 +523,8 @@ class _BaseBatchSavedModelDoFn(_BaseDoFn): def __init__( self, inference_spec_type: model_spec_pb2.InferenceSpecType, - shared_model_handle: shared.Shared, process_column: Optional[str] = None, + shared_model_handle: shared.Shared, data_type, + process_column: Optional[str] = None, ): super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, process_column) self._inference_spec_type = inference_spec_type @@ -541,6 +537,7 @@ def __init__( _get_tags(inference_spec_type)) self._session = None self._io_tensor_spec = None + self._data_type = data_type def setup(self): """Load the model. @@ -620,12 +617,14 @@ def _has_tpu_tag(self) -> bool: return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and tf.saved_model.TPU in self._tags) - def run_inference(self, elements: List[str]) -> Mapping[Text, np.ndarray]: + def run_inference( + self, elements: List[Union[str, bytes]]) -> Mapping[Text, np.ndarray]: self._check_elements(elements) outputs = self._run_tf_operations(elements) return outputs - def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]: + def _run_tf_operations( + self, elements: List[Union[str, bytes]]) -> Mapping[Text, np.ndarray]: result = self._session.run( self._io_tensor_spec.output_alias_tensor_names, feed_dict={self._io_tensor_spec.input_tensor_name: elements}) @@ -633,15 +632,15 @@ def _run_tf_operations(self, elements: List[str]) -> Mapping[Text, np.ndarray]: raise RuntimeError('Output length does not match fetches') return result - def _check_elements( - self, elements: List[str]) -> None: + def _check_elements(self, elements: List[Union[str, bytes]]) -> None: """Unimplemented.""" raise NotImplementedError -@beam.typehints.with_input_types(List[str]) -@beam.typehints.with_output_types(Tuple[str, classification_pb2.Classifications]) +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(Tuple[Union[str, bytes], + classification_pb2.Classifications]) class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn): """A DoFn that run inference on classification model.""" @@ -654,45 +653,43 @@ def setup(self): signature_def.method_name) super(_BatchClassifyDoFn, self).setup() - def _check_elements(self, elements: List[str]) -> None: - for element in serialized_examples: - try: - example = tf.train.Example.FromString(element) - except: + def _check_elements(self, elements: List[Union[str, bytes]]) -> None: + for element in elements: + if self._data_type != DataType.EXAMPLE: raise ValueError('Classify only supports tf.train.Example') def _post_process( - self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray] - ) -> Iterable[Tuple[str, classification_pb2.Classifications]]: + self, elements: Sequence[Union[str, bytes]], + outputs: Mapping[Text, np.ndarray] + ) -> Iterable[Tuple[Union[str, bytes], classification_pb2.Classifications]]: classifications = _post_process_classify( self._io_tensor_spec.output_alias_tensor_names, elements, outputs) return zip(elements, classifications) -@beam.typehints.with_input_types(List[str]) -@beam.typehints.with_output_types(Tuple[str, regression_pb2.Regression]) +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(Tuple[Union[str, bytes], + regression_pb2.Regression]) class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): """A DoFn that run inference on regression model.""" def setup(self): super(_BatchRegressDoFn, self).setup() - def _check_elements(self, elements: List[str]) -> None: - for element in serialized_examples: - try: - example = tf.train.Example.FromString(element) - except: + def _check_elements(self, elements: List[Union[str, bytes]]) -> None: + for element in elements: + if self._data_type != DataType.EXAMPLE: raise ValueError('Regress only supports tf.train.Example') - def _post_process( - self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray] - ) -> Iterable[Tuple[str, regression_pb2.Regression]]: + self, elements: Sequence[Union[str, bytes]], + outputs: Mapping[Text, np.ndarray] + ) -> Iterable[Tuple[Union[str, bytes], regression_pb2.Regression]]: regressions = _post_process_regress(elements, outputs) return zip(elements, regressions) -@beam.typehints.with_input_types(List[str]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictLog) class _BatchPredictDoFn(_BaseBatchSavedModelDoFn): """A DoFn that runs inference on predict model.""" @@ -706,11 +703,11 @@ def setup(self): signature_def.method_name) super(_BatchPredictDoFn, self).setup() - def _check_elements(self, elements: List[str]) -> None: + def _check_elements(self, elements: List[Union[str, bytes]]) -> None: pass def _post_process( - self, elements: Sequence[str], + self, elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray] ) -> Iterable[prediction_log_pb2.PredictLog]: input_tensor_alias = self._io_tensor_spec.input_tensor_alias @@ -746,22 +743,21 @@ def _post_process( return result -@beam.typehints.with_input_types(List[str]) -@beam.typehints.with_output_types(Tuple[str, inference_pb2.MultiInferenceResponse]) +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(Tuple[Union[str, bytes], + inference_pb2.MultiInferenceResponse]) class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): """A DoFn that runs inference on multi-head model.""" - def _check_elements(self, elements: List[str]) -> None: - for element in serialized_examples: - try: - example = tf.train.Example.FromString(element) - except: + def _check_elements(self, elements: List[Union[str, bytes]]) -> None: + for element in elements: + if self._data_type != DataType.EXAMPLE: raise ValueError('Multi-inference only supports tf.train.Example') - def _post_process( - self, elements: Sequence[str], outputs: Mapping[Text, np.ndarray] - ) -> Iterable[Tuple[str, inference_pb2.MultiInferenceResponse]]: + self, elements: Sequence[Union[str, bytes]], + outputs: Mapping[Text, np.ndarray] + ) -> Iterable[Tuple[Union[str, bytes], inference_pb2.MultiInferenceResponse]]: classifications = None regressions = None for signature in self._signatures: @@ -801,35 +797,38 @@ def _post_process( -@beam.typehints.with_input_types(Tuple[str, classification_pb2.Classifications]) +@beam.typehints.with_input_types(Tuple[Union[str, bytes], + classification_pb2.Classifications]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) class _BuildPredictionLogForClassificationsDoFn(beam.DoFn): """A DoFn that builds prediction log from classifications.""" def process( - self, element: Tuple[str, classification_pb2.Classifications] + self, + element: Tuple[Union[str, bytes], classification_pb2.Classifications] ) -> Iterable[prediction_log_pb2.PredictionLog]: (train_example, classifications) = element result = prediction_log_pb2.PredictionLog() result.classify_log.request.input.example_list.examples.add().CopyFrom( - train_example) + tf.train.Example.FromString(train_example)) result.classify_log.response.result.classifications.add().CopyFrom( classifications) yield result -@beam.typehints.with_input_types(Tuple[str, regression_pb2.Regression]) +@beam.typehints.with_input_types(Tuple[Union[str, bytes], + regression_pb2.Regression]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) class _BuildPredictionLogForRegressionsDoFn(beam.DoFn): """A DoFn that builds prediction log from regressions.""" def process( - self, element: Tuple[str, regression_pb2.Regression] + self, element: Tuple[Union[str, bytes], regression_pb2.Regression] ) -> Iterable[prediction_log_pb2.PredictionLog]: (train_example, regression) = element result = prediction_log_pb2.PredictionLog() result.regress_log.request.input.example_list.examples.add().CopyFrom( - train_example) + tf.train.Example.FromString(train_example)) result.regress_log.response.result.regressions.add().CopyFrom(regression) yield result @@ -847,25 +846,27 @@ def process( yield result -@beam.typehints.with_input_types(Tuple[str,inference_pb2.MultiInferenceResponse]) +@beam.typehints.with_input_types(Tuple[Union[str, bytes], + inference_pb2.MultiInferenceResponse]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) class _BuildMultiInferenceLogDoFn(beam.DoFn): """A DoFn that builds prediction log from multi-head inference result.""" def process( - self, element: Tuple[str, inference_pb2.MultiInferenceResponse] + self, element: Tuple[Union[str, bytes], + inference_pb2.MultiInferenceResponse] ) -> Iterable[prediction_log_pb2.PredictionLog]: (train_example, multi_inference_response) = element result = prediction_log_pb2.PredictionLog() (result.multi_inference_log.request.input.example_list.examples.add() - .CopyFrom(train_example)) + .CopyFrom(tf.train.Example.FromString(train_example))) result.multi_inference_log.response.CopyFrom(multi_inference_response) yield result def _post_process_classify( output_alias_tensor_names: Mapping[Text, Text], - elements: Sequence[str], outputs: Mapping[Text, np.ndarray] + elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray] ) -> Sequence[classification_pb2.Classifications]: """Returns classifications from inference output.""" @@ -926,7 +927,7 @@ def _post_process_classify( def _post_process_regress( - elements: Sequence[str], + elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]: """Returns regressions from inference output.""" @@ -1127,6 +1128,15 @@ def _get_operation_type( return OperationType.PREDICTION +def _get_data_type(elements: Sequence[Any]) -> Text: + if all(isinstance(elements, tf.train.Example)): + return DataType.EXAMPLE + elif all(isinstance(element, tf.train.SequenceExample)): + return DataType.SEQUENCEEXAMPLE + else: + raise ValueError(f'Unsupported DataType {type(elements)}') + + def _get_meta_graph_def(saved_model_pb: _SavedModel, tags: Sequence[Text]) -> _MetaGraphDef: """Returns MetaGraphDef from SavedModel.""" diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index 30416ae0..056d5674 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -26,6 +26,7 @@ import mock import apache_beam as beam +import pyarrow as pa from apache_beam.metrics.metric import MetricsFilter from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to @@ -33,18 +34,18 @@ from googleapiclient import http from six.moves import http_client import tensorflow as tf -from tfx_bsl.beam import run_inference +from tfx_bsl.beam import run_inference_arrow from tfx_bsl.public.proto import model_spec_pb2 +from tfx_bsl.tfxio import raw_tf_record from google.protobuf import text_format - from tensorflow_serving.apis import prediction_log_pb2 -class RunInferenceFixture(tf.test.TestCase): +class RunInferenceArrowFixture(tf.test.TestCase): def setUp(self): - super(RunInferenceFixture, self).setUp() + super(RunInferenceArrowFixture, self).setUp() self._predict_examples = [ text_format.Parse( """ @@ -70,11 +71,10 @@ def _prepare_predict_examples(self, example_path): for example in self._predict_examples: output_file.write(example.SerializeToString()) - -class RunOfflineInferenceTest(RunInferenceFixture): +class RunOfflineInferenceArrowTest(RunInferenceArrowFixture): def setUp(self): - super(RunOfflineInferenceTest, self).setUp() + super(RunOfflineInferenceArrowTest, self).setUp() self._predict_examples = [ text_format.Parse( """ @@ -107,6 +107,21 @@ def setUp(self): ] + serialized_example = [] + for example in self._predict_examples: + serialized_example.append(example.SerializeToString()) + self.record_batch = pa.RecordBatch.from_arrays( + [serialized_example, ], ["__RAW_RECORD__", ] + ) + + serialized_example_multi = [] + for example in self._multihead_examples: + serialized_example_multi.append(example.SerializeToString()) + self.record_batch_multihead = pa.RecordBatch.from_arrays( + [serialized_example_multi, ], ["__RAW_RECORD__", ] + ) + + def _prepare_multihead_examples(self, example_path): with tf.io.TFRecordWriter(example_path) as output_file: for example in self._multihead_examples: @@ -206,14 +221,23 @@ def _build_multihead_model(self, model_path): signature_def_map=signature_def_map) builder.save() - def _run_inference_with_beam(self, example_path, inference_spec_type, + def _run_inference_with_beam(self, example_type, inference_spec_type, prediction_log_path): - with beam.Pipeline() as pipeline: - _ = ( + if example_type == 'multi': + with beam.Pipeline() as pipeline: + _ = ( pipeline - | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) - | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) + | "createRecordBatch" >> beam.Create([self.record_batch_multihead]) + | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + else: + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | "createRecordBatch" >> beam.Create([self.record_batch]) + | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -234,7 +258,7 @@ def testModelPathInvalid(self): prediction_log_path = self._get_output_data_dir('predictions') with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): self._run_inference_with_beam( - example_path, + 'predict', model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=self._get_output_data_dir())), prediction_log_path) @@ -246,7 +270,7 @@ def testEstimatorModelPredict(self): self._build_predict_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( - example_path, + 'predict', model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path)), prediction_log_path) @@ -255,7 +279,7 @@ def testEstimatorModelPredict(self): self.assertLen(results, 2) self.assertEqual( results[0].predict_log.request.inputs[ - run_inference._DEFAULT_INPUT_KEY].string_val[0], + run_inference_arrow._DEFAULT_INPUT_KEY].string_val[0], self._predict_examples[0].SerializeToString()) self.assertEqual(results[0].predict_log.response.outputs['y'].dtype, tf.float32) @@ -275,7 +299,7 @@ def testClassifyModel(self): self._build_multihead_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( - example_path, + 'multi', model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path, signature_name=['classify_sum'])), @@ -299,7 +323,7 @@ def testRegressModel(self): self._build_multihead_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( - example_path, + 'multi', model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path, signature_name=['regress_diff'])), @@ -322,7 +346,7 @@ def testMultiInferenceModel(self): self._build_multihead_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( - example_path, + 'multi', model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path, @@ -394,7 +418,7 @@ def call(self, serialized_example): self._prepare_predict_examples(example_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( - example_path, + 'predict', model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path)), prediction_log_path) @@ -412,9 +436,9 @@ def testTelemetry(self): model_path=model_path, signature_name=['classify_sum'])) pipeline = beam.Pipeline() _ = ( - pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) - | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) + pipeline + | "createRecordBatch" >> beam.Create([self.record_batch_multihead]) + | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type)) run_result = pipeline.run() run_result.wait_until_finish() @@ -449,6 +473,414 @@ def testTelemetry(self): load_model_latency_milli_secs['distributions'][0].result.sum, 0) +# class RunInferenceFixture(tf.test.TestCase): + +# def setUp(self): +# super(RunInferenceFixture, self).setUp() +# self._predict_examples = [ +# text_format.Parse( +# """ +# features { +# feature { key: "input1" value { float_list { value: 0 }}} +# } +# """, tf.train.Example()), +# ] + +# def _get_output_data_dir(self, sub_dir=None): +# test_dir = self._testMethodName +# path = os.path.join( +# os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), +# test_dir) +# if not tf.io.gfile.exists(path): +# tf.io.gfile.makedirs(path) +# if sub_dir is not None: +# path = os.path.join(path, sub_dir) +# return path + +# def _prepare_predict_examples(self, example_path): +# with tf.io.TFRecordWriter(example_path) as output_file: +# for example in self._predict_examples: +# output_file.write(example.SerializeToString()) + + +# class RunOfflineInferenceTest(RunInferenceFixture): + +# def setUp(self): +# super(RunOfflineInferenceTest, self).setUp() +# self._predict_examples = [ +# text_format.Parse( +# """ +# features { +# feature { key: "input1" value { float_list { value: 0 }}} +# } +# """, tf.train.Example()), +# text_format.Parse( +# """ +# features { +# feature { key: "input1" value { float_list { value: 1 }}} +# } +# """, tf.train.Example()), +# ] +# self._multihead_examples = [ +# text_format.Parse( +# """ +# features { +# feature {key: "x" value { float_list { value: 0.8 }}} +# feature {key: "y" value { float_list { value: 0.2 }}} +# } +# """, tf.train.Example()), +# text_format.Parse( +# """ +# features { +# feature {key: "x" value { float_list { value: 0.6 }}} +# feature {key: "y" value { float_list { value: 0.1 }}} +# } +# """, tf.train.Example()), +# ] + + +# def _prepare_multihead_examples(self, example_path): +# with tf.io.TFRecordWriter(example_path) as output_file: +# for example in self._multihead_examples: +# output_file.write(example.SerializeToString()) + +# def _build_predict_model(self, model_path): +# """Exports the dummy sum predict model.""" + +# with tf.compat.v1.Graph().as_default(): +# input_tensors = { +# 'x': tf.compat.v1.io.FixedLenFeature( +# [1], dtype=tf.float32, default_value=0) +# } +# serving_receiver = ( +# tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn( +# input_tensors)()) +# output_tensors = {'y': serving_receiver.features['x'] * 2} +# sess = tf.compat.v1.Session() +# sess.run(tf.compat.v1.initializers.global_variables()) +# signature_def = tf.compat.v1.estimator.export.PredictOutput( +# output_tensors).as_signature_def(serving_receiver.receiver_tensors) +# builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) +# builder.add_meta_graph_and_variables( +# sess, [tf.compat.v1.saved_model.tag_constants.SERVING], +# signature_def_map={ +# tf.compat.v1.saved_model.signature_constants +# .DEFAULT_SERVING_SIGNATURE_DEF_KEY: +# signature_def, +# }) +# builder.save() + +# def _build_regression_signature(self, input_tensor, output_tensor): +# """Helper function for building a regression SignatureDef.""" +# input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( +# input_tensor) +# signature_inputs = { +# tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS: +# input_tensor_info +# } +# output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( +# output_tensor) +# signature_outputs = { +# tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS: +# output_tensor_info +# } +# return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( +# signature_inputs, signature_outputs, +# tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME) + +# def _build_classification_signature(self, input_tensor, scores_tensor): +# """Helper function for building a classification SignatureDef.""" +# input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( +# input_tensor) +# signature_inputs = { +# tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS: +# input_tensor_info +# } +# output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( +# scores_tensor) +# signature_outputs = { +# tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: +# output_tensor_info +# } +# return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( +# signature_inputs, signature_outputs, +# tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME) + +# def _build_multihead_model(self, model_path): +# with tf.compat.v1.Graph().as_default(): +# input_example = tf.compat.v1.placeholder( +# tf.string, name='input_examples_tensor') +# config = { +# 'x': tf.compat.v1.io.FixedLenFeature( +# [1], dtype=tf.float32, default_value=0), +# 'y': tf.compat.v1.io.FixedLenFeature( +# [1], dtype=tf.float32, default_value=0), +# } +# features = tf.compat.v1.parse_example(input_example, config) +# x = features['x'] +# y = features['y'] +# sum_pred = x + y +# diff_pred = tf.abs(x - y) +# sess = tf.compat.v1.Session() +# sess.run(tf.compat.v1.initializers.global_variables()) +# signature_def_map = { +# 'regress_diff': +# self._build_regression_signature(input_example, diff_pred), +# 'classify_sum': +# self._build_classification_signature(input_example, sum_pred), +# tf.compat.v1.saved_model.signature_constants +# .DEFAULT_SERVING_SIGNATURE_DEF_KEY: +# self._build_regression_signature(input_example, sum_pred) +# } +# builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) +# builder.add_meta_graph_and_variables( +# sess, [tf.compat.v1.saved_model.tag_constants.SERVING], +# signature_def_map=signature_def_map) +# builder.save() + +# def _run_inference_with_beam(self, example_path, inference_spec_type, +# prediction_log_path): +# with beam.Pipeline() as pipeline: +# _ = ( +# pipeline +# | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) +# | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) +# | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) +# | 'WritePredictions' >> beam.io.WriteToTFRecord( +# prediction_log_path, +# coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + +# def _get_results(self, prediction_log_path): +# results = [] +# for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): +# record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) +# for record_string in record_iterator: +# prediction_log = prediction_log_pb2.PredictionLog() +# prediction_log.MergeFromString(record_string) +# results.append(prediction_log) +# return results + +# def testModelPathInvalid(self): +# example_path = self._get_output_data_dir('examples') +# self._prepare_predict_examples(example_path) +# prediction_log_path = self._get_output_data_dir('predictions') +# with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): +# self._run_inference_with_beam( +# example_path, +# model_spec_pb2.InferenceSpecType( +# saved_model_spec=model_spec_pb2.SavedModelSpec( +# model_path=self._get_output_data_dir())), prediction_log_path) + +# def testEstimatorModelPredict(self): +# example_path = self._get_output_data_dir('examples') +# self._prepare_predict_examples(example_path) +# model_path = self._get_output_data_dir('model') +# self._build_predict_model(model_path) +# prediction_log_path = self._get_output_data_dir('predictions') +# self._run_inference_with_beam( +# example_path, +# model_spec_pb2.InferenceSpecType( +# saved_model_spec=model_spec_pb2.SavedModelSpec( +# model_path=model_path)), prediction_log_path) + +# results = self._get_results(prediction_log_path) +# self.assertLen(results, 2) +# self.assertEqual( +# results[0].predict_log.request.inputs[ +# run_inference._DEFAULT_INPUT_KEY].string_val[0], +# self._predict_examples[0].SerializeToString()) +# self.assertEqual(results[0].predict_log.response.outputs['y'].dtype, +# tf.float32) +# self.assertLen( +# results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2) +# self.assertEqual( +# results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size, +# 1) +# self.assertEqual( +# results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size, +# 1) + +# def testClassifyModel(self): +# example_path = self._get_output_data_dir('examples') +# self._prepare_multihead_examples(example_path) +# model_path = self._get_output_data_dir('model') +# self._build_multihead_model(model_path) +# prediction_log_path = self._get_output_data_dir('predictions') +# self._run_inference_with_beam( +# example_path, +# model_spec_pb2.InferenceSpecType( +# saved_model_spec=model_spec_pb2.SavedModelSpec( +# model_path=model_path, signature_name=['classify_sum'])), +# prediction_log_path) + +# results = self._get_results(prediction_log_path) +# self.assertLen(results, 2) +# classify_log = results[0].classify_log +# self.assertLen(classify_log.request.input.example_list.examples, 1) +# self.assertEqual(classify_log.request.input.example_list.examples[0], +# self._multihead_examples[0]) +# self.assertLen(classify_log.response.result.classifications, 1) +# self.assertLen(classify_log.response.result.classifications[0].classes, 1) +# self.assertAlmostEqual( +# classify_log.response.result.classifications[0].classes[0].score, 1.0) + +# def testRegressModel(self): +# example_path = self._get_output_data_dir('examples') +# self._prepare_multihead_examples(example_path) +# model_path = self._get_output_data_dir('model') +# self._build_multihead_model(model_path) +# prediction_log_path = self._get_output_data_dir('predictions') +# self._run_inference_with_beam( +# example_path, +# model_spec_pb2.InferenceSpecType( +# saved_model_spec=model_spec_pb2.SavedModelSpec( +# model_path=model_path, signature_name=['regress_diff'])), +# prediction_log_path) + +# results = self._get_results(prediction_log_path) +# self.assertLen(results, 2) +# regress_log = results[0].regress_log +# self.assertLen(regress_log.request.input.example_list.examples, 1) +# self.assertEqual(regress_log.request.input.example_list.examples[0], +# self._multihead_examples[0]) +# self.assertLen(regress_log.response.result.regressions, 1) +# self.assertAlmostEqual(regress_log.response.result.regressions[0].value, +# 0.6) + +# def testMultiInferenceModel(self): +# example_path = self._get_output_data_dir('examples') +# self._prepare_multihead_examples(example_path) +# model_path = self._get_output_data_dir('model') +# self._build_multihead_model(model_path) +# prediction_log_path = self._get_output_data_dir('predictions') +# self._run_inference_with_beam( +# example_path, +# model_spec_pb2.InferenceSpecType( +# saved_model_spec=model_spec_pb2.SavedModelSpec( +# model_path=model_path, +# signature_name=['regress_diff', 'classify_sum'])), +# prediction_log_path) +# results = self._get_results(prediction_log_path) +# self.assertLen(results, 2) +# multi_inference_log = results[0].multi_inference_log +# self.assertLen(multi_inference_log.request.input.example_list.examples, 1) +# self.assertEqual(multi_inference_log.request.input.example_list.examples[0], +# self._multihead_examples[0]) +# self.assertLen(multi_inference_log.response.results, 2) +# signature_names = [] +# for result in multi_inference_log.response.results: +# signature_names.append(result.model_spec.signature_name) +# self.assertIn('regress_diff', signature_names) +# self.assertIn('classify_sum', signature_names) +# result = multi_inference_log.response.results[0] +# self.assertEqual(result.model_spec.signature_name, 'regress_diff') +# self.assertLen(result.regression_result.regressions, 1) +# self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6) +# result = multi_inference_log.response.results[1] +# self.assertEqual(result.model_spec.signature_name, 'classify_sum') +# self.assertLen(result.classification_result.classifications, 1) +# self.assertLen(result.classification_result.classifications[0].classes, 1) +# self.assertAlmostEqual( +# result.classification_result.classifications[0].classes[0].score, 1.0) + +# def testKerasModelPredict(self): +# inputs = tf.keras.Input(shape=(1,), name='input1') +# output1 = tf.keras.layers.Dense( +# 1, activation=tf.nn.sigmoid, name='output1')( +# inputs) +# output2 = tf.keras.layers.Dense( +# 1, activation=tf.nn.sigmoid, name='output2')( +# inputs) +# inference_model = tf.keras.models.Model(inputs, [output1, output2]) + +# class TestKerasModel(tf.keras.Model): + +# def __init__(self, inference_model): +# super(TestKerasModel, self).__init__(name='test_keras_model') +# self.inference_model = inference_model + +# @tf.function(input_signature=[ +# tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') +# ]) +# def call(self, serialized_example): +# features = { +# 'input1': +# tf.compat.v1.io.FixedLenFeature([1], +# dtype=tf.float32, +# default_value=0) +# } +# input_tensor_dict = tf.io.parse_example(serialized_example, features) +# return inference_model(input_tensor_dict['input1']) + +# model = TestKerasModel(inference_model) +# model.compile( +# optimizer=tf.keras.optimizers.Adam(lr=.001), +# loss=tf.keras.losses.binary_crossentropy, +# metrics=['accuracy']) + +# model_path = self._get_output_data_dir('model') +# tf.compat.v1.keras.experimental.export_saved_model( +# model, model_path, serving_only=True) + +# example_path = self._get_output_data_dir('examples') +# self._prepare_predict_examples(example_path) +# prediction_log_path = self._get_output_data_dir('predictions') +# self._run_inference_with_beam( +# example_path, +# model_spec_pb2.InferenceSpecType( +# saved_model_spec=model_spec_pb2.SavedModelSpec( +# model_path=model_path)), prediction_log_path) + +# results = self._get_results(prediction_log_path) +# self.assertLen(results, 2) + +# def testTelemetry(self): +# example_path = self._get_output_data_dir('examples') +# self._prepare_multihead_examples(example_path) +# model_path = self._get_output_data_dir('model') +# self._build_multihead_model(model_path) +# inference_spec_type = model_spec_pb2.InferenceSpecType( +# saved_model_spec=model_spec_pb2.SavedModelSpec( +# model_path=model_path, signature_name=['classify_sum'])) +# pipeline = beam.Pipeline() +# _ = ( +# pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) +# | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) +# | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) +# run_result = pipeline.run() +# run_result.wait_until_finish() + +# num_inferences = run_result.metrics().query( +# MetricsFilter().with_name('num_inferences')) +# self.assertTrue(num_inferences['counters']) +# self.assertEqual(num_inferences['counters'][0].result, 2) +# num_instances = run_result.metrics().query( +# MetricsFilter().with_name('num_instances')) +# self.assertTrue(num_instances['counters']) +# self.assertEqual(num_instances['counters'][0].result, 2) +# inference_request_batch_size = run_result.metrics().query( +# MetricsFilter().with_name('inference_request_batch_size')) +# self.assertTrue(inference_request_batch_size['distributions']) +# self.assertEqual( +# inference_request_batch_size['distributions'][0].result.sum, 2) +# inference_request_batch_byte_size = run_result.metrics().query( +# MetricsFilter().with_name('inference_request_batch_byte_size')) +# self.assertTrue(inference_request_batch_byte_size['distributions']) +# self.assertEqual( +# inference_request_batch_byte_size['distributions'][0].result.sum, +# sum(element.ByteSize() for element in self._multihead_examples)) +# inference_batch_latency_micro_secs = run_result.metrics().query( +# MetricsFilter().with_name('inference_batch_latency_micro_secs')) +# self.assertTrue(inference_batch_latency_micro_secs['distributions']) +# self.assertGreaterEqual( +# inference_batch_latency_micro_secs['distributions'][0].result.sum, 0) +# load_model_latency_milli_secs = run_result.metrics().query( +# MetricsFilter().with_name('load_model_latency_milli_secs')) +# self.assertTrue(load_model_latency_milli_secs['distributions']) +# self.assertGreaterEqual( +# load_model_latency_milli_secs['distributions'][0].result.sum, 0) + + # class RunRemoteInferenceTest(RunInferenceFixture): # def setUp(self): From 037f3b62bbff87ef726ec85c2a4199179efa7e75 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Mon, 8 Jun 2020 10:16:39 -0400 Subject: [PATCH 09/31] fix spacing --- tfx_bsl/beam/run_inference_arrow_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index 056d5674..4104d1f4 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -323,7 +323,7 @@ def testRegressModel(self): self._build_multihead_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( - 'multi', + 'multi', model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path, signature_name=['regress_diff'])), From 44af0587062dd4e6a9bd1acfe2e2c42bbd6c3a31 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 10 Jun 2020 17:57:18 -0400 Subject: [PATCH 10/31] remove unecessary loop --- tfx_bsl/beam/run_inference_arrow.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index 7d7cecf5..f5d2800f 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -65,6 +65,7 @@ except ImportError: pass +_RECORDBATCH_COLUMN = '__RAW_RECORD__' _DEFAULT_INPUT_KEY = 'examples' _METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer' _METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess' @@ -442,6 +443,8 @@ def _prepare_instances( instance = {} tfexample = tf.train.Example.FromString(example) + + # TODO (Maxine): consider leveraging recordbatch columns for input_name, feature in tfexample.features.feature.items(): attr_name = feature.WhichOneof('kind') if attr_name is None: @@ -654,9 +657,8 @@ def setup(self): super(_BatchClassifyDoFn, self).setup() def _check_elements(self, elements: List[Union[str, bytes]]) -> None: - for element in elements: - if self._data_type != DataType.EXAMPLE: - raise ValueError('Classify only supports tf.train.Example') + if self._data_type != DataType.EXAMPLE: + raise ValueError('Classify only supports tf.train.Example') def _post_process( self, elements: Sequence[Union[str, bytes]], @@ -677,9 +679,8 @@ def setup(self): super(_BatchRegressDoFn, self).setup() def _check_elements(self, elements: List[Union[str, bytes]]) -> None: - for element in elements: - if self._data_type != DataType.EXAMPLE: - raise ValueError('Regress only supports tf.train.Example') + if self._data_type != DataType.EXAMPLE: + raise ValueError('Regress only supports tf.train.Example') def _post_process( self, elements: Sequence[Union[str, bytes]], @@ -750,9 +751,8 @@ class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): """A DoFn that runs inference on multi-head model.""" def _check_elements(self, elements: List[Union[str, bytes]]) -> None: - for element in elements: - if self._data_type != DataType.EXAMPLE: - raise ValueError('Multi-inference only supports tf.train.Example') + if self._data_type != DataType.EXAMPLE: + raise ValueError('Multi-inference only supports tf.train.Example') def _post_process( self, elements: Sequence[Union[str, bytes]], @@ -864,6 +864,7 @@ def process( yield result + def _post_process_classify( output_alias_tensor_names: Mapping[Text, Text], elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray] From 3b64e745b7b64ee6ea483aefc35f32a9c9a169af Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 11 Jun 2020 18:21:40 -0400 Subject: [PATCH 11/31] modify and add tests for remote prediction --- tfx_bsl/beam/run_inference_arrow.py | 11 +- tfx_bsl/beam/run_inference_arrow_test.py | 656 +++++------------------ 2 files changed, 134 insertions(+), 533 deletions(-) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index f5d2800f..c0cc0521 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -387,6 +387,7 @@ def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, pipeline_options: PipelineOptions, data_type): super(_RemotePredictDoFn, self).__init__(inference_spec_type) self._api_client = None + self._data_type = data_type project_id = ( inference_spec_type.ai_platform_prediction_model_spec.project_id or @@ -437,10 +438,6 @@ def _prepare_instances( cls, elements: List[Union[str, bytes]] ) -> Generator[Mapping[Text, Any], None, None]: for example in elements: - # TODO(b/151468119): support tf.train.SequenceExample - if data_type != DataType.EXAMPLE: - raise ValueError('Remote prediction only supports tf.train.Example') - instance = {} tfexample = tf.train.Example.FromString(example) @@ -482,8 +479,14 @@ def _parse_feature_content(values: Sequence[Any], attr_name: Text, else: return values + def _check_elements(self, elements: List[Union[str, bytes]]) -> None: + # TODO(b/151468119): support tf.train.SequenceExample + if self._data_type != DataType.EXAMPLE: + raise ValueError('Remote prediction only supports tf.train.Example') + def run_inference( self, elements: List[Union[str, bytes]]) -> Sequence[Mapping[Text, Any]]: + self._check_elements(elements) body = {'instances': list(self._prepare_instances(elements))} request = self._make_request(body) response = self._execute_request(request) diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index 4104d1f4..c502ad8c 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -55,6 +55,13 @@ def setUp(self): """, tf.train.Example()), ] + serialized_example = [] + for example in self._predict_examples: + serialized_example.append(example.SerializeToString()) + self.record_batch = pa.RecordBatch.from_arrays( + [serialized_example, ], ["__RAW_RECORD__", ] + ) + def _get_output_data_dir(self, sub_dir=None): test_dir = self._testMethodName path = os.path.join( @@ -473,535 +480,126 @@ def testTelemetry(self): load_model_latency_milli_secs['distributions'][0].result.sum, 0) -# class RunInferenceFixture(tf.test.TestCase): - -# def setUp(self): -# super(RunInferenceFixture, self).setUp() -# self._predict_examples = [ -# text_format.Parse( -# """ -# features { -# feature { key: "input1" value { float_list { value: 0 }}} -# } -# """, tf.train.Example()), -# ] - -# def _get_output_data_dir(self, sub_dir=None): -# test_dir = self._testMethodName -# path = os.path.join( -# os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), -# test_dir) -# if not tf.io.gfile.exists(path): -# tf.io.gfile.makedirs(path) -# if sub_dir is not None: -# path = os.path.join(path, sub_dir) -# return path - -# def _prepare_predict_examples(self, example_path): -# with tf.io.TFRecordWriter(example_path) as output_file: -# for example in self._predict_examples: -# output_file.write(example.SerializeToString()) - - -# class RunOfflineInferenceTest(RunInferenceFixture): - -# def setUp(self): -# super(RunOfflineInferenceTest, self).setUp() -# self._predict_examples = [ -# text_format.Parse( -# """ -# features { -# feature { key: "input1" value { float_list { value: 0 }}} -# } -# """, tf.train.Example()), -# text_format.Parse( -# """ -# features { -# feature { key: "input1" value { float_list { value: 1 }}} -# } -# """, tf.train.Example()), -# ] -# self._multihead_examples = [ -# text_format.Parse( -# """ -# features { -# feature {key: "x" value { float_list { value: 0.8 }}} -# feature {key: "y" value { float_list { value: 0.2 }}} -# } -# """, tf.train.Example()), -# text_format.Parse( -# """ -# features { -# feature {key: "x" value { float_list { value: 0.6 }}} -# feature {key: "y" value { float_list { value: 0.1 }}} -# } -# """, tf.train.Example()), -# ] - - -# def _prepare_multihead_examples(self, example_path): -# with tf.io.TFRecordWriter(example_path) as output_file: -# for example in self._multihead_examples: -# output_file.write(example.SerializeToString()) - -# def _build_predict_model(self, model_path): -# """Exports the dummy sum predict model.""" - -# with tf.compat.v1.Graph().as_default(): -# input_tensors = { -# 'x': tf.compat.v1.io.FixedLenFeature( -# [1], dtype=tf.float32, default_value=0) -# } -# serving_receiver = ( -# tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn( -# input_tensors)()) -# output_tensors = {'y': serving_receiver.features['x'] * 2} -# sess = tf.compat.v1.Session() -# sess.run(tf.compat.v1.initializers.global_variables()) -# signature_def = tf.compat.v1.estimator.export.PredictOutput( -# output_tensors).as_signature_def(serving_receiver.receiver_tensors) -# builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) -# builder.add_meta_graph_and_variables( -# sess, [tf.compat.v1.saved_model.tag_constants.SERVING], -# signature_def_map={ -# tf.compat.v1.saved_model.signature_constants -# .DEFAULT_SERVING_SIGNATURE_DEF_KEY: -# signature_def, -# }) -# builder.save() - -# def _build_regression_signature(self, input_tensor, output_tensor): -# """Helper function for building a regression SignatureDef.""" -# input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( -# input_tensor) -# signature_inputs = { -# tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS: -# input_tensor_info -# } -# output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( -# output_tensor) -# signature_outputs = { -# tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS: -# output_tensor_info -# } -# return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( -# signature_inputs, signature_outputs, -# tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME) - -# def _build_classification_signature(self, input_tensor, scores_tensor): -# """Helper function for building a classification SignatureDef.""" -# input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( -# input_tensor) -# signature_inputs = { -# tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS: -# input_tensor_info -# } -# output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( -# scores_tensor) -# signature_outputs = { -# tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: -# output_tensor_info -# } -# return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( -# signature_inputs, signature_outputs, -# tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME) - -# def _build_multihead_model(self, model_path): -# with tf.compat.v1.Graph().as_default(): -# input_example = tf.compat.v1.placeholder( -# tf.string, name='input_examples_tensor') -# config = { -# 'x': tf.compat.v1.io.FixedLenFeature( -# [1], dtype=tf.float32, default_value=0), -# 'y': tf.compat.v1.io.FixedLenFeature( -# [1], dtype=tf.float32, default_value=0), -# } -# features = tf.compat.v1.parse_example(input_example, config) -# x = features['x'] -# y = features['y'] -# sum_pred = x + y -# diff_pred = tf.abs(x - y) -# sess = tf.compat.v1.Session() -# sess.run(tf.compat.v1.initializers.global_variables()) -# signature_def_map = { -# 'regress_diff': -# self._build_regression_signature(input_example, diff_pred), -# 'classify_sum': -# self._build_classification_signature(input_example, sum_pred), -# tf.compat.v1.saved_model.signature_constants -# .DEFAULT_SERVING_SIGNATURE_DEF_KEY: -# self._build_regression_signature(input_example, sum_pred) -# } -# builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) -# builder.add_meta_graph_and_variables( -# sess, [tf.compat.v1.saved_model.tag_constants.SERVING], -# signature_def_map=signature_def_map) -# builder.save() - -# def _run_inference_with_beam(self, example_path, inference_spec_type, -# prediction_log_path): -# with beam.Pipeline() as pipeline: -# _ = ( -# pipeline -# | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) -# | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) -# | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) -# | 'WritePredictions' >> beam.io.WriteToTFRecord( -# prediction_log_path, -# coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) - -# def _get_results(self, prediction_log_path): -# results = [] -# for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): -# record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) -# for record_string in record_iterator: -# prediction_log = prediction_log_pb2.PredictionLog() -# prediction_log.MergeFromString(record_string) -# results.append(prediction_log) -# return results - -# def testModelPathInvalid(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_predict_examples(example_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=self._get_output_data_dir())), prediction_log_path) - -# def testEstimatorModelPredict(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_predict_examples(example_path) -# model_path = self._get_output_data_dir('model') -# self._build_predict_model(model_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path)), prediction_log_path) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# self.assertEqual( -# results[0].predict_log.request.inputs[ -# run_inference._DEFAULT_INPUT_KEY].string_val[0], -# self._predict_examples[0].SerializeToString()) -# self.assertEqual(results[0].predict_log.response.outputs['y'].dtype, -# tf.float32) -# self.assertLen( -# results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2) -# self.assertEqual( -# results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size, -# 1) -# self.assertEqual( -# results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size, -# 1) - -# def testClassifyModel(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# model_path = self._get_output_data_dir('model') -# self._build_multihead_model(model_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path, signature_name=['classify_sum'])), -# prediction_log_path) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# classify_log = results[0].classify_log -# self.assertLen(classify_log.request.input.example_list.examples, 1) -# self.assertEqual(classify_log.request.input.example_list.examples[0], -# self._multihead_examples[0]) -# self.assertLen(classify_log.response.result.classifications, 1) -# self.assertLen(classify_log.response.result.classifications[0].classes, 1) -# self.assertAlmostEqual( -# classify_log.response.result.classifications[0].classes[0].score, 1.0) - -# def testRegressModel(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# model_path = self._get_output_data_dir('model') -# self._build_multihead_model(model_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path, signature_name=['regress_diff'])), -# prediction_log_path) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# regress_log = results[0].regress_log -# self.assertLen(regress_log.request.input.example_list.examples, 1) -# self.assertEqual(regress_log.request.input.example_list.examples[0], -# self._multihead_examples[0]) -# self.assertLen(regress_log.response.result.regressions, 1) -# self.assertAlmostEqual(regress_log.response.result.regressions[0].value, -# 0.6) - -# def testMultiInferenceModel(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# model_path = self._get_output_data_dir('model') -# self._build_multihead_model(model_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path, -# signature_name=['regress_diff', 'classify_sum'])), -# prediction_log_path) -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# multi_inference_log = results[0].multi_inference_log -# self.assertLen(multi_inference_log.request.input.example_list.examples, 1) -# self.assertEqual(multi_inference_log.request.input.example_list.examples[0], -# self._multihead_examples[0]) -# self.assertLen(multi_inference_log.response.results, 2) -# signature_names = [] -# for result in multi_inference_log.response.results: -# signature_names.append(result.model_spec.signature_name) -# self.assertIn('regress_diff', signature_names) -# self.assertIn('classify_sum', signature_names) -# result = multi_inference_log.response.results[0] -# self.assertEqual(result.model_spec.signature_name, 'regress_diff') -# self.assertLen(result.regression_result.regressions, 1) -# self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6) -# result = multi_inference_log.response.results[1] -# self.assertEqual(result.model_spec.signature_name, 'classify_sum') -# self.assertLen(result.classification_result.classifications, 1) -# self.assertLen(result.classification_result.classifications[0].classes, 1) -# self.assertAlmostEqual( -# result.classification_result.classifications[0].classes[0].score, 1.0) - -# def testKerasModelPredict(self): -# inputs = tf.keras.Input(shape=(1,), name='input1') -# output1 = tf.keras.layers.Dense( -# 1, activation=tf.nn.sigmoid, name='output1')( -# inputs) -# output2 = tf.keras.layers.Dense( -# 1, activation=tf.nn.sigmoid, name='output2')( -# inputs) -# inference_model = tf.keras.models.Model(inputs, [output1, output2]) - -# class TestKerasModel(tf.keras.Model): - -# def __init__(self, inference_model): -# super(TestKerasModel, self).__init__(name='test_keras_model') -# self.inference_model = inference_model - -# @tf.function(input_signature=[ -# tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') -# ]) -# def call(self, serialized_example): -# features = { -# 'input1': -# tf.compat.v1.io.FixedLenFeature([1], -# dtype=tf.float32, -# default_value=0) -# } -# input_tensor_dict = tf.io.parse_example(serialized_example, features) -# return inference_model(input_tensor_dict['input1']) - -# model = TestKerasModel(inference_model) -# model.compile( -# optimizer=tf.keras.optimizers.Adam(lr=.001), -# loss=tf.keras.losses.binary_crossentropy, -# metrics=['accuracy']) - -# model_path = self._get_output_data_dir('model') -# tf.compat.v1.keras.experimental.export_saved_model( -# model, model_path, serving_only=True) - -# example_path = self._get_output_data_dir('examples') -# self._prepare_predict_examples(example_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path)), prediction_log_path) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) - -# def testTelemetry(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# model_path = self._get_output_data_dir('model') -# self._build_multihead_model(model_path) -# inference_spec_type = model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path, signature_name=['classify_sum'])) -# pipeline = beam.Pipeline() -# _ = ( -# pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) -# | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) -# | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) -# run_result = pipeline.run() -# run_result.wait_until_finish() - -# num_inferences = run_result.metrics().query( -# MetricsFilter().with_name('num_inferences')) -# self.assertTrue(num_inferences['counters']) -# self.assertEqual(num_inferences['counters'][0].result, 2) -# num_instances = run_result.metrics().query( -# MetricsFilter().with_name('num_instances')) -# self.assertTrue(num_instances['counters']) -# self.assertEqual(num_instances['counters'][0].result, 2) -# inference_request_batch_size = run_result.metrics().query( -# MetricsFilter().with_name('inference_request_batch_size')) -# self.assertTrue(inference_request_batch_size['distributions']) -# self.assertEqual( -# inference_request_batch_size['distributions'][0].result.sum, 2) -# inference_request_batch_byte_size = run_result.metrics().query( -# MetricsFilter().with_name('inference_request_batch_byte_size')) -# self.assertTrue(inference_request_batch_byte_size['distributions']) -# self.assertEqual( -# inference_request_batch_byte_size['distributions'][0].result.sum, -# sum(element.ByteSize() for element in self._multihead_examples)) -# inference_batch_latency_micro_secs = run_result.metrics().query( -# MetricsFilter().with_name('inference_batch_latency_micro_secs')) -# self.assertTrue(inference_batch_latency_micro_secs['distributions']) -# self.assertGreaterEqual( -# inference_batch_latency_micro_secs['distributions'][0].result.sum, 0) -# load_model_latency_milli_secs = run_result.metrics().query( -# MetricsFilter().with_name('load_model_latency_milli_secs')) -# self.assertTrue(load_model_latency_milli_secs['distributions']) -# self.assertGreaterEqual( -# load_model_latency_milli_secs['distributions'][0].result.sum, 0) - - -# class RunRemoteInferenceTest(RunInferenceFixture): - -# def setUp(self): -# super(RunRemoteInferenceTest, self).setUp() -# self.example_path = self._get_output_data_dir('example') -# self._prepare_predict_examples(self.example_path) -# # This is from https://ml.googleapis.com/$discovery/rest?version=v1. -# self._discovery_testdata_dir = os.path.join( -# os.path.join(os.path.dirname(__file__), 'testdata'), -# 'ml_discovery.json') - -# @staticmethod -# def _make_response_body(content, successful): -# if successful: -# response_dict = {'predictions': content} -# else: -# response_dict = {'error': content} -# return json.dumps(response_dict) - -# def _set_up_pipeline(self, inference_spec_type): -# self.pipeline = beam.Pipeline() -# self.pcoll = ( -# self.pipeline -# | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) -# | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) -# | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) - -# def _run_inference_with_beam(self): -# self.pipeline_result = self.pipeline.run() -# self.pipeline_result.wait_until_finish() - -# def test_model_predict(self): -# predictions = [{'output_1': [0.901], 'output_2': [0.997]}] -# builder = http.RequestMockBuilder({ -# 'ml.projects.predict': -# (None, self._make_response_body(predictions, successful=True)) -# }) -# resource = discovery.build( -# 'ml', -# 'v1', -# http=http.HttpMock(self._discovery_testdata_dir, -# {'status': http_client.OK}), -# requestBuilder=builder) -# with mock.patch('googleapiclient.discovery.' 'build') as response_mock: -# response_mock.side_effect = lambda service, version: resource -# inference_spec_type = model_spec_pb2.InferenceSpecType( -# ai_platform_prediction_model_spec=model_spec_pb2 -# .AIPlatformPredictionModelSpec( -# project_id='test-project', -# model_name='test-model', -# )) - -# prediction_log = prediction_log_pb2.PredictionLog() -# prediction_log.predict_log.response.outputs['output_1'].CopyFrom( -# tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) -# prediction_log.predict_log.response.outputs['output_2'].CopyFrom( -# tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) - -# self._set_up_pipeline(inference_spec_type) -# assert_that(self.pcoll, equal_to([prediction_log])) -# self._run_inference_with_beam() - -# def test_exception_raised_when_response_body_contains_error_entry(self): -# error_msg = 'Base64 decode failed.' -# builder = http.RequestMockBuilder({ -# 'ml.projects.predict': -# (None, self._make_response_body(error_msg, successful=False)) -# }) -# resource = discovery.build( -# 'ml', -# 'v1', -# http=http.HttpMock(self._discovery_testdata_dir, -# {'status': http_client.OK}), -# requestBuilder=builder) -# with mock.patch('googleapiclient.discovery.' 'build') as response_mock: -# response_mock.side_effect = lambda service, version: resource -# inference_spec_type = model_spec_pb2.InferenceSpecType( -# ai_platform_prediction_model_spec=model_spec_pb2 -# .AIPlatformPredictionModelSpec( -# project_id='test-project', -# model_name='test-model', -# )) - -# try: -# self._set_up_pipeline(inference_spec_type) -# self._run_inference_with_beam() -# except ValueError as exc: -# actual_error_msg = str(exc) -# self.assertTrue(actual_error_msg.startswith(error_msg)) -# else: -# self.fail('Test was expected to throw ValueError exception') - -# def test_exception_raised_when_project_id_is_empty(self): -# inference_spec_type = model_spec_pb2.InferenceSpecType( -# ai_platform_prediction_model_spec=model_spec_pb2 -# .AIPlatformPredictionModelSpec(model_name='test-model',)) - -# with self.assertRaises(ValueError): -# self._set_up_pipeline(inference_spec_type) -# self._run_inference_with_beam() - -# def test_request_body_with_binary_data(self): -# example = text_format.Parse( -# """ -# features { -# feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} -# feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} -# feature { key: "y" value { int64_list { value: [1, 2] }}} -# } -# """, tf.train.Example()) -# result = list( -# run_inference._RemotePredictDoFn._prepare_instances([example])) -# self.assertEqual([ -# { -# 'x_bytes': { -# 'b64': 'QVNhOGFzZGY=' -# }, -# 'x': 'JLK7ljk3', -# 'y': [1, 2] -# }, -# ], result) +class RunRemoteInferenceArrowTest(RunInferenceArrowFixture): + + def setUp(self): + super(RunRemoteInferenceArrowTest, self).setUp() + self.example_path = self._get_output_data_dir('example') + self._prepare_predict_examples(self.example_path) + # This is from https://ml.googleapis.com/$discovery/rest?version=v1. + self._discovery_testdata_dir = os.path.join( + os.path.join(os.path.dirname(__file__), 'testdata'), + 'ml_discovery.json') + + @staticmethod + def _make_response_body(content, successful): + if successful: + response_dict = {'predictions': content} + else: + response_dict = {'error': content} + return json.dumps(response_dict) + + def _set_up_pipeline(self, inference_spec_type): + self.pipeline = beam.Pipeline() + self.pcoll = ( + self.pipeline + | "createRecordBatch" >> beam.Create([self.record_batch]) + | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type)) + + def _run_inference_with_beam(self): + self.pipeline_result = self.pipeline.run() + self.pipeline_result.wait_until_finish() + + def test_model_predict(self): + predictions = [{'output_1': [0.901], 'output_2': [0.997]}] + builder = http.RequestMockBuilder({ + 'ml.projects.predict': + (None, self._make_response_body(predictions, successful=True)) + }) + resource = discovery.build( + 'ml', + 'v1', + http=http.HttpMock(self._discovery_testdata_dir, + {'status': http_client.OK}), + requestBuilder=builder) + with mock.patch('googleapiclient.discovery.' 'build') as response_mock: + response_mock.side_effect = lambda service, version: resource + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec( + project_id='test-project', + model_name='test-model', + )) + + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.predict_log.response.outputs['output_1'].CopyFrom( + tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) + prediction_log.predict_log.response.outputs['output_2'].CopyFrom( + tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) + + self._set_up_pipeline(inference_spec_type) + assert_that(self.pcoll, equal_to([prediction_log])) + self._run_inference_with_beam() + + def test_exception_raised_when_response_body_contains_error_entry(self): + error_msg = 'Base64 decode failed.' + builder = http.RequestMockBuilder({ + 'ml.projects.predict': + (None, self._make_response_body(error_msg, successful=False)) + }) + resource = discovery.build( + 'ml', + 'v1', + http=http.HttpMock(self._discovery_testdata_dir, + {'status': http_client.OK}), + requestBuilder=builder) + with mock.patch('googleapiclient.discovery.' 'build') as response_mock: + response_mock.side_effect = lambda service, version: resource + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec( + project_id='test-project', + model_name='test-model', + )) + + try: + self._set_up_pipeline(inference_spec_type) + self._run_inference_with_beam() + except ValueError as exc: + actual_error_msg = str(exc) + self.assertTrue(actual_error_msg.startswith(error_msg)) + else: + self.fail('Test was expected to throw ValueError exception') + + def test_exception_raised_when_project_id_is_empty(self): + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec(model_name='test-model',)) + + with self.assertRaises(ValueError): + self._set_up_pipeline(inference_spec_type) + self._run_inference_with_beam() + + def test_request_body_with_binary_data(self): + example = text_format.Parse( + """ + features { + feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} + feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} + feature { key: "y" value { int64_list { value: [1, 2] }}} + } + """, tf.train.Example()) + result = list( + run_inference_arrow._RemotePredictDoFn._prepare_instances([example.SerializeToString()])) + self.assertEqual([ + { + 'x_bytes': { + 'b64': 'QVNhOGFzZGY=' + }, + 'x': 'JLK7ljk3', + 'y': [1, 2] + }, + ], result) if __name__ == '__main__': From f874ce3fa430fdb739fde2f74e80b9d7c94b3bd1 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Mon, 22 Jun 2020 12:25:59 -0400 Subject: [PATCH 12/31] add config param, declared IOspec (foundation for multi-tensor) --- tfx_bsl/beam/run_inference_arrow.py | 109 ++++++++++----------- tfx_bsl/beam/run_inference_arrow_test.py | 117 ++++++++++++++++------- 2 files changed, 134 insertions(+), 92 deletions(-) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index c0cc0521..f73add3c 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -45,6 +45,7 @@ from tfx_bsl.beam import shared from tfx_bsl.public.proto import model_spec_pb2 from tfx_bsl.telemetry import util +from tfx_bsl.tfxio import tensor_adapter from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \ Tuple, Union, Optional @@ -93,21 +94,21 @@ class DataType(object): SEQUENCEEXAMPLE = 'SEQUENCEEXAMPLE' -# This API is private and called with only example or sequence example -# TODO (Maxine): pTransform from examples/sequence example here @beam.ptransform_fn @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def RunInferenceImpl( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType, - process_column: Optional[str] = None + tensor_adapter_config: tensor_adapter.TensorAdapterConfig ) -> beam.pvalue.PCollection: """Implementation of RunInference API. Args: examples: A PCollection containing RecordBatch of serialized examples. inference_spec_type: Model inference endpoint. + tensor_adapter_config: Tensor adapter config which specifies how to obtain + tensors from the Arrow RecordBatch. Returns: A PCollection containing prediction logs. @@ -117,25 +118,24 @@ def RunInferenceImpl( # pylint: disable=invalid-name """ logging.info('RunInference on model: %s', inference_spec_type) - # TODO (Maxine): uncomment this once we change the api to take input - # Union[tf.train.Example, tf.train.SequenceExample] + # TODO (Maxine): either determine data type or take it as an input # data_type = _get_data_type(examples) data_type = DataType.EXAMPLE operation_type = _get_operation_type(inference_spec_type) if operation_type == OperationType.CLASSIFICATION: return examples | 'Classify' >> _Classify( - inference_spec_type, data_type, process_column) + inference_spec_type, tensor_adapter_config, data_type) elif operation_type == OperationType.REGRESSION: return examples | 'Regress' >> _Regress( - inference_spec_type, data_type, process_column) + inference_spec_type,tensor_adapter_config, data_type) elif operation_type == OperationType.PREDICTION: return examples | 'Predict' >> _Predict( - inference_spec_type, data_type, process_column) + inference_spec_type, tensor_adapter_config, data_type) elif operation_type == OperationType.MULTIHEAD: return (examples | 'MultiInference' >> _MultiInference( - inference_spec_type, data_type, process_column)) + inference_spec_type, tensor_adapter_config, data_type)) else: raise ValueError('Unsupported operation_type %s' % operation_type) @@ -152,12 +152,12 @@ def RunInferenceImpl( # pylint: disable=invalid-name @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType, - data_type, process_column: Optional[str] = None): + tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): """Performs classify PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll - | 'Classify' >> beam.ParDo(_BatchClassifyDoFn( - inference_spec_type, shared.Shared(), data_type, process_column)) + | 'Classify' >> beam.ParDo( + _BatchClassifyDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type)) | 'BuildPredictionLogForClassifications' >> beam.ParDo( _BuildPredictionLogForClassificationsDoFn())) else: @@ -169,12 +169,12 @@ def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType, - data_type, process_column: Optional[str] = None): + tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): """Performs regress PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll - | 'Regress' >> beam.ParDo(_BatchRegressDoFn( - inference_spec_type, shared.Shared(), data_type, process_column)) + | 'Regress' >> beam.ParDo( + _BatchRegressDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type)) | 'BuildPredictionLogForRegressions' >> beam.ParDo( _BuildPredictionLogForRegressionsDoFn())) else: @@ -186,18 +186,18 @@ def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType, - data_type, process_column: Optional[str] = None): + tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): """Performs predict PTransform.""" if _using_in_process_inference(inference_spec_type): predictions = ( pcoll - | 'Predict' >> beam.ParDo(_BatchPredictDoFn( - inference_spec_type, shared.Shared(), data_type, process_column))) + | 'Predict' >> beam.ParDo( + _BatchPredictDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type))) else: predictions = ( pcoll - | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn( - inference_spec_type, pcoll.pipeline.options, data_type))) + | 'RemotePredict' >> beam.ParDo( + _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options, tensor_adapter_config, data_type))) return (predictions | 'BuildPredictionLogForPredictions' >> beam.ParDo( _BuildPredictionLogForPredictionsDoFn())) @@ -208,13 +208,13 @@ def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name inference_spec_type: model_spec_pb2.InferenceSpecType, - data_type, process_column: Optional[str] = None): + tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): """Performs multi inference PTransform.""" if _using_in_process_inference(inference_spec_type): return ( pcoll - | 'MultiInference' >> beam.ParDo(_BatchMultiInferenceDoFn( - inference_spec_type, shared.Shared(), data_type, process_column)) + | 'MultiInference' >> beam.ParDo( + _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type)) | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn())) else: raise NotImplementedError @@ -278,47 +278,44 @@ def update( self._inference_request_batch_byte_size.update( sum(len(element) for element in elements)) - # TODO (Maxine): just one col for now, later, will do a list of str + def __init__( self, inference_spec_type: model_spec_pb2.InferenceSpecType, - process_column: Optional[str] = None): + tensor_adapter_config: tensor_adapter.TensorAdapterConfig): super(_BaseDoFn, self).__init__() self._clock = None - self._process_column = process_column self._metrics_collector = self._MetricsCollector(inference_spec_type) + self._tensor_adapter = tensor_adapter.TensorAdapter(tensor_adapter_config) + self._io_tensor_spec = None # This value may be None if the model is remote def setup(self): self._clock = _ClockFactory.make_clock() - def process( - self, elements: pa.RecordBatch - ) -> Iterable[Any]: - batch_start_time = self._clock.get_current_time_in_microseconds() - # TODO (Maxine): take process as a parameter, should it be part of inference spec? - # extract record batch from here, assuming first column - - # what would record batch look like? (flatten or not) - # vs np.asarray(elements.column(0)) + def _extract_from_recordBatch(self, elements: pa.RecordBatch): if len(elements.columns) == 1: - serialized_examples = elements.column(0).to_pylist() + serialized_examples = elements.column(0).flatten().to_pylist() else: - if self._process_column is None: - raise ValueError('Must pass in a process column with multi-column RecordBatch') - serialized_examples = None for column_name, column_array in zip(elements.schema.names, elements.columns): column_type = column_array.type - if column_name == self._process_column: - serialized_examples = column_array.to_pylist() + if column_name == _RECORDBATCH_COLUMN: + serialized_examples = column_array.flatten().to_pylist() break + if (serialized_examples is None): + raise ValueError('Raw examples not found.') + for example in serialized_examples: if not (isinstance(example, bytes) or isinstance(example, str)): raise ValueError( - f'Expected a list of serialized examples in bytes or as a string, \ - got {type(example)}' - ) + 'Expected a list of serialized examples in bytes or as a string, got %s' % + type(example)) + + return serialized_examples + def process(self, elements: pa.RecordBatch) -> Iterable[Any]: + batch_start_time = self._clock.get_current_time_in_microseconds() + serialized_examples = self._extract_from_recordBatch(elements) outputs = self.run_inference(serialized_examples) result = self._post_process(serialized_examples, outputs) self._metrics_collector.update( @@ -384,8 +381,9 @@ class _RemotePredictDoFn(_BaseDoFn): """ def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, - pipeline_options: PipelineOptions, data_type): - super(_RemotePredictDoFn, self).__init__(inference_spec_type) + pipeline_options: PipelineOptions, + tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): + super(_RemotePredictDoFn, self).__init__(inference_spec_type, tensor_adapter_config) self._api_client = None self._data_type = data_type @@ -527,22 +525,19 @@ class _BaseBatchSavedModelDoFn(_BaseDoFn): """ def __init__( - self, - inference_spec_type: model_spec_pb2.InferenceSpecType, - shared_model_handle: shared.Shared, data_type, - process_column: Optional[str] = None, - ): - super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, process_column) + self, inference_spec_type: model_spec_pb2.InferenceSpecType, + shared_model_handle: shared.Shared, + tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): + super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, tensor_adapter_config) self._inference_spec_type = inference_spec_type self._shared_model_handle = shared_model_handle self._model_path = inference_spec_type.saved_model_spec.model_path self._tags = None self._signatures = _get_signatures( - inference_spec_type.saved_model_spec.model_path, - inference_spec_type.saved_model_spec.signature_name, - _get_tags(inference_spec_type)) + inference_spec_type.saved_model_spec.model_path, + inference_spec_type.saved_model_spec.signature_name, + _get_tags(inference_spec_type)) self._session = None - self._io_tensor_spec = None self._data_type = data_type def setup(self): @@ -1138,7 +1133,7 @@ def _get_data_type(elements: Sequence[Any]) -> Text: elif all(isinstance(element, tf.train.SequenceExample)): return DataType.SEQUENCEEXAMPLE else: - raise ValueError(f'Unsupported DataType {type(elements)}') + return DataType.EXAMPLE def _get_meta_graph_def(saved_model_pb: _SavedModel, diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index c502ad8c..e6554758 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -36,10 +36,12 @@ import tensorflow as tf from tfx_bsl.beam import run_inference_arrow from tfx_bsl.public.proto import model_spec_pb2 -from tfx_bsl.tfxio import raw_tf_record +from tfx_bsl.tfxio import test_util +from tfx_bsl.tfxio import tensor_adapter from google.protobuf import text_format from tensorflow_serving.apis import prediction_log_pb2 +from tensorflow_metadata.proto.v0 import schema_pb2 class RunInferenceArrowFixture(tf.test.TestCase): @@ -57,10 +59,27 @@ def setUp(self): serialized_example = [] for example in self._predict_examples: - serialized_example.append(example.SerializeToString()) + serialized_example.append([example.SerializeToString()]) self.record_batch = pa.RecordBatch.from_arrays( - [serialized_example, ], ["__RAW_RECORD__", ] - ) + [ + pa.array([[0]], type=pa.list_(pa.float32())), + serialized_example + ], + ['input1', '__RAW_RECORD__'] +) + + tfxio = test_util.InMemoryTFExampleRecord( + schema=text_format.Parse( + """ + feature { + name: "input1" + type: FLOAT + } + """, schema_pb2.Schema()), + raw_record_column_name='__RAW_RECORD__') + self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig( + arrow_schema=tfxio.ArrowSchema(), + tensor_representations=tfxio.TensorRepresentations()) def _get_output_data_dir(self, sub_dir=None): test_dir = self._testMethodName @@ -96,6 +115,32 @@ def setUp(self): } """, tf.train.Example()), ] + + serialized_example = [] + for example in self._predict_examples: + serialized_example.append([example.SerializeToString()]) + self.record_batch = pa.RecordBatch.from_arrays( + [ + pa.array([[0], [1]], type=pa.list_(pa.float32())), + serialized_example + ], + ['input1', '__RAW_RECORD__'] + ) + + tfxio = test_util.InMemoryTFExampleRecord( + schema=text_format.Parse( + """ + feature { + name: "input1" + type: FLOAT + } + """, schema_pb2.Schema()), + raw_record_column_name='__RAW_RECORD__') + self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig( + arrow_schema=tfxio.ArrowSchema(), + tensor_representations=tfxio.TensorRepresentations()) + + self._multihead_examples = [ text_format.Parse( """ @@ -113,21 +158,35 @@ def setUp(self): """, tf.train.Example()), ] - - serialized_example = [] - for example in self._predict_examples: - serialized_example.append(example.SerializeToString()) - self.record_batch = pa.RecordBatch.from_arrays( - [serialized_example, ], ["__RAW_RECORD__", ] - ) - serialized_example_multi = [] for example in self._multihead_examples: - serialized_example_multi.append(example.SerializeToString()) + serialized_example_multi.append([example.SerializeToString()]) self.record_batch_multihead = pa.RecordBatch.from_arrays( - [serialized_example_multi, ], ["__RAW_RECORD__", ] + [ + pa.array([[0.8], [0.6]], type=pa.list_(pa.float32())), + pa.array([[0.2], [0.1]], type=pa.list_(pa.float32())), + serialized_example_multi + ], + ['x', 'y', '__RAW_RECORD__'] ) + tfxio_multi = test_util.InMemoryTFExampleRecord( + schema=text_format.Parse( + """ + feature { + name: "x" + type: FLOAT + } + feature { + name: "y" + type: FLOAT + } + """, schema_pb2.Schema()), + raw_record_column_name='__RAW_RECORD__') + self.tensor_adapter_config_multihead = tensor_adapter.TensorAdapterConfig( + arrow_schema=tfxio_multi.ArrowSchema(), + tensor_representations=tfxio_multi.TensorRepresentations()) + def _prepare_multihead_examples(self, example_path): with tf.io.TFRecordWriter(example_path) as output_file: @@ -235,7 +294,8 @@ def _run_inference_with_beam(self, example_type, inference_spec_type, _ = ( pipeline | "createRecordBatch" >> beam.Create([self.record_batch_multihead]) - | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type) + | 'RunInference' >> run_inference_arrow.RunInferenceImpl( + inference_spec_type, self.tensor_adapter_config_multihead) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -244,7 +304,8 @@ def _run_inference_with_beam(self, example_type, inference_spec_type, _ = ( pipeline | "createRecordBatch" >> beam.Create([self.record_batch]) - | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type) + | 'RunInference' >> run_inference_arrow.RunInferenceImpl( + inference_spec_type, self.tensor_adapter_config) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -260,8 +321,6 @@ def _get_results(self, prediction_log_path): return results def testModelPathInvalid(self): - example_path = self._get_output_data_dir('examples') - self._prepare_predict_examples(example_path) prediction_log_path = self._get_output_data_dir('predictions') with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): self._run_inference_with_beam( @@ -271,8 +330,6 @@ def testModelPathInvalid(self): model_path=self._get_output_data_dir())), prediction_log_path) def testEstimatorModelPredict(self): - example_path = self._get_output_data_dir('examples') - self._prepare_predict_examples(example_path) model_path = self._get_output_data_dir('model') self._build_predict_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') @@ -300,8 +357,6 @@ def testEstimatorModelPredict(self): 1) def testClassifyModel(self): - example_path = self._get_output_data_dir('examples') - self._prepare_multihead_examples(example_path) model_path = self._get_output_data_dir('model') self._build_multihead_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') @@ -324,8 +379,6 @@ def testClassifyModel(self): classify_log.response.result.classifications[0].classes[0].score, 1.0) def testRegressModel(self): - example_path = self._get_output_data_dir('examples') - self._prepare_multihead_examples(example_path) model_path = self._get_output_data_dir('model') self._build_multihead_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') @@ -347,8 +400,6 @@ def testRegressModel(self): 0.6) def testMultiInferenceModel(self): - example_path = self._get_output_data_dir('examples') - self._prepare_multihead_examples(example_path) model_path = self._get_output_data_dir('model') self._build_multihead_model(model_path) prediction_log_path = self._get_output_data_dir('predictions') @@ -421,8 +472,6 @@ def call(self, serialized_example): tf.compat.v1.keras.experimental.export_saved_model( model, model_path, serving_only=True) - example_path = self._get_output_data_dir('examples') - self._prepare_predict_examples(example_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( 'predict', @@ -434,8 +483,6 @@ def call(self, serialized_example): self.assertLen(results, 2) def testTelemetry(self): - example_path = self._get_output_data_dir('examples') - self._prepare_multihead_examples(example_path) model_path = self._get_output_data_dir('model') self._build_multihead_model(model_path) inference_spec_type = model_spec_pb2.InferenceSpecType( @@ -445,7 +492,8 @@ def testTelemetry(self): _ = ( pipeline | "createRecordBatch" >> beam.Create([self.record_batch_multihead]) - | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type)) + | 'RunInference' >> run_inference_arrow.RunInferenceImpl( + inference_spec_type, self.tensor_adapter_config_multihead)) run_result = pipeline.run() run_result.wait_until_finish() @@ -484,8 +532,6 @@ class RunRemoteInferenceArrowTest(RunInferenceArrowFixture): def setUp(self): super(RunRemoteInferenceArrowTest, self).setUp() - self.example_path = self._get_output_data_dir('example') - self._prepare_predict_examples(self.example_path) # This is from https://ml.googleapis.com/$discovery/rest?version=v1. self._discovery_testdata_dir = os.path.join( os.path.join(os.path.dirname(__file__), 'testdata'), @@ -504,7 +550,8 @@ def _set_up_pipeline(self, inference_spec_type): self.pcoll = ( self.pipeline | "createRecordBatch" >> beam.Create([self.record_batch]) - | 'RunInference' >> run_inference_arrow.RunInferenceImpl(inference_spec_type)) + | 'RunInference' >> run_inference_arrow.RunInferenceImpl( + inference_spec_type, self.tensor_adapter_config)) def _run_inference_with_beam(self): self.pipeline_result = self.pipeline.run() @@ -582,7 +629,7 @@ def test_exception_raised_when_project_id_is_empty(self): def test_request_body_with_binary_data(self): example = text_format.Parse( - """ + """ features { feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} From 300d8c9eca87bda83622ffd801a9fa9b99c76343 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 25 Jun 2020 19:57:03 -0400 Subject: [PATCH 13/31] add signature checking with multi-tensor model --- tfx_bsl/beam/run_inference_arrow.py | 240 ++++++++++++----------- tfx_bsl/beam/run_inference_arrow_test.py | 17 +- 2 files changed, 131 insertions(+), 126 deletions(-) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index f73add3c..9fafadd4 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -100,15 +100,16 @@ class DataType(object): def RunInferenceImpl( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType, - tensor_adapter_config: tensor_adapter.TensorAdapterConfig + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None ) -> beam.pvalue.PCollection: """Implementation of RunInference API. Args: examples: A PCollection containing RecordBatch of serialized examples. inference_spec_type: Model inference endpoint. - tensor_adapter_config: Tensor adapter config which specifies how to obtain - tensors from the Arrow RecordBatch. + tensor_adapter_config [Optional]: Tensor adapter config which specifies how to + obtain tensors from the Arrow RecordBatch. + - Not required when running inference with remote model or 1 input Returns: A PCollection containing prediction logs. @@ -125,24 +126,23 @@ def RunInferenceImpl( # pylint: disable=invalid-name operation_type = _get_operation_type(inference_spec_type) if operation_type == OperationType.CLASSIFICATION: return examples | 'Classify' >> _Classify( - inference_spec_type, tensor_adapter_config, data_type) + inference_spec_type, data_type, tensor_adapter_config) elif operation_type == OperationType.REGRESSION: return examples | 'Regress' >> _Regress( - inference_spec_type,tensor_adapter_config, data_type) + inference_spec_type, data_type, tensor_adapter_config) elif operation_type == OperationType.PREDICTION: return examples | 'Predict' >> _Predict( - inference_spec_type, tensor_adapter_config, data_type) + inference_spec_type, data_type, tensor_adapter_config) elif operation_type == OperationType.MULTIHEAD: - return (examples - | 'MultiInference' >> _MultiInference( - inference_spec_type, tensor_adapter_config, data_type)) + return (examples | 'MultiInference' >> _MultiInference( + inference_spec_type, data_type, tensor_adapter_config)) else: raise ValueError('Unsupported operation_type %s' % operation_type) _IOTensorSpec = collections.namedtuple( '_IOTensorSpec', - ['input_tensor_alias', 'input_tensor_name', 'output_alias_tensor_names']) + ['input_tensor_alias', 'input_tensor_names', 'input_tensor_types', 'output_alias_tensor_names']) _Signature = collections.namedtuple('_Signature', ['name', 'signature_def']) @@ -151,13 +151,13 @@ def RunInferenceImpl( # pylint: disable=invalid-name @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType, - tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): + inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): """Performs classify PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll - | 'Classify' >> beam.ParDo( - _BatchClassifyDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type)) + | 'Classify' >> beam.ParDo(_BatchClassifyDoFn( + inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)) | 'BuildPredictionLogForClassifications' >> beam.ParDo( _BuildPredictionLogForClassificationsDoFn())) else: @@ -168,13 +168,13 @@ def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType, - tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): + inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): """Performs regress PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll - | 'Regress' >> beam.ParDo( - _BatchRegressDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type)) + | 'Regress' >> beam.ParDo(_BatchRegressDoFn( + inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)) | 'BuildPredictionLogForRegressions' >> beam.ParDo( _BuildPredictionLogForRegressionsDoFn())) else: @@ -185,19 +185,19 @@ def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType, - tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): + inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): """Performs predict PTransform.""" if _using_in_process_inference(inference_spec_type): predictions = ( pcoll - | 'Predict' >> beam.ParDo( - _BatchPredictDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type))) + | 'Predict' >> beam.ParDo(_BatchPredictDoFn( + inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))) else: predictions = ( pcoll - | 'RemotePredict' >> beam.ParDo( - _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options, tensor_adapter_config, data_type))) + | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn( + inference_spec_type, pcoll.pipeline.options, data_type, tensor_adapter_config))) return (predictions | 'BuildPredictionLogForPredictions' >> beam.ParDo( _BuildPredictionLogForPredictionsDoFn())) @@ -207,14 +207,14 @@ def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType, - tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): + inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): """Performs multi inference PTransform.""" if _using_in_process_inference(inference_spec_type): return ( pcoll - | 'MultiInference' >> beam.ParDo( - _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared(), tensor_adapter_config, data_type)) + | 'MultiInference' >> beam.ParDo(_BatchMultiInferenceDoFn( + inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)) | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn())) else: raise NotImplementedError @@ -281,42 +281,52 @@ def update( def __init__( self, inference_spec_type: model_spec_pb2.InferenceSpecType, - tensor_adapter_config: tensor_adapter.TensorAdapterConfig): + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): super(_BaseDoFn, self).__init__() self._clock = None self._metrics_collector = self._MetricsCollector(inference_spec_type) - self._tensor_adapter = tensor_adapter.TensorAdapter(tensor_adapter_config) + self._tensor_adapter_config = tensor_adapter_config self._io_tensor_spec = None # This value may be None if the model is remote def setup(self): self._clock = _ClockFactory.make_clock() def _extract_from_recordBatch(self, elements: pa.RecordBatch): - if len(elements.columns) == 1: - serialized_examples = elements.column(0).flatten().to_pylist() - else: - serialized_examples = None - for column_name, column_array in zip(elements.schema.names, elements.columns): - column_type = column_array.type - if column_name == _RECORDBATCH_COLUMN: - serialized_examples = column_array.flatten().to_pylist() - break + """ + Function to extract the compatible input with model signature + """ + serialized_examples = None + for column_name, column_array in zip(elements.schema.names, elements.columns): + if column_name == _RECORDBATCH_COLUMN: + column_type = column_array.flatten().type + if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)): + raise ValueError('Expected a list of serialized examples in bytes or as a string, got %s' % type(example)) + serialized_examples = column_array.flatten().to_pylist() + break if (serialized_examples is None): raise ValueError('Raw examples not found.') - for example in serialized_examples: - if not (isinstance(example, bytes) or isinstance(example, str)): - raise ValueError( - 'Expected a list of serialized examples in bytes or as a string, got %s' % - type(example)) + model_input = None + if self._io_tensor_spec is None: # Case when we are running remote inference + model_input = serialized_examples + elif (len(self._io_tensor_spec.input_tensor_names) == 1): + model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples} + else: + if (self._tensor_adapter_config is None): + raise ValueError('Tensor adaptor config is required with a multi-input model') + _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config) + dict_of_tensors = self._tensor_adapter.ToBatchTensors(elements) + if self._io_tensor_spec: + model_input = model_util.filter_tensors_by_input_names( + dict_of_tensors, self._io_tensor_spec.input_tensor_names) - return serialized_examples + return serialized_examples, model_input def process(self, elements: pa.RecordBatch) -> Iterable[Any]: batch_start_time = self._clock.get_current_time_in_microseconds() - serialized_examples = self._extract_from_recordBatch(elements) - outputs = self.run_inference(serialized_examples) + serialized_examples, model_input = self._extract_from_recordBatch(elements) + outputs = self.run_inference(model_input) result = self._post_process(serialized_examples, outputs) self._metrics_collector.update( serialized_examples, @@ -328,7 +338,7 @@ def finish_bundle(self): @abc.abstractmethod def run_inference( - self, elements: List[Union[str, bytes]] + self, tensors: Mapping[Any, Any] ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: raise NotImplementedError @@ -381,8 +391,8 @@ class _RemotePredictDoFn(_BaseDoFn): """ def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, - pipeline_options: PipelineOptions, - tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): + pipeline_options: PipelineOptions, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): super(_RemotePredictDoFn, self).__init__(inference_spec_type, tensor_adapter_config) self._api_client = None self._data_type = data_type @@ -439,7 +449,6 @@ def _prepare_instances( instance = {} tfexample = tf.train.Example.FromString(example) - # TODO (Maxine): consider leveraging recordbatch columns for input_name, feature in tfexample.features.feature.items(): attr_name = feature.WhichOneof('kind') if attr_name is None: @@ -477,14 +486,14 @@ def _parse_feature_content(values: Sequence[Any], attr_name: Text, else: return values - def _check_elements(self, elements: List[Union[str, bytes]]) -> None: + def _check_elements(self) -> None: # TODO(b/151468119): support tf.train.SequenceExample if self._data_type != DataType.EXAMPLE: raise ValueError('Remote prediction only supports tf.train.Example') def run_inference( self, elements: List[Union[str, bytes]]) -> Sequence[Mapping[Text, Any]]: - self._check_elements(elements) + self._check_elements() body = {'instances': list(self._prepare_instances(elements))} request = self._make_request(body) response = self._execute_request(request) @@ -526,8 +535,8 @@ class _BaseBatchSavedModelDoFn(_BaseDoFn): def __init__( self, inference_spec_type: model_spec_pb2.InferenceSpecType, - shared_model_handle: shared.Shared, - tensor_adapter_config: tensor_adapter.TensorAdapterConfig, data_type): + shared_model_handle: shared.Shared, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, tensor_adapter_config) self._inference_spec_type = inference_spec_type self._shared_model_handle = shared_model_handle @@ -584,56 +593,55 @@ def load(): def _pre_process(self) -> _IOTensorSpec: # Pre process functions will validate for each signature. - # TODO (Maxine): having more than 1 input io_tensor_specs = [] for signature in self._signatures: - if len(signature.signature_def.inputs) != 1: - raise ValueError('Signature should have 1 and only 1 inputs') - if (list(signature.signature_def.inputs.values())[0].dtype != + if (len(signature.signature_def.inputs) == 1 and + list(signature.signature_def.inputs.values())[0].dtype != tf.string.as_datatype_enum): raise ValueError( - 'Input dtype is expected to be %s, got %s' % + 'With 1 input, dtype is expected to be %s, got %s' % tf.string.as_datatype_enum, list(signature.signature_def.inputs.values())[0].dtype) io_tensor_specs.append(_signature_pre_process(signature.signature_def)) - input_tensor_name = '' - input_tensor_alias = '' + input_tensor_names = [] + input_tensor_alias = [] + input_tensor_types = {} output_alias_tensor_names = {} for io_tensor_spec in io_tensor_specs: - if not input_tensor_name: - input_tensor_name = io_tensor_spec.input_tensor_name + if not input_tensor_names: + input_tensor_names = io_tensor_spec.input_tensor_names input_tensor_alias = io_tensor_spec.input_tensor_alias - elif input_tensor_name != io_tensor_spec.input_tensor_name: + elif input_tensor_names != io_tensor_spec.input_tensor_names: raise ValueError('Input tensor must be the same for all Signatures.') - for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items( - ): + for alias, tensor_type in io_tensor_spec.input_tensor_types.items(): + input_tensor_types[alias] = tensor_type + for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items(): output_alias_tensor_names[alias] = tensor_name - if (not output_alias_tensor_names or not input_tensor_name or + if (not output_alias_tensor_names or not input_tensor_names or not input_tensor_alias): raise ValueError('No valid fetch tensors or feed tensors.') - return _IOTensorSpec(input_tensor_alias, input_tensor_name, - output_alias_tensor_names) + return _IOTensorSpec(input_tensor_alias, input_tensor_names, + input_tensor_types, output_alias_tensor_names) def _has_tpu_tag(self) -> bool: return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and tf.saved_model.TPU in self._tags) def run_inference( - self, elements: List[Union[str, bytes]]) -> Mapping[Text, np.ndarray]: - self._check_elements(elements) - outputs = self._run_tf_operations(elements) + self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]: + self._check_elements() + outputs = self._run_tf_operations(tensors) return outputs def _run_tf_operations( - self, elements: List[Union[str, bytes]]) -> Mapping[Text, np.ndarray]: + self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]: result = self._session.run( - self._io_tensor_spec.output_alias_tensor_names, - feed_dict={self._io_tensor_spec.input_tensor_name: elements}) + self._io_tensor_spec.output_alias_tensor_names, feed_dict=tensors) if len(result) != len(self._io_tensor_spec.output_alias_tensor_names): raise RuntimeError('Output length does not match fetches') return result - def _check_elements(self, elements: List[Union[str, bytes]]) -> None: + def _check_elements(self) -> None: """Unimplemented.""" raise NotImplementedError @@ -654,7 +662,7 @@ def setup(self): signature_def.method_name) super(_BatchClassifyDoFn, self).setup() - def _check_elements(self, elements: List[Union[str, bytes]]) -> None: + def _check_elements(self) -> None: if self._data_type != DataType.EXAMPLE: raise ValueError('Classify only supports tf.train.Example') @@ -676,7 +684,7 @@ class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): def setup(self): super(_BatchRegressDoFn, self).setup() - def _check_elements(self, elements: List[Union[str, bytes]]) -> None: + def _check_elements(self) -> None: if self._data_type != DataType.EXAMPLE: raise ValueError('Regress only supports tf.train.Example') @@ -702,14 +710,16 @@ def setup(self): signature_def.method_name) super(_BatchPredictDoFn, self).setup() - def _check_elements(self, elements: List[Union[str, bytes]]) -> None: + def _check_elements(self) -> None: pass def _post_process( self, elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray] ) -> Iterable[prediction_log_pb2.PredictLog]: - input_tensor_alias = self._io_tensor_spec.input_tensor_alias + if not self._io_tensor_spec.input_tensor_types: + raise ValueError('No valid tensor types.') + input_tensor_types = self._io_tensor_spec.input_tensor_types signature_name = self._signatures[0].name batch_size = len(elements) for output_alias, output in outputs.items(): @@ -722,15 +732,16 @@ def _post_process( predict_log_tmpl = prediction_log_pb2.PredictLog() predict_log_tmpl.request.model_spec.signature_name = signature_name predict_log_tmpl.response.model_spec.signature_name = signature_name - input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias] - input_tensor_proto.dtype = tf.string.as_datatype_enum - input_tensor_proto.tensor_shape.dim.add().size = 1 - + for alias, tensor_type in input_tensor_types.items(): + input_tensor_proto = predict_log_tmpl.request.inputs[alias] + input_tensor_proto.dtype = tf.as_dtype(tensor_type).as_datatype_enum + input_tensor_proto.tensor_shape.dim.add().size = 1 + # TODO (Maxine): Fix here result = [] for i in range(batch_size): predict_log = prediction_log_pb2.PredictLog() predict_log.CopyFrom(predict_log_tmpl) - predict_log.request.inputs[input_tensor_alias].string_val.append(elements[i]) + predict_log.request.inputs[list(input_tensor_types)[0]].string_val.append(elements[i]) for output_alias, output in outputs.items(): # Mimic tensor::Split tensor_proto = tf.make_tensor_proto( @@ -748,7 +759,7 @@ def _post_process( class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): """A DoFn that runs inference on multi-head model.""" - def _check_elements(self, elements: List[Union[str, bytes]]) -> None: + def _check_elements(self) -> None: if self._data_type != DataType.EXAMPLE: raise ValueError('Multi-inference only supports tf.train.Example') @@ -963,28 +974,27 @@ def _post_process_regress( def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec: """Returns IOTensorSpec from signature.""" - - if len(signature.inputs) != 1: - raise ValueError('Signature should have 1 and only 1 inputs') - input_tensor_alias = list(signature.inputs.keys())[0] - if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum: + if (len(signature.inputs) == 1 and + list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum): raise ValueError( - 'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum, - list(signature.inputs.values())[0].dtype) + 'With 1 input, dtype is expected to be %s, got %s' % + tf.string.as_datatype_enum, + list(signature.inputs.values())[0].dtype) + input_tensor_alias = [signature.inputs.keys()] if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: - input_tensor_name, output_alias_tensor_names = ( - _signature_pre_process_classify(signature)) + input_tensor_names, input_tensor_types, output_alias_tensor_names = ( + _signature_pre_process_classify(signature)) elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME: - input_tensor_name, output_alias_tensor_names = ( - _signature_pre_process_predict(signature)) + input_tensor_names, input_tensor_types, output_alias_tensor_names = ( + _signature_pre_process_predict(signature)) elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME: - input_tensor_name, output_alias_tensor_names = ( - _signature_pre_process_regress(signature)) + input_tensor_names, input_tensor_types, output_alias_tensor_names = ( + _signature_pre_process_regress(signature)) else: raise ValueError('Signature method %s is not supported' % - signature.method_name) - return _IOTensorSpec(input_tensor_alias, input_tensor_name, - output_alias_tensor_names) + signature.method_name) + return _IOTensorSpec(input_tensor_alias, input_tensor_names, + input_tensor_types, output_alias_tensor_names) def _signature_pre_process_classify( @@ -997,13 +1007,14 @@ def _signature_pre_process_classify( Returns: A tuple of input tensor name and output alias tensor names. """ - + if len(signature.inputs) != 1: + raise ValueError('Classify signature should have 1 and only 1 inputs') if len(signature.outputs) != 1 and len(signature.outputs) != 2: raise ValueError('Classify signature should have 1 or 2 outputs') if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs: raise ValueError('No classification inputs found in SignatureDef: %s' % signature.inputs) - input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name + input_tensor_names = [signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name] output_alias_tensor_names = {} if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs): @@ -1018,7 +1029,7 @@ def _signature_pre_process_classify( if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs: output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = ( signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name) - return input_tensor_name, output_alias_tensor_names + return input_tensor_names, {}, output_alias_tensor_names def _signature_pre_process_predict( @@ -1031,12 +1042,14 @@ def _signature_pre_process_predict( Returns: A tuple of input tensor name and output alias tensor names. """ - - input_tensor_name = list(signature.inputs.values())[0].name + input_tensor_names = [value.name for value in signature.inputs.values()] + input_tensor_types = dict([ + (key, value.dtype) for key, value in signature.inputs.items() + ]) output_alias_tensor_names = dict([ - (key, output.name) for key, output in signature.outputs.items() + (key, output.name) for key, output in signature.outputs.items() ]) - return input_tensor_name, output_alias_tensor_names + return input_tensor_names, input_tensor_types, output_alias_tensor_names def _signature_pre_process_regress( @@ -1049,13 +1062,14 @@ def _signature_pre_process_regress( Returns: A tuple of input tensor name and output alias tensor names. """ - + if len(signature.inputs) != 1: + raise ValueError('Regress signature should have 1 and only 1 inputs') if len(signature.outputs) != 1: raise ValueError('Regress signature should have 1 output') if tf.saved_model.REGRESS_INPUTS not in signature.inputs: raise ValueError('No regression inputs found in SignatureDef: %s' % signature.inputs) - input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name + input_tensor_names = [signature.inputs[tf.saved_model.REGRESS_INPUTS].name] if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs: raise ValueError('No regression outputs found in SignatureDef: %s' % signature.outputs) @@ -1063,7 +1077,7 @@ def _signature_pre_process_regress( tf.saved_model.REGRESS_OUTPUTS: signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name } - return input_tensor_name, output_alias_tensor_names + return input_tensor_names, {}, output_alias_tensor_names def _using_in_process_inference( diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index e6554758..cc4494ee 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -92,10 +92,6 @@ def _get_output_data_dir(self, sub_dir=None): path = os.path.join(path, sub_dir) return path - def _prepare_predict_examples(self, example_path): - with tf.io.TFRecordWriter(example_path) as output_file: - for example in self._predict_examples: - output_file.write(example.SerializeToString()) class RunOfflineInferenceArrowTest(RunInferenceArrowFixture): @@ -188,11 +184,6 @@ def setUp(self): tensor_representations=tfxio_multi.TensorRepresentations()) - def _prepare_multihead_examples(self, example_path): - with tf.io.TFRecordWriter(example_path) as output_file: - for example in self._multihead_examples: - output_file.write(example.SerializeToString()) - def _build_predict_model(self, model_path): """Exports the dummy sum predict model.""" @@ -295,7 +286,7 @@ def _run_inference_with_beam(self, example_type, inference_spec_type, pipeline | "createRecordBatch" >> beam.Create([self.record_batch_multihead]) | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, self.tensor_adapter_config_multihead) + inference_spec_type) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -305,7 +296,7 @@ def _run_inference_with_beam(self, example_type, inference_spec_type, pipeline | "createRecordBatch" >> beam.Create([self.record_batch]) | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, self.tensor_adapter_config) + inference_spec_type) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -493,7 +484,7 @@ def testTelemetry(self): pipeline | "createRecordBatch" >> beam.Create([self.record_batch_multihead]) | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, self.tensor_adapter_config_multihead)) + inference_spec_type)) run_result = pipeline.run() run_result.wait_until_finish() @@ -551,7 +542,7 @@ def _set_up_pipeline(self, inference_spec_type): self.pipeline | "createRecordBatch" >> beam.Create([self.record_batch]) | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, self.tensor_adapter_config)) + inference_spec_type)) def _run_inference_with_beam(self): self.pipeline_result = self.pipeline.run() From a45716bdb6de577cfbebae79b6fa29cb8f865587 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Fri, 10 Jul 2020 15:38:47 -0400 Subject: [PATCH 14/31] complete case 2 --- tfx_bsl/beam/run_inference_arrow.py | 96 ++++++++++---- tfx_bsl/beam/run_inference_arrow_test.py | 154 ++++++++++++++++------- tfx_bsl/beam/util.py | 59 +++++++++ 3 files changed, 239 insertions(+), 70 deletions(-) create mode 100644 tfx_bsl/beam/util.py diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index 9fafadd4..7d502d2c 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -40,6 +40,7 @@ from googleapiclient import discovery from googleapiclient import http import numpy as np +import json import six import tensorflow as tf from tfx_bsl.beam import shared @@ -55,7 +56,7 @@ from tensorflow_serving.apis import inference_pb2 from tensorflow_serving.apis import prediction_log_pb2 from tensorflow_serving.apis import regression_pb2 - +from tensorflow_model_analysis import model_util # TODO(b/131873699): Remove once 1.x support is dropped. # pylint: disable=g-import-not-at-top @@ -66,6 +67,7 @@ except ImportError: pass + _RECORDBATCH_COLUMN = '__RAW_RECORD__' _DEFAULT_INPUT_KEY = 'examples' _METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer' @@ -269,6 +271,7 @@ def update_metrics_with_cache(self): self._model_byte_size.update(self.model_byte_size_cache) self.model_byte_size_cache = None + # For feature inputs, using serialized example for batch size def update( self, elements: List[Union[str, bytes]], latency_micro_secs: int) -> None: self._inference_batch_latency_micro_secs.update(latency_micro_secs) @@ -300,7 +303,9 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch): if column_name == _RECORDBATCH_COLUMN: column_type = column_array.flatten().type if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)): - raise ValueError('Expected a list of serialized examples in bytes or as a string, got %s' % type(example)) + raise ValueError( + 'Expected a list of serialized examples in bytes or as a string, got %s' % + type(example)) serialized_examples = column_array.flatten().to_pylist() break @@ -315,19 +320,25 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch): else: if (self._tensor_adapter_config is None): raise ValueError('Tensor adaptor config is required with a multi-input model') + + input_tensor_names = self._io_tensor_spec.input_tensor_names + input_tensor_alias = self._io_tensor_spec.input_tensor_alias _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config) - dict_of_tensors = self._tensor_adapter.ToBatchTensors(elements) - if self._io_tensor_spec: - model_input = model_util.filter_tensors_by_input_names( - dict_of_tensors, self._io_tensor_spec.input_tensor_names) - + dict_of_tensors = _tensor_adapter.ToBatchTensors( + elements, produce_eager_tensors = False) + filtered_tensors = model_util.filter_tensors_by_input_names( + dict_of_tensors, input_tensor_alias) + + model_input = {} + for feature, tensor_name in zip(input_tensor_alias, input_tensor_names): + model_input[tensor_name] = filtered_tensors[feature] return serialized_examples, model_input def process(self, elements: pa.RecordBatch) -> Iterable[Any]: batch_start_time = self._clock.get_current_time_in_microseconds() serialized_examples, model_input = self._extract_from_recordBatch(elements) outputs = self.run_inference(model_input) - result = self._post_process(serialized_examples, outputs) + result = self._post_process(model_input, outputs) self._metrics_collector.update( serialized_examples, self._clock.get_current_time_in_microseconds() - batch_start_time) @@ -667,12 +678,14 @@ def _check_elements(self) -> None: raise ValueError('Classify only supports tf.train.Example') def _post_process( - self, elements: Sequence[Union[str, bytes]], + self, elements: Mapping[Any, Any], outputs: Mapping[Text, np.ndarray] ) -> Iterable[Tuple[Union[str, bytes], classification_pb2.Classifications]]: + serialized_examples, = elements.values() classifications = _post_process_classify( - self._io_tensor_spec.output_alias_tensor_names, elements, outputs) - return zip(elements, classifications) + self._io_tensor_spec.output_alias_tensor_names, + serialized_examples, outputs) + return zip(serialized_examples, classifications) @beam.typehints.with_input_types(pa.RecordBatch) @@ -689,11 +702,12 @@ def _check_elements(self) -> None: raise ValueError('Regress only supports tf.train.Example') def _post_process( - self, elements: Sequence[Union[str, bytes]], + self, elements: Mapping[Any, Any], outputs: Mapping[Text, np.ndarray] ) -> Iterable[Tuple[Union[str, bytes], regression_pb2.Regression]]: - regressions = _post_process_regress(elements, outputs) - return zip(elements, regressions) + serialized_examples, = elements.values() + regressions = _post_process_regress(serialized_examples, outputs) + return zip(serialized_examples, regressions) @beam.typehints.with_input_types(pa.RecordBatch) @@ -714,14 +728,37 @@ def _check_elements(self) -> None: pass def _post_process( - self, elements: Sequence[Union[str, bytes]], + self, elements: Mapping[Any, Any], outputs: Mapping[Text, np.ndarray] ) -> Iterable[prediction_log_pb2.PredictLog]: if not self._io_tensor_spec.input_tensor_types: raise ValueError('No valid tensor types.') + input_tensor_names = self._io_tensor_spec.input_tensor_names + input_tensor_alias = self._io_tensor_spec.input_tensor_alias input_tensor_types = self._io_tensor_spec.input_tensor_types signature_name = self._signatures[0].name - batch_size = len(elements) + + if len(input_tensor_alias) != len(input_tensor_names): + raise ValueError('Expected to have one name and one alias per tensor') + + include_request = True + if len(input_tensor_names) == 1: + serialized_examples, = elements.values() + batch_size = len(serialized_examples) + process_elements = serialized_examples + else: + # Only include request in the predictLog when the all tensors are dense + # is there a better way to check this? + for tensor_name, tensor in elements.items(): + if not isinstance(tensor, np.ndarray): + include_request = False + break + + if include_request: + batch_size = len(elements[input_tensor_names[0]]) + else: + batch_size = elements[input_tensor_names[0]].shape[0] + for output_alias, output in outputs.items(): if len(output.shape) < 1 or output.shape[0] != batch_size: raise ValueError( @@ -735,13 +772,22 @@ def _post_process( for alias, tensor_type in input_tensor_types.items(): input_tensor_proto = predict_log_tmpl.request.inputs[alias] input_tensor_proto.dtype = tf.as_dtype(tensor_type).as_datatype_enum + # TODO (Maxine): fix dimension? input_tensor_proto.tensor_shape.dim.add().size = 1 - # TODO (Maxine): Fix here + result = [] for i in range(batch_size): predict_log = prediction_log_pb2.PredictLog() predict_log.CopyFrom(predict_log_tmpl) - predict_log.request.inputs[list(input_tensor_types)[0]].string_val.append(elements[i]) + + if include_request: + if len(input_tensor_alias) == 1: + alias = input_tensor_alias[0] + predict_log.request.inputs[alias].string_val.append(process_elements[i]) + else: + for alias, tensor_name in zip(input_tensor_alias, input_tensor_names): + predict_log.request.inputs[alias].float_val.append(elements[tensor_name][i]) + for output_alias, output in outputs.items(): # Mimic tensor::Split tensor_proto = tf.make_tensor_proto( @@ -764,23 +810,25 @@ def _check_elements(self) -> None: raise ValueError('Multi-inference only supports tf.train.Example') def _post_process( - self, elements: Sequence[Union[str, bytes]], + self, elements: Mapping[Any, Any], outputs: Mapping[Text, np.ndarray] ) -> Iterable[Tuple[Union[str, bytes], inference_pb2.MultiInferenceResponse]]: classifications = None regressions = None + serialized_examples, = elements.values() for signature in self._signatures: signature_def = signature.signature_def if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: classifications = _post_process_classify( - self._io_tensor_spec.output_alias_tensor_names, elements, outputs) + self._io_tensor_spec.output_alias_tensor_names, + serialized_examples, outputs) elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME: - regressions = _post_process_regress(elements, outputs) + regressions = _post_process_regress(serialized_examples, outputs) else: raise ValueError('Signature method %s is not supported for ' 'multi inference' % signature_def.method_name) result = [] - for i in range(len(elements)): + for i in range(len(serialized_examples)): response = inference_pb2.MultiInferenceResponse() for signature in self._signatures: signature_def = signature.signature_def @@ -801,7 +849,7 @@ def _post_process( if len(response.results) != len(self._signatures): raise RuntimeError('Multi inference response result length does not ' 'match the number of signatures') - result.append((elements[i], response)) + result.append((serialized_examples[i], response)) return result @@ -980,7 +1028,7 @@ def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec: 'With 1 input, dtype is expected to be %s, got %s' % tf.string.as_datatype_enum, list(signature.inputs.values())[0].dtype) - input_tensor_alias = [signature.inputs.keys()] + input_tensor_alias = [alias for alias in signature.inputs.keys()] if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: input_tensor_names, input_tensor_types, output_alias_tensor_names = ( _signature_pre_process_classify(signature)) diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index cc4494ee..61d50362 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -65,21 +65,8 @@ def setUp(self): pa.array([[0]], type=pa.list_(pa.float32())), serialized_example ], - ['input1', '__RAW_RECORD__'] -) + ['input1', '__RAW_RECORD__']) - tfxio = test_util.InMemoryTFExampleRecord( - schema=text_format.Parse( - """ - feature { - name: "input1" - type: FLOAT - } - """, schema_pb2.Schema()), - raw_record_column_name='__RAW_RECORD__') - self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig( - arrow_schema=tfxio.ArrowSchema(), - tensor_representations=tfxio.TensorRepresentations()) def _get_output_data_dir(self, sub_dir=None): test_dir = self._testMethodName @@ -123,19 +110,6 @@ def setUp(self): ['input1', '__RAW_RECORD__'] ) - tfxio = test_util.InMemoryTFExampleRecord( - schema=text_format.Parse( - """ - feature { - name: "input1" - type: FLOAT - } - """, schema_pb2.Schema()), - raw_record_column_name='__RAW_RECORD__') - self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig( - arrow_schema=tfxio.ArrowSchema(), - tensor_representations=tfxio.TensorRepresentations()) - self._multihead_examples = [ text_format.Parse( @@ -166,22 +140,75 @@ def setUp(self): ['x', 'y', '__RAW_RECORD__'] ) - tfxio_multi = test_util.InMemoryTFExampleRecord( - schema=text_format.Parse( + + self._multi_input_examples = [ + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.8 }}} + feature {key: "y" value { float_list { value: 0.2 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.6 }}} + feature {key: "y" value { float_list { value: 0.1 }}} + } + """, tf.train.Example()), + ] + + serialized_example_multi_input = [] + for example in self._multi_input_examples: + serialized_example_multi_input.append([example.SerializeToString()]) + self.record_batch_multi_input = pa.RecordBatch.from_arrays( + [ + pa.array([[0.8], [0.6]], type=pa.list_(pa.float32())), + pa.array([[0.2], [0.1]], type=pa.list_(pa.float32())), + serialized_example_multi_input + ], + ['x', 'y', '__RAW_RECORD__'] + ) + + tfxio = test_util.InMemoryTFExampleRecord( + schema = text_format.Parse( """ - feature { - name: "x" - type: FLOAT - } - feature { - name: "y" - type: FLOAT + tensor_representation_group { + key: "" + value { + tensor_representation { + key: "x" + value { + dense_tensor { + column_name: "x" + shape { dim { size: 1 } } + } + } + } + tensor_representation { + key: "y" + value { + dense_tensor { + column_name: "y" + shape { dim { size: 1 } } + } + } + } } + } + feature { + name: "x" + type: FLOAT + } + feature { + name: "y" + type: FLOAT + } """, schema_pb2.Schema()), raw_record_column_name='__RAW_RECORD__') - self.tensor_adapter_config_multihead = tensor_adapter.TensorAdapterConfig( - arrow_schema=tfxio_multi.ArrowSchema(), - tensor_representations=tfxio_multi.TensorRepresentations()) + self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig( + arrow_schema=tfxio.ArrowSchema(), + tensor_representations=tfxio.TensorRepresentations()) def _build_predict_model(self, model_path): @@ -279,8 +306,18 @@ def _build_multihead_model(self, model_path): builder.save() def _run_inference_with_beam(self, example_type, inference_spec_type, - prediction_log_path): - if example_type == 'multi': + prediction_log_path, include_config = False): + if include_config: + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | "createRecordBatch" >> beam.Create([self.record_batch_multi_input]) + | 'RunInference' >> run_inference_arrow.RunInferenceImpl( + inference_spec_type, self.tensor_adapter_config) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + elif example_type == 'multi': with beam.Pipeline() as pipeline: _ = ( pipeline @@ -435,7 +472,6 @@ def testKerasModelPredict(self): inference_model = tf.keras.models.Model(inputs, [output1, output2]) class TestKerasModel(tf.keras.Model): - def __init__(self, inference_model): super(TestKerasModel, self).__init__(name='test_keras_model') self.inference_model = inference_model @@ -445,10 +481,9 @@ def __init__(self, inference_model): ]) def call(self, serialized_example): features = { - 'input1': - tf.compat.v1.io.FixedLenFeature([1], - dtype=tf.float32, - default_value=0) + 'input1': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, + default_value=0) } input_tensor_dict = tf.io.parse_example(serialized_example, features) return inference_model(input_tensor_dict['input1']) @@ -470,8 +505,35 @@ def call(self, serialized_example): saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path)), prediction_log_path) + results = self._get_results(prediction_log_path)git st + self.assertLen(results, 2) + + def testKerasModelPredictMultiTensor(self): + input1 = tf.keras.layers.Input((1,), name='x') + input2 = tf.keras.layers.Input((1,), name='y') + + x1 = tf.keras.layers.Dense(10)(input1) + x2 = tf.keras.layers.Dense(10)(input2) + output = tf.keras.layers.Dense(5, name='output')(x2) + + model = tf.keras.models.Model([input1, input2], output) + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + 'multi', + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), + prediction_log_path, include_config = True) + results = self._get_results(prediction_log_path) self.assertLen(results, 2) + for result in results: + self.assertLen(result.predict_log.request.inputs, 2) + self.assertEqual(list(result.predict_log.request.inputs), list(['x','y'])) def testTelemetry(self): model_path = self._get_output_data_dir('model') diff --git a/tfx_bsl/beam/util.py b/tfx_bsl/beam/util.py new file mode 100644 index 00000000..4bdf4ba3 --- /dev/null +++ b/tfx_bsl/beam/util.py @@ -0,0 +1,59 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TensorAdapter.""" + +from __future__ import absolute_import +from __future__ import division +# Standard __future__ imports +from __future__ import print_function + + +import numpy as np +import pyarrow as pa +import pandas as pd +import typing +import json +from typing import Dict + + +_RECORDBATCH_COLUMN = '__RAW_RECORD__' + +class JSONAdapter(object): + """A JSONAdapter converts a RecordBatch to a JSON strings. + + The conversion will take in a recordbatch that contains features from a + tf.train.Example and will return a list of dict like string (JSON) where + each item represent + The conversion is determined by both the Arrow schema and the + TensorRepresentations, which must be provided at the initialization time. + Each TensorRepresentation contains the information needed to translates one + or more columns in a RecordBatch of the given Arrow schema into a TF Tensor + or CompositeTensor. They are contained in a Dict whose keys are + the names of the tensors, which will be the keys of the Dict produced by + ToBatchTensors(). + """ + + + def ToJSON(self, record_batch: pa.RecordBatch) -> Dict[Text, Any]: + """Returns a JSON string translated from `record_batch`. + + Args: + record_batch: input RecordBatch. + """ + + df = record_batch.to_pandas() + if _RECORDBATCH_COLUMN in df.columns: + df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1) + + return json.loads(df.to_json(orient='records')) \ No newline at end of file From c42ce34647c17057294973fecbe364f8d02f8a8c Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Mon, 13 Jul 2020 12:27:29 -0400 Subject: [PATCH 15/31] fix typo and renamed util to avoid conflict --- tfx_bsl/beam/{util.py => inference_util.py} | 4 ++-- tfx_bsl/beam/run_inference_arrow.py | 5 ++++- tfx_bsl/beam/run_inference_arrow_test.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) rename tfx_bsl/beam/{util.py => inference_util.py} (95%) diff --git a/tfx_bsl/beam/util.py b/tfx_bsl/beam/inference_util.py similarity index 95% rename from tfx_bsl/beam/util.py rename to tfx_bsl/beam/inference_util.py index 4bdf4ba3..53d3c98c 100644 --- a/tfx_bsl/beam/util.py +++ b/tfx_bsl/beam/inference_util.py @@ -24,7 +24,7 @@ import pandas as pd import typing import json -from typing import Dict +from typing import List, Text _RECORDBATCH_COLUMN = '__RAW_RECORD__' @@ -45,7 +45,7 @@ class JSONAdapter(object): """ - def ToJSON(self, record_batch: pa.RecordBatch) -> Dict[Text, Any]: + def ToJSON(self, record_batch: pa.RecordBatch) -> List[Text]: """Returns a JSON string translated from `record_batch`. Args: diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index 7d502d2c..4ab52d82 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -44,6 +44,7 @@ import six import tensorflow as tf from tfx_bsl.beam import shared +from tfx_bsl.beam import inference_util from tfx_bsl.public.proto import model_spec_pb2 from tfx_bsl.telemetry import util from tfx_bsl.tfxio import tensor_adapter @@ -314,6 +315,8 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch): model_input = None if self._io_tensor_spec is None: # Case when we are running remote inference + # _jsonAdaptor = inference_util.JSONAdapter() + # model_input = _jsonAdaptor.ToJSON(elements) model_input = serialized_examples elif (len(self._io_tensor_spec.input_tensor_names) == 1): model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples} @@ -355,7 +358,7 @@ def run_inference( @abc.abstractmethod def _post_process( - self, elements: List[Union[str, bytes]], outputs: Any) -> Iterable[Any]: + self, elements: Mapping[Any, Any], outputs: Any) -> Iterable[Any]: raise NotImplementedError diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index 61d50362..2bea6066 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -505,7 +505,7 @@ def call(self, serialized_example): saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path)), prediction_log_path) - results = self._get_results(prediction_log_path)git st + results = self._get_results(prediction_log_path) self.assertLen(results, 2) def testKerasModelPredictMultiTensor(self): From 4e8651c618bcbf30b7955b460b6d069815e025c5 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 15 Jul 2020 12:29:44 -0400 Subject: [PATCH 16/31] add APIs and use recordbatch to json module --- tfx_bsl/beam/inference_util.py | 5 +- tfx_bsl/beam/run_inference_arrow.py | 46 +------------- tfx_bsl/beam/run_inference_arrow_test.py | 18 +++++- tfx_bsl/public/beam/run_inference.py | 80 +++++++++++++++++++++++- 4 files changed, 101 insertions(+), 48 deletions(-) diff --git a/tfx_bsl/beam/inference_util.py b/tfx_bsl/beam/inference_util.py index 53d3c98c..57cfd321 100644 --- a/tfx_bsl/beam/inference_util.py +++ b/tfx_bsl/beam/inference_util.py @@ -22,8 +22,9 @@ import numpy as np import pyarrow as pa import pandas as pd -import typing +import base64 import json +import typing from typing import List, Text @@ -53,6 +54,8 @@ def ToJSON(self, record_batch: pa.RecordBatch) -> List[Text]: """ df = record_batch.to_pandas() + as_binary = df.columns.str.endswith("_bytes") + df.loc[:, as_binary] = df.loc[:, as_binary].applymap(lambda x: {'b64': base64.b64encode(x).decode()}) if _RECORDBATCH_COLUMN in df.columns: df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1) diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index 4ab52d82..cf667d94 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -19,7 +19,6 @@ from __future__ import print_function import abc -import base64 import collections import os import platform @@ -315,9 +314,8 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch): model_input = None if self._io_tensor_spec is None: # Case when we are running remote inference - # _jsonAdaptor = inference_util.JSONAdapter() - # model_input = _jsonAdaptor.ToJSON(elements) - model_input = serialized_examples + _jsonAdaptor = inference_util.JSONAdapter() + model_input = _jsonAdaptor.ToJSON(elements) elif (len(self._io_tensor_spec.input_tensor_names) == 1): model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples} else: @@ -459,47 +457,9 @@ def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest: def _prepare_instances( cls, elements: List[Union[str, bytes]] ) -> Generator[Mapping[Text, Any], None, None]: - for example in elements: - instance = {} - tfexample = tf.train.Example.FromString(example) - - for input_name, feature in tfexample.features.feature.items(): - attr_name = feature.WhichOneof('kind') - if attr_name is None: - continue - attr = getattr(feature, attr_name) - values = cls._parse_feature_content(attr.value, attr_name, - cls._sending_as_binary(input_name)) - # Flatten a sequence if its length is 1 - values = (values[0] if len(values) == 1 else values) - instance[input_name] = values + for instance in elements: yield instance - @staticmethod - def _sending_as_binary(input_name: Text) -> bool: - """Whether data should be sent as binary.""" - return input_name.endswith('_bytes') - - @staticmethod - def _parse_feature_content(values: Sequence[Any], attr_name: Text, - as_binary: bool) -> Sequence[Any]: - """Parse the content of tf.train.Feature object. - - If bytes_list, parse a list of bytes-like objects to a list of strings so - that it would be JSON serializable. - - If float_list or int64_list, do nothing. - - If data should be sent as binary, mark it as binary by replacing it with - a single attribute named 'b64'. - """ - if as_binary: - return [{'b64': base64.b64encode(x).decode()} for x in values] - elif attr_name == 'bytes_list': - return [x.decode() for x in values] - else: - return values - def _check_elements(self) -> None: # TODO(b/151468119): support tf.train.SequenceExample if self._data_type != DataType.EXAMPLE: diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index 2bea6066..452b4dab 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -35,6 +35,7 @@ from six.moves import http_client import tensorflow as tf from tfx_bsl.beam import run_inference_arrow +from tfx_bsl.beam import inference_util from tfx_bsl.public.proto import model_spec_pb2 from tfx_bsl.tfxio import test_util from tfx_bsl.tfxio import tensor_adapter @@ -533,7 +534,7 @@ def testKerasModelPredictMultiTensor(self): self.assertLen(results, 2) for result in results: self.assertLen(result.predict_log.request.inputs, 2) - self.assertEqual(list(result.predict_log.request.inputs), list(['x','y'])) + self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) def testTelemetry(self): model_path = self._get_output_data_dir('model') @@ -689,8 +690,19 @@ def test_request_body_with_binary_data(self): feature { key: "y" value { int64_list { value: [1, 2] }}} } """, tf.train.Example()) - result = list( - run_inference_arrow._RemotePredictDoFn._prepare_instances([example.SerializeToString()])) + + serialized_example_remote = [example.SerializeToString()] + record_batch_remote = pa.RecordBatch.from_arrays( + [ + pa.array(["ASa8asdf"], type=pa.binary()), + pa.array(["JLK7ljk3"], type=pa.utf8()), + pa.array([[1, 2]], type=pa.list_(pa.float32())), + ], + ['x_bytes', 'x', 'y'] + ) + + _jsonAdaptor = inference_util.JSONAdapter() + result = list(_jsonAdaptor.ToJSON(record_batch_remote)) self.assertEqual([ { 'x_bytes': { diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py index d27ab453..93941273 100644 --- a/tfx_bsl/public/beam/run_inference.py +++ b/tfx_bsl/public/beam/run_inference.py @@ -21,12 +21,20 @@ import apache_beam as beam import tensorflow as tf +import pyarrow as pa +from typing import Union, Optional +from tfx_bsl.tfxio import test_util +from tfx_bsl.tfxio import tensor_adapter +from tfx_bsl.tfxio import raw_tf_record from tfx_bsl.beam import run_inference +from tfx_bsl.beam import run_inference_arrow from tfx_bsl.public.proto import model_spec_pb2 -from typing import Union from tensorflow_serving.apis import prediction_log_pb2 +from tensorflow_metadata.proto.v0 import schema_pb2 +_RECORDBATCH_COLUMN = '__RAW_RECORD__' + @beam.ptransform_fn @beam.typehints.with_input_types(Union[tf.train.Example, tf.train.SequenceExample]) @@ -60,3 +68,73 @@ def RunInference( # pylint: disable=invalid-name return ( examples | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(inference_spec_type)) + + +@beam.ptransform_fn +@beam.typehints.with_input_types(Union[tf.train.Example, + tf.train.SequenceExample]) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def RunInferenceArrow( # pylint: disable=invalid-name + file_path, + inference_spec_type: model_spec_pb2.InferenceSpecType, + schema: Optional[schema_pb2.Schema] = None +) -> beam.pvalue.PCollection: + """Run inference with a model. + + There are two types of inference you can perform using this PTransform: + 1. In-process inference from a SavedModel instance. Used when + `saved_model_spec` field is set in `inference_spec_type`. + 2. Remote inference by using a service endpoint. Used when + `ai_platform_prediction_model_spec` field is set in + `inference_spec_type`. + + Args: + file_path: File Path for which the examples are stored. + inference_spec_type: Model inference endpoint. + + Returns: + A PCollection containing prediction logs. + """ + with beam.Pipeline(options=PipelineOptions()) as pipeline: + tfxio = test_util.InMemoryTFExampleRecord( + schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) + tensor_adapter_config = tensor_adapter.TensorAdapterConfig( + arrow_schema=tfxio.ArrowSchema(), + tensor_representations=tfxio.TensorRepresentations()) + converter = raw_tf_record.RawTfRecordTFXIO( + file_path, raw_record_column_name=_RECORDBATCH_COLUMN) + + return (pipeline + | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource() + | "RunInferenceImpl" >> run_inference_arrow.RunInferenceImpl( + inference_spec_type, tensor_adapter_config=tensor_adapter_config)) + + +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def RunInferenceRecord( # pylint: disable=invalid-name + examples: beam.pvalue.PCollection, + inference_spec_type: model_spec_pb2.InferenceSpecType, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None +) -> beam.pvalue.PCollection: + """Run inference with a model. + + There are two types of inference you can perform using this PTransform: + 1. In-process inference from a SavedModel instance. Used when + `saved_model_spec` field is set in `inference_spec_type`. + 2. Remote inference by using a service endpoint. Used when + `ai_platform_prediction_model_spec` field is set in + `inference_spec_type`. + + Args: + examples: A PCollection containing RecordBatch. + inference_spec_type: Model inference endpoint. + + Returns: + A PCollection containing prediction logs. + """ + + return ( + examples | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl( + inference_spec_type, tensor_adapter_config)) \ No newline at end of file From 89878b3af8a59b855799b5552ee70b99cf185649 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 16 Jul 2020 13:02:14 -0400 Subject: [PATCH 17/31] fix docstring --- tfx_bsl/beam/inference_util.py | 11 +++-------- tfx_bsl/public/beam/run_inference.py | 4 ++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/tfx_bsl/beam/inference_util.py b/tfx_bsl/beam/inference_util.py index 57cfd321..af3b555c 100644 --- a/tfx_bsl/beam/inference_util.py +++ b/tfx_bsl/beam/inference_util.py @@ -35,14 +35,9 @@ class JSONAdapter(object): The conversion will take in a recordbatch that contains features from a tf.train.Example and will return a list of dict like string (JSON) where - each item represent - The conversion is determined by both the Arrow schema and the - TensorRepresentations, which must be provided at the initialization time. - Each TensorRepresentation contains the information needed to translates one - or more columns in a RecordBatch of the given Arrow schema into a TF Tensor - or CompositeTensor. They are contained in a Dict whose keys are - the names of the tensors, which will be the keys of the Dict produced by - ToBatchTensors(). + each item is a JSON representation of an example. + + - return format: [{ feature1: value1, ... }, ...] """ diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py index 93941273..80953a27 100644 --- a/tfx_bsl/public/beam/run_inference.py +++ b/tfx_bsl/public/beam/run_inference.py @@ -22,7 +22,7 @@ import apache_beam as beam import tensorflow as tf import pyarrow as pa -from typing import Union, Optional +from typing import Union, Text, Optional from tfx_bsl.tfxio import test_util from tfx_bsl.tfxio import tensor_adapter from tfx_bsl.tfxio import raw_tf_record @@ -75,7 +75,7 @@ def RunInference( # pylint: disable=invalid-name tf.train.SequenceExample]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def RunInferenceArrow( # pylint: disable=invalid-name - file_path, + file_path: Text, inference_spec_type: model_spec_pb2.InferenceSpecType, schema: Optional[schema_pb2.Schema] = None ) -> beam.pvalue.PCollection: From 0cdf874a9f89bf134b336e44a5320fe23260f67c Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 16 Jul 2020 14:07:55 -0400 Subject: [PATCH 18/31] add missing case --- tfx_bsl/public/beam/run_inference.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py index 80953a27..672ebf59 100644 --- a/tfx_bsl/public/beam/run_inference.py +++ b/tfx_bsl/public/beam/run_inference.py @@ -91,19 +91,22 @@ def RunInferenceArrow( # pylint: disable=invalid-name Args: file_path: File Path for which the examples are stored. inference_spec_type: Model inference endpoint. + Schema [optional]: required for models that requires + multi-tensor inputs. Returns: A PCollection containing prediction logs. """ - with beam.Pipeline(options=PipelineOptions()) as pipeline: + converter = raw_tf_record.RawTfRecordTFXIO( + file_path, raw_record_column_name=_RECORDBATCH_COLUMN) + if schema: tfxio = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( arrow_schema=tfxio.ArrowSchema(), tensor_representations=tfxio.TensorRepresentations()) - converter = raw_tf_record.RawTfRecordTFXIO( - file_path, raw_record_column_name=_RECORDBATCH_COLUMN) + with beam.Pipeline() as pipeline: return (pipeline | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource() | "RunInferenceImpl" >> run_inference_arrow.RunInferenceImpl( @@ -137,4 +140,4 @@ def RunInferenceRecord( # pylint: disable=invalid-name return ( examples | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, tensor_adapter_config)) \ No newline at end of file + inference_spec_type, tensor_adapter_config)) From c7e2237bc9da7158172b4b19e0312c33e2859f93 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Fri, 17 Jul 2020 15:41:59 -0400 Subject: [PATCH 19/31] add model analysis function to util --- tfx_bsl/beam/bsl_util.py | 97 ++++++++++++++++++++++++ tfx_bsl/beam/inference_util.py | 57 -------------- tfx_bsl/beam/run_inference_arrow.py | 8 +- tfx_bsl/beam/run_inference_arrow_test.py | 5 +- tfx_bsl/public/beam/run_inference.py | 2 +- 5 files changed, 103 insertions(+), 66 deletions(-) create mode 100644 tfx_bsl/beam/bsl_util.py delete mode 100644 tfx_bsl/beam/inference_util.py diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py new file mode 100644 index 00000000..25633365 --- /dev/null +++ b/tfx_bsl/beam/bsl_util.py @@ -0,0 +1,97 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TensorAdapter.""" + +from __future__ import absolute_import +from __future__ import division +# Standard __future__ imports +from __future__ import print_function + + +import numpy as np +import pyarrow as pa +import pandas as pd +import base64 +import json +import typing +from typing import Dict, List, Text, Any, Set, Optional + +_RECORDBATCH_COLUMN = '__RAW_RECORD__' +KERAS_INPUT_SUFFIX = '_input' + + +def RecordToJSON(record_batch: pa.RecordBatch) -> List[Text]: + """Returns a JSON string translated from `record_batch`. + + The conversion will take in a recordbatch that contains features from a + tf.train.Example and will return a list of dict like string (JSON) where + each item is a JSON representation of an example. + - return format: [{ feature1: value1, ... }, ...] + + Args: + record_batch: input RecordBatch. + """ + df = record_batch.to_pandas() + as_binary = df.columns.str.endswith("_bytes") + df.loc[:, as_binary] = df.loc[:, as_binary].applymap(lambda x: {'b64': base64.b64encode(x).decode()}) + if _RECORDBATCH_COLUMN in df.columns: + df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1) + + return json.loads(df.to_json(orient='records')) + +def find_input_name_in_features(features: Set[Text], + input_name: Text) -> Optional[Text]: + """Maps input name to an entry in features. Returns None if not found.""" + if input_name in features: + return input_name + # Some keras models prepend '_input' to the names of the inputs + # so try under '_input' as well. + elif (input_name.endswith(KERAS_INPUT_SUFFIX) and + input_name[:-len(KERAS_INPUT_SUFFIX)] in features): + return input_name[:-len(KERAS_INPUT_SUFFIX)] + return None + + +def filter_tensors_by_input_names( + tensors: Dict[Text, Any], + input_names: List[Text]) -> Optional[Dict[Text, Any]]: + """Filter tensors by input names. + In case we don't find the specified input name in the tensors and there + exists only one input name, we assume we are feeding serialized examples to + the model and return None. + Args: + tensors: Dict of tensors. + input_names: List of input names. + Returns: + Filtered tensors. + Raises: + RuntimeError: When the specified input tensor cannot be found. + """ + + if not input_names: + return None + result = {} + tensor_keys = set(tensors.keys()) + for name in input_names: + tensor_name = find_input_name_in_features(tensor_keys, name) + if tensor_name is None: + # This should happen only in the case where the model takes serialized + # examples as input. Else raise an exception. + if len(input_names) == 1: + return None + raise RuntimeError( + 'Input tensor not found: {}. Existing keys: {}.'.format( + name, ','.join(tensors.keys()))) + result[name] = tensors[tensor_name] + return result diff --git a/tfx_bsl/beam/inference_util.py b/tfx_bsl/beam/inference_util.py deleted file mode 100644 index af3b555c..00000000 --- a/tfx_bsl/beam/inference_util.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""TensorAdapter.""" - -from __future__ import absolute_import -from __future__ import division -# Standard __future__ imports -from __future__ import print_function - - -import numpy as np -import pyarrow as pa -import pandas as pd -import base64 -import json -import typing -from typing import List, Text - - -_RECORDBATCH_COLUMN = '__RAW_RECORD__' - -class JSONAdapter(object): - """A JSONAdapter converts a RecordBatch to a JSON strings. - - The conversion will take in a recordbatch that contains features from a - tf.train.Example and will return a list of dict like string (JSON) where - each item is a JSON representation of an example. - - - return format: [{ feature1: value1, ... }, ...] - """ - - - def ToJSON(self, record_batch: pa.RecordBatch) -> List[Text]: - """Returns a JSON string translated from `record_batch`. - - Args: - record_batch: input RecordBatch. - """ - - df = record_batch.to_pandas() - as_binary = df.columns.str.endswith("_bytes") - df.loc[:, as_binary] = df.loc[:, as_binary].applymap(lambda x: {'b64': base64.b64encode(x).decode()}) - if _RECORDBATCH_COLUMN in df.columns: - df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1) - - return json.loads(df.to_json(orient='records')) \ No newline at end of file diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index cf667d94..a701db1e 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -43,7 +43,7 @@ import six import tensorflow as tf from tfx_bsl.beam import shared -from tfx_bsl.beam import inference_util +from tfx_bsl.beam import bsl_util from tfx_bsl.public.proto import model_spec_pb2 from tfx_bsl.telemetry import util from tfx_bsl.tfxio import tensor_adapter @@ -56,7 +56,6 @@ from tensorflow_serving.apis import inference_pb2 from tensorflow_serving.apis import prediction_log_pb2 from tensorflow_serving.apis import regression_pb2 -from tensorflow_model_analysis import model_util # TODO(b/131873699): Remove once 1.x support is dropped. # pylint: disable=g-import-not-at-top @@ -314,8 +313,7 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch): model_input = None if self._io_tensor_spec is None: # Case when we are running remote inference - _jsonAdaptor = inference_util.JSONAdapter() - model_input = _jsonAdaptor.ToJSON(elements) + model_input = bsl_util.RecordToJSON(elements) elif (len(self._io_tensor_spec.input_tensor_names) == 1): model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples} else: @@ -327,7 +325,7 @@ def _extract_from_recordBatch(self, elements: pa.RecordBatch): _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config) dict_of_tensors = _tensor_adapter.ToBatchTensors( elements, produce_eager_tensors = False) - filtered_tensors = model_util.filter_tensors_by_input_names( + filtered_tensors = bsl_util.filter_tensors_by_input_names( dict_of_tensors, input_tensor_alias) model_input = {} diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index 452b4dab..c0d9b5c3 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -35,7 +35,7 @@ from six.moves import http_client import tensorflow as tf from tfx_bsl.beam import run_inference_arrow -from tfx_bsl.beam import inference_util +from tfx_bsl.beam import bsl_util from tfx_bsl.public.proto import model_spec_pb2 from tfx_bsl.tfxio import test_util from tfx_bsl.tfxio import tensor_adapter @@ -701,8 +701,7 @@ def test_request_body_with_binary_data(self): ['x_bytes', 'x', 'y'] ) - _jsonAdaptor = inference_util.JSONAdapter() - result = list(_jsonAdaptor.ToJSON(record_batch_remote)) + result = list(bsl_util.RecordToJSON(record_batch_remote)) self.assertEqual([ { 'x_bytes': { diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py index 672ebf59..6cf85bc2 100644 --- a/tfx_bsl/public/beam/run_inference.py +++ b/tfx_bsl/public/beam/run_inference.py @@ -91,7 +91,7 @@ def RunInferenceArrow( # pylint: disable=invalid-name Args: file_path: File Path for which the examples are stored. inference_spec_type: Model inference endpoint. - Schema [optional]: required for models that requires + Schema [optional]: required for models that requires multi-tensor inputs. Returns: From 42fab7e67d6f3dfa69ff9d96384cf3486151afaa Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 22 Jul 2020 12:17:48 -0400 Subject: [PATCH 20/31] update API and create constate file --- tfx_bsl/beam/bsl_constants.py | 6 ++ tfx_bsl/beam/bsl_util.py | 8 +-- tfx_bsl/beam/run_inference_arrow.py | 22 ++----- tfx_bsl/beam/run_inference_arrow_test.py | 15 ++--- tfx_bsl/beam/run_inference_test.py | 1 - tfx_bsl/public/beam/run_inference.py | 74 +++++++++++++++--------- 6 files changed, 70 insertions(+), 56 deletions(-) create mode 100644 tfx_bsl/beam/bsl_constants.py diff --git a/tfx_bsl/beam/bsl_constants.py b/tfx_bsl/beam/bsl_constants.py new file mode 100644 index 00000000..caaba5aa --- /dev/null +++ b/tfx_bsl/beam/bsl_constants.py @@ -0,0 +1,6 @@ +_RECORDBATCH_COLUMN = '__RAW_RECORD__' +KERAS_INPUT_SUFFIX = '_input' + +class DataType(object): + EXAMPLE = 'EXAMPLE' + SEQUENCEEXAMPLE = 'SEQUENCEEXAMPLE' diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py index 25633365..ee06384d 100644 --- a/tfx_bsl/beam/bsl_util.py +++ b/tfx_bsl/beam/bsl_util.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""TensorAdapter.""" +"""TFX-BSL util""" from __future__ import absolute_import from __future__ import division @@ -26,9 +26,8 @@ import json import typing from typing import Dict, List, Text, Any, Set, Optional - -_RECORDBATCH_COLUMN = '__RAW_RECORD__' -KERAS_INPUT_SUFFIX = '_input' +from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN +from tfx_bsl.beam.bsl_constants import KERAS_INPUT_SUFFIX def RecordToJSON(record_batch: pa.RecordBatch) -> List[Text]: @@ -50,6 +49,7 @@ def RecordToJSON(record_batch: pa.RecordBatch) -> List[Text]: return json.loads(df.to_json(orient='records')) + def find_input_name_in_features(features: Set[Text], input_name: Text) -> Optional[Text]: """Maps input name to an entry in features. Returns None if not found.""" diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py index a701db1e..0ffbc7b5 100644 --- a/tfx_bsl/beam/run_inference_arrow.py +++ b/tfx_bsl/beam/run_inference_arrow.py @@ -50,6 +50,9 @@ from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \ Tuple, Union, Optional +from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN +from tfx_bsl.beam.bsl_constants import DataType + # TODO(b/140306674): stop using the internal TF API. from tensorflow.python.saved_model import loader_impl from tensorflow_serving.apis import classification_pb2 @@ -67,7 +70,6 @@ pass -_RECORDBATCH_COLUMN = '__RAW_RECORD__' _DEFAULT_INPUT_KEY = 'examples' _METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer' _METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess' @@ -90,10 +92,6 @@ class OperationType(object): PREDICTION = 'PREDICTION' MULTIHEAD = 'MULTIHEAD' -class DataType(object): - EXAMPLE = 'EXAMPLE' - SEQUENCEEXAMPLE = 'SEQUENCEEXAMPLE' - @beam.ptransform_fn @beam.typehints.with_input_types(pa.RecordBatch) @@ -101,6 +99,7 @@ class DataType(object): def RunInferenceImpl( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType, + data_type: DataType, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None ) -> beam.pvalue.PCollection: """Implementation of RunInference API. @@ -120,10 +119,6 @@ def RunInferenceImpl( # pylint: disable=invalid-name """ logging.info('RunInference on model: %s', inference_spec_type) - # TODO (Maxine): either determine data type or take it as an input - # data_type = _get_data_type(examples) - - data_type = DataType.EXAMPLE operation_type = _get_operation_type(inference_spec_type) if operation_type == OperationType.CLASSIFICATION: return examples | 'Classify' >> _Classify( @@ -1150,15 +1145,6 @@ def _get_operation_type( return OperationType.PREDICTION -def _get_data_type(elements: Sequence[Any]) -> Text: - if all(isinstance(elements, tf.train.Example)): - return DataType.EXAMPLE - elif all(isinstance(element, tf.train.SequenceExample)): - return DataType.SEQUENCEEXAMPLE - else: - return DataType.EXAMPLE - - def _get_meta_graph_def(saved_model_pb: _SavedModel, tags: Sequence[Text]) -> _MetaGraphDef: """Returns MetaGraphDef from SavedModel.""" diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py index c0d9b5c3..441060e0 100644 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ b/tfx_bsl/beam/run_inference_arrow_test.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for tfx_bsl.run_inference.""" +"""Tests for tfx_bsl.run_inference_arrow.""" from __future__ import absolute_import from __future__ import division @@ -34,8 +34,9 @@ from googleapiclient import http from six.moves import http_client import tensorflow as tf -from tfx_bsl.beam import run_inference_arrow from tfx_bsl.beam import bsl_util +from tfx_bsl.beam import run_inference_arrow +from tfx_bsl.beam.bsl_constants import DataType from tfx_bsl.public.proto import model_spec_pb2 from tfx_bsl.tfxio import test_util from tfx_bsl.tfxio import tensor_adapter @@ -314,7 +315,7 @@ def _run_inference_with_beam(self, example_type, inference_spec_type, pipeline | "createRecordBatch" >> beam.Create([self.record_batch_multi_input]) | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, self.tensor_adapter_config) + inference_spec_type, DataType.EXAMPLE, self.tensor_adapter_config) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -324,7 +325,7 @@ def _run_inference_with_beam(self, example_type, inference_spec_type, pipeline | "createRecordBatch" >> beam.Create([self.record_batch_multihead]) | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type) + inference_spec_type, DataType.EXAMPLE) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -334,7 +335,7 @@ def _run_inference_with_beam(self, example_type, inference_spec_type, pipeline | "createRecordBatch" >> beam.Create([self.record_batch]) | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type) + inference_spec_type, DataType.EXAMPLE) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -547,7 +548,7 @@ def testTelemetry(self): pipeline | "createRecordBatch" >> beam.Create([self.record_batch_multihead]) | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type)) + inference_spec_type, DataType.EXAMPLE)) run_result = pipeline.run() run_result.wait_until_finish() @@ -605,7 +606,7 @@ def _set_up_pipeline(self, inference_spec_type): self.pipeline | "createRecordBatch" >> beam.Create([self.record_batch]) | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type)) + inference_spec_type, DataType.EXAMPLE)) def _run_inference_with_beam(self): self.pipeline_result = self.pipeline.run() diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index 73251603..8601dc30 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -37,7 +37,6 @@ from tfx_bsl.public.proto import model_spec_pb2 from google.protobuf import text_format - from tensorflow_serving.apis import prediction_log_pb2 diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py index 6cf85bc2..788235e0 100644 --- a/tfx_bsl/public/beam/run_inference.py +++ b/tfx_bsl/public/beam/run_inference.py @@ -25,23 +25,25 @@ from typing import Union, Text, Optional from tfx_bsl.tfxio import test_util from tfx_bsl.tfxio import tensor_adapter -from tfx_bsl.tfxio import raw_tf_record +from tfx_bsl.tfxio import tf_example_record +from tfx_bsl.tfxio import tf_sequence_example_record from tfx_bsl.beam import run_inference from tfx_bsl.beam import run_inference_arrow from tfx_bsl.public.proto import model_spec_pb2 from tensorflow_serving.apis import prediction_log_pb2 from tensorflow_metadata.proto.v0 import schema_pb2 +from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN +from tfx_bsl.beam.bsl_constants import DataType -_RECORDBATCH_COLUMN = '__RAW_RECORD__' @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(tf.train.Example) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def RunInference( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, - inference_spec_type: model_spec_pb2.InferenceSpecType + inference_spec_type: model_spec_pb2.InferenceSpecType, + schema: Optional[schema_pb2.Schema] = None ) -> beam.pvalue.PCollection: """Run inference with a model. @@ -52,30 +54,43 @@ def RunInference( # pylint: disable=invalid-name `ai_platform_prediction_model_spec` field is set in `inference_spec_type`. - TODO(b/131873699): Add support for the following features: - 1. Bytes as Input. - 2. PTable Input. - 3. Models as SideInput. - Args: examples: A PCollection containing examples. inference_spec_type: Model inference endpoint. + Schema [optional]: required for models that requires + multi-tensor inputs. Returns: A PCollection containing prediction logs. """ - return ( - examples | - 'RunInferenceImpl' >> run_inference.RunInferenceImpl(inference_spec_type)) + data_type = DataType.EXAMPLE + converter = tf_example_record.TFExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[], + schema=schema, + raw_record_column_name=_RECORDBATCH_COLUMN) + + if schema: + tfxio = test_util.InMemoryTFExampleRecord( + schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) + tensor_adapter_config = tensor_adapter.TensorAdapterConfig( + arrow_schema=tfxio.ArrowSchema(), + tensor_representations=tfxio.TensorRepresentations()) + + return (examples + | 'ParseExamples' >> beam.Map(tf.train.Example.SerializeToString) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl( + inference_spec_type, data_type, + tensor_adapter_config=tensor_adapter_config)) @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(tf.train.SequenceExample) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def RunInferenceArrow( # pylint: disable=invalid-name - file_path: Text, +def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name + examples: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType, schema: Optional[schema_pb2.Schema] = None ) -> beam.pvalue.PCollection: @@ -89,7 +104,7 @@ def RunInferenceArrow( # pylint: disable=invalid-name `inference_spec_type`. Args: - file_path: File Path for which the examples are stored. + examples: A PCollection containing sequence examples. inference_spec_type: Model inference endpoint. Schema [optional]: required for models that requires multi-tensor inputs. @@ -97,8 +112,14 @@ def RunInferenceArrow( # pylint: disable=invalid-name Returns: A PCollection containing prediction logs. """ - converter = raw_tf_record.RawTfRecordTFXIO( - file_path, raw_record_column_name=_RECORDBATCH_COLUMN) + + data_type = DataType.SEQUENCEEXAMPLE + converter = tf_sequence_example_record.TFSequenceExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[], + schema=schema, + raw_record_column_name=_RECORDBATCH_COLUMN) + if schema: tfxio = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) @@ -106,17 +127,18 @@ def RunInferenceArrow( # pylint: disable=invalid-name arrow_schema=tfxio.ArrowSchema(), tensor_representations=tfxio.TensorRepresentations()) - with beam.Pipeline() as pipeline: - return (pipeline - | "GetRawRecordAndConvertToRecordBatch" >> converter.BeamSource() - | "RunInferenceImpl" >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, tensor_adapter_config=tensor_adapter_config)) + return (examples + | 'ParseExamples' >> beam.Map(tf.train.Example.SerializeToString) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl( + inference_spec_type, data_type, + tensor_adapter_config=tensor_adapter_config)) @beam.ptransform_fn @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def RunInferenceRecord( # pylint: disable=invalid-name +def RunInferenceOnRecordBatch( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None From 353604a2c365737b11472034da498e7ed1a871c4 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 23 Jul 2020 18:02:09 -0400 Subject: [PATCH 21/31] include TFXIO module in tests and create and tested APIS --- tfx_bsl/beam/bsl_util.py | 9 +- tfx_bsl/beam/run_inference.py | 522 +++++---- tfx_bsl/beam/run_inference_arrow.py | 1216 -------------------- tfx_bsl/beam/run_inference_arrow_test.py | 718 ------------ tfx_bsl/beam/run_inference_record_batch.py | 57 + tfx_bsl/beam/run_inference_test.py | 217 +++- tfx_bsl/public/beam/run_inference.py | 43 +- 7 files changed, 534 insertions(+), 2248 deletions(-) delete mode 100644 tfx_bsl/beam/run_inference_arrow.py delete mode 100644 tfx_bsl/beam/run_inference_arrow_test.py create mode 100644 tfx_bsl/beam/run_inference_record_batch.py diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py index ee06384d..b7d46576 100644 --- a/tfx_bsl/beam/bsl_util.py +++ b/tfx_bsl/beam/bsl_util.py @@ -41,12 +41,19 @@ def RecordToJSON(record_batch: pa.RecordBatch) -> List[Text]: Args: record_batch: input RecordBatch. """ + def flatten(element: List[Any]): + if len(element) == 1: + return element[0] + return element + df = record_batch.to_pandas() as_binary = df.columns.str.endswith("_bytes") - df.loc[:, as_binary] = df.loc[:, as_binary].applymap(lambda x: {'b64': base64.b64encode(x).decode()}) + df.loc[:, as_binary] = df.loc[:, as_binary].applymap( + lambda values: [{'b64': base64.b64encode(x).decode()} for x in values]) if _RECORDBATCH_COLUMN in df.columns: df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1) + df = df.applymap(lambda x: flatten(x)) return json.loads(df.to_json(orient='records')) diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py index 6987a15d..320ac1da 100644 --- a/tfx_bsl/beam/run_inference.py +++ b/tfx_bsl/beam/run_inference.py @@ -19,7 +19,6 @@ from __future__ import print_function import abc -import base64 import collections import os import platform @@ -32,6 +31,7 @@ from absl import logging import apache_beam as beam +import pyarrow as pa from apache_beam.options.pipeline_options import GoogleCloudOptions from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.utils import retry @@ -39,13 +39,19 @@ from googleapiclient import discovery from googleapiclient import http import numpy as np +import json import six import tensorflow as tf from tfx_bsl.beam import shared +from tfx_bsl.beam import bsl_util from tfx_bsl.public.proto import model_spec_pb2 from tfx_bsl.telemetry import util +from tfx_bsl.tfxio import tensor_adapter from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \ - Tuple, Union + Tuple, Union, Optional + +from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN +from tfx_bsl.beam.bsl_constants import DataType # TODO(b/140306674): stop using the internal TF API. from tensorflow.python.saved_model import loader_impl @@ -54,7 +60,6 @@ from tensorflow_serving.apis import prediction_log_pb2 from tensorflow_serving.apis import regression_pb2 - # TODO(b/131873699): Remove once 1.x support is dropped. # pylint: disable=g-import-not-at-top try: @@ -64,6 +69,7 @@ except ImportError: pass + _DEFAULT_INPUT_KEY = 'examples' _METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer' _METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess' @@ -73,19 +79,11 @@ _SECOND_TO_MICROSECOND = 1000000 _REMOTE_INFERENCE_NUM_RETRIES = 5 -# We define the following aliases of Any because the actual types are not -# public. +# We define the following aliases of Any because the actual types are not public. _SignatureDef = Any _MetaGraphDef = Any _SavedModel = Any -_BulkInferResult = Union[prediction_log_pb2.PredictLog, - Tuple[tf.train.Example, regression_pb2.Regression], - Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse], - Tuple[tf.train.Example, - classification_pb2.Classifications]] - # TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7 class OperationType(object): @@ -96,18 +94,21 @@ class OperationType(object): @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def RunInferenceImpl( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, - inference_spec_type: model_spec_pb2.InferenceSpecType + inference_spec_type: model_spec_pb2.InferenceSpecType, data_type: Text, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None ) -> beam.pvalue.PCollection: """Implementation of RunInference API. Args: - examples: A PCollection containing examples. + examples: A PCollection containing RecordBatch of serialized examples. inference_spec_type: Model inference endpoint. + tensor_adapter_config [Optional]: Tensor adapter config which specifies how to + obtain tensors from the Arrow RecordBatch. + - Not required when running inference with remote model or 1 input Returns: A PCollection containing prediction logs. @@ -117,39 +118,41 @@ def RunInferenceImpl( # pylint: disable=invalid-name """ logging.info('RunInference on model: %s', inference_spec_type) - batched_examples = examples | 'BatchExamples' >> beam.BatchElements() operation_type = _get_operation_type(inference_spec_type) if operation_type == OperationType.CLASSIFICATION: - return batched_examples | 'Classify' >> _Classify(inference_spec_type) + return examples | 'Classify' >> _Classify( + inference_spec_type, data_type, tensor_adapter_config) elif operation_type == OperationType.REGRESSION: - return batched_examples | 'Regress' >> _Regress(inference_spec_type) + return examples | 'Regress' >> _Regress( + inference_spec_type, data_type, tensor_adapter_config) elif operation_type == OperationType.PREDICTION: - return batched_examples | 'Predict' >> _Predict(inference_spec_type) + return examples | 'Predict' >> _Predict( + inference_spec_type, data_type, tensor_adapter_config) elif operation_type == OperationType.MULTIHEAD: - return (batched_examples - | 'MultiInference' >> _MultiInference(inference_spec_type)) + return (examples | 'MultiInference' >> _MultiInference( + inference_spec_type, data_type, tensor_adapter_config)) else: raise ValueError('Unsupported operation_type %s' % operation_type) _IOTensorSpec = collections.namedtuple( '_IOTensorSpec', - ['input_tensor_alias', 'input_tensor_name', 'output_alias_tensor_names']) + ['input_tensor_alias', 'input_tensor_names', 'input_tensor_types', 'output_alias_tensor_names']) _Signature = collections.namedtuple('_Signature', ['name', 'signature_def']) @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): + inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): """Performs classify PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll - | 'Classify' >> beam.ParDo( - _BatchClassifyDoFn(inference_spec_type, shared.Shared())) + | 'Classify' >> beam.ParDo(_BatchClassifyDoFn( + inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)) | 'BuildPredictionLogForClassifications' >> beam.ParDo( _BuildPredictionLogForClassificationsDoFn())) else: @@ -157,16 +160,16 @@ def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): + inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): """Performs regress PTransform.""" if _using_in_process_inference(inference_spec_type): return (pcoll - | 'Regress' >> beam.ParDo( - _BatchRegressDoFn(inference_spec_type, shared.Shared())) + | 'Regress' >> beam.ParDo(_BatchRegressDoFn( + inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)) | 'BuildPredictionLogForRegressions' >> beam.ParDo( _BuildPredictionLogForRegressionsDoFn())) else: @@ -174,39 +177,39 @@ def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): + inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): """Performs predict PTransform.""" if _using_in_process_inference(inference_spec_type): predictions = ( pcoll - | 'Predict' >> beam.ParDo( - _BatchPredictDoFn(inference_spec_type, shared.Shared()))) + | 'Predict' >> beam.ParDo(_BatchPredictDoFn( + inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))) else: predictions = ( pcoll - | 'RemotePredict' >> beam.ParDo( - _RemotePredictDoFn(inference_spec_type, pcoll.pipeline.options))) + | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn( + inference_spec_type, pcoll.pipeline.options, data_type, tensor_adapter_config))) return (predictions | 'BuildPredictionLogForPredictions' >> beam.ParDo( _BuildPredictionLogForPredictionsDoFn())) @beam.ptransform_fn -@beam.typehints.with_input_types(Union[tf.train.Example, - tf.train.SequenceExample]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType): + inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): """Performs multi inference PTransform.""" if _using_in_process_inference(inference_spec_type): return ( pcoll - | 'MultiInference' >> beam.ParDo( - _BatchMultiInferenceDoFn(inference_spec_type, shared.Shared())) + | 'MultiInference' >> beam.ParDo(_BatchMultiInferenceDoFn( + inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)) | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn())) else: raise NotImplementedError @@ -261,32 +264,76 @@ def update_metrics_with_cache(self): self._model_byte_size.update(self.model_byte_size_cache) self.model_byte_size_cache = None - def update(self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]], - latency_micro_secs: int) -> None: + # For feature inputs, using serialized example for batch size + def update( + self, elements: List[Union[str, bytes]], latency_micro_secs: int) -> None: self._inference_batch_latency_micro_secs.update(latency_micro_secs) self._num_instances.inc(len(elements)) self._inference_counter.inc(len(elements)) self._inference_request_batch_size.update(len(elements)) self._inference_request_batch_byte_size.update( - sum(element.ByteSize() for element in elements)) + sum(len(element) for element in elements)) + - def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): + def __init__( + self, inference_spec_type: model_spec_pb2.InferenceSpecType, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): super(_BaseDoFn, self).__init__() self._clock = None self._metrics_collector = self._MetricsCollector(inference_spec_type) + self._tensor_adapter_config = tensor_adapter_config + self._io_tensor_spec = None # This value may be None if the model is remote def setup(self): self._clock = _ClockFactory.make_clock() - def process( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Iterable[Any]: + def _extract_from_recordBatch(self, elements: pa.RecordBatch): + """ + Function to extract the compatible input with model signature + """ + serialized_examples = None + for column_name, column_array in zip(elements.schema.names, elements.columns): + if column_name == _RECORDBATCH_COLUMN: + column_type = column_array.flatten().type + if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)): + raise ValueError( + 'Expected a list of serialized examples in bytes or as a string, got %s' % + type(example)) + serialized_examples = column_array.flatten().to_pylist() + break + + if (serialized_examples is None): + raise ValueError('Raw examples not found.') + + model_input = None + if self._io_tensor_spec is None: # Case when we are running remote inference + model_input = bsl_util.RecordToJSON(elements) + elif (len(self._io_tensor_spec.input_tensor_names) == 1): + model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples} + else: + if (self._tensor_adapter_config is None): + raise ValueError('Tensor adaptor config is required with a multi-input model') + + input_tensor_names = self._io_tensor_spec.input_tensor_names + input_tensor_alias = self._io_tensor_spec.input_tensor_alias + _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config) + dict_of_tensors = _tensor_adapter.ToBatchTensors( + elements, produce_eager_tensors = False) + filtered_tensors = bsl_util.filter_tensors_by_input_names( + dict_of_tensors, input_tensor_alias) + + model_input = {} + for feature, tensor_name in zip(input_tensor_alias, input_tensor_names): + model_input[tensor_name] = filtered_tensors[feature] + return serialized_examples, model_input + + def process(self, elements: pa.RecordBatch) -> Iterable[Any]: batch_start_time = self._clock.get_current_time_in_microseconds() - outputs = self.run_inference(elements) - result = self._post_process(elements, outputs) + serialized_examples, model_input = self._extract_from_recordBatch(elements) + outputs = self.run_inference(model_input) + result = self._post_process(model_input, outputs) self._metrics_collector.update( - elements, + serialized_examples, self._clock.get_current_time_in_microseconds() - batch_start_time) return result @@ -295,14 +342,13 @@ def finish_bundle(self): @abc.abstractmethod def run_inference( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] + self, tensors: Mapping[Any, Any] ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: raise NotImplementedError @abc.abstractmethod - def _post_process(self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]], - outputs: Any) -> Iterable[Any]: + def _post_process( + self, elements: Mapping[Any, Any], outputs: Any) -> Iterable[Any]: raise NotImplementedError @@ -322,8 +368,7 @@ def _retry_on_unavailable_and_resource_error_filter(exception: Exception): exception.resp.status in (503, 429)) -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) +@beam.typehints.with_input_types(pa.RecordBatch) # Using output typehints triggers NotImplementedError('BEAM-2717)' on # streaming mode on Dataflow runner. # TODO(b/151468119): Consider to re-batch with online serving request size @@ -350,9 +395,11 @@ class _RemotePredictDoFn(_BaseDoFn): """ def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, - pipeline_options: PipelineOptions): - super(_RemotePredictDoFn, self).__init__(inference_spec_type) + pipeline_options: PipelineOptions, data_type: Text, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): + super(_RemotePredictDoFn, self).__init__(inference_spec_type, tensor_adapter_config) self._api_client = None + self._data_type = data_type project_id = ( inference_spec_type.ai_platform_prediction_model_spec.project_id or @@ -387,8 +434,7 @@ def setup(self): num_retries=_REMOTE_INFERENCE_NUM_RETRIES, retry_filter=_retry_on_unavailable_and_resource_error_filter) def _execute_request( - self, - request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]: + self, request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]: result = request.execute() if 'error' in result: raise ValueError(result['error']) @@ -400,63 +446,26 @@ def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest: @classmethod def _prepare_instances( - cls, elements: List[tf.train.Example] + cls, elements: List[Union[str, bytes]] ) -> Generator[Mapping[Text, Any], None, None]: - for example in elements: - # TODO(b/151468119): support tf.train.SequenceExample - if not isinstance(example, tf.train.Example): - raise ValueError('Remote prediction only supports tf.train.Example') - - instance = {} - for input_name, feature in example.features.feature.items(): - attr_name = feature.WhichOneof('kind') - if attr_name is None: - continue - attr = getattr(feature, attr_name) - values = cls._parse_feature_content(attr.value, attr_name, - cls._sending_as_binary(input_name)) - # Flatten a sequence if its length is 1 - values = (values[0] if len(values) == 1 else values) - instance[input_name] = values + for instance in elements: yield instance - @staticmethod - def _sending_as_binary(input_name: Text) -> bool: - """Whether data should be sent as binary.""" - return input_name.endswith('_bytes') - - @staticmethod - def _parse_feature_content(values: Sequence[Any], attr_name: Text, - as_binary: bool) -> List[Any]: - """Parse the content of tf.train.Feature object. - - If bytes_list, parse a list of bytes-like objects to a list of strings so - that it would be JSON serializable. - - If float_list or int64_list, do nothing. - - If data should be sent as binary, mark it as binary by replacing it with - a single attribute named 'b64'. - """ - if as_binary: - return [{'b64': base64.b64encode(x).decode()} for x in values] - elif attr_name == 'bytes_list': - return [x.decode() for x in values] - else: - # Converts proto RepeatedScalarContainer to list so it is - # JSON-serializable - return list(values) + def _check_elements(self) -> None: + # TODO(b/151468119): support tf.train.SequenceExample + if self._data_type != DataType.EXAMPLE: + raise ValueError('Remote prediction only supports tf.train.Example') def run_inference( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Sequence[Mapping[Text, Any]]: + self, elements: List[Union[str, bytes]]) -> Sequence[Mapping[Text, Any]]: + self._check_elements() body = {'instances': list(self._prepare_instances(elements))} request = self._make_request(body) response = self._execute_request(request) return response['predictions'] def _post_process( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]], + self, elements: List[Union[str, bytes]], outputs: Sequence[Mapping[Text, Any]] ) -> Iterable[prediction_log_pb2.PredictLog]: result = [] @@ -478,6 +487,7 @@ def _post_process( # is fixed. # TODO(b/143484017): Add batch_size back off in the case there are functional # reasons large batch sizes cannot be handled. + class _BaseBatchSavedModelDoFn(_BaseDoFn): """A DoFn that runs in-process batch inference with a model. @@ -489,21 +499,20 @@ class _BaseBatchSavedModelDoFn(_BaseDoFn): """ def __init__( - self, - inference_spec_type: model_spec_pb2.InferenceSpecType, - shared_model_handle: shared.Shared, - ): - super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type) + self, inference_spec_type: model_spec_pb2.InferenceSpecType, + shared_model_handle: shared.Shared, data_type, + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): + super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, tensor_adapter_config) self._inference_spec_type = inference_spec_type self._shared_model_handle = shared_model_handle self._model_path = inference_spec_type.saved_model_spec.model_path self._tags = None self._signatures = _get_signatures( - inference_spec_type.saved_model_spec.model_path, - inference_spec_type.saved_model_spec.signature_name, - _get_tags(inference_spec_type)) + inference_spec_type.saved_model_spec.model_path, + inference_spec_type.saved_model_spec.signature_name, + _get_tags(inference_spec_type)) self._session = None - self._io_tensor_spec = None + self._data_type = data_type def setup(self): """Load the model. @@ -551,69 +560,61 @@ def _pre_process(self) -> _IOTensorSpec: # Pre process functions will validate for each signature. io_tensor_specs = [] for signature in self._signatures: - if len(signature.signature_def.inputs) != 1: - raise ValueError('Signature should have 1 and only 1 inputs') - if (list(signature.signature_def.inputs.values())[0].dtype != + if (len(signature.signature_def.inputs) == 1 and + list(signature.signature_def.inputs.values())[0].dtype != tf.string.as_datatype_enum): raise ValueError( - 'Input dtype is expected to be %s, got %s' % + 'With 1 input, dtype is expected to be %s, got %s' % tf.string.as_datatype_enum, list(signature.signature_def.inputs.values())[0].dtype) io_tensor_specs.append(_signature_pre_process(signature.signature_def)) - input_tensor_name = '' - input_tensor_alias = '' + input_tensor_names = [] + input_tensor_alias = [] + input_tensor_types = {} output_alias_tensor_names = {} for io_tensor_spec in io_tensor_specs: - if not input_tensor_name: - input_tensor_name = io_tensor_spec.input_tensor_name + if not input_tensor_names: + input_tensor_names = io_tensor_spec.input_tensor_names input_tensor_alias = io_tensor_spec.input_tensor_alias - elif input_tensor_name != io_tensor_spec.input_tensor_name: + elif input_tensor_names != io_tensor_spec.input_tensor_names: raise ValueError('Input tensor must be the same for all Signatures.') - for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items( - ): + for alias, tensor_type in io_tensor_spec.input_tensor_types.items(): + input_tensor_types[alias] = tensor_type + for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items(): output_alias_tensor_names[alias] = tensor_name - if (not output_alias_tensor_names or not input_tensor_name or + if (not output_alias_tensor_names or not input_tensor_names or not input_tensor_alias): raise ValueError('No valid fetch tensors or feed tensors.') - return _IOTensorSpec(input_tensor_alias, input_tensor_name, - output_alias_tensor_names) + return _IOTensorSpec(input_tensor_alias, input_tensor_names, + input_tensor_types, output_alias_tensor_names) def _has_tpu_tag(self) -> bool: return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and tf.saved_model.TPU in self._tags) def run_inference( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Mapping[Text, np.ndarray]: - self._check_elements(elements) - outputs = self._run_tf_operations(elements) + self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]: + self._check_elements() + outputs = self._run_tf_operations(tensors) return outputs def _run_tf_operations( - self, elements: List[Union[tf.train.Example, tf.train.SequenceExample]] - ) -> Mapping[Text, np.ndarray]: - input_values = [] - for element in elements: - input_values.append(element.SerializeToString()) + self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]: result = self._session.run( - self._io_tensor_spec.output_alias_tensor_names, - feed_dict={self._io_tensor_spec.input_tensor_name: input_values}) + self._io_tensor_spec.output_alias_tensor_names, feed_dict=tensors) if len(result) != len(self._io_tensor_spec.output_alias_tensor_names): raise RuntimeError('Output length does not match fetches') return result - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: + def _check_elements(self) -> None: """Unimplemented.""" raise NotImplementedError -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) -@beam.typehints.with_output_types(Tuple[tf.train.Example, - classification_pb2.Classifications]) +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(Tuple[Union[str, bytes], + classification_pb2.Classifications]) class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn): """A DoFn that run inference on classification model.""" @@ -626,47 +627,44 @@ def setup(self): signature_def.method_name) super(_BatchClassifyDoFn, self).setup() - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - if not all(isinstance(element, tf.train.Example) for element in elements): + def _check_elements(self) -> None: + if self._data_type != DataType.EXAMPLE: raise ValueError('Classify only supports tf.train.Example') def _post_process( - self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, - np.ndarray] - ) -> Iterable[Tuple[tf.train.Example, classification_pb2.Classifications]]: + self, elements: Mapping[Any, Any], + outputs: Mapping[Text, np.ndarray] + ) -> Iterable[Tuple[Union[str, bytes], classification_pb2.Classifications]]: + serialized_examples, = elements.values() classifications = _post_process_classify( - self._io_tensor_spec.output_alias_tensor_names, elements, outputs) - return zip(elements, classifications) + self._io_tensor_spec.output_alias_tensor_names, + serialized_examples, outputs) + return zip(serialized_examples, classifications) -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) -@beam.typehints.with_output_types(Tuple[tf.train.Example, - regression_pb2.Regression]) +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(Tuple[Union[str, bytes], + regression_pb2.Regression]) class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): """A DoFn that run inference on regression model.""" def setup(self): super(_BatchRegressDoFn, self).setup() - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - if not all(isinstance(element, tf.train.Example) for element in elements): + def _check_elements(self) -> None: + if self._data_type != DataType.EXAMPLE: raise ValueError('Regress only supports tf.train.Example') def _post_process( - self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, - np.ndarray] - ) -> Iterable[Tuple[tf.train.Example, regression_pb2.Regression]]: - regressions = _post_process_regress(elements, outputs) - return zip(elements, regressions) + self, elements: Mapping[Any, Any], + outputs: Mapping[Text, np.ndarray] + ) -> Iterable[Tuple[Union[str, bytes], regression_pb2.Regression]]: + serialized_examples, = elements.values() + regressions = _post_process_regress(serialized_examples, outputs) + return zip(serialized_examples, regressions) -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) +@beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictLog) class _BatchPredictDoFn(_BaseBatchSavedModelDoFn): """A DoFn that runs inference on predict model.""" @@ -680,19 +678,39 @@ def setup(self): signature_def.method_name) super(_BatchPredictDoFn, self).setup() - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: + def _check_elements(self) -> None: pass def _post_process( - self, elements: Union[Sequence[tf.train.Example], - Sequence[tf.train.SequenceExample]], + self, elements: Mapping[Any, Any], outputs: Mapping[Text, np.ndarray] ) -> Iterable[prediction_log_pb2.PredictLog]: + if not self._io_tensor_spec.input_tensor_types: + raise ValueError('No valid tensor types.') + input_tensor_names = self._io_tensor_spec.input_tensor_names input_tensor_alias = self._io_tensor_spec.input_tensor_alias + input_tensor_types = self._io_tensor_spec.input_tensor_types signature_name = self._signatures[0].name - batch_size = len(elements) + + if len(input_tensor_alias) != len(input_tensor_names): + raise ValueError('Expected to have one name and one alias per tensor') + + include_request = True + if len(input_tensor_names) == 1: + serialized_examples, = elements.values() + batch_size = len(serialized_examples) + process_elements = serialized_examples + else: + for tensor_name, tensor in elements.items(): + if not isinstance(tensor, np.ndarray): + include_request = False + break + + if include_request: + batch_size = len(elements[input_tensor_names[0]]) + else: + batch_size = elements[input_tensor_names[0]].shape[0] + for output_alias, output in outputs.items(): if len(output.shape) < 1 or output.shape[0] != batch_size: raise ValueError( @@ -703,16 +721,25 @@ def _post_process( predict_log_tmpl = prediction_log_pb2.PredictLog() predict_log_tmpl.request.model_spec.signature_name = signature_name predict_log_tmpl.response.model_spec.signature_name = signature_name - input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias] - input_tensor_proto.dtype = tf.string.as_datatype_enum - input_tensor_proto.tensor_shape.dim.add().size = 1 + for alias, tensor_type in input_tensor_types.items(): + input_tensor_proto = predict_log_tmpl.request.inputs[alias] + input_tensor_proto.dtype = tf.as_dtype(tensor_type).as_datatype_enum + # TODO (Maxine): fix dimension? + input_tensor_proto.tensor_shape.dim.add().size = 1 result = [] for i in range(batch_size): predict_log = prediction_log_pb2.PredictLog() predict_log.CopyFrom(predict_log_tmpl) - predict_log.request.inputs[input_tensor_alias].string_val.append( - elements[i].SerializeToString()) + + if include_request: + if len(input_tensor_alias) == 1: + alias = input_tensor_alias[0] + predict_log.request.inputs[alias].string_val.append(process_elements[i]) + else: + for alias, tensor_name in zip(input_tensor_alias, input_tensor_names): + predict_log.request.inputs[alias].float_val.append(elements[tensor_name][i]) + for output_alias, output in outputs.items(): # Mimic tensor::Split tensor_proto = tf.make_tensor_proto( @@ -724,37 +751,36 @@ def _post_process( return result -@beam.typehints.with_input_types(List[Union[tf.train.Example, - tf.train.SequenceExample]]) -@beam.typehints.with_output_types(Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse]) +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(Tuple[Union[str, bytes], + inference_pb2.MultiInferenceResponse]) class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): """A DoFn that runs inference on multi-head model.""" - def _check_elements( - self, elements: List[Union[tf.train.Example, - tf.train.SequenceExample]]) -> None: - if not all(isinstance(element, tf.train.Example) for element in elements): - raise ValueError('Multi inference only supports tf.train.Example') + def _check_elements(self) -> None: + if self._data_type != DataType.EXAMPLE: + raise ValueError('Multi-inference only supports tf.train.Example') def _post_process( - self, elements: Sequence[tf.train.Example], outputs: Mapping[Text, - np.ndarray] - ) -> Iterable[Tuple[tf.train.Example, inference_pb2.MultiInferenceResponse]]: + self, elements: Mapping[Any, Any], + outputs: Mapping[Text, np.ndarray] + ) -> Iterable[Tuple[Union[str, bytes], inference_pb2.MultiInferenceResponse]]: classifications = None regressions = None + serialized_examples, = elements.values() for signature in self._signatures: signature_def = signature.signature_def if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: classifications = _post_process_classify( - self._io_tensor_spec.output_alias_tensor_names, elements, outputs) + self._io_tensor_spec.output_alias_tensor_names, + serialized_examples, outputs) elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME: - regressions = _post_process_regress(elements, outputs) + regressions = _post_process_regress(serialized_examples, outputs) else: raise ValueError('Signature method %s is not supported for ' 'multi inference' % signature_def.method_name) result = [] - for i in range(len(elements)): + for i in range(len(serialized_examples)): response = inference_pb2.MultiInferenceResponse() for signature in self._signatures: signature_def = signature.signature_def @@ -775,41 +801,42 @@ def _post_process( if len(response.results) != len(self._signatures): raise RuntimeError('Multi inference response result length does not ' 'match the number of signatures') - result.append((elements[i], response)) + result.append((serialized_examples[i], response)) return result -@beam.typehints.with_input_types(Tuple[tf.train.Example, - classification_pb2.Classifications]) + +@beam.typehints.with_input_types(Tuple[Union[str, bytes], + classification_pb2.Classifications]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) class _BuildPredictionLogForClassificationsDoFn(beam.DoFn): """A DoFn that builds prediction log from classifications.""" def process( - self, element: Tuple[tf.train.Example, classification_pb2.Classifications] + self, element: Tuple[Union[str, bytes], classification_pb2.Classifications] ) -> Iterable[prediction_log_pb2.PredictionLog]: (train_example, classifications) = element result = prediction_log_pb2.PredictionLog() result.classify_log.request.input.example_list.examples.add().CopyFrom( - train_example) + tf.train.Example.FromString(train_example)) result.classify_log.response.result.classifications.add().CopyFrom( classifications) yield result -@beam.typehints.with_input_types(Tuple[tf.train.Example, - regression_pb2.Regression]) +@beam.typehints.with_input_types(Tuple[Union[str, bytes], + regression_pb2.Regression]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) class _BuildPredictionLogForRegressionsDoFn(beam.DoFn): """A DoFn that builds prediction log from regressions.""" def process( - self, element: Tuple[tf.train.Example, regression_pb2.Regression] + self, element: Tuple[Union[str, bytes], regression_pb2.Regression] ) -> Iterable[prediction_log_pb2.PredictionLog]: (train_example, regression) = element result = prediction_log_pb2.PredictionLog() result.regress_log.request.input.example_list.examples.add().CopyFrom( - train_example) + tf.train.Example.FromString(train_example)) result.regress_log.response.result.regressions.add().CopyFrom(regression) yield result @@ -827,28 +854,28 @@ def process( yield result -@beam.typehints.with_input_types(Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse]) +@beam.typehints.with_input_types(Tuple[Union[str, bytes], + inference_pb2.MultiInferenceResponse]) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) class _BuildMultiInferenceLogDoFn(beam.DoFn): """A DoFn that builds prediction log from multi-head inference result.""" def process( - self, element: Tuple[tf.train.Example, - inference_pb2.MultiInferenceResponse] + self, element: Tuple[Union[str, bytes], + inference_pb2.MultiInferenceResponse] ) -> Iterable[prediction_log_pb2.PredictionLog]: (train_example, multi_inference_response) = element result = prediction_log_pb2.PredictionLog() (result.multi_inference_log.request.input.example_list.examples.add() - .CopyFrom(train_example)) + .CopyFrom(tf.train.Example.FromString(train_example))) result.multi_inference_log.response.CopyFrom(multi_inference_response) yield result def _post_process_classify( output_alias_tensor_names: Mapping[Text, Text], - elements: Sequence[tf.train.Example], outputs: Mapping[Text, np.ndarray] -) -> Sequence[classification_pb2.Classifications]: + elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray] + ) -> Sequence[classification_pb2.Classifications]: """Returns classifications from inference output.""" # This is to avoid error "The truth value of an array with @@ -908,7 +935,7 @@ def _post_process_classify( def _post_process_regress( - elements: Sequence[tf.train.Example], + elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]: """Returns regressions from inference output.""" @@ -945,28 +972,27 @@ def _post_process_regress( def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec: """Returns IOTensorSpec from signature.""" - - if len(signature.inputs) != 1: - raise ValueError('Signature should have 1 and only 1 inputs') - input_tensor_alias = list(signature.inputs.keys())[0] - if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum: + if (len(signature.inputs) == 1 and + list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum): raise ValueError( - 'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum, - list(signature.inputs.values())[0].dtype) + 'With 1 input, dtype is expected to be %s, got %s' % + tf.string.as_datatype_enum, + list(signature.inputs.values())[0].dtype) + input_tensor_alias = [alias for alias in signature.inputs.keys()] if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: - input_tensor_name, output_alias_tensor_names = ( - _signature_pre_process_classify(signature)) + input_tensor_names, input_tensor_types, output_alias_tensor_names = ( + _signature_pre_process_classify(signature)) elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME: - input_tensor_name, output_alias_tensor_names = ( - _signature_pre_process_predict(signature)) + input_tensor_names, input_tensor_types, output_alias_tensor_names = ( + _signature_pre_process_predict(signature)) elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME: - input_tensor_name, output_alias_tensor_names = ( - _signature_pre_process_regress(signature)) + input_tensor_names, input_tensor_types, output_alias_tensor_names = ( + _signature_pre_process_regress(signature)) else: raise ValueError('Signature method %s is not supported' % - signature.method_name) - return _IOTensorSpec(input_tensor_alias, input_tensor_name, - output_alias_tensor_names) + signature.method_name) + return _IOTensorSpec(input_tensor_alias, input_tensor_names, + input_tensor_types, output_alias_tensor_names) def _signature_pre_process_classify( @@ -979,13 +1005,14 @@ def _signature_pre_process_classify( Returns: A tuple of input tensor name and output alias tensor names. """ - + if len(signature.inputs) != 1: + raise ValueError('Classify signature should have 1 and only 1 inputs') if len(signature.outputs) != 1 and len(signature.outputs) != 2: raise ValueError('Classify signature should have 1 or 2 outputs') if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs: raise ValueError('No classification inputs found in SignatureDef: %s' % signature.inputs) - input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name + input_tensor_names = [signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name] output_alias_tensor_names = {} if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs): @@ -1000,7 +1027,7 @@ def _signature_pre_process_classify( if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs: output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = ( signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name) - return input_tensor_name, output_alias_tensor_names + return input_tensor_names, {}, output_alias_tensor_names def _signature_pre_process_predict( @@ -1013,12 +1040,14 @@ def _signature_pre_process_predict( Returns: A tuple of input tensor name and output alias tensor names. """ - - input_tensor_name = list(signature.inputs.values())[0].name + input_tensor_names = [value.name for value in signature.inputs.values()] + input_tensor_types = dict([ + (key, value.dtype) for key, value in signature.inputs.items() + ]) output_alias_tensor_names = dict([ - (key, output.name) for key, output in signature.outputs.items() + (key, output.name) for key, output in signature.outputs.items() ]) - return input_tensor_name, output_alias_tensor_names + return input_tensor_names, input_tensor_types, output_alias_tensor_names def _signature_pre_process_regress( @@ -1031,13 +1060,14 @@ def _signature_pre_process_regress( Returns: A tuple of input tensor name and output alias tensor names. """ - + if len(signature.inputs) != 1: + raise ValueError('Regress signature should have 1 and only 1 inputs') if len(signature.outputs) != 1: raise ValueError('Regress signature should have 1 output') if tf.saved_model.REGRESS_INPUTS not in signature.inputs: raise ValueError('No regression inputs found in SignatureDef: %s' % signature.inputs) - input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name + input_tensor_names = [signature.inputs[tf.saved_model.REGRESS_INPUTS].name] if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs: raise ValueError('No regression outputs found in SignatureDef: %s' % signature.outputs) @@ -1045,7 +1075,7 @@ def _signature_pre_process_regress( tf.saved_model.REGRESS_OUTPUTS: signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name } - return input_tensor_name, output_alias_tensor_names + return input_tensor_names, {}, output_alias_tensor_names def _using_in_process_inference( diff --git a/tfx_bsl/beam/run_inference_arrow.py b/tfx_bsl/beam/run_inference_arrow.py deleted file mode 100644 index 0ffbc7b5..00000000 --- a/tfx_bsl/beam/run_inference_arrow.py +++ /dev/null @@ -1,1216 +0,0 @@ -# Copyright 2019 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Run batch inference on saved model.""" - -from __future__ import absolute_import -from __future__ import division -# Standard __future__ imports -from __future__ import print_function - -import abc -import collections -import os -import platform -import sys -import time -try: - import resource -except ImportError: - resource = None - -from absl import logging -import apache_beam as beam -import pyarrow as pa -from apache_beam.options.pipeline_options import GoogleCloudOptions -from apache_beam.options.pipeline_options import PipelineOptions -from apache_beam.utils import retry -import googleapiclient -from googleapiclient import discovery -from googleapiclient import http -import numpy as np -import json -import six -import tensorflow as tf -from tfx_bsl.beam import shared -from tfx_bsl.beam import bsl_util -from tfx_bsl.public.proto import model_spec_pb2 -from tfx_bsl.telemetry import util -from tfx_bsl.tfxio import tensor_adapter -from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \ - Tuple, Union, Optional - -from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN -from tfx_bsl.beam.bsl_constants import DataType - -# TODO(b/140306674): stop using the internal TF API. -from tensorflow.python.saved_model import loader_impl -from tensorflow_serving.apis import classification_pb2 -from tensorflow_serving.apis import inference_pb2 -from tensorflow_serving.apis import prediction_log_pb2 -from tensorflow_serving.apis import regression_pb2 - -# TODO(b/131873699): Remove once 1.x support is dropped. -# pylint: disable=g-import-not-at-top -try: - # We need to import this in order to register all quantiles ops, even though - # it's not directly used. - from tensorflow.contrib.boosted_trees.python.ops import quantile_ops as _ # pylint: disable=unused-import -except ImportError: - pass - - -_DEFAULT_INPUT_KEY = 'examples' -_METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer' -_METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess' -_METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION = 'CloudAIPlatformPrediction' -_MILLISECOND_TO_MICROSECOND = 1000 -_MICROSECOND_TO_NANOSECOND = 1000 -_SECOND_TO_MICROSECOND = 1000000 -_REMOTE_INFERENCE_NUM_RETRIES = 5 - -# We define the following aliases of Any because the actual types are not public. -_SignatureDef = Any -_MetaGraphDef = Any -_SavedModel = Any - - -# TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7 -class OperationType(object): - CLASSIFICATION = 'CLASSIFICATION' - REGRESSION = 'REGRESSION' - PREDICTION = 'PREDICTION' - MULTIHEAD = 'MULTIHEAD' - - -@beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def RunInferenceImpl( # pylint: disable=invalid-name - examples: beam.pvalue.PCollection, - inference_spec_type: model_spec_pb2.InferenceSpecType, - data_type: DataType, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None -) -> beam.pvalue.PCollection: - """Implementation of RunInference API. - - Args: - examples: A PCollection containing RecordBatch of serialized examples. - inference_spec_type: Model inference endpoint. - tensor_adapter_config [Optional]: Tensor adapter config which specifies how to - obtain tensors from the Arrow RecordBatch. - - Not required when running inference with remote model or 1 input - - Returns: - A PCollection containing prediction logs. - - Raises: - ValueError; when operation is not supported. - """ - logging.info('RunInference on model: %s', inference_spec_type) - - operation_type = _get_operation_type(inference_spec_type) - if operation_type == OperationType.CLASSIFICATION: - return examples | 'Classify' >> _Classify( - inference_spec_type, data_type, tensor_adapter_config) - elif operation_type == OperationType.REGRESSION: - return examples | 'Regress' >> _Regress( - inference_spec_type, data_type, tensor_adapter_config) - elif operation_type == OperationType.PREDICTION: - return examples | 'Predict' >> _Predict( - inference_spec_type, data_type, tensor_adapter_config) - elif operation_type == OperationType.MULTIHEAD: - return (examples | 'MultiInference' >> _MultiInference( - inference_spec_type, data_type, tensor_adapter_config)) - else: - raise ValueError('Unsupported operation_type %s' % operation_type) - - -_IOTensorSpec = collections.namedtuple( - '_IOTensorSpec', - ['input_tensor_alias', 'input_tensor_names', 'input_tensor_types', 'output_alias_tensor_names']) - -_Signature = collections.namedtuple('_Signature', ['name', 'signature_def']) - - -@beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def _Classify(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): - """Performs classify PTransform.""" - if _using_in_process_inference(inference_spec_type): - return (pcoll - | 'Classify' >> beam.ParDo(_BatchClassifyDoFn( - inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)) - | 'BuildPredictionLogForClassifications' >> beam.ParDo( - _BuildPredictionLogForClassificationsDoFn())) - else: - raise NotImplementedError - - -@beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def _Regress(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): - """Performs regress PTransform.""" - if _using_in_process_inference(inference_spec_type): - return (pcoll - | 'Regress' >> beam.ParDo(_BatchRegressDoFn( - inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)) - | 'BuildPredictionLogForRegressions' >> beam.ParDo( - _BuildPredictionLogForRegressionsDoFn())) - else: - raise NotImplementedError - - -@beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): - """Performs predict PTransform.""" - if _using_in_process_inference(inference_spec_type): - predictions = ( - pcoll - | 'Predict' >> beam.ParDo(_BatchPredictDoFn( - inference_spec_type, shared.Shared(), data_type, tensor_adapter_config))) - else: - predictions = ( - pcoll - | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn( - inference_spec_type, pcoll.pipeline.options, data_type, tensor_adapter_config))) - return (predictions - | 'BuildPredictionLogForPredictions' >> beam.ParDo( - _BuildPredictionLogForPredictionsDoFn())) - - -@beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def _MultiInference(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name - inference_spec_type: model_spec_pb2.InferenceSpecType, data_type, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): - """Performs multi inference PTransform.""" - if _using_in_process_inference(inference_spec_type): - return ( - pcoll - | 'MultiInference' >> beam.ParDo(_BatchMultiInferenceDoFn( - inference_spec_type, shared.Shared(), data_type, tensor_adapter_config)) - | 'BuildMultiInferenceLog' >> beam.ParDo(_BuildMultiInferenceLogDoFn())) - else: - raise NotImplementedError - - -@six.add_metaclass(abc.ABCMeta) -class _BaseDoFn(beam.DoFn): - """Base DoFn that performs bulk inference.""" - - class _MetricsCollector(object): - """A collector for beam metrics.""" - - def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType): - operation_type = _get_operation_type(inference_spec_type) - proximity_descriptor = ( - _METRICS_DESCRIPTOR_IN_PROCESS - if _using_in_process_inference(inference_spec_type) else - _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) - namespace = util.MakeTfxNamespace( - [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor]) - - # Metrics - self._inference_counter = beam.metrics.Metrics.counter( - namespace, 'num_inferences') - self._num_instances = beam.metrics.Metrics.counter( - namespace, 'num_instances') - self._inference_request_batch_size = beam.metrics.Metrics.distribution( - namespace, 'inference_request_batch_size') - self._inference_request_batch_byte_size = ( - beam.metrics.Metrics.distribution( - namespace, 'inference_request_batch_byte_size')) - # Batch inference latency in microseconds. - self._inference_batch_latency_micro_secs = ( - beam.metrics.Metrics.distribution( - namespace, 'inference_batch_latency_micro_secs')) - self._model_byte_size = beam.metrics.Metrics.distribution( - namespace, 'model_byte_size') - # Model load latency in milliseconds. - self._load_model_latency_milli_secs = beam.metrics.Metrics.distribution( - namespace, 'load_model_latency_milli_secs') - - # Metrics cache - self.load_model_latency_milli_secs_cache = None - self.model_byte_size_cache = None - - def update_metrics_with_cache(self): - if self.load_model_latency_milli_secs_cache is not None: - self._load_model_latency_milli_secs.update( - self.load_model_latency_milli_secs_cache) - self.load_model_latency_milli_secs_cache = None - if self.model_byte_size_cache is not None: - self._model_byte_size.update(self.model_byte_size_cache) - self.model_byte_size_cache = None - - # For feature inputs, using serialized example for batch size - def update( - self, elements: List[Union[str, bytes]], latency_micro_secs: int) -> None: - self._inference_batch_latency_micro_secs.update(latency_micro_secs) - self._num_instances.inc(len(elements)) - self._inference_counter.inc(len(elements)) - self._inference_request_batch_size.update(len(elements)) - self._inference_request_batch_byte_size.update( - sum(len(element) for element in elements)) - - - def __init__( - self, inference_spec_type: model_spec_pb2.InferenceSpecType, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): - super(_BaseDoFn, self).__init__() - self._clock = None - self._metrics_collector = self._MetricsCollector(inference_spec_type) - self._tensor_adapter_config = tensor_adapter_config - self._io_tensor_spec = None # This value may be None if the model is remote - - def setup(self): - self._clock = _ClockFactory.make_clock() - - def _extract_from_recordBatch(self, elements: pa.RecordBatch): - """ - Function to extract the compatible input with model signature - """ - serialized_examples = None - for column_name, column_array in zip(elements.schema.names, elements.columns): - if column_name == _RECORDBATCH_COLUMN: - column_type = column_array.flatten().type - if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)): - raise ValueError( - 'Expected a list of serialized examples in bytes or as a string, got %s' % - type(example)) - serialized_examples = column_array.flatten().to_pylist() - break - - if (serialized_examples is None): - raise ValueError('Raw examples not found.') - - model_input = None - if self._io_tensor_spec is None: # Case when we are running remote inference - model_input = bsl_util.RecordToJSON(elements) - elif (len(self._io_tensor_spec.input_tensor_names) == 1): - model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples} - else: - if (self._tensor_adapter_config is None): - raise ValueError('Tensor adaptor config is required with a multi-input model') - - input_tensor_names = self._io_tensor_spec.input_tensor_names - input_tensor_alias = self._io_tensor_spec.input_tensor_alias - _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config) - dict_of_tensors = _tensor_adapter.ToBatchTensors( - elements, produce_eager_tensors = False) - filtered_tensors = bsl_util.filter_tensors_by_input_names( - dict_of_tensors, input_tensor_alias) - - model_input = {} - for feature, tensor_name in zip(input_tensor_alias, input_tensor_names): - model_input[tensor_name] = filtered_tensors[feature] - return serialized_examples, model_input - - def process(self, elements: pa.RecordBatch) -> Iterable[Any]: - batch_start_time = self._clock.get_current_time_in_microseconds() - serialized_examples, model_input = self._extract_from_recordBatch(elements) - outputs = self.run_inference(model_input) - result = self._post_process(model_input, outputs) - self._metrics_collector.update( - serialized_examples, - self._clock.get_current_time_in_microseconds() - batch_start_time) - return result - - def finish_bundle(self): - self._metrics_collector.update_metrics_with_cache() - - @abc.abstractmethod - def run_inference( - self, tensors: Mapping[Any, Any] - ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: - raise NotImplementedError - - @abc.abstractmethod - def _post_process( - self, elements: Mapping[Any, Any], outputs: Any) -> Iterable[Any]: - raise NotImplementedError - - -def _retry_on_unavailable_and_resource_error_filter(exception: Exception): - """Retries for HttpError. - - Retries if error is unavailable (503) or resource exhausted (429). - Resource exhausted may happen when qps or bandwidth exceeds quota. - - Args: - exception: Exception from inference http request execution. - Returns: - A boolean of whether retry. - """ - - return (isinstance(exception, googleapiclient.errors.HttpError) and - exception.resp.status in (503, 429)) - - -@beam.typehints.with_input_types(pa.RecordBatch) -# Using output typehints triggers NotImplementedError('BEAM-2717)' on -# streaming mode on Dataflow runner. -# TODO(b/151468119): Consider to re-batch with online serving request size -# limit, and re-batch with RPC failures(InvalidArgument) regarding request size. -# @beam.typehints.with_output_types(prediction_log_pb2.PredictLog) -class _RemotePredictDoFn(_BaseDoFn): - """A DoFn that performs predictions from a cloud-hosted TensorFlow model. - - Supports both batch and streaming processing modes. - NOTE: Does not work on DirectRunner for streaming jobs [BEAM-7885]. - - In order to request predictions, you must deploy your trained model to AI - Platform Prediction in the TensorFlow SavedModel format. See - [Exporting a SavedModel for prediction] - (https://cloud.google.com/ai-platform/prediction/docs/exporting-savedmodel-for-prediction) - for more details. - - To send binary data, you have to make sure that the name of an input ends in - `_bytes`. - - NOTE: The returned `PredictLog` instances do not have `PredictRequest` part - filled. The reason is that it is difficult to determine the input tensor name - without having access to cloud-hosted model's signatures. - """ - - def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, - pipeline_options: PipelineOptions, data_type, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): - super(_RemotePredictDoFn, self).__init__(inference_spec_type, tensor_adapter_config) - self._api_client = None - self._data_type = data_type - - project_id = ( - inference_spec_type.ai_platform_prediction_model_spec.project_id or - pipeline_options.view_as(GoogleCloudOptions).project) - if not project_id: - raise ValueError('Either a non-empty project id or project flag in ' - ' beam pipeline options needs be provided.') - - model_name = ( - inference_spec_type.ai_platform_prediction_model_spec.model_name) - if not model_name: - raise ValueError('A non-empty model name must be provided.') - - version_name = ( - inference_spec_type.ai_platform_prediction_model_spec.version_name) - name_spec = 'projects/{}/models/{}' - # If version is not specified, the default version for a model is used. - if version_name: - name_spec += '/versions/{}' - self._full_model_name = name_spec.format(project_id, model_name, - version_name) - - def setup(self): - super(_RemotePredictDoFn, self).setup() - # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to - # user agent once custom header is supported in googleapiclient. - self._api_client = discovery.build('ml', 'v1') - - # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff. - @retry.with_exponential_backoff( - initial_delay_secs=1.0, - num_retries=_REMOTE_INFERENCE_NUM_RETRIES, - retry_filter=_retry_on_unavailable_and_resource_error_filter) - def _execute_request( - self, - request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]: - result = request.execute() - if 'error' in result: - raise ValueError(result['error']) - return result - - def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest: - return self._api_client.projects().predict( - name=self._full_model_name, body=body) - - @classmethod - def _prepare_instances( - cls, elements: List[Union[str, bytes]] - ) -> Generator[Mapping[Text, Any], None, None]: - for instance in elements: - yield instance - - def _check_elements(self) -> None: - # TODO(b/151468119): support tf.train.SequenceExample - if self._data_type != DataType.EXAMPLE: - raise ValueError('Remote prediction only supports tf.train.Example') - - def run_inference( - self, elements: List[Union[str, bytes]]) -> Sequence[Mapping[Text, Any]]: - self._check_elements() - body = {'instances': list(self._prepare_instances(elements))} - request = self._make_request(body) - response = self._execute_request(request) - return response['predictions'] - - def _post_process( - self, elements: List[Union[str, bytes]], - outputs: Sequence[Mapping[Text, Any]] - ) -> Iterable[prediction_log_pb2.PredictLog]: - result = [] - for output in outputs: - predict_log = prediction_log_pb2.PredictLog() - for output_alias, values in output.items(): - values = np.array(values) - tensor_proto = tf.make_tensor_proto( - values=values, - dtype=tf.as_dtype(values.dtype).as_datatype_enum, - shape=np.expand_dims(values, axis=0).shape) - predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) - result.append(predict_log) - return result - - -# TODO(b/131873699): Add typehints once -# [BEAM-8381](https://issues.apache.org/jira/browse/BEAM-8381) -# is fixed. -# TODO(b/143484017): Add batch_size back off in the case there are functional -# reasons large batch sizes cannot be handled. - -class _BaseBatchSavedModelDoFn(_BaseDoFn): - """A DoFn that runs in-process batch inference with a model. - - Models need to have the required serving signature as mentioned in - [Tensorflow Serving](https://www.tensorflow.org/tfx/serving/signature_defs) - - This function will check model signatures first. Then it will load and run - model inference in batch. - """ - - def __init__( - self, inference_spec_type: model_spec_pb2.InferenceSpecType, - shared_model_handle: shared.Shared, data_type, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): - super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, tensor_adapter_config) - self._inference_spec_type = inference_spec_type - self._shared_model_handle = shared_model_handle - self._model_path = inference_spec_type.saved_model_spec.model_path - self._tags = None - self._signatures = _get_signatures( - inference_spec_type.saved_model_spec.model_path, - inference_spec_type.saved_model_spec.signature_name, - _get_tags(inference_spec_type)) - self._session = None - self._data_type = data_type - - def setup(self): - """Load the model. - - Note that worker may crash if exception is thrown in setup due - to b/139207285. - """ - - super(_BaseBatchSavedModelDoFn, self).setup() - self._tags = _get_tags(self._inference_spec_type) - self._io_tensor_spec = self._pre_process() - - if self._has_tpu_tag(): - # TODO(b/131873699): Support TPU inference. - raise ValueError('TPU inference is not supported yet.') - self._session = self._load_model() - - def _load_model(self): - """Load a saved model into memory. - - Returns: - Session instance. - """ - - def load(): - """Function for constructing shared LoadedModel.""" - # TODO(b/143484017): Do warmup and other heavy model construction here. - result = tf.compat.v1.Session(graph=tf.compat.v1.Graph()) - memory_before = _get_current_process_memory_in_bytes() - start_time = self._clock.get_current_time_in_microseconds() - tf.compat.v1.saved_model.loader.load(result, self._tags, self._model_path) - end_time = self._clock.get_current_time_in_microseconds() - memory_after = _get_current_process_memory_in_bytes() - self._metrics_collector.load_model_latency_milli_secs_cache = ( - (end_time - start_time) / _MILLISECOND_TO_MICROSECOND) - self._metrics_collector.model_byte_size_cache = ( - memory_after - memory_before) - return result - - if not self._model_path: - raise ValueError('Model path is not valid.') - return self._shared_model_handle.acquire(load) - - def _pre_process(self) -> _IOTensorSpec: - # Pre process functions will validate for each signature. - io_tensor_specs = [] - for signature in self._signatures: - if (len(signature.signature_def.inputs) == 1 and - list(signature.signature_def.inputs.values())[0].dtype != - tf.string.as_datatype_enum): - raise ValueError( - 'With 1 input, dtype is expected to be %s, got %s' % - tf.string.as_datatype_enum, - list(signature.signature_def.inputs.values())[0].dtype) - io_tensor_specs.append(_signature_pre_process(signature.signature_def)) - input_tensor_names = [] - input_tensor_alias = [] - input_tensor_types = {} - output_alias_tensor_names = {} - for io_tensor_spec in io_tensor_specs: - if not input_tensor_names: - input_tensor_names = io_tensor_spec.input_tensor_names - input_tensor_alias = io_tensor_spec.input_tensor_alias - elif input_tensor_names != io_tensor_spec.input_tensor_names: - raise ValueError('Input tensor must be the same for all Signatures.') - for alias, tensor_type in io_tensor_spec.input_tensor_types.items(): - input_tensor_types[alias] = tensor_type - for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items(): - output_alias_tensor_names[alias] = tensor_name - if (not output_alias_tensor_names or not input_tensor_names or - not input_tensor_alias): - raise ValueError('No valid fetch tensors or feed tensors.') - return _IOTensorSpec(input_tensor_alias, input_tensor_names, - input_tensor_types, output_alias_tensor_names) - - def _has_tpu_tag(self) -> bool: - return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and - tf.saved_model.TPU in self._tags) - - def run_inference( - self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]: - self._check_elements() - outputs = self._run_tf_operations(tensors) - return outputs - - def _run_tf_operations( - self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]: - result = self._session.run( - self._io_tensor_spec.output_alias_tensor_names, feed_dict=tensors) - if len(result) != len(self._io_tensor_spec.output_alias_tensor_names): - raise RuntimeError('Output length does not match fetches') - return result - - def _check_elements(self) -> None: - """Unimplemented.""" - - raise NotImplementedError - - -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(Tuple[Union[str, bytes], - classification_pb2.Classifications]) -class _BatchClassifyDoFn(_BaseBatchSavedModelDoFn): - """A DoFn that run inference on classification model.""" - - def setup(self): - signature_def = self._signatures[0].signature_def - if signature_def.method_name != tf.saved_model.CLASSIFY_METHOD_NAME: - raise ValueError( - 'BulkInferrerClassifyDoFn requires signature method ' - 'name %s, got: %s' % tf.saved_model.CLASSIFY_METHOD_NAME, - signature_def.method_name) - super(_BatchClassifyDoFn, self).setup() - - def _check_elements(self) -> None: - if self._data_type != DataType.EXAMPLE: - raise ValueError('Classify only supports tf.train.Example') - - def _post_process( - self, elements: Mapping[Any, Any], - outputs: Mapping[Text, np.ndarray] - ) -> Iterable[Tuple[Union[str, bytes], classification_pb2.Classifications]]: - serialized_examples, = elements.values() - classifications = _post_process_classify( - self._io_tensor_spec.output_alias_tensor_names, - serialized_examples, outputs) - return zip(serialized_examples, classifications) - - -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(Tuple[Union[str, bytes], - regression_pb2.Regression]) -class _BatchRegressDoFn(_BaseBatchSavedModelDoFn): - """A DoFn that run inference on regression model.""" - - def setup(self): - super(_BatchRegressDoFn, self).setup() - - def _check_elements(self) -> None: - if self._data_type != DataType.EXAMPLE: - raise ValueError('Regress only supports tf.train.Example') - - def _post_process( - self, elements: Mapping[Any, Any], - outputs: Mapping[Text, np.ndarray] - ) -> Iterable[Tuple[Union[str, bytes], regression_pb2.Regression]]: - serialized_examples, = elements.values() - regressions = _post_process_regress(serialized_examples, outputs) - return zip(serialized_examples, regressions) - - -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(prediction_log_pb2.PredictLog) -class _BatchPredictDoFn(_BaseBatchSavedModelDoFn): - """A DoFn that runs inference on predict model.""" - - def setup(self): - signature_def = self._signatures[0].signature_def - if signature_def.method_name != tf.saved_model.PREDICT_METHOD_NAME: - raise ValueError( - 'BulkInferrerPredictDoFn requires signature method ' - 'name %s, got: %s' % tf.saved_model.PREDICT_METHOD_NAME, - signature_def.method_name) - super(_BatchPredictDoFn, self).setup() - - def _check_elements(self) -> None: - pass - - def _post_process( - self, elements: Mapping[Any, Any], - outputs: Mapping[Text, np.ndarray] - ) -> Iterable[prediction_log_pb2.PredictLog]: - if not self._io_tensor_spec.input_tensor_types: - raise ValueError('No valid tensor types.') - input_tensor_names = self._io_tensor_spec.input_tensor_names - input_tensor_alias = self._io_tensor_spec.input_tensor_alias - input_tensor_types = self._io_tensor_spec.input_tensor_types - signature_name = self._signatures[0].name - - if len(input_tensor_alias) != len(input_tensor_names): - raise ValueError('Expected to have one name and one alias per tensor') - - include_request = True - if len(input_tensor_names) == 1: - serialized_examples, = elements.values() - batch_size = len(serialized_examples) - process_elements = serialized_examples - else: - # Only include request in the predictLog when the all tensors are dense - # is there a better way to check this? - for tensor_name, tensor in elements.items(): - if not isinstance(tensor, np.ndarray): - include_request = False - break - - if include_request: - batch_size = len(elements[input_tensor_names[0]]) - else: - batch_size = elements[input_tensor_names[0]].shape[0] - - for output_alias, output in outputs.items(): - if len(output.shape) < 1 or output.shape[0] != batch_size: - raise ValueError( - 'Expected output tensor %s to have at least one ' - 'dimension, with the first having a size equal to the input batch ' - 'size %s. Instead found %s' % - (output_alias, batch_size, output.shape)) - predict_log_tmpl = prediction_log_pb2.PredictLog() - predict_log_tmpl.request.model_spec.signature_name = signature_name - predict_log_tmpl.response.model_spec.signature_name = signature_name - for alias, tensor_type in input_tensor_types.items(): - input_tensor_proto = predict_log_tmpl.request.inputs[alias] - input_tensor_proto.dtype = tf.as_dtype(tensor_type).as_datatype_enum - # TODO (Maxine): fix dimension? - input_tensor_proto.tensor_shape.dim.add().size = 1 - - result = [] - for i in range(batch_size): - predict_log = prediction_log_pb2.PredictLog() - predict_log.CopyFrom(predict_log_tmpl) - - if include_request: - if len(input_tensor_alias) == 1: - alias = input_tensor_alias[0] - predict_log.request.inputs[alias].string_val.append(process_elements[i]) - else: - for alias, tensor_name in zip(input_tensor_alias, input_tensor_names): - predict_log.request.inputs[alias].float_val.append(elements[tensor_name][i]) - - for output_alias, output in outputs.items(): - # Mimic tensor::Split - tensor_proto = tf.make_tensor_proto( - values=output[i], - dtype=tf.as_dtype(output[i].dtype).as_datatype_enum, - shape=np.expand_dims(output[i], axis=0).shape) - predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) - result.append(predict_log) - return result - - -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(Tuple[Union[str, bytes], - inference_pb2.MultiInferenceResponse]) -class _BatchMultiInferenceDoFn(_BaseBatchSavedModelDoFn): - """A DoFn that runs inference on multi-head model.""" - - def _check_elements(self) -> None: - if self._data_type != DataType.EXAMPLE: - raise ValueError('Multi-inference only supports tf.train.Example') - - def _post_process( - self, elements: Mapping[Any, Any], - outputs: Mapping[Text, np.ndarray] - ) -> Iterable[Tuple[Union[str, bytes], inference_pb2.MultiInferenceResponse]]: - classifications = None - regressions = None - serialized_examples, = elements.values() - for signature in self._signatures: - signature_def = signature.signature_def - if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: - classifications = _post_process_classify( - self._io_tensor_spec.output_alias_tensor_names, - serialized_examples, outputs) - elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME: - regressions = _post_process_regress(serialized_examples, outputs) - else: - raise ValueError('Signature method %s is not supported for ' - 'multi inference' % signature_def.method_name) - result = [] - for i in range(len(serialized_examples)): - response = inference_pb2.MultiInferenceResponse() - for signature in self._signatures: - signature_def = signature.signature_def - inference_result = response.results.add() - if (signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME and - classifications): - inference_result.classification_result.classifications.add().CopyFrom( - classifications[i]) - elif ( - signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME and - regressions): - inference_result.regression_result.regressions.add().CopyFrom( - regressions[i]) - else: - raise ValueError('Signature method %s is not supported for ' - 'multi inference' % signature_def.method_name) - inference_result.model_spec.signature_name = signature.name - if len(response.results) != len(self._signatures): - raise RuntimeError('Multi inference response result length does not ' - 'match the number of signatures') - result.append((serialized_examples[i], response)) - return result - - - -@beam.typehints.with_input_types(Tuple[Union[str, bytes], - classification_pb2.Classifications]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -class _BuildPredictionLogForClassificationsDoFn(beam.DoFn): - """A DoFn that builds prediction log from classifications.""" - - def process( - self, - element: Tuple[Union[str, bytes], classification_pb2.Classifications] - ) -> Iterable[prediction_log_pb2.PredictionLog]: - (train_example, classifications) = element - result = prediction_log_pb2.PredictionLog() - result.classify_log.request.input.example_list.examples.add().CopyFrom( - tf.train.Example.FromString(train_example)) - result.classify_log.response.result.classifications.add().CopyFrom( - classifications) - yield result - - -@beam.typehints.with_input_types(Tuple[Union[str, bytes], - regression_pb2.Regression]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -class _BuildPredictionLogForRegressionsDoFn(beam.DoFn): - """A DoFn that builds prediction log from regressions.""" - - def process( - self, element: Tuple[Union[str, bytes], regression_pb2.Regression] - ) -> Iterable[prediction_log_pb2.PredictionLog]: - (train_example, regression) = element - result = prediction_log_pb2.PredictionLog() - result.regress_log.request.input.example_list.examples.add().CopyFrom( - tf.train.Example.FromString(train_example)) - result.regress_log.response.result.regressions.add().CopyFrom(regression) - yield result - - -@beam.typehints.with_input_types(prediction_log_pb2.PredictLog) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -class _BuildPredictionLogForPredictionsDoFn(beam.DoFn): - """A DoFn that builds prediction log from predictions.""" - - def process( - self, element: prediction_log_pb2.PredictLog - ) -> Iterable[prediction_log_pb2.PredictionLog]: - result = prediction_log_pb2.PredictionLog() - result.predict_log.CopyFrom(element) - yield result - - -@beam.typehints.with_input_types(Tuple[Union[str, bytes], - inference_pb2.MultiInferenceResponse]) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -class _BuildMultiInferenceLogDoFn(beam.DoFn): - """A DoFn that builds prediction log from multi-head inference result.""" - - def process( - self, element: Tuple[Union[str, bytes], - inference_pb2.MultiInferenceResponse] - ) -> Iterable[prediction_log_pb2.PredictionLog]: - (train_example, multi_inference_response) = element - result = prediction_log_pb2.PredictionLog() - (result.multi_inference_log.request.input.example_list.examples.add() - .CopyFrom(tf.train.Example.FromString(train_example))) - result.multi_inference_log.response.CopyFrom(multi_inference_response) - yield result - - - -def _post_process_classify( - output_alias_tensor_names: Mapping[Text, Text], - elements: Sequence[Union[str, bytes]], outputs: Mapping[Text, np.ndarray] - ) -> Sequence[classification_pb2.Classifications]: - """Returns classifications from inference output.""" - - # This is to avoid error "The truth value of an array with - # more than one element is ambiguous." - has_classes = False - has_scores = False - if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in output_alias_tensor_names: - classes = outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] - has_classes = True - if tf.saved_model.CLASSIFY_OUTPUT_SCORES in output_alias_tensor_names: - scores = outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES] - has_scores = True - if has_classes: - if classes.ndim != 2: - raise ValueError('Expected Tensor shape: [batch_size num_classes] but ' - 'got %s' % classes.shape) - if classes.dtype != tf.string.as_numpy_dtype: - raise ValueError('Expected classes Tensor of %s. Got: %s' % - (tf.string.as_numpy_dtype, classes.dtype)) - if classes.shape[0] != len(elements): - raise ValueError('Expected classes output batch size of %s, got %s' % - (len(elements), classes.shape[0])) - if has_scores: - if scores.ndim != 2: - raise ValueError("""Expected Tensor shape: [batch_size num_classes] but - got %s""" % scores.shape) - if scores.dtype != tf.float32.as_numpy_dtype: - raise ValueError('Expected classes Tensor of %s. Got: %s' % - (tf.float32.as_numpy_dtype, scores.dtype)) - if scores.shape[0] != len(elements): - raise ValueError('Expected classes output batch size of %s, got %s' % - (len(elements), scores.shape[0])) - num_classes = 0 - if has_classes and has_scores: - if scores.shape[1] != classes.shape[1]: - raise ValueError('Tensors class and score should match in shape[1]. ' - 'Got %s vs %s' % (classes.shape[1], scores.shape[1])) - num_classes = classes.shape[1] - elif has_classes: - num_classes = classes.shape[1] - elif has_scores: - num_classes = scores.shape[1] - - result = [] - for i in range(len(elements)): - a_classification = classification_pb2.Classifications() - for c in range(num_classes): - a_class = a_classification.classes.add() - if has_classes: - a_class.label = classes[i][c] - if has_scores: - a_class.score = scores[i][c] - result.append(a_classification) - if len(result) != len(elements): - raise RuntimeError('Classifications length does not match elements') - return result - - -def _post_process_regress( - elements: Sequence[Union[str, bytes]], - outputs: Mapping[Text, np.ndarray]) -> Sequence[regression_pb2.Regression]: - """Returns regressions from inference output.""" - - if tf.saved_model.REGRESS_OUTPUTS not in outputs: - raise ValueError('No regression outputs found in outputs: %s' % - outputs.keys()) - output = outputs[tf.saved_model.REGRESS_OUTPUTS] - batch_size = len(elements) - if not (output.ndim == 1 or (output.ndim == 2 and output.shape[1] == 1)): - raise ValueError("""Expected output Tensor shape to be either [batch_size] - or [batch_size, 1] but got %s""" % output.shape) - if batch_size != output.shape[0]: - raise ValueError( - 'Input batch size did not match output batch size: %s vs %s' % - (batch_size, output.shape[0])) - if output.dtype != tf.float32.as_numpy_dtype: - raise ValueError('Expected output Tensor of %s. Got: %s' % - (tf.float32.as_numpy_dtype, output.dtype)) - if output.size != batch_size: - raise ValueError('Expected output batch size to be %s. Got: %s' % - (batch_size, output.size)) - flatten_output = output.flatten() - result = [] - for regression_result in flatten_output: - regression = regression_pb2.Regression() - regression.value = regression_result - result.append(regression) - - # Add additional check to save downstream consumer checks. - if len(result) != len(elements): - raise RuntimeError('Regression length does not match elements') - return result - - -def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec: - """Returns IOTensorSpec from signature.""" - if (len(signature.inputs) == 1 and - list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum): - raise ValueError( - 'With 1 input, dtype is expected to be %s, got %s' % - tf.string.as_datatype_enum, - list(signature.inputs.values())[0].dtype) - input_tensor_alias = [alias for alias in signature.inputs.keys()] - if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME: - input_tensor_names, input_tensor_types, output_alias_tensor_names = ( - _signature_pre_process_classify(signature)) - elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME: - input_tensor_names, input_tensor_types, output_alias_tensor_names = ( - _signature_pre_process_predict(signature)) - elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME: - input_tensor_names, input_tensor_types, output_alias_tensor_names = ( - _signature_pre_process_regress(signature)) - else: - raise ValueError('Signature method %s is not supported' % - signature.method_name) - return _IOTensorSpec(input_tensor_alias, input_tensor_names, - input_tensor_types, output_alias_tensor_names) - - -def _signature_pre_process_classify( - signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: - """Returns input tensor name and output alias tensor names from signature. - - Args: - signature: SignatureDef - - Returns: - A tuple of input tensor name and output alias tensor names. - """ - if len(signature.inputs) != 1: - raise ValueError('Classify signature should have 1 and only 1 inputs') - if len(signature.outputs) != 1 and len(signature.outputs) != 2: - raise ValueError('Classify signature should have 1 or 2 outputs') - if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs: - raise ValueError('No classification inputs found in SignatureDef: %s' % - signature.inputs) - input_tensor_names = [signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name] - output_alias_tensor_names = {} - if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and - tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs): - raise ValueError( - """Expected classification signature outputs to contain at - least one of %s or %s. Signature was: %s""" % - tf.saved_model.CLASSIFY_OUTPUT_CLASSES, - tf.saved_model.CLASSIFY_OUTPUT_SCORES, signature) - if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in signature.outputs: - output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] = ( - signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES].name) - if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs: - output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = ( - signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name) - return input_tensor_names, {}, output_alias_tensor_names - - -def _signature_pre_process_predict( - signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: - """Returns input tensor name and output alias tensor names from signature. - - Args: - signature: SignatureDef - - Returns: - A tuple of input tensor name and output alias tensor names. - """ - input_tensor_names = [value.name for value in signature.inputs.values()] - input_tensor_types = dict([ - (key, value.dtype) for key, value in signature.inputs.items() - ]) - output_alias_tensor_names = dict([ - (key, output.name) for key, output in signature.outputs.items() - ]) - return input_tensor_names, input_tensor_types, output_alias_tensor_names - - -def _signature_pre_process_regress( - signature: _SignatureDef) -> Tuple[Text, Mapping[Text, Text]]: - """Returns input tensor name and output alias tensor names from signature. - - Args: - signature: SignatureDef - - Returns: - A tuple of input tensor name and output alias tensor names. - """ - if len(signature.inputs) != 1: - raise ValueError('Regress signature should have 1 and only 1 inputs') - if len(signature.outputs) != 1: - raise ValueError('Regress signature should have 1 output') - if tf.saved_model.REGRESS_INPUTS not in signature.inputs: - raise ValueError('No regression inputs found in SignatureDef: %s' % - signature.inputs) - input_tensor_names = [signature.inputs[tf.saved_model.REGRESS_INPUTS].name] - if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs: - raise ValueError('No regression outputs found in SignatureDef: %s' % - signature.outputs) - output_alias_tensor_names = { - tf.saved_model.REGRESS_OUTPUTS: - signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name - } - return input_tensor_names, {}, output_alias_tensor_names - - -def _using_in_process_inference( - inference_spec_type: model_spec_pb2.InferenceSpecType) -> bool: - return inference_spec_type.WhichOneof('type') == 'saved_model_spec' - - -def _get_signatures(model_path: Text, signatures: Sequence[Text], - tags: Sequence[Text]) -> Sequence[_Signature]: - """Returns a sequence of {model_signature_name: signature}.""" - - if signatures: - signature_names = signatures - else: - signature_names = [tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] - - saved_model_pb = loader_impl.parse_saved_model(model_path) - meta_graph_def = _get_meta_graph_def(saved_model_pb, tags) - result = [] - for signature_name in signature_names: - if signature_name in meta_graph_def.signature_def: - result.append( - _Signature(signature_name, - meta_graph_def.signature_def[signature_name])) - else: - raise RuntimeError('Signature %s could not be found in SavedModel' % - signature_name) - return result - - -def _get_operation_type( - inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text: - if _using_in_process_inference(inference_spec_type): - signatures = _get_signatures( - inference_spec_type.saved_model_spec.model_path, - inference_spec_type.saved_model_spec.signature_name, - _get_tags(inference_spec_type)) - if not signatures: - raise ValueError('Model does not have valid signature to use') - - if len(signatures) == 1: - method_name = signatures[0].signature_def.method_name - if method_name == tf.saved_model.CLASSIFY_METHOD_NAME: - return OperationType.CLASSIFICATION - elif method_name == tf.saved_model.REGRESS_METHOD_NAME: - return OperationType.REGRESSION - elif method_name == tf.saved_model.PREDICT_METHOD_NAME: - return OperationType.PREDICTION - else: - raise ValueError('Unsupported signature method_name %s' % method_name) - else: - for signature in signatures: - method_name = signature.signature_def.method_name - if (method_name != tf.saved_model.CLASSIFY_METHOD_NAME and - method_name != tf.saved_model.REGRESS_METHOD_NAME): - raise ValueError('Unsupported signature method_name for multi-head ' - 'model inference: %s' % method_name) - return OperationType.MULTIHEAD - else: - # Remote inference supports predictions only. - return OperationType.PREDICTION - - -def _get_meta_graph_def(saved_model_pb: _SavedModel, - tags: Sequence[Text]) -> _MetaGraphDef: - """Returns MetaGraphDef from SavedModel.""" - - for meta_graph_def in saved_model_pb.meta_graphs: - if set(meta_graph_def.meta_info_def.tags) == set(tags): - return meta_graph_def - raise RuntimeError('MetaGraphDef associated with tags %s could not be ' - 'found in SavedModel' % tags) - - -def _get_current_process_memory_in_bytes(): - """Returns memory usage in bytes.""" - - if resource is not None: - usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - if _is_darwin(): - return usage - return usage * 1024 - else: - logging.warning('Resource module is not available for current platform, ' - 'memory usage cannot be fetched.') - return 0 - - -def _get_tags( - inference_spec_type: model_spec_pb2.InferenceSpecType) -> Sequence[Text]: - """Returns tags from ModelSpec.""" - - if inference_spec_type.saved_model_spec.tag: - return list(inference_spec_type.saved_model_spec.tag) - else: - return [tf.saved_model.SERVING] - - -def _is_darwin() -> bool: - return sys.platform == 'darwin' - - -def _is_windows() -> bool: - return platform.system() == 'Windows' or os.name == 'nt' - - -def _is_cygwin() -> bool: - return platform.system().startswith('CYGWIN_NT') - - -class _Clock(object): - - def get_current_time_in_microseconds(self) -> int: - return int(time.time() * _SECOND_TO_MICROSECOND) - - -class _FineGrainedClock(_Clock): - - def get_current_time_in_microseconds(self) -> int: - return int( - time.clock_gettime_ns(time.CLOCK_REALTIME) / # pytype: disable=module-attr - _MICROSECOND_TO_NANOSECOND) - - -class _ClockFactory(object): - - @staticmethod - def make_clock() -> _Clock: - if (hasattr(time, 'clock_gettime_ns') and not _is_windows() - and not _is_cygwin()): - return _FineGrainedClock() - return _Clock() diff --git a/tfx_bsl/beam/run_inference_arrow_test.py b/tfx_bsl/beam/run_inference_arrow_test.py deleted file mode 100644 index 441060e0..00000000 --- a/tfx_bsl/beam/run_inference_arrow_test.py +++ /dev/null @@ -1,718 +0,0 @@ -# Copyright 2019 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tests for tfx_bsl.run_inference_arrow.""" - -from __future__ import absolute_import -from __future__ import division -# Standard __future__ imports -from __future__ import print_function - -import json -import os -try: - import unittest.mock as mock -except ImportError: - import mock - -import apache_beam as beam -import pyarrow as pa -from apache_beam.metrics.metric import MetricsFilter -from apache_beam.testing.util import assert_that -from apache_beam.testing.util import equal_to -from googleapiclient import discovery -from googleapiclient import http -from six.moves import http_client -import tensorflow as tf -from tfx_bsl.beam import bsl_util -from tfx_bsl.beam import run_inference_arrow -from tfx_bsl.beam.bsl_constants import DataType -from tfx_bsl.public.proto import model_spec_pb2 -from tfx_bsl.tfxio import test_util -from tfx_bsl.tfxio import tensor_adapter - -from google.protobuf import text_format -from tensorflow_serving.apis import prediction_log_pb2 -from tensorflow_metadata.proto.v0 import schema_pb2 - - -class RunInferenceArrowFixture(tf.test.TestCase): - - def setUp(self): - super(RunInferenceArrowFixture, self).setUp() - self._predict_examples = [ - text_format.Parse( - """ - features { - feature { key: "input1" value { float_list { value: 0 }}} - } - """, tf.train.Example()), - ] - - serialized_example = [] - for example in self._predict_examples: - serialized_example.append([example.SerializeToString()]) - self.record_batch = pa.RecordBatch.from_arrays( - [ - pa.array([[0]], type=pa.list_(pa.float32())), - serialized_example - ], - ['input1', '__RAW_RECORD__']) - - - def _get_output_data_dir(self, sub_dir=None): - test_dir = self._testMethodName - path = os.path.join( - os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), - test_dir) - if not tf.io.gfile.exists(path): - tf.io.gfile.makedirs(path) - if sub_dir is not None: - path = os.path.join(path, sub_dir) - return path - - -class RunOfflineInferenceArrowTest(RunInferenceArrowFixture): - - def setUp(self): - super(RunOfflineInferenceArrowTest, self).setUp() - self._predict_examples = [ - text_format.Parse( - """ - features { - feature { key: "input1" value { float_list { value: 0 }}} - } - """, tf.train.Example()), - text_format.Parse( - """ - features { - feature { key: "input1" value { float_list { value: 1 }}} - } - """, tf.train.Example()), - ] - - serialized_example = [] - for example in self._predict_examples: - serialized_example.append([example.SerializeToString()]) - self.record_batch = pa.RecordBatch.from_arrays( - [ - pa.array([[0], [1]], type=pa.list_(pa.float32())), - serialized_example - ], - ['input1', '__RAW_RECORD__'] - ) - - - self._multihead_examples = [ - text_format.Parse( - """ - features { - feature {key: "x" value { float_list { value: 0.8 }}} - feature {key: "y" value { float_list { value: 0.2 }}} - } - """, tf.train.Example()), - text_format.Parse( - """ - features { - feature {key: "x" value { float_list { value: 0.6 }}} - feature {key: "y" value { float_list { value: 0.1 }}} - } - """, tf.train.Example()), - ] - - serialized_example_multi = [] - for example in self._multihead_examples: - serialized_example_multi.append([example.SerializeToString()]) - self.record_batch_multihead = pa.RecordBatch.from_arrays( - [ - pa.array([[0.8], [0.6]], type=pa.list_(pa.float32())), - pa.array([[0.2], [0.1]], type=pa.list_(pa.float32())), - serialized_example_multi - ], - ['x', 'y', '__RAW_RECORD__'] - ) - - - self._multi_input_examples = [ - text_format.Parse( - """ - features { - feature {key: "x" value { float_list { value: 0.8 }}} - feature {key: "y" value { float_list { value: 0.2 }}} - } - """, tf.train.Example()), - text_format.Parse( - """ - features { - feature {key: "x" value { float_list { value: 0.6 }}} - feature {key: "y" value { float_list { value: 0.1 }}} - } - """, tf.train.Example()), - ] - - serialized_example_multi_input = [] - for example in self._multi_input_examples: - serialized_example_multi_input.append([example.SerializeToString()]) - self.record_batch_multi_input = pa.RecordBatch.from_arrays( - [ - pa.array([[0.8], [0.6]], type=pa.list_(pa.float32())), - pa.array([[0.2], [0.1]], type=pa.list_(pa.float32())), - serialized_example_multi_input - ], - ['x', 'y', '__RAW_RECORD__'] - ) - - tfxio = test_util.InMemoryTFExampleRecord( - schema = text_format.Parse( - """ - tensor_representation_group { - key: "" - value { - tensor_representation { - key: "x" - value { - dense_tensor { - column_name: "x" - shape { dim { size: 1 } } - } - } - } - tensor_representation { - key: "y" - value { - dense_tensor { - column_name: "y" - shape { dim { size: 1 } } - } - } - } - } - } - feature { - name: "x" - type: FLOAT - } - feature { - name: "y" - type: FLOAT - } - """, schema_pb2.Schema()), - raw_record_column_name='__RAW_RECORD__') - self.tensor_adapter_config = tensor_adapter.TensorAdapterConfig( - arrow_schema=tfxio.ArrowSchema(), - tensor_representations=tfxio.TensorRepresentations()) - - - def _build_predict_model(self, model_path): - """Exports the dummy sum predict model.""" - - with tf.compat.v1.Graph().as_default(): - input_tensors = { - 'x': tf.compat.v1.io.FixedLenFeature( - [1], dtype=tf.float32, default_value=0) - } - serving_receiver = ( - tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn( - input_tensors)()) - output_tensors = {'y': serving_receiver.features['x'] * 2} - sess = tf.compat.v1.Session() - sess.run(tf.compat.v1.initializers.global_variables()) - signature_def = tf.compat.v1.estimator.export.PredictOutput( - output_tensors).as_signature_def(serving_receiver.receiver_tensors) - builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) - builder.add_meta_graph_and_variables( - sess, [tf.compat.v1.saved_model.tag_constants.SERVING], - signature_def_map={ - tf.compat.v1.saved_model.signature_constants - .DEFAULT_SERVING_SIGNATURE_DEF_KEY: - signature_def, - }) - builder.save() - - def _build_regression_signature(self, input_tensor, output_tensor): - """Helper function for building a regression SignatureDef.""" - input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( - input_tensor) - signature_inputs = { - tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS: - input_tensor_info - } - output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( - output_tensor) - signature_outputs = { - tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS: - output_tensor_info - } - return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( - signature_inputs, signature_outputs, - tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME) - - def _build_classification_signature(self, input_tensor, scores_tensor): - """Helper function for building a classification SignatureDef.""" - input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( - input_tensor) - signature_inputs = { - tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS: - input_tensor_info - } - output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( - scores_tensor) - signature_outputs = { - tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: - output_tensor_info - } - return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( - signature_inputs, signature_outputs, - tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME) - - def _build_multihead_model(self, model_path): - with tf.compat.v1.Graph().as_default(): - input_example = tf.compat.v1.placeholder( - tf.string, name='input_examples_tensor') - config = { - 'x': tf.compat.v1.io.FixedLenFeature( - [1], dtype=tf.float32, default_value=0), - 'y': tf.compat.v1.io.FixedLenFeature( - [1], dtype=tf.float32, default_value=0), - } - features = tf.compat.v1.parse_example(input_example, config) - x = features['x'] - y = features['y'] - sum_pred = x + y - diff_pred = tf.abs(x - y) - sess = tf.compat.v1.Session() - sess.run(tf.compat.v1.initializers.global_variables()) - signature_def_map = { - 'regress_diff': - self._build_regression_signature(input_example, diff_pred), - 'classify_sum': - self._build_classification_signature(input_example, sum_pred), - tf.compat.v1.saved_model.signature_constants - .DEFAULT_SERVING_SIGNATURE_DEF_KEY: - self._build_regression_signature(input_example, sum_pred) - } - builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) - builder.add_meta_graph_and_variables( - sess, [tf.compat.v1.saved_model.tag_constants.SERVING], - signature_def_map=signature_def_map) - builder.save() - - def _run_inference_with_beam(self, example_type, inference_spec_type, - prediction_log_path, include_config = False): - if include_config: - with beam.Pipeline() as pipeline: - _ = ( - pipeline - | "createRecordBatch" >> beam.Create([self.record_batch_multi_input]) - | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, DataType.EXAMPLE, self.tensor_adapter_config) - | 'WritePredictions' >> beam.io.WriteToTFRecord( - prediction_log_path, - coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) - elif example_type == 'multi': - with beam.Pipeline() as pipeline: - _ = ( - pipeline - | "createRecordBatch" >> beam.Create([self.record_batch_multihead]) - | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, DataType.EXAMPLE) - | 'WritePredictions' >> beam.io.WriteToTFRecord( - prediction_log_path, - coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) - else: - with beam.Pipeline() as pipeline: - _ = ( - pipeline - | "createRecordBatch" >> beam.Create([self.record_batch]) - | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, DataType.EXAMPLE) - | 'WritePredictions' >> beam.io.WriteToTFRecord( - prediction_log_path, - coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) - - def _get_results(self, prediction_log_path): - results = [] - for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): - record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) - for record_string in record_iterator: - prediction_log = prediction_log_pb2.PredictionLog() - prediction_log.MergeFromString(record_string) - results.append(prediction_log) - return results - - def testModelPathInvalid(self): - prediction_log_path = self._get_output_data_dir('predictions') - with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): - self._run_inference_with_beam( - 'predict', - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=self._get_output_data_dir())), prediction_log_path) - - def testEstimatorModelPredict(self): - model_path = self._get_output_data_dir('model') - self._build_predict_model(model_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - 'predict', - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path)), prediction_log_path) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - self.assertEqual( - results[0].predict_log.request.inputs[ - run_inference_arrow._DEFAULT_INPUT_KEY].string_val[0], - self._predict_examples[0].SerializeToString()) - self.assertEqual(results[0].predict_log.response.outputs['y'].dtype, - tf.float32) - self.assertLen( - results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2) - self.assertEqual( - results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size, - 1) - self.assertEqual( - results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size, - 1) - - def testClassifyModel(self): - model_path = self._get_output_data_dir('model') - self._build_multihead_model(model_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - 'multi', - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path, signature_name=['classify_sum'])), - prediction_log_path) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - classify_log = results[0].classify_log - self.assertLen(classify_log.request.input.example_list.examples, 1) - self.assertEqual(classify_log.request.input.example_list.examples[0], - self._multihead_examples[0]) - self.assertLen(classify_log.response.result.classifications, 1) - self.assertLen(classify_log.response.result.classifications[0].classes, 1) - self.assertAlmostEqual( - classify_log.response.result.classifications[0].classes[0].score, 1.0) - - def testRegressModel(self): - model_path = self._get_output_data_dir('model') - self._build_multihead_model(model_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - 'multi', - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path, signature_name=['regress_diff'])), - prediction_log_path) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - regress_log = results[0].regress_log - self.assertLen(regress_log.request.input.example_list.examples, 1) - self.assertEqual(regress_log.request.input.example_list.examples[0], - self._multihead_examples[0]) - self.assertLen(regress_log.response.result.regressions, 1) - self.assertAlmostEqual(regress_log.response.result.regressions[0].value, - 0.6) - - def testMultiInferenceModel(self): - model_path = self._get_output_data_dir('model') - self._build_multihead_model(model_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - 'multi', - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path, - signature_name=['regress_diff', 'classify_sum'])), - prediction_log_path) - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - multi_inference_log = results[0].multi_inference_log - self.assertLen(multi_inference_log.request.input.example_list.examples, 1) - self.assertEqual(multi_inference_log.request.input.example_list.examples[0], - self._multihead_examples[0]) - self.assertLen(multi_inference_log.response.results, 2) - signature_names = [] - for result in multi_inference_log.response.results: - signature_names.append(result.model_spec.signature_name) - self.assertIn('regress_diff', signature_names) - self.assertIn('classify_sum', signature_names) - result = multi_inference_log.response.results[0] - self.assertEqual(result.model_spec.signature_name, 'regress_diff') - self.assertLen(result.regression_result.regressions, 1) - self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6) - result = multi_inference_log.response.results[1] - self.assertEqual(result.model_spec.signature_name, 'classify_sum') - self.assertLen(result.classification_result.classifications, 1) - self.assertLen(result.classification_result.classifications[0].classes, 1) - self.assertAlmostEqual( - result.classification_result.classifications[0].classes[0].score, 1.0) - - def testKerasModelPredict(self): - inputs = tf.keras.Input(shape=(1,), name='input1') - output1 = tf.keras.layers.Dense( - 1, activation=tf.nn.sigmoid, name='output1')( - inputs) - output2 = tf.keras.layers.Dense( - 1, activation=tf.nn.sigmoid, name='output2')( - inputs) - inference_model = tf.keras.models.Model(inputs, [output1, output2]) - - class TestKerasModel(tf.keras.Model): - def __init__(self, inference_model): - super(TestKerasModel, self).__init__(name='test_keras_model') - self.inference_model = inference_model - - @tf.function(input_signature=[ - tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') - ]) - def call(self, serialized_example): - features = { - 'input1': tf.compat.v1.io.FixedLenFeature( - [1], dtype=tf.float32, - default_value=0) - } - input_tensor_dict = tf.io.parse_example(serialized_example, features) - return inference_model(input_tensor_dict['input1']) - - model = TestKerasModel(inference_model) - model.compile( - optimizer=tf.keras.optimizers.Adam(lr=.001), - loss=tf.keras.losses.binary_crossentropy, - metrics=['accuracy']) - - model_path = self._get_output_data_dir('model') - tf.compat.v1.keras.experimental.export_saved_model( - model, model_path, serving_only=True) - - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - 'predict', - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path)), prediction_log_path) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - - def testKerasModelPredictMultiTensor(self): - input1 = tf.keras.layers.Input((1,), name='x') - input2 = tf.keras.layers.Input((1,), name='y') - - x1 = tf.keras.layers.Dense(10)(input1) - x2 = tf.keras.layers.Dense(10)(input2) - output = tf.keras.layers.Dense(5, name='output')(x2) - - model = tf.keras.models.Model([input1, input2], output) - model_path = self._get_output_data_dir('model') - tf.compat.v1.keras.experimental.export_saved_model( - model, model_path, serving_only=True) - - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - 'multi', - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path)), - prediction_log_path, include_config = True) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - for result in results: - self.assertLen(result.predict_log.request.inputs, 2) - self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) - - def testTelemetry(self): - model_path = self._get_output_data_dir('model') - self._build_multihead_model(model_path) - inference_spec_type = model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path, signature_name=['classify_sum'])) - pipeline = beam.Pipeline() - _ = ( - pipeline - | "createRecordBatch" >> beam.Create([self.record_batch_multihead]) - | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, DataType.EXAMPLE)) - run_result = pipeline.run() - run_result.wait_until_finish() - - num_inferences = run_result.metrics().query( - MetricsFilter().with_name('num_inferences')) - self.assertTrue(num_inferences['counters']) - self.assertEqual(num_inferences['counters'][0].result, 2) - num_instances = run_result.metrics().query( - MetricsFilter().with_name('num_instances')) - self.assertTrue(num_instances['counters']) - self.assertEqual(num_instances['counters'][0].result, 2) - inference_request_batch_size = run_result.metrics().query( - MetricsFilter().with_name('inference_request_batch_size')) - self.assertTrue(inference_request_batch_size['distributions']) - self.assertEqual( - inference_request_batch_size['distributions'][0].result.sum, 2) - inference_request_batch_byte_size = run_result.metrics().query( - MetricsFilter().with_name('inference_request_batch_byte_size')) - self.assertTrue(inference_request_batch_byte_size['distributions']) - self.assertEqual( - inference_request_batch_byte_size['distributions'][0].result.sum, - sum(element.ByteSize() for element in self._multihead_examples)) - inference_batch_latency_micro_secs = run_result.metrics().query( - MetricsFilter().with_name('inference_batch_latency_micro_secs')) - self.assertTrue(inference_batch_latency_micro_secs['distributions']) - self.assertGreaterEqual( - inference_batch_latency_micro_secs['distributions'][0].result.sum, 0) - load_model_latency_milli_secs = run_result.metrics().query( - MetricsFilter().with_name('load_model_latency_milli_secs')) - self.assertTrue(load_model_latency_milli_secs['distributions']) - self.assertGreaterEqual( - load_model_latency_milli_secs['distributions'][0].result.sum, 0) - - -class RunRemoteInferenceArrowTest(RunInferenceArrowFixture): - - def setUp(self): - super(RunRemoteInferenceArrowTest, self).setUp() - # This is from https://ml.googleapis.com/$discovery/rest?version=v1. - self._discovery_testdata_dir = os.path.join( - os.path.join(os.path.dirname(__file__), 'testdata'), - 'ml_discovery.json') - - @staticmethod - def _make_response_body(content, successful): - if successful: - response_dict = {'predictions': content} - else: - response_dict = {'error': content} - return json.dumps(response_dict) - - def _set_up_pipeline(self, inference_spec_type): - self.pipeline = beam.Pipeline() - self.pcoll = ( - self.pipeline - | "createRecordBatch" >> beam.Create([self.record_batch]) - | 'RunInference' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, DataType.EXAMPLE)) - - def _run_inference_with_beam(self): - self.pipeline_result = self.pipeline.run() - self.pipeline_result.wait_until_finish() - - def test_model_predict(self): - predictions = [{'output_1': [0.901], 'output_2': [0.997]}] - builder = http.RequestMockBuilder({ - 'ml.projects.predict': - (None, self._make_response_body(predictions, successful=True)) - }) - resource = discovery.build( - 'ml', - 'v1', - http=http.HttpMock(self._discovery_testdata_dir, - {'status': http_client.OK}), - requestBuilder=builder) - with mock.patch('googleapiclient.discovery.' 'build') as response_mock: - response_mock.side_effect = lambda service, version: resource - inference_spec_type = model_spec_pb2.InferenceSpecType( - ai_platform_prediction_model_spec=model_spec_pb2 - .AIPlatformPredictionModelSpec( - project_id='test-project', - model_name='test-model', - )) - - prediction_log = prediction_log_pb2.PredictionLog() - prediction_log.predict_log.response.outputs['output_1'].CopyFrom( - tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) - prediction_log.predict_log.response.outputs['output_2'].CopyFrom( - tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) - - self._set_up_pipeline(inference_spec_type) - assert_that(self.pcoll, equal_to([prediction_log])) - self._run_inference_with_beam() - - def test_exception_raised_when_response_body_contains_error_entry(self): - error_msg = 'Base64 decode failed.' - builder = http.RequestMockBuilder({ - 'ml.projects.predict': - (None, self._make_response_body(error_msg, successful=False)) - }) - resource = discovery.build( - 'ml', - 'v1', - http=http.HttpMock(self._discovery_testdata_dir, - {'status': http_client.OK}), - requestBuilder=builder) - with mock.patch('googleapiclient.discovery.' 'build') as response_mock: - response_mock.side_effect = lambda service, version: resource - inference_spec_type = model_spec_pb2.InferenceSpecType( - ai_platform_prediction_model_spec=model_spec_pb2 - .AIPlatformPredictionModelSpec( - project_id='test-project', - model_name='test-model', - )) - - try: - self._set_up_pipeline(inference_spec_type) - self._run_inference_with_beam() - except ValueError as exc: - actual_error_msg = str(exc) - self.assertTrue(actual_error_msg.startswith(error_msg)) - else: - self.fail('Test was expected to throw ValueError exception') - - def test_exception_raised_when_project_id_is_empty(self): - inference_spec_type = model_spec_pb2.InferenceSpecType( - ai_platform_prediction_model_spec=model_spec_pb2 - .AIPlatformPredictionModelSpec(model_name='test-model',)) - - with self.assertRaises(ValueError): - self._set_up_pipeline(inference_spec_type) - self._run_inference_with_beam() - - def test_request_body_with_binary_data(self): - example = text_format.Parse( - """ - features { - feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} - feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} - feature { key: "y" value { int64_list { value: [1, 2] }}} - } - """, tf.train.Example()) - - serialized_example_remote = [example.SerializeToString()] - record_batch_remote = pa.RecordBatch.from_arrays( - [ - pa.array(["ASa8asdf"], type=pa.binary()), - pa.array(["JLK7ljk3"], type=pa.utf8()), - pa.array([[1, 2]], type=pa.list_(pa.float32())), - ], - ['x_bytes', 'x', 'y'] - ) - - result = list(bsl_util.RecordToJSON(record_batch_remote)) - self.assertEqual([ - { - 'x_bytes': { - 'b64': 'QVNhOGFzZGY=' - }, - 'x': 'JLK7ljk3', - 'y': [1, 2] - }, - ], result) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tfx_bsl/beam/run_inference_record_batch.py b/tfx_bsl/beam/run_inference_record_batch.py new file mode 100644 index 00000000..ca4543ff --- /dev/null +++ b/tfx_bsl/beam/run_inference_record_batch.py @@ -0,0 +1,57 @@ +# Copyright 2019 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Lint as: python3 +"""Private API of inference.""" + +from __future__ import absolute_import +from __future__ import division +# Standard __future__ imports +from __future__ import print_function + +import apache_beam as beam +import tensorflow as tf +import pyarrow as pa +from typing import Text, Optional +from tfx_bsl.beam import run_inference +from tfx_bsl.public.proto import model_spec_pb2 +from tensorflow_serving.apis import prediction_log_pb2 + +@beam.ptransform_fn +@beam.typehints.with_input_types(pa.RecordBatch) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def RunInferenceOnRecordBatch( # pylint: disable=invalid-name + examples: beam.pvalue.PCollection, + inference_spec_type: model_spec_pb2.InferenceSpecType, data_type: Text + tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None +) -> beam.pvalue.PCollection: + """Run inference with a model. + + There are two types of inference you can perform using this PTransform: + 1. In-process inference from a SavedModel instance. Used when + `saved_model_spec` field is set in `inference_spec_type`. + 2. Remote inference by using a service endpoint. Used when + `ai_platform_prediction_model_spec` field is set in + `inference_spec_type`. + + Args: + examples: A PCollection containing RecordBatch. + inference_spec_type: Model inference endpoint. + + Returns: + A PCollection containing prediction logs. + """ + + return ( + examples | 'RunInferenceImpl' >> run_inference.RunInferenceImpl( + inference_spec_type, data_type, tensor_adapter_config)) diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index 8601dc30..d3580788 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -26,6 +26,7 @@ import mock import apache_beam as beam +import pyarrow as pa from apache_beam.metrics.metric import MetricsFilter from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to @@ -33,17 +34,24 @@ from googleapiclient import http from six.moves import http_client import tensorflow as tf +from tfx_bsl.beam import bsl_util from tfx_bsl.beam import run_inference +from tfx_bsl.beam.bsl_constants import DataType +from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN from tfx_bsl.public.proto import model_spec_pb2 +from tfx_bsl.tfxio import test_util +from tfx_bsl.tfxio import tensor_adapter +from tfx_bsl.tfxio import tf_example_record from google.protobuf import text_format from tensorflow_serving.apis import prediction_log_pb2 +from tensorflow_metadata.proto.v0 import schema_pb2 -class RunInferenceFixture(tf.test.TestCase): +class RunInferenceArrowFixture(tf.test.TestCase): def setUp(self): - super(RunInferenceFixture, self).setUp() + super(RunInferenceArrowFixture, self).setUp() self._predict_examples = [ text_format.Parse( """ @@ -70,10 +78,10 @@ def _prepare_predict_examples(self, example_path): output_file.write(example.SerializeToString()) -class RunOfflineInferenceTest(RunInferenceFixture): +class RunOfflineInferenceArrowTest(RunInferenceArrowFixture): def setUp(self): - super(RunOfflineInferenceTest, self).setUp() + super(RunOfflineInferenceArrowTest, self).setUp() self._predict_examples = [ text_format.Parse( """ @@ -88,6 +96,7 @@ def setUp(self): } """, tf.train.Example()), ] + self._multihead_examples = [ text_format.Parse( """ @@ -105,12 +114,47 @@ def setUp(self): """, tf.train.Example()), ] + self.schema = text_format.Parse( + """ + tensor_representation_group { + key: "" + value { + tensor_representation { + key: "x" + value { + dense_tensor { + column_name: "x" + shape { dim { size: 1 } } + } + } + } + tensor_representation { + key: "y" + value { + dense_tensor { + column_name: "y" + shape { dim { size: 1 } } + } + } + } + } + } + feature { + name: "x" + type: FLOAT + } + feature { + name: "y" + type: FLOAT + } + """, schema_pb2.Schema()) def _prepare_multihead_examples(self, example_path): with tf.io.TFRecordWriter(example_path) as output_file: for example in self._multihead_examples: output_file.write(example.SerializeToString()) + def _build_predict_model(self, model_path): """Exports the dummy sum predict model.""" @@ -206,14 +250,37 @@ def _build_multihead_model(self, model_path): builder.save() def _run_inference_with_beam(self, example_path, inference_spec_type, - prediction_log_path): - with beam.Pipeline() as pipeline: - _ = ( + prediction_log_path, include_config = False): + converter = tf_example_record.TFExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[], + raw_record_column_name=_RECORDBATCH_COLUMN) + + if include_config: + tfxio = test_util.InMemoryTFExampleRecord( + schema=self.schema, raw_record_column_name=_RECORDBATCH_COLUMN) + tensor_adapter_config = tensor_adapter.TensorAdapterConfig( + arrow_schema=tfxio.ArrowSchema(), + tensor_representations=tfxio.TensorRepresentations()) + + with beam.Pipeline() as pipeline: + _ = ( pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) - | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | - 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInference' >> run_inference.RunInferenceImpl( + inference_spec_type, DataType.EXAMPLE, tensor_adapter_config) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + else: + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInference' >> run_inference.RunInferenceImpl( + inference_spec_type, DataType.EXAMPLE) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -362,7 +429,6 @@ def testKerasModelPredict(self): inference_model = tf.keras.models.Model(inputs, [output1, output2]) class TestKerasModel(tf.keras.Model): - def __init__(self, inference_model): super(TestKerasModel, self).__init__(name='test_keras_model') self.inference_model = inference_model @@ -372,10 +438,9 @@ def __init__(self, inference_model): ]) def call(self, serialized_example): features = { - 'input1': - tf.compat.v1.io.FixedLenFeature([1], - dtype=tf.float32, - default_value=0) + 'input1': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, + default_value=0) } input_tensor_dict = tf.io.parse_example(serialized_example, features) return inference_model(input_tensor_dict['input1']) @@ -386,12 +451,12 @@ def call(self, serialized_example): loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy']) + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) model_path = self._get_output_data_dir('model') tf.compat.v1.keras.experimental.export_saved_model( model, model_path, serving_only=True) - example_path = self._get_output_data_dir('examples') - self._prepare_predict_examples(example_path) prediction_log_path = self._get_output_data_dir('predictions') self._run_inference_with_beam( example_path, @@ -402,6 +467,66 @@ def call(self, serialized_example): results = self._get_results(prediction_log_path) self.assertLen(results, 2) + def testKerasModelPredictMultiTensor(self): + input1 = tf.keras.layers.Input((1,), name='x') + input2 = tf.keras.layers.Input((1,), name='y') + + x1 = tf.keras.layers.Dense(10)(input1) + x2 = tf.keras.layers.Dense(10)(input2) + output = tf.keras.layers.Dense(5, name='output')(x2) + + model = tf.keras.models.Model([input1, input2], output) + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), + prediction_log_path, include_config = True) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + for result in results: + self.assertLen(result.predict_log.request.inputs, 2) + self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) + + def testMultiTensorError(self): + input1 = tf.keras.layers.Input((1,), name='x') + input2 = tf.keras.layers.Input((1,), name='y') + + x1 = tf.keras.layers.Dense(10)(input1) + x2 = tf.keras.layers.Dense(10)(input2) + output = tf.keras.layers.Dense(5, name='output')(x2) + + model = tf.keras.models.Model([input1, input2], output) + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + + error_msg = 'Tensor adaptor config is required with a multi-input model' + try: + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), + prediction_log_path, include_config = False) + except ValueError as exc: + actual_error_msg = str(exc) + self.assertTrue(actual_error_msg.startswith(error_msg)) + else: + self.fail('Test was expected to throw ValueError exception') + def testTelemetry(self): example_path = self._get_output_data_dir('examples') self._prepare_multihead_examples(example_path) @@ -410,11 +535,18 @@ def testTelemetry(self): inference_spec_type = model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path, signature_name=['classify_sum'])) + pipeline = beam.Pipeline() + converter = tf_example_record.TFExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[], + raw_record_column_name=_RECORDBATCH_COLUMN) _ = ( - pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) - | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInference' >> run_inference.RunInferenceImpl( + inference_spec_type, DataType.EXAMPLE)) run_result = pipeline.run() run_result.wait_until_finish() @@ -449,13 +581,13 @@ def testTelemetry(self): load_model_latency_milli_secs['distributions'][0].result.sum, 0) -class RunRemoteInferenceTest(RunInferenceFixture): +class RunRemoteInferenceArrowTest(RunInferenceArrowFixture): def setUp(self): - super(RunRemoteInferenceTest, self).setUp() + super(RunRemoteInferenceArrowTest, self).setUp() + # This is from https://ml.googleapis.com/$discovery/rest?version=v1. self.example_path = self._get_output_data_dir('example') self._prepare_predict_examples(self.example_path) - # This is from https://ml.googleapis.com/$discovery/rest?version=v1. self._discovery_testdata_dir = os.path.join( os.path.join(os.path.dirname(__file__), 'testdata'), 'ml_discovery.json') @@ -470,11 +602,16 @@ def _make_response_body(content, successful): def _set_up_pipeline(self, inference_spec_type): self.pipeline = beam.Pipeline() + converter = tf_example_record.TFExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[], + raw_record_column_name=_RECORDBATCH_COLUMN) self.pcoll = ( self.pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) - | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInference' >> run_inference.RunInferenceImpl( + inference_spec_type, DataType.EXAMPLE)) def _run_inference_with_beam(self): self.pipeline_result = self.pipeline.run() @@ -582,18 +719,25 @@ def test_can_format_requests(self): } """, tf.train.Example()) + converter = tf_example_record.TFExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[], + raw_record_column_name=_RECORDBATCH_COLUMN) + self.pipeline = beam.Pipeline() self.pcoll = ( self.pipeline - | 'ReadExamples' >> beam.Create([example]) - | - 'RunInference' >> run_inference.RunInferenceImpl(inference_spec_type)) + | 'CreateExamples' >> beam.Create([example]) + | 'ParseExamples' >> beam.Map(lambda x: x.SerializeToString()) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInference' >> run_inference.RunInferenceImpl( + inference_spec_type, DataType.EXAMPLE)) self._run_inference_with_beam() def test_request_body_with_binary_data(self): example = text_format.Parse( - """ + """ features { feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} @@ -601,8 +745,19 @@ def test_request_body_with_binary_data(self): feature { key: "z" value { float_list { value: [4.5, 5, 5.5] }}} } """, tf.train.Example()) - result = list( - run_inference._RemotePredictDoFn._prepare_instances([example])) + + serialized_example_remote = [example.SerializeToString()] + record_batch_remote = pa.RecordBatch.from_arrays( + [ + pa.array([["ASa8asdf"]], type=pa.list_(pa.binary())), + pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())), + pa.array([[1, 2]], type=pa.list_(pa.int32())), + pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32())) + ], + ['x_bytes', 'x', 'y', 'z'] + ) + + result = list(bsl_util.RecordToJSON(record_batch_remote)) self.assertEqual([ { 'x_bytes': { diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py index 788235e0..58633f95 100644 --- a/tfx_bsl/public/beam/run_inference.py +++ b/tfx_bsl/public/beam/run_inference.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # Lint as: python3 -"""Publich API of batch inference.""" +"""Public API of batch inference.""" from __future__ import absolute_import from __future__ import division @@ -28,7 +28,6 @@ from tfx_bsl.tfxio import tf_example_record from tfx_bsl.tfxio import tf_sequence_example_record from tfx_bsl.beam import run_inference -from tfx_bsl.beam import run_inference_arrow from tfx_bsl.public.proto import model_spec_pb2 from tensorflow_serving.apis import prediction_log_pb2 from tensorflow_metadata.proto.v0 import schema_pb2 @@ -71,6 +70,7 @@ def RunInference( # pylint: disable=invalid-name schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) + tensor_adapter_config = None if schema: tfxio = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) @@ -79,9 +79,9 @@ def RunInference( # pylint: disable=invalid-name tensor_representations=tfxio.TensorRepresentations()) return (examples - | 'ParseExamples' >> beam.Map(tf.train.Example.SerializeToString) + | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl( + | 'RunInferenceImpl' >> run_inference.RunInferenceImpl( inference_spec_type, data_type, tensor_adapter_config=tensor_adapter_config)) @@ -120,6 +120,7 @@ def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) + tensor_adapter_config = None if schema: tfxio = test_util.InMemoryTFExampleRecord( schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) @@ -128,38 +129,8 @@ def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name tensor_representations=tfxio.TensorRepresentations()) return (examples - | 'ParseExamples' >> beam.Map(tf.train.Example.SerializeToString) + | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl( + | 'RunInferenceImpl' >> run_inference.RunInferenceImpl( inference_spec_type, data_type, tensor_adapter_config=tensor_adapter_config)) - - -@beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def RunInferenceOnRecordBatch( # pylint: disable=invalid-name - examples: beam.pvalue.PCollection, - inference_spec_type: model_spec_pb2.InferenceSpecType, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None -) -> beam.pvalue.PCollection: - """Run inference with a model. - - There are two types of inference you can perform using this PTransform: - 1. In-process inference from a SavedModel instance. Used when - `saved_model_spec` field is set in `inference_spec_type`. - 2. Remote inference by using a service endpoint. Used when - `ai_platform_prediction_model_spec` field is set in - `inference_spec_type`. - - Args: - examples: A PCollection containing RecordBatch. - inference_spec_type: Model inference endpoint. - - Returns: - A PCollection containing prediction logs. - """ - - return ( - examples | 'RunInferenceImpl' >> run_inference_arrow.RunInferenceImpl( - inference_spec_type, tensor_adapter_config)) From f172fe379fb9ff7d6b3e9034d5842b347cb21373 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Fri, 24 Jul 2020 18:27:39 -0400 Subject: [PATCH 22/31] move private APIs and test them --- tfx_bsl/beam/run_inference.py | 115 +++++- tfx_bsl/beam/run_inference_record_batch.py | 57 --- tfx_bsl/beam/run_inference_test.py | 454 ++++++++++++++++++++- tfx_bsl/public/beam/run_inference.py | 59 +-- 4 files changed, 565 insertions(+), 120 deletions(-) delete mode 100644 tfx_bsl/beam/run_inference_record_batch.py diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py index 320ac1da..075b0d07 100644 --- a/tfx_bsl/beam/run_inference.py +++ b/tfx_bsl/beam/run_inference.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Run batch inference on saved model.""" +"""Run batch inference on saved model and private APIs of inference.""" from __future__ import absolute_import from __future__ import division @@ -46,7 +46,10 @@ from tfx_bsl.beam import bsl_util from tfx_bsl.public.proto import model_spec_pb2 from tfx_bsl.telemetry import util +from tfx_bsl.tfxio import test_util from tfx_bsl.tfxio import tensor_adapter +from tfx_bsl.tfxio import tf_example_record +from tfx_bsl.tfxio import tf_sequence_example_record from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \ Tuple, Union, Optional @@ -59,6 +62,7 @@ from tensorflow_serving.apis import inference_pb2 from tensorflow_serving.apis import prediction_log_pb2 from tensorflow_serving.apis import regression_pb2 +from tensorflow_metadata.proto.v0 import schema_pb2 # TODO(b/131873699): Remove once 1.x support is dropped. # pylint: disable=g-import-not-at-top @@ -93,10 +97,115 @@ class OperationType(object): MULTIHEAD = 'MULTIHEAD' +@beam.ptransform_fn +@beam.typehints.with_input_types(tf.train.Example) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def RunInferenceOnExamples( # pylint: disable=invalid-name + examples: beam.pvalue.PCollection, + inference_spec_type: model_spec_pb2.InferenceSpecType, + schema: Optional[schema_pb2.Schema] = None +) -> beam.pvalue.PCollection: + """Run inference with a model. + + There are two types of inference you can perform using this PTransform: + 1. In-process inference from a SavedModel instance. Used when + `saved_model_spec` field is set in `inference_spec_type`. + 2. Remote inference by using a service endpoint. Used when + `ai_platform_prediction_model_spec` field is set in + `inference_spec_type`. + + TODO(b/131873699): Add support for the following features: + 1. Bytes as Input. + 2. PTable Input. + 3. Models as SideInput. + + Args: + examples: A PCollection containing examples. + inference_spec_type: Model inference endpoint. + Schema [optional]: required for models that requires + multi-tensor inputs. + + Returns: + A PCollection containing prediction logs. + """ + + data_type = DataType.EXAMPLE + converter = tf_example_record.TFExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[], + schema=schema, + raw_record_column_name=_RECORDBATCH_COLUMN) + + tensor_adapter_config = None + if schema: + tfxio = test_util.InMemoryTFExampleRecord( + schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) + tensor_adapter_config = tensor_adapter.TensorAdapterConfig( + arrow_schema=tfxio.ArrowSchema(), + tensor_representations=tfxio.TensorRepresentations()) + + return (examples + | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInferenceImpl' >> RunInferenceOnRecordBatch( + inference_spec_type, data_type, + tensor_adapter_config=tensor_adapter_config)) + + +@beam.ptransform_fn +@beam.typehints.with_input_types(tf.train.SequenceExample) +@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) +def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name + examples: beam.pvalue.PCollection, + inference_spec_type: model_spec_pb2.InferenceSpecType, + schema: Optional[schema_pb2.Schema] = None +) -> beam.pvalue.PCollection: + """Run inference with a model. + + There are two types of inference you can perform using this PTransform: + 1. In-process inference from a SavedModel instance. Used when + `saved_model_spec` field is set in `inference_spec_type`. + 2. Remote inference by using a service endpoint. Used when + `ai_platform_prediction_model_spec` field is set in + `inference_spec_type`. + + Args: + examples: A PCollection containing sequence examples. + inference_spec_type: Model inference endpoint. + Schema [optional]: required for models that requires + multi-tensor inputs. + + Returns: + A PCollection containing prediction logs. + """ + + data_type = DataType.SEQUENCEEXAMPLE + converter = tf_sequence_example_record.TFSequenceExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[], + schema=schema, + raw_record_column_name=_RECORDBATCH_COLUMN) + + tensor_adapter_config = None + if schema: + tfxio = test_util.InMemoryTFExampleRecord( + schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) + tensor_adapter_config = tensor_adapter.TensorAdapterConfig( + arrow_schema=tfxio.ArrowSchema(), + tensor_representations=tfxio.TensorRepresentations()) + + return (examples + | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInferenceImpl' >> RunInferenceOnRecordBatch( + inference_spec_type, data_type, + tensor_adapter_config=tensor_adapter_config)) + + @beam.ptransform_fn @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def RunInferenceImpl( # pylint: disable=invalid-name +def RunInferenceOnRecordBatch( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType, data_type: Text, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None @@ -104,7 +213,7 @@ def RunInferenceImpl( # pylint: disable=invalid-name """Implementation of RunInference API. Args: - examples: A PCollection containing RecordBatch of serialized examples. + examples: A PCollection containing RecordBatch of serialized examples and features. inference_spec_type: Model inference endpoint. tensor_adapter_config [Optional]: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. diff --git a/tfx_bsl/beam/run_inference_record_batch.py b/tfx_bsl/beam/run_inference_record_batch.py deleted file mode 100644 index ca4543ff..00000000 --- a/tfx_bsl/beam/run_inference_record_batch.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2019 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Lint as: python3 -"""Private API of inference.""" - -from __future__ import absolute_import -from __future__ import division -# Standard __future__ imports -from __future__ import print_function - -import apache_beam as beam -import tensorflow as tf -import pyarrow as pa -from typing import Text, Optional -from tfx_bsl.beam import run_inference -from tfx_bsl.public.proto import model_spec_pb2 -from tensorflow_serving.apis import prediction_log_pb2 - -@beam.ptransform_fn -@beam.typehints.with_input_types(pa.RecordBatch) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def RunInferenceOnRecordBatch( # pylint: disable=invalid-name - examples: beam.pvalue.PCollection, - inference_spec_type: model_spec_pb2.InferenceSpecType, data_type: Text - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None -) -> beam.pvalue.PCollection: - """Run inference with a model. - - There are two types of inference you can perform using this PTransform: - 1. In-process inference from a SavedModel instance. Used when - `saved_model_spec` field is set in `inference_spec_type`. - 2. Remote inference by using a service endpoint. Used when - `ai_platform_prediction_model_spec` field is set in - `inference_spec_type`. - - Args: - examples: A PCollection containing RecordBatch. - inference_spec_type: Model inference endpoint. - - Returns: - A PCollection containing prediction logs. - """ - - return ( - examples | 'RunInferenceImpl' >> run_inference.RunInferenceImpl( - inference_spec_type, data_type, tensor_adapter_config)) diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index d3580788..94e218de 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -48,10 +48,10 @@ from tensorflow_metadata.proto.v0 import schema_pb2 -class RunInferenceArrowFixture(tf.test.TestCase): +class RunInferenceFixture(tf.test.TestCase): def setUp(self): - super(RunInferenceArrowFixture, self).setUp() + super(RunInferenceFixture, self).setUp() self._predict_examples = [ text_format.Parse( """ @@ -78,7 +78,438 @@ def _prepare_predict_examples(self, example_path): output_file.write(example.SerializeToString()) -class RunOfflineInferenceArrowTest(RunInferenceArrowFixture): +class RunOfflineInferenceExamplesTest(RunInferenceFixture): + + def setUp(self): + super(RunOfflineInferenceExamplesTest, self).setUp() + self._predict_examples = [ + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 1 }}} + } + """, tf.train.Example()), + ] + self._multihead_examples = [ + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.8 }}} + feature {key: "y" value { float_list { value: 0.2 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.6 }}} + feature {key: "y" value { float_list { value: 0.1 }}} + } + """, tf.train.Example()), + ] + + self.schema = text_format.Parse( + """ + tensor_representation_group { + key: "" + value { + tensor_representation { + key: "x" + value { + dense_tensor { + column_name: "x" + shape { dim { size: 1 } } + } + } + } + tensor_representation { + key: "y" + value { + dense_tensor { + column_name: "y" + shape { dim { size: 1 } } + } + } + } + } + } + feature { + name: "x" + type: FLOAT + } + feature { + name: "y" + type: FLOAT + } + """, schema_pb2.Schema()) + + def _prepare_multihead_examples(self, example_path): + with tf.io.TFRecordWriter(example_path) as output_file: + for example in self._multihead_examples: + output_file.write(example.SerializeToString()) + + def _run_inference_with_beam(self, example_path, inference_spec_type, + prediction_log_path, include_schema = False): + schema = None + if include_schema: + schema = self.schema + + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | 'RunInference' >> run_inference.RunInferenceOnExamples( + inference_spec_type, schema=schema) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + + def _get_results(self, prediction_log_path): + results = [] + for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): + record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) + for record_string in record_iterator: + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.MergeFromString(record_string) + results.append(prediction_log) + return results + + + def testKerasModelPredict(self): + inputs = tf.keras.Input(shape=(1,), name='input1') + output1 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output1')( + inputs) + output2 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output2')( + inputs) + inference_model = tf.keras.models.Model(inputs, [output1, output2]) + + class TestKerasModel(tf.keras.Model): + + def __init__(self, inference_model): + super(TestKerasModel, self).__init__(name='test_keras_model') + self.inference_model = inference_model + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') + ]) + def call(self, serialized_example): + features = { + 'input1': + tf.compat.v1.io.FixedLenFeature([1], + dtype=tf.float32, + default_value=0) + } + input_tensor_dict = tf.io.parse_example(serialized_example, features) + return inference_model(input_tensor_dict['input1']) + + model = TestKerasModel(inference_model) + model.compile( + optimizer=tf.keras.optimizers.Adam(lr=.001), + loss=tf.keras.losses.binary_crossentropy, + metrics=['accuracy']) + + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + + def testKerasModelPredictMultiTensor(self): + input1 = tf.keras.layers.Input((1,), name='x') + input2 = tf.keras.layers.Input((1,), name='y') + + x1 = tf.keras.layers.Dense(10)(input1) + x2 = tf.keras.layers.Dense(10)(input2) + output = tf.keras.layers.Dense(5, name='output')(x2) + + model = tf.keras.models.Model([input1, input2], output) + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), + prediction_log_path, include_schema = True) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + for result in results: + self.assertLen(result.predict_log.request.inputs, 2) + self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) + + +class RunRemoteInferenceExamplesTest(RunInferenceFixture): + + def setUp(self): + super(RunRemoteInferenceExamplesTest, self).setUp() + self.example_path = self._get_output_data_dir('example') + self._prepare_predict_examples(self.example_path) + # This is from https://ml.googleapis.com/$discovery/rest?version=v1. + self._discovery_testdata_dir = os.path.join( + os.path.join(os.path.dirname(__file__), 'testdata'), + 'ml_discovery.json') + + @staticmethod + def _make_response_body(content, successful): + if successful: + response_dict = {'predictions': content} + else: + response_dict = {'error': content} + return json.dumps(response_dict) + + def _set_up_pipeline(self, inference_spec_type): + self.pipeline = beam.Pipeline() + self.pcoll = ( + self.pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | 'RunInference' >> run_inference.RunInferenceOnExamples(inference_spec_type)) + + def _run_inference_with_beam(self): + self.pipeline_result = self.pipeline.run() + self.pipeline_result.wait_until_finish() + + def test_model_predict(self): + predictions = [{'output_1': [0.901], 'output_2': [0.997]}] + builder = http.RequestMockBuilder({ + 'ml.projects.predict': + (None, self._make_response_body(predictions, successful=True)) + }) + resource = discovery.build( + 'ml', + 'v1', + http=http.HttpMock(self._discovery_testdata_dir, + {'status': http_client.OK}), + requestBuilder=builder) + with mock.patch('googleapiclient.discovery.' 'build') as response_mock: + response_mock.side_effect = lambda service, version: resource + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec( + project_id='test-project', + model_name='test-model', + )) + + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.predict_log.response.outputs['output_1'].CopyFrom( + tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) + prediction_log.predict_log.response.outputs['output_2'].CopyFrom( + tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) + + self._set_up_pipeline(inference_spec_type) + assert_that(self.pcoll, equal_to([prediction_log])) + self._run_inference_with_beam() + + +class RunOfflineInferenceSequenceExamplesTest(RunInferenceFixture): + + def setUp(self): + super(RunOfflineInferenceSequenceExamplesTest, self).setUp() + self._predict_examples = [ + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 1 }}} + } + """, tf.train.Example()), + ] + self._multihead_examples = [ + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.8 }}} + feature {key: "y" value { float_list { value: 0.2 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.6 }}} + feature {key: "y" value { float_list { value: 0.1 }}} + } + """, tf.train.Example()), + ] + + self.schema = text_format.Parse( + """ + tensor_representation_group { + key: "" + value { + tensor_representation { + key: "x" + value { + dense_tensor { + column_name: "x" + shape { dim { size: 1 } } + } + } + } + tensor_representation { + key: "y" + value { + dense_tensor { + column_name: "y" + shape { dim { size: 1 } } + } + } + } + } + } + feature { + name: "x" + type: FLOAT + } + feature { + name: "y" + type: FLOAT + } + """, schema_pb2.Schema()) + + def _prepare_multihead_examples(self, example_path): + with tf.io.TFRecordWriter(example_path) as output_file: + for example in self._multihead_examples: + output_file.write(example.SerializeToString()) + + def _run_inference_with_beam(self, example_path, inference_spec_type, + prediction_log_path, include_schema = False): + schema = None + if include_schema: + schema = self.schema + + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString) + | 'RunInference' >> run_inference.RunInferenceOnSequenceExamples( + inference_spec_type, schema=schema) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + + def _get_results(self, prediction_log_path): + results = [] + for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): + record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) + for record_string in record_iterator: + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.MergeFromString(record_string) + results.append(prediction_log) + return results + + + def testKerasModelPredict(self): + inputs = tf.keras.Input(shape=(1,), name='input1') + output1 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output1')( + inputs) + output2 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output2')( + inputs) + inference_model = tf.keras.models.Model(inputs, [output1, output2]) + + class TestKerasModel(tf.keras.Model): + + def __init__(self, inference_model): + super(TestKerasModel, self).__init__(name='test_keras_model') + self.inference_model = inference_model + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') + ]) + def call(self, serialized_example): + features = { + 'input1': + tf.compat.v1.io.FixedLenFeature([1], + dtype=tf.float32, + default_value=0) + } + input_tensor_dict = tf.io.parse_example(serialized_example, features) + return inference_model(input_tensor_dict['input1']) + + model = TestKerasModel(inference_model) + model.compile( + optimizer=tf.keras.optimizers.Adam(lr=.001), + loss=tf.keras.losses.binary_crossentropy, + metrics=['accuracy']) + + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + + def testKerasModelPredictMultiTensor(self): + input1 = tf.keras.layers.Input((1,), name='x') + input2 = tf.keras.layers.Input((1,), name='y') + + x1 = tf.keras.layers.Dense(10)(input1) + x2 = tf.keras.layers.Dense(10)(input2) + output = tf.keras.layers.Dense(5, name='output')(x2) + + model = tf.keras.models.Model([input1, input2], output) + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), + prediction_log_path, include_schema = True) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + for result in results: + self.assertLen(result.predict_log.request.inputs, 2) + self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) + + +class RunOfflineInferenceArrowTest(RunInferenceFixture): def setUp(self): super(RunOfflineInferenceArrowTest, self).setUp() @@ -251,6 +682,7 @@ def _build_multihead_model(self, model_path): def _run_inference_with_beam(self, example_path, inference_spec_type, prediction_log_path, include_config = False): + # test RunInferenceOnRecordBatch converter = tf_example_record.TFExampleBeamRecord( physical_format="inmem", telemetry_descriptors=[], @@ -268,8 +700,8 @@ def _run_inference_with_beam(self, example_path, inference_spec_type, pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInference' >> run_inference.RunInferenceImpl( - inference_spec_type, DataType.EXAMPLE, tensor_adapter_config) + | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( + inference_spec_type, DataType.EXAMPLE, tensor_adapter_config) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) @@ -279,7 +711,7 @@ def _run_inference_with_beam(self, example_path, inference_spec_type, pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInference' >> run_inference.RunInferenceImpl( + | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( inference_spec_type, DataType.EXAMPLE) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, @@ -488,7 +920,7 @@ def testKerasModelPredictMultiTensor(self): model_spec_pb2.InferenceSpecType( saved_model_spec=model_spec_pb2.SavedModelSpec( model_path=model_path)), - prediction_log_path, include_config = True) + prediction_log_path, include_config = True) results = self._get_results(prediction_log_path) self.assertLen(results, 2) @@ -545,7 +977,7 @@ def testTelemetry(self): pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInference' >> run_inference.RunInferenceImpl( + | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( inference_spec_type, DataType.EXAMPLE)) run_result = pipeline.run() run_result.wait_until_finish() @@ -581,7 +1013,7 @@ def testTelemetry(self): load_model_latency_milli_secs['distributions'][0].result.sum, 0) -class RunRemoteInferenceArrowTest(RunInferenceArrowFixture): +class RunRemoteInferenceArrowTest(RunInferenceFixture): def setUp(self): super(RunRemoteInferenceArrowTest, self).setUp() @@ -610,7 +1042,7 @@ def _set_up_pipeline(self, inference_spec_type): self.pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInference' >> run_inference.RunInferenceImpl( + | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( inference_spec_type, DataType.EXAMPLE)) def _run_inference_with_beam(self): @@ -730,7 +1162,7 @@ def test_can_format_requests(self): | 'CreateExamples' >> beam.Create([example]) | 'ParseExamples' >> beam.Map(lambda x: x.SerializeToString()) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInference' >> run_inference.RunInferenceImpl( + | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( inference_spec_type, DataType.EXAMPLE)) self._run_inference_with_beam() diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py index 58633f95..8e173d5d 100644 --- a/tfx_bsl/public/beam/run_inference.py +++ b/tfx_bsl/public/beam/run_inference.py @@ -22,20 +22,12 @@ import apache_beam as beam import tensorflow as tf import pyarrow as pa -from typing import Union, Text, Optional -from tfx_bsl.tfxio import test_util -from tfx_bsl.tfxio import tensor_adapter -from tfx_bsl.tfxio import tf_example_record -from tfx_bsl.tfxio import tf_sequence_example_record +from typing import Text, Optional from tfx_bsl.beam import run_inference from tfx_bsl.public.proto import model_spec_pb2 from tensorflow_serving.apis import prediction_log_pb2 from tensorflow_metadata.proto.v0 import schema_pb2 -from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN -from tfx_bsl.beam.bsl_constants import DataType - - @beam.ptransform_fn @beam.typehints.with_input_types(tf.train.Example) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) @@ -53,6 +45,11 @@ def RunInference( # pylint: disable=invalid-name `ai_platform_prediction_model_spec` field is set in `inference_spec_type`. + TODO(b/131873699): Add support for the following features: + 1. Bytes as Input. + 2. PTable Input. + 3. Models as SideInput. + Args: examples: A PCollection containing examples. inference_spec_type: Model inference endpoint. @@ -63,27 +60,9 @@ def RunInference( # pylint: disable=invalid-name A PCollection containing prediction logs. """ - data_type = DataType.EXAMPLE - converter = tf_example_record.TFExampleBeamRecord( - physical_format="inmem", - telemetry_descriptors=[], - schema=schema, - raw_record_column_name=_RECORDBATCH_COLUMN) - - tensor_adapter_config = None - if schema: - tfxio = test_util.InMemoryTFExampleRecord( - schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) - tensor_adapter_config = tensor_adapter.TensorAdapterConfig( - arrow_schema=tfxio.ArrowSchema(), - tensor_representations=tfxio.TensorRepresentations()) - return (examples - | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) - | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInferenceImpl' >> run_inference.RunInferenceImpl( - inference_spec_type, data_type, - tensor_adapter_config=tensor_adapter_config)) + | 'RunInferenceOnExamples' >> run_inference.RunInferenceOnExamples( + inference_spec_type, schema=schema)) @beam.ptransform_fn @@ -113,24 +92,6 @@ def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name A PCollection containing prediction logs. """ - data_type = DataType.SEQUENCEEXAMPLE - converter = tf_sequence_example_record.TFSequenceExampleBeamRecord( - physical_format="inmem", - telemetry_descriptors=[], - schema=schema, - raw_record_column_name=_RECORDBATCH_COLUMN) - - tensor_adapter_config = None - if schema: - tfxio = test_util.InMemoryTFExampleRecord( - schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) - tensor_adapter_config = tensor_adapter.TensorAdapterConfig( - arrow_schema=tfxio.ArrowSchema(), - tensor_representations=tfxio.TensorRepresentations()) - return (examples - | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) - | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInferenceImpl' >> run_inference.RunInferenceImpl( - inference_spec_type, data_type, - tensor_adapter_config=tensor_adapter_config)) + | 'RunInferenceOnSequenceExamples' >> run_inference.RunInferenceOnSequenceExamples( + inference_spec_type, schema=schema)) From daf394e0faaf189711ca1aa69f01711cb4ee712e Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 30 Jul 2020 13:47:42 -0400 Subject: [PATCH 23/31] fix test --- tfx_bsl/beam/bsl_util.py | 9 +- tfx_bsl/beam/run_inference_test.py | 1864 ++++++++++++++-------------- 2 files changed, 934 insertions(+), 939 deletions(-) diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py index 496f9eb4..3bc8c624 100644 --- a/tfx_bsl/beam/bsl_util.py +++ b/tfx_bsl/beam/bsl_util.py @@ -51,13 +51,8 @@ def flatten(element: List[Any]): return [{'b64': base64.b64encode(value).decode()} for value in df[_RECORDBATCH_COLUMN]] else: as_binary = df.columns.str.endswith("_bytes") - # Handles the case where there is only one entry - if len(df) == 1: - df.loc[:, as_binary] = df.loc[:, as_binary].applymap( - lambda feature: [{'b64': base64.b64encode(feature).decode()}]) - else: - df.loc[:, as_binary] = df.loc[:, as_binary].applymap( - lambda feature: [{'b64': base64.b64encode(value).decode()} for value in feature]) + df.loc[:, as_binary] = df.loc[:, as_binary].applymap( + lambda feature: [{'b64': base64.b64encode(value).decode()} for value in feature]) if _RECORDBATCH_COLUMN in df.columns: df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1) diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index 7dee9811..990e243e 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -79,939 +79,939 @@ def _prepare_predict_examples(self, example_path): output_file.write(example.SerializeToString()) -# class RunOfflineInferenceExamplesTest(RunInferenceFixture): - -# def setUp(self): -# super(RunOfflineInferenceExamplesTest, self).setUp() -# self._predict_examples = [ -# text_format.Parse( -# """ -# features { -# feature { key: "input1" value { float_list { value: 0 }}} -# } -# """, tf.train.Example()), -# text_format.Parse( -# """ -# features { -# feature { key: "input1" value { float_list { value: 1 }}} -# } -# """, tf.train.Example()), -# ] -# self._multihead_examples = [ -# text_format.Parse( -# """ -# features { -# feature {key: "x" value { float_list { value: 0.8 }}} -# feature {key: "y" value { float_list { value: 0.2 }}} -# } -# """, tf.train.Example()), -# text_format.Parse( -# """ -# features { -# feature {key: "x" value { float_list { value: 0.6 }}} -# feature {key: "y" value { float_list { value: 0.1 }}} -# } -# """, tf.train.Example()), -# ] - -# self.schema = text_format.Parse( -# """ -# tensor_representation_group { -# key: "" -# value { -# tensor_representation { -# key: "x" -# value { -# dense_tensor { -# column_name: "x" -# shape { dim { size: 1 } } -# } -# } -# } -# tensor_representation { -# key: "y" -# value { -# dense_tensor { -# column_name: "y" -# shape { dim { size: 1 } } -# } -# } -# } -# } -# } -# feature { -# name: "x" -# type: FLOAT -# } -# feature { -# name: "y" -# type: FLOAT -# } -# """, schema_pb2.Schema()) - -# def _prepare_multihead_examples(self, example_path): -# with tf.io.TFRecordWriter(example_path) as output_file: -# for example in self._multihead_examples: -# output_file.write(example.SerializeToString()) - -# def _run_inference_with_beam(self, example_path, inference_spec_type, -# prediction_log_path, include_schema = False): -# schema = None -# if include_schema: -# schema = self.schema - -# with beam.Pipeline() as pipeline: -# _ = ( -# pipeline -# | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) -# | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) -# | 'RunInference' >> run_inference.RunInferenceOnExamples( -# inference_spec_type, schema=schema) -# | 'WritePredictions' >> beam.io.WriteToTFRecord( -# prediction_log_path, -# coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) - -# def _get_results(self, prediction_log_path): -# results = [] -# for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): -# record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) -# for record_string in record_iterator: -# prediction_log = prediction_log_pb2.PredictionLog() -# prediction_log.MergeFromString(record_string) -# results.append(prediction_log) -# return results - - -# def testKerasModelPredict(self): -# inputs = tf.keras.Input(shape=(1,), name='input1') -# output1 = tf.keras.layers.Dense( -# 1, activation=tf.nn.sigmoid, name='output1')( -# inputs) -# output2 = tf.keras.layers.Dense( -# 1, activation=tf.nn.sigmoid, name='output2')( -# inputs) -# inference_model = tf.keras.models.Model(inputs, [output1, output2]) - -# class TestKerasModel(tf.keras.Model): - -# def __init__(self, inference_model): -# super(TestKerasModel, self).__init__(name='test_keras_model') -# self.inference_model = inference_model - -# @tf.function(input_signature=[ -# tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') -# ]) -# def call(self, serialized_example): -# features = { -# 'input1': -# tf.compat.v1.io.FixedLenFeature([1], -# dtype=tf.float32, -# default_value=0) -# } -# input_tensor_dict = tf.io.parse_example(serialized_example, features) -# return inference_model(input_tensor_dict['input1']) - -# model = TestKerasModel(inference_model) -# model.compile( -# optimizer=tf.keras.optimizers.Adam(lr=.001), -# loss=tf.keras.losses.binary_crossentropy, -# metrics=['accuracy']) - -# model_path = self._get_output_data_dir('model') -# tf.compat.v1.keras.experimental.export_saved_model( -# model, model_path, serving_only=True) - -# example_path = self._get_output_data_dir('examples') -# self._prepare_predict_examples(example_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path)), prediction_log_path) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) - -# def testKerasModelPredictMultiTensor(self): -# input1 = tf.keras.layers.Input((1,), name='x') -# input2 = tf.keras.layers.Input((1,), name='y') - -# x1 = tf.keras.layers.Dense(10)(input1) -# x2 = tf.keras.layers.Dense(10)(input2) -# output = tf.keras.layers.Dense(5, name='output')(x2) - -# model = tf.keras.models.Model([input1, input2], output) -# model_path = self._get_output_data_dir('model') -# tf.compat.v1.keras.experimental.export_saved_model( -# model, model_path, serving_only=True) - -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path)), -# prediction_log_path, include_schema = True) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# for result in results: -# self.assertLen(result.predict_log.request.inputs, 2) -# self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) - - -# class RunRemoteInferenceExamplesTest(RunInferenceFixture): - -# def setUp(self): -# super(RunRemoteInferenceExamplesTest, self).setUp() -# self.example_path = self._get_output_data_dir('example') -# self._prepare_predict_examples(self.example_path) -# # This is from https://ml.googleapis.com/$discovery/rest?version=v1. -# self._discovery_testdata_dir = os.path.join( -# os.path.join(os.path.dirname(__file__), 'testdata'), -# 'ml_discovery.json') - -# @staticmethod -# def _make_response_body(content, successful): -# if successful: -# response_dict = {'predictions': content} -# else: -# response_dict = {'error': content} -# return json.dumps(response_dict) - -# def _set_up_pipeline(self, inference_spec_type): -# self.pipeline = beam.Pipeline() -# self.pcoll = ( -# self.pipeline -# | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) -# | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) -# | 'RunInference' >> run_inference.RunInferenceOnExamples(inference_spec_type)) - -# def _run_inference_with_beam(self): -# self.pipeline_result = self.pipeline.run() -# self.pipeline_result.wait_until_finish() - -# def test_model_predict(self): -# predictions = [{'output_1': [0.901], 'output_2': [0.997]}] -# builder = http.RequestMockBuilder({ -# 'ml.projects.predict': -# (None, self._make_response_body(predictions, successful=True)) -# }) -# resource = discovery.build( -# 'ml', -# 'v1', -# http=http.HttpMock(self._discovery_testdata_dir, -# {'status': http_client.OK}), -# requestBuilder=builder) -# with mock.patch('googleapiclient.discovery.' 'build') as response_mock: -# response_mock.side_effect = lambda service, version: resource -# inference_spec_type = model_spec_pb2.InferenceSpecType( -# ai_platform_prediction_model_spec=model_spec_pb2 -# .AIPlatformPredictionModelSpec( -# project_id='test-project', -# model_name='test-model', -# )) - -# prediction_log = prediction_log_pb2.PredictionLog() -# prediction_log.predict_log.response.outputs['output_1'].CopyFrom( -# tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) -# prediction_log.predict_log.response.outputs['output_2'].CopyFrom( -# tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) - -# self._set_up_pipeline(inference_spec_type) -# assert_that(self.pcoll, equal_to([prediction_log])) -# self._run_inference_with_beam() - - -# class RunOfflineInferenceSequenceExamplesTest(RunInferenceFixture): - -# def setUp(self): -# super(RunOfflineInferenceSequenceExamplesTest, self).setUp() -# self._predict_examples = [ -# text_format.Parse( -# """ -# features { -# feature { key: "input1" value { float_list { value: 0 }}} -# } -# """, tf.train.Example()), -# text_format.Parse( -# """ -# features { -# feature { key: "input1" value { float_list { value: 1 }}} -# } -# """, tf.train.Example()), -# ] -# self._multihead_examples = [ -# text_format.Parse( -# """ -# features { -# feature {key: "x" value { float_list { value: 0.8 }}} -# feature {key: "y" value { float_list { value: 0.2 }}} -# } -# """, tf.train.Example()), -# text_format.Parse( -# """ -# features { -# feature {key: "x" value { float_list { value: 0.6 }}} -# feature {key: "y" value { float_list { value: 0.1 }}} -# } -# """, tf.train.Example()), -# ] - -# self.schema = text_format.Parse( -# """ -# tensor_representation_group { -# key: "" -# value { -# tensor_representation { -# key: "x" -# value { -# dense_tensor { -# column_name: "x" -# shape { dim { size: 1 } } -# } -# } -# } -# tensor_representation { -# key: "y" -# value { -# dense_tensor { -# column_name: "y" -# shape { dim { size: 1 } } -# } -# } -# } -# } -# } -# feature { -# name: "x" -# type: FLOAT -# } -# feature { -# name: "y" -# type: FLOAT -# } -# """, schema_pb2.Schema()) - -# def _prepare_multihead_examples(self, example_path): -# with tf.io.TFRecordWriter(example_path) as output_file: -# for example in self._multihead_examples: -# output_file.write(example.SerializeToString()) - -# def _run_inference_with_beam(self, example_path, inference_spec_type, -# prediction_log_path, include_schema = False): -# schema = None -# if include_schema: -# schema = self.schema - -# with beam.Pipeline() as pipeline: -# _ = ( -# pipeline -# | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) -# | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString) -# | 'RunInference' >> run_inference.RunInferenceOnSequenceExamples( -# inference_spec_type, schema=schema) -# | 'WritePredictions' >> beam.io.WriteToTFRecord( -# prediction_log_path, -# coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) - -# def _get_results(self, prediction_log_path): -# results = [] -# for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): -# record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) -# for record_string in record_iterator: -# prediction_log = prediction_log_pb2.PredictionLog() -# prediction_log.MergeFromString(record_string) -# results.append(prediction_log) -# return results - - -# def testKerasModelPredict(self): -# inputs = tf.keras.Input(shape=(1,), name='input1') -# output1 = tf.keras.layers.Dense( -# 1, activation=tf.nn.sigmoid, name='output1')( -# inputs) -# output2 = tf.keras.layers.Dense( -# 1, activation=tf.nn.sigmoid, name='output2')( -# inputs) -# inference_model = tf.keras.models.Model(inputs, [output1, output2]) - -# class TestKerasModel(tf.keras.Model): - -# def __init__(self, inference_model): -# super(TestKerasModel, self).__init__(name='test_keras_model') -# self.inference_model = inference_model - -# @tf.function(input_signature=[ -# tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') -# ]) -# def call(self, serialized_example): -# features = { -# 'input1': -# tf.compat.v1.io.FixedLenFeature([1], -# dtype=tf.float32, -# default_value=0) -# } -# input_tensor_dict = tf.io.parse_example(serialized_example, features) -# return inference_model(input_tensor_dict['input1']) - -# model = TestKerasModel(inference_model) -# model.compile( -# optimizer=tf.keras.optimizers.Adam(lr=.001), -# loss=tf.keras.losses.binary_crossentropy, -# metrics=['accuracy']) - -# model_path = self._get_output_data_dir('model') -# tf.compat.v1.keras.experimental.export_saved_model( -# model, model_path, serving_only=True) - -# example_path = self._get_output_data_dir('examples') -# self._prepare_predict_examples(example_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path)), prediction_log_path) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) - -# def testKerasModelPredictMultiTensor(self): -# input1 = tf.keras.layers.Input((1,), name='x') -# input2 = tf.keras.layers.Input((1,), name='y') - -# x1 = tf.keras.layers.Dense(10)(input1) -# x2 = tf.keras.layers.Dense(10)(input2) -# output = tf.keras.layers.Dense(5, name='output')(x2) - -# model = tf.keras.models.Model([input1, input2], output) -# model_path = self._get_output_data_dir('model') -# tf.compat.v1.keras.experimental.export_saved_model( -# model, model_path, serving_only=True) - -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path)), -# prediction_log_path, include_schema = True) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# for result in results: -# self.assertLen(result.predict_log.request.inputs, 2) -# self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) - - -# class RunOfflineInferenceArrowTest(RunInferenceFixture): - -# def setUp(self): -# super(RunOfflineInferenceArrowTest, self).setUp() -# self._predict_examples = [ -# text_format.Parse( -# """ -# features { -# feature { key: "input1" value { float_list { value: 0 }}} -# } -# """, tf.train.Example()), -# text_format.Parse( -# """ -# features { -# feature { key: "input1" value { float_list { value: 1 }}} -# } -# """, tf.train.Example()), -# ] - -# self._multihead_examples = [ -# text_format.Parse( -# """ -# features { -# feature {key: "x" value { float_list { value: 0.8 }}} -# feature {key: "y" value { float_list { value: 0.2 }}} -# } -# """, tf.train.Example()), -# text_format.Parse( -# """ -# features { -# feature {key: "x" value { float_list { value: 0.6 }}} -# feature {key: "y" value { float_list { value: 0.1 }}} -# } -# """, tf.train.Example()), -# ] - -# self.schema = text_format.Parse( -# """ -# tensor_representation_group { -# key: "" -# value { -# tensor_representation { -# key: "x" -# value { -# dense_tensor { -# column_name: "x" -# shape { dim { size: 1 } } -# } -# } -# } -# tensor_representation { -# key: "y" -# value { -# dense_tensor { -# column_name: "y" -# shape { dim { size: 1 } } -# } -# } -# } -# } -# } -# feature { -# name: "x" -# type: FLOAT -# } -# feature { -# name: "y" -# type: FLOAT -# } -# """, schema_pb2.Schema()) - -# def _prepare_multihead_examples(self, example_path): -# with tf.io.TFRecordWriter(example_path) as output_file: -# for example in self._multihead_examples: -# output_file.write(example.SerializeToString()) - - -# def _build_predict_model(self, model_path): -# """Exports the dummy sum predict model.""" - -# with tf.compat.v1.Graph().as_default(): -# input_tensors = { -# 'x': tf.compat.v1.io.FixedLenFeature( -# [1], dtype=tf.float32, default_value=0) -# } -# serving_receiver = ( -# tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn( -# input_tensors)()) -# output_tensors = {'y': serving_receiver.features['x'] * 2} -# sess = tf.compat.v1.Session() -# sess.run(tf.compat.v1.initializers.global_variables()) -# signature_def = tf.compat.v1.estimator.export.PredictOutput( -# output_tensors).as_signature_def(serving_receiver.receiver_tensors) -# builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) -# builder.add_meta_graph_and_variables( -# sess, [tf.compat.v1.saved_model.tag_constants.SERVING], -# signature_def_map={ -# tf.compat.v1.saved_model.signature_constants -# .DEFAULT_SERVING_SIGNATURE_DEF_KEY: -# signature_def, -# }) -# builder.save() - -# def _build_regression_signature(self, input_tensor, output_tensor): -# """Helper function for building a regression SignatureDef.""" -# input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( -# input_tensor) -# signature_inputs = { -# tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS: -# input_tensor_info -# } -# output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( -# output_tensor) -# signature_outputs = { -# tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS: -# output_tensor_info -# } -# return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( -# signature_inputs, signature_outputs, -# tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME) - -# def _build_classification_signature(self, input_tensor, scores_tensor): -# """Helper function for building a classification SignatureDef.""" -# input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( -# input_tensor) -# signature_inputs = { -# tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS: -# input_tensor_info -# } -# output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( -# scores_tensor) -# signature_outputs = { -# tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: -# output_tensor_info -# } -# return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( -# signature_inputs, signature_outputs, -# tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME) - -# def _build_multihead_model(self, model_path): -# with tf.compat.v1.Graph().as_default(): -# input_example = tf.compat.v1.placeholder( -# tf.string, name='input_examples_tensor') -# config = { -# 'x': tf.compat.v1.io.FixedLenFeature( -# [1], dtype=tf.float32, default_value=0), -# 'y': tf.compat.v1.io.FixedLenFeature( -# [1], dtype=tf.float32, default_value=0), -# } -# features = tf.compat.v1.parse_example(input_example, config) -# x = features['x'] -# y = features['y'] -# sum_pred = x + y -# diff_pred = tf.abs(x - y) -# sess = tf.compat.v1.Session() -# sess.run(tf.compat.v1.initializers.global_variables()) -# signature_def_map = { -# 'regress_diff': -# self._build_regression_signature(input_example, diff_pred), -# 'classify_sum': -# self._build_classification_signature(input_example, sum_pred), -# tf.compat.v1.saved_model.signature_constants -# .DEFAULT_SERVING_SIGNATURE_DEF_KEY: -# self._build_regression_signature(input_example, sum_pred) -# } -# builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) -# builder.add_meta_graph_and_variables( -# sess, [tf.compat.v1.saved_model.tag_constants.SERVING], -# signature_def_map=signature_def_map) -# builder.save() - -# def _run_inference_with_beam(self, example_path, inference_spec_type, -# prediction_log_path, include_config = False): -# # test RunInferenceOnRecordBatch -# converter = tf_example_record.TFExampleBeamRecord( -# physical_format="inmem", -# telemetry_descriptors=[], -# raw_record_column_name=_RECORDBATCH_COLUMN) - -# if include_config: -# tfxio = test_util.InMemoryTFExampleRecord( -# schema=self.schema, raw_record_column_name=_RECORDBATCH_COLUMN) -# tensor_adapter_config = tensor_adapter.TensorAdapterConfig( -# arrow_schema=tfxio.ArrowSchema(), -# tensor_representations=tfxio.TensorRepresentations()) - -# with beam.Pipeline() as pipeline: -# _ = ( -# pipeline -# | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) -# | 'ConvertToRecordBatch' >> converter.BeamSource() -# | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( -# inference_spec_type, DataType.EXAMPLE, tensor_adapter_config) -# | 'WritePredictions' >> beam.io.WriteToTFRecord( -# prediction_log_path, -# coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) -# else: -# with beam.Pipeline() as pipeline: -# _ = ( -# pipeline -# | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) -# | 'ConvertToRecordBatch' >> converter.BeamSource() -# | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( -# inference_spec_type, DataType.EXAMPLE) -# | 'WritePredictions' >> beam.io.WriteToTFRecord( -# prediction_log_path, -# coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) - -# def _get_results(self, prediction_log_path): -# results = [] -# for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): -# record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) -# for record_string in record_iterator: -# prediction_log = prediction_log_pb2.PredictionLog() -# prediction_log.MergeFromString(record_string) -# results.append(prediction_log) -# return results - -# def testModelPathInvalid(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_predict_examples(example_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=self._get_output_data_dir())), prediction_log_path) - -# def testEstimatorModelPredict(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_predict_examples(example_path) -# model_path = self._get_output_data_dir('model') -# self._build_predict_model(model_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path)), prediction_log_path) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# self.assertEqual( -# results[0].predict_log.request.inputs[ -# run_inference._DEFAULT_INPUT_KEY].string_val[0], -# self._predict_examples[0].SerializeToString()) -# self.assertEqual(results[0].predict_log.response.outputs['y'].dtype, -# tf.float32) -# self.assertLen( -# results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2) -# self.assertEqual( -# results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size, -# 1) -# self.assertEqual( -# results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size, -# 1) - -# def testClassifyModel(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# model_path = self._get_output_data_dir('model') -# self._build_multihead_model(model_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path, signature_name=['classify_sum'])), -# prediction_log_path) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# classify_log = results[0].classify_log -# self.assertLen(classify_log.request.input.example_list.examples, 1) -# self.assertEqual(classify_log.request.input.example_list.examples[0], -# self._multihead_examples[0]) -# self.assertLen(classify_log.response.result.classifications, 1) -# self.assertLen(classify_log.response.result.classifications[0].classes, 1) -# self.assertAlmostEqual( -# classify_log.response.result.classifications[0].classes[0].score, 1.0) - -# def testRegressModel(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# model_path = self._get_output_data_dir('model') -# self._build_multihead_model(model_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path, signature_name=['regress_diff'])), -# prediction_log_path) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# regress_log = results[0].regress_log -# self.assertLen(regress_log.request.input.example_list.examples, 1) -# self.assertEqual(regress_log.request.input.example_list.examples[0], -# self._multihead_examples[0]) -# self.assertLen(regress_log.response.result.regressions, 1) -# self.assertAlmostEqual(regress_log.response.result.regressions[0].value, -# 0.6) - -# def testMultiInferenceModel(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# model_path = self._get_output_data_dir('model') -# self._build_multihead_model(model_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path, -# signature_name=['regress_diff', 'classify_sum'])), -# prediction_log_path) -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# multi_inference_log = results[0].multi_inference_log -# self.assertLen(multi_inference_log.request.input.example_list.examples, 1) -# self.assertEqual(multi_inference_log.request.input.example_list.examples[0], -# self._multihead_examples[0]) -# self.assertLen(multi_inference_log.response.results, 2) -# signature_names = [] -# for result in multi_inference_log.response.results: -# signature_names.append(result.model_spec.signature_name) -# self.assertIn('regress_diff', signature_names) -# self.assertIn('classify_sum', signature_names) -# result = multi_inference_log.response.results[0] -# self.assertEqual(result.model_spec.signature_name, 'regress_diff') -# self.assertLen(result.regression_result.regressions, 1) -# self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6) -# result = multi_inference_log.response.results[1] -# self.assertEqual(result.model_spec.signature_name, 'classify_sum') -# self.assertLen(result.classification_result.classifications, 1) -# self.assertLen(result.classification_result.classifications[0].classes, 1) -# self.assertAlmostEqual( -# result.classification_result.classifications[0].classes[0].score, 1.0) - -# def testKerasModelPredict(self): -# inputs = tf.keras.Input(shape=(1,), name='input1') -# output1 = tf.keras.layers.Dense( -# 1, activation=tf.nn.sigmoid, name='output1')( -# inputs) -# output2 = tf.keras.layers.Dense( -# 1, activation=tf.nn.sigmoid, name='output2')( -# inputs) -# inference_model = tf.keras.models.Model(inputs, [output1, output2]) - -# class TestKerasModel(tf.keras.Model): -# def __init__(self, inference_model): -# super(TestKerasModel, self).__init__(name='test_keras_model') -# self.inference_model = inference_model - -# @tf.function(input_signature=[ -# tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') -# ]) -# def call(self, serialized_example): -# features = { -# 'input1': tf.compat.v1.io.FixedLenFeature( -# [1], dtype=tf.float32, -# default_value=0) -# } -# input_tensor_dict = tf.io.parse_example(serialized_example, features) -# return inference_model(input_tensor_dict['input1']) - -# model = TestKerasModel(inference_model) -# model.compile( -# optimizer=tf.keras.optimizers.Adam(lr=.001), -# loss=tf.keras.losses.binary_crossentropy, -# metrics=['accuracy']) - -# example_path = self._get_output_data_dir('examples') -# self._prepare_predict_examples(example_path) -# model_path = self._get_output_data_dir('model') -# tf.compat.v1.keras.experimental.export_saved_model( -# model, model_path, serving_only=True) - -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path)), prediction_log_path) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) - -# def testKerasModelPredictMultiTensor(self): -# input1 = tf.keras.layers.Input((1,), name='x') -# input2 = tf.keras.layers.Input((1,), name='y') - -# x1 = tf.keras.layers.Dense(10)(input1) -# x2 = tf.keras.layers.Dense(10)(input2) -# output = tf.keras.layers.Dense(5, name='output')(x2) - -# model = tf.keras.models.Model([input1, input2], output) -# model_path = self._get_output_data_dir('model') -# tf.compat.v1.keras.experimental.export_saved_model( -# model, model_path, serving_only=True) - -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# prediction_log_path = self._get_output_data_dir('predictions') -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path)), -# prediction_log_path, include_config = True) - -# results = self._get_results(prediction_log_path) -# self.assertLen(results, 2) -# for result in results: -# self.assertLen(result.predict_log.request.inputs, 2) -# self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) - -# def testMultiTensorError(self): -# input1 = tf.keras.layers.Input((1,), name='x') -# input2 = tf.keras.layers.Input((1,), name='y') - -# x1 = tf.keras.layers.Dense(10)(input1) -# x2 = tf.keras.layers.Dense(10)(input2) -# output = tf.keras.layers.Dense(5, name='output')(x2) - -# model = tf.keras.models.Model([input1, input2], output) -# model_path = self._get_output_data_dir('model') -# tf.compat.v1.keras.experimental.export_saved_model( -# model, model_path, serving_only=True) - -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# prediction_log_path = self._get_output_data_dir('predictions') - -# error_msg = 'Tensor adaptor config is required with a multi-input model' -# try: -# self._run_inference_with_beam( -# example_path, -# model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path)), -# prediction_log_path, include_config = False) -# except ValueError as exc: -# actual_error_msg = str(exc) -# self.assertTrue(actual_error_msg.startswith(error_msg)) -# else: -# self.fail('Test was expected to throw ValueError exception') - -# def testTelemetry(self): -# example_path = self._get_output_data_dir('examples') -# self._prepare_multihead_examples(example_path) -# model_path = self._get_output_data_dir('model') -# self._build_multihead_model(model_path) -# inference_spec_type = model_spec_pb2.InferenceSpecType( -# saved_model_spec=model_spec_pb2.SavedModelSpec( -# model_path=model_path, signature_name=['classify_sum'])) +class RunOfflineInferenceExamplesTest(RunInferenceFixture): + + def setUp(self): + super(RunOfflineInferenceExamplesTest, self).setUp() + self._predict_examples = [ + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 1 }}} + } + """, tf.train.Example()), + ] + self._multihead_examples = [ + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.8 }}} + feature {key: "y" value { float_list { value: 0.2 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.6 }}} + feature {key: "y" value { float_list { value: 0.1 }}} + } + """, tf.train.Example()), + ] + + self.schema = text_format.Parse( + """ + tensor_representation_group { + key: "" + value { + tensor_representation { + key: "x" + value { + dense_tensor { + column_name: "x" + shape { dim { size: 1 } } + } + } + } + tensor_representation { + key: "y" + value { + dense_tensor { + column_name: "y" + shape { dim { size: 1 } } + } + } + } + } + } + feature { + name: "x" + type: FLOAT + } + feature { + name: "y" + type: FLOAT + } + """, schema_pb2.Schema()) + + def _prepare_multihead_examples(self, example_path): + with tf.io.TFRecordWriter(example_path) as output_file: + for example in self._multihead_examples: + output_file.write(example.SerializeToString()) + + def _run_inference_with_beam(self, example_path, inference_spec_type, + prediction_log_path, include_schema = False): + schema = None + if include_schema: + schema = self.schema + + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | 'RunInference' >> run_inference.RunInferenceOnExamples( + inference_spec_type, schema=schema) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + + def _get_results(self, prediction_log_path): + results = [] + for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): + record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) + for record_string in record_iterator: + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.MergeFromString(record_string) + results.append(prediction_log) + return results + + + def testKerasModelPredict(self): + inputs = tf.keras.Input(shape=(1,), name='input1') + output1 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output1')( + inputs) + output2 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output2')( + inputs) + inference_model = tf.keras.models.Model(inputs, [output1, output2]) + + class TestKerasModel(tf.keras.Model): + + def __init__(self, inference_model): + super(TestKerasModel, self).__init__(name='test_keras_model') + self.inference_model = inference_model + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') + ]) + def call(self, serialized_example): + features = { + 'input1': + tf.compat.v1.io.FixedLenFeature([1], + dtype=tf.float32, + default_value=0) + } + input_tensor_dict = tf.io.parse_example(serialized_example, features) + return inference_model(input_tensor_dict['input1']) + + model = TestKerasModel(inference_model) + model.compile( + optimizer=tf.keras.optimizers.Adam(lr=.001), + loss=tf.keras.losses.binary_crossentropy, + metrics=['accuracy']) + + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + + def testKerasModelPredictMultiTensor(self): + input1 = tf.keras.layers.Input((1,), name='x') + input2 = tf.keras.layers.Input((1,), name='y') + + x1 = tf.keras.layers.Dense(10)(input1) + x2 = tf.keras.layers.Dense(10)(input2) + output = tf.keras.layers.Dense(5, name='output')(x2) + + model = tf.keras.models.Model([input1, input2], output) + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), + prediction_log_path, include_schema = True) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + for result in results: + self.assertLen(result.predict_log.request.inputs, 2) + self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) + + +class RunRemoteInferenceExamplesTest(RunInferenceFixture): + + def setUp(self): + super(RunRemoteInferenceExamplesTest, self).setUp() + self.example_path = self._get_output_data_dir('example') + self._prepare_predict_examples(self.example_path) + # This is from https://ml.googleapis.com/$discovery/rest?version=v1. + self._discovery_testdata_dir = os.path.join( + os.path.join(os.path.dirname(__file__), 'testdata'), + 'ml_discovery.json') + + @staticmethod + def _make_response_body(content, successful): + if successful: + response_dict = {'predictions': content} + else: + response_dict = {'error': content} + return json.dumps(response_dict) + + def _set_up_pipeline(self, inference_spec_type): + self.pipeline = beam.Pipeline() + self.pcoll = ( + self.pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) + | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) + | 'RunInference' >> run_inference.RunInferenceOnExamples(inference_spec_type)) + + def _run_inference_with_beam(self): + self.pipeline_result = self.pipeline.run() + self.pipeline_result.wait_until_finish() + + def test_model_predict(self): + predictions = [{'output_1': [0.901], 'output_2': [0.997]}] + builder = http.RequestMockBuilder({ + 'ml.projects.predict': + (None, self._make_response_body(predictions, successful=True)) + }) + resource = discovery.build( + 'ml', + 'v1', + http=http.HttpMock(self._discovery_testdata_dir, + {'status': http_client.OK}), + requestBuilder=builder) + with mock.patch('googleapiclient.discovery.' 'build') as response_mock: + response_mock.side_effect = lambda service, version: resource + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec( + project_id='test-project', + model_name='test-model', + )) + + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.predict_log.response.outputs['output_1'].CopyFrom( + tf.make_tensor_proto(values=[0.901], dtype=tf.double, shape=(1, 1))) + prediction_log.predict_log.response.outputs['output_2'].CopyFrom( + tf.make_tensor_proto(values=[0.997], dtype=tf.double, shape=(1, 1))) + + self._set_up_pipeline(inference_spec_type) + assert_that(self.pcoll, equal_to([prediction_log])) + self._run_inference_with_beam() + + +class RunOfflineInferenceSequenceExamplesTest(RunInferenceFixture): + + def setUp(self): + super(RunOfflineInferenceSequenceExamplesTest, self).setUp() + self._predict_examples = [ + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 1 }}} + } + """, tf.train.Example()), + ] + self._multihead_examples = [ + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.8 }}} + feature {key: "y" value { float_list { value: 0.2 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.6 }}} + feature {key: "y" value { float_list { value: 0.1 }}} + } + """, tf.train.Example()), + ] + + self.schema = text_format.Parse( + """ + tensor_representation_group { + key: "" + value { + tensor_representation { + key: "x" + value { + dense_tensor { + column_name: "x" + shape { dim { size: 1 } } + } + } + } + tensor_representation { + key: "y" + value { + dense_tensor { + column_name: "y" + shape { dim { size: 1 } } + } + } + } + } + } + feature { + name: "x" + type: FLOAT + } + feature { + name: "y" + type: FLOAT + } + """, schema_pb2.Schema()) + + def _prepare_multihead_examples(self, example_path): + with tf.io.TFRecordWriter(example_path) as output_file: + for example in self._multihead_examples: + output_file.write(example.SerializeToString()) + + def _run_inference_with_beam(self, example_path, inference_spec_type, + prediction_log_path, include_schema = False): + schema = None + if include_schema: + schema = self.schema + + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString) + | 'RunInference' >> run_inference.RunInferenceOnSequenceExamples( + inference_spec_type, schema=schema) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + + def _get_results(self, prediction_log_path): + results = [] + for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): + record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) + for record_string in record_iterator: + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.MergeFromString(record_string) + results.append(prediction_log) + return results + + + def testKerasModelPredict(self): + inputs = tf.keras.Input(shape=(1,), name='input1') + output1 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output1')( + inputs) + output2 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output2')( + inputs) + inference_model = tf.keras.models.Model(inputs, [output1, output2]) + + class TestKerasModel(tf.keras.Model): + + def __init__(self, inference_model): + super(TestKerasModel, self).__init__(name='test_keras_model') + self.inference_model = inference_model + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') + ]) + def call(self, serialized_example): + features = { + 'input1': + tf.compat.v1.io.FixedLenFeature([1], + dtype=tf.float32, + default_value=0) + } + input_tensor_dict = tf.io.parse_example(serialized_example, features) + return inference_model(input_tensor_dict['input1']) + + model = TestKerasModel(inference_model) + model.compile( + optimizer=tf.keras.optimizers.Adam(lr=.001), + loss=tf.keras.losses.binary_crossentropy, + metrics=['accuracy']) + + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + + def testKerasModelPredictMultiTensor(self): + input1 = tf.keras.layers.Input((1,), name='x') + input2 = tf.keras.layers.Input((1,), name='y') + + x1 = tf.keras.layers.Dense(10)(input1) + x2 = tf.keras.layers.Dense(10)(input2) + output = tf.keras.layers.Dense(5, name='output')(x2) + + model = tf.keras.models.Model([input1, input2], output) + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), + prediction_log_path, include_schema = True) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + for result in results: + self.assertLen(result.predict_log.request.inputs, 2) + self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) + + +class RunOfflineInferenceArrowTest(RunInferenceFixture): + + def setUp(self): + super(RunOfflineInferenceArrowTest, self).setUp() + self._predict_examples = [ + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 1 }}} + } + """, tf.train.Example()), + ] + + self._multihead_examples = [ + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.8 }}} + feature {key: "y" value { float_list { value: 0.2 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + features { + feature {key: "x" value { float_list { value: 0.6 }}} + feature {key: "y" value { float_list { value: 0.1 }}} + } + """, tf.train.Example()), + ] + + self.schema = text_format.Parse( + """ + tensor_representation_group { + key: "" + value { + tensor_representation { + key: "x" + value { + dense_tensor { + column_name: "x" + shape { dim { size: 1 } } + } + } + } + tensor_representation { + key: "y" + value { + dense_tensor { + column_name: "y" + shape { dim { size: 1 } } + } + } + } + } + } + feature { + name: "x" + type: FLOAT + } + feature { + name: "y" + type: FLOAT + } + """, schema_pb2.Schema()) + + def _prepare_multihead_examples(self, example_path): + with tf.io.TFRecordWriter(example_path) as output_file: + for example in self._multihead_examples: + output_file.write(example.SerializeToString()) + + + def _build_predict_model(self, model_path): + """Exports the dummy sum predict model.""" + + with tf.compat.v1.Graph().as_default(): + input_tensors = { + 'x': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0) + } + serving_receiver = ( + tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn( + input_tensors)()) + output_tensors = {'y': serving_receiver.features['x'] * 2} + sess = tf.compat.v1.Session() + sess.run(tf.compat.v1.initializers.global_variables()) + signature_def = tf.compat.v1.estimator.export.PredictOutput( + output_tensors).as_signature_def(serving_receiver.receiver_tensors) + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) + builder.add_meta_graph_and_variables( + sess, [tf.compat.v1.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.compat.v1.saved_model.signature_constants + .DEFAULT_SERVING_SIGNATURE_DEF_KEY: + signature_def, + }) + builder.save() + + def _build_regression_signature(self, input_tensor, output_tensor): + """Helper function for building a regression SignatureDef.""" + input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + input_tensor) + signature_inputs = { + tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS: + input_tensor_info + } + output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + output_tensor) + signature_outputs = { + tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS: + output_tensor_info + } + return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( + signature_inputs, signature_outputs, + tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME) + + def _build_classification_signature(self, input_tensor, scores_tensor): + """Helper function for building a classification SignatureDef.""" + input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + input_tensor) + signature_inputs = { + tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS: + input_tensor_info + } + output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + scores_tensor) + signature_outputs = { + tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: + output_tensor_info + } + return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( + signature_inputs, signature_outputs, + tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME) + + def _build_multihead_model(self, model_path): + with tf.compat.v1.Graph().as_default(): + input_example = tf.compat.v1.placeholder( + tf.string, name='input_examples_tensor') + config = { + 'x': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0), + 'y': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0), + } + features = tf.compat.v1.parse_example(input_example, config) + x = features['x'] + y = features['y'] + sum_pred = x + y + diff_pred = tf.abs(x - y) + sess = tf.compat.v1.Session() + sess.run(tf.compat.v1.initializers.global_variables()) + signature_def_map = { + 'regress_diff': + self._build_regression_signature(input_example, diff_pred), + 'classify_sum': + self._build_classification_signature(input_example, sum_pred), + tf.compat.v1.saved_model.signature_constants + .DEFAULT_SERVING_SIGNATURE_DEF_KEY: + self._build_regression_signature(input_example, sum_pred) + } + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) + builder.add_meta_graph_and_variables( + sess, [tf.compat.v1.saved_model.tag_constants.SERVING], + signature_def_map=signature_def_map) + builder.save() + + def _run_inference_with_beam(self, example_path, inference_spec_type, + prediction_log_path, include_config = False): + # test RunInferenceOnRecordBatch + converter = tf_example_record.TFExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[], + raw_record_column_name=_RECORDBATCH_COLUMN) + + if include_config: + tfxio = test_util.InMemoryTFExampleRecord( + schema=self.schema, raw_record_column_name=_RECORDBATCH_COLUMN) + tensor_adapter_config = tensor_adapter.TensorAdapterConfig( + arrow_schema=tfxio.ArrowSchema(), + tensor_representations=tfxio.TensorRepresentations()) + + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( + inference_spec_type, DataType.EXAMPLE, tensor_adapter_config) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + else: + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( + inference_spec_type, DataType.EXAMPLE) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + + def _get_results(self, prediction_log_path): + results = [] + for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): + record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) + for record_string in record_iterator: + prediction_log = prediction_log_pb2.PredictionLog() + prediction_log.MergeFromString(record_string) + results.append(prediction_log) + return results + + def testModelPathInvalid(self): + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + with self.assertRaisesRegexp(IOError, 'SavedModel file does not exist.*'): + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=self._get_output_data_dir())), prediction_log_path) + + def testEstimatorModelPredict(self): + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_predict_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + self.assertEqual( + results[0].predict_log.request.inputs[ + run_inference._DEFAULT_INPUT_KEY].string_val[0], + self._predict_examples[0].SerializeToString()) + self.assertEqual(results[0].predict_log.response.outputs['y'].dtype, + tf.float32) + self.assertLen( + results[0].predict_log.response.outputs['y'].tensor_shape.dim, 2) + self.assertEqual( + results[0].predict_log.response.outputs['y'].tensor_shape.dim[0].size, + 1) + self.assertEqual( + results[0].predict_log.response.outputs['y'].tensor_shape.dim[1].size, + 1) + + def testClassifyModel(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['classify_sum'])), + prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + classify_log = results[0].classify_log + self.assertLen(classify_log.request.input.example_list.examples, 1) + self.assertEqual(classify_log.request.input.example_list.examples[0], + self._multihead_examples[0]) + self.assertLen(classify_log.response.result.classifications, 1) + self.assertLen(classify_log.response.result.classifications[0].classes, 1) + self.assertAlmostEqual( + classify_log.response.result.classifications[0].classes[0].score, 1.0) + + def testRegressModel(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['regress_diff'])), + prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + regress_log = results[0].regress_log + self.assertLen(regress_log.request.input.example_list.examples, 1) + self.assertEqual(regress_log.request.input.example_list.examples[0], + self._multihead_examples[0]) + self.assertLen(regress_log.response.result.regressions, 1) + self.assertAlmostEqual(regress_log.response.result.regressions[0].value, + 0.6) + + def testMultiInferenceModel(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, + signature_name=['regress_diff', 'classify_sum'])), + prediction_log_path) + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + multi_inference_log = results[0].multi_inference_log + self.assertLen(multi_inference_log.request.input.example_list.examples, 1) + self.assertEqual(multi_inference_log.request.input.example_list.examples[0], + self._multihead_examples[0]) + self.assertLen(multi_inference_log.response.results, 2) + signature_names = [] + for result in multi_inference_log.response.results: + signature_names.append(result.model_spec.signature_name) + self.assertIn('regress_diff', signature_names) + self.assertIn('classify_sum', signature_names) + result = multi_inference_log.response.results[0] + self.assertEqual(result.model_spec.signature_name, 'regress_diff') + self.assertLen(result.regression_result.regressions, 1) + self.assertAlmostEqual(result.regression_result.regressions[0].value, 0.6) + result = multi_inference_log.response.results[1] + self.assertEqual(result.model_spec.signature_name, 'classify_sum') + self.assertLen(result.classification_result.classifications, 1) + self.assertLen(result.classification_result.classifications[0].classes, 1) + self.assertAlmostEqual( + result.classification_result.classifications[0].classes[0].score, 1.0) + + def testKerasModelPredict(self): + inputs = tf.keras.Input(shape=(1,), name='input1') + output1 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output1')( + inputs) + output2 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output2')( + inputs) + inference_model = tf.keras.models.Model(inputs, [output1, output2]) + + class TestKerasModel(tf.keras.Model): + def __init__(self, inference_model): + super(TestKerasModel, self).__init__(name='test_keras_model') + self.inference_model = inference_model + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') + ]) + def call(self, serialized_example): + features = { + 'input1': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, + default_value=0) + } + input_tensor_dict = tf.io.parse_example(serialized_example, features) + return inference_model(input_tensor_dict['input1']) + + model = TestKerasModel(inference_model) + model.compile( + optimizer=tf.keras.optimizers.Adam(lr=.001), + loss=tf.keras.losses.binary_crossentropy, + metrics=['accuracy']) + + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + + def testKerasModelPredictMultiTensor(self): + input1 = tf.keras.layers.Input((1,), name='x') + input2 = tf.keras.layers.Input((1,), name='y') + + x1 = tf.keras.layers.Dense(10)(input1) + x2 = tf.keras.layers.Dense(10)(input2) + output = tf.keras.layers.Dense(5, name='output')(x2) + + model = tf.keras.models.Model([input1, input2], output) + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), + prediction_log_path, include_config = True) + + results = self._get_results(prediction_log_path) + self.assertLen(results, 2) + for result in results: + self.assertLen(result.predict_log.request.inputs, 2) + self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) + + def testMultiTensorError(self): + input1 = tf.keras.layers.Input((1,), name='x') + input2 = tf.keras.layers.Input((1,), name='y') + + x1 = tf.keras.layers.Dense(10)(input1) + x2 = tf.keras.layers.Dense(10)(input2) + output = tf.keras.layers.Dense(5, name='output')(x2) + + model = tf.keras.models.Model([input1, input2], output) + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + + error_msg = 'Tensor adaptor config is required with a multi-input model' + try: + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), + prediction_log_path, include_config = False) + except ValueError as exc: + actual_error_msg = str(exc) + self.assertTrue(actual_error_msg.startswith(error_msg)) + else: + self.fail('Test was expected to throw ValueError exception') + + def testTelemetry(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + inference_spec_type = model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['classify_sum'])) -# pipeline = beam.Pipeline() -# converter = tf_example_record.TFExampleBeamRecord( -# physical_format="inmem", -# telemetry_descriptors=[], -# raw_record_column_name=_RECORDBATCH_COLUMN) -# _ = ( -# pipeline -# | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) -# | 'ConvertToRecordBatch' >> converter.BeamSource() -# | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( -# inference_spec_type, DataType.EXAMPLE)) -# run_result = pipeline.run() -# run_result.wait_until_finish() - -# num_inferences = run_result.metrics().query( -# MetricsFilter().with_name('num_inferences')) -# self.assertTrue(num_inferences['counters']) -# self.assertEqual(num_inferences['counters'][0].result, 2) -# num_instances = run_result.metrics().query( -# MetricsFilter().with_name('num_instances')) -# self.assertTrue(num_instances['counters']) -# self.assertEqual(num_instances['counters'][0].result, 2) -# inference_request_batch_size = run_result.metrics().query( -# MetricsFilter().with_name('inference_request_batch_size')) -# self.assertTrue(inference_request_batch_size['distributions']) -# self.assertEqual( -# inference_request_batch_size['distributions'][0].result.sum, 2) -# inference_request_batch_byte_size = run_result.metrics().query( -# MetricsFilter().with_name('inference_request_batch_byte_size')) -# self.assertTrue(inference_request_batch_byte_size['distributions']) -# self.assertEqual( -# inference_request_batch_byte_size['distributions'][0].result.sum, -# sum(element.ByteSize() for element in self._multihead_examples)) -# inference_batch_latency_micro_secs = run_result.metrics().query( -# MetricsFilter().with_name('inference_batch_latency_micro_secs')) -# self.assertTrue(inference_batch_latency_micro_secs['distributions']) -# self.assertGreaterEqual( -# inference_batch_latency_micro_secs['distributions'][0].result.sum, 0) -# load_model_latency_milli_secs = run_result.metrics().query( -# MetricsFilter().with_name('load_model_latency_milli_secs')) -# self.assertTrue(load_model_latency_milli_secs['distributions']) -# self.assertGreaterEqual( -# load_model_latency_milli_secs['distributions'][0].result.sum, 0) + pipeline = beam.Pipeline() + converter = tf_example_record.TFExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[], + raw_record_column_name=_RECORDBATCH_COLUMN) + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( + inference_spec_type, DataType.EXAMPLE)) + run_result = pipeline.run() + run_result.wait_until_finish() + + num_inferences = run_result.metrics().query( + MetricsFilter().with_name('num_inferences')) + self.assertTrue(num_inferences['counters']) + self.assertEqual(num_inferences['counters'][0].result, 2) + num_instances = run_result.metrics().query( + MetricsFilter().with_name('num_instances')) + self.assertTrue(num_instances['counters']) + self.assertEqual(num_instances['counters'][0].result, 2) + inference_request_batch_size = run_result.metrics().query( + MetricsFilter().with_name('inference_request_batch_size')) + self.assertTrue(inference_request_batch_size['distributions']) + self.assertEqual( + inference_request_batch_size['distributions'][0].result.sum, 2) + inference_request_batch_byte_size = run_result.metrics().query( + MetricsFilter().with_name('inference_request_batch_byte_size')) + self.assertTrue(inference_request_batch_byte_size['distributions']) + self.assertEqual( + inference_request_batch_byte_size['distributions'][0].result.sum, + sum(element.ByteSize() for element in self._multihead_examples)) + inference_batch_latency_micro_secs = run_result.metrics().query( + MetricsFilter().with_name('inference_batch_latency_micro_secs')) + self.assertTrue(inference_batch_latency_micro_secs['distributions']) + self.assertGreaterEqual( + inference_batch_latency_micro_secs['distributions'][0].result.sum, 0) + load_model_latency_milli_secs = run_result.metrics().query( + MetricsFilter().with_name('load_model_latency_milli_secs')) + self.assertTrue(load_model_latency_milli_secs['distributions']) + self.assertGreaterEqual( + load_model_latency_milli_secs['distributions'][0].result.sum, 0) class RunRemoteInferenceArrowTest(RunInferenceFixture): From 9bc26b443b351017267ad1bdaab8792ede0cc5e9 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 30 Jul 2020 13:49:09 -0400 Subject: [PATCH 24/31] Delete temp test --- tfx_bsl/public/beam/test_api.py | 251 -------------------------------- 1 file changed, 251 deletions(-) delete mode 100644 tfx_bsl/public/beam/test_api.py diff --git a/tfx_bsl/public/beam/test_api.py b/tfx_bsl/public/beam/test_api.py deleted file mode 100644 index 89646148..00000000 --- a/tfx_bsl/public/beam/test_api.py +++ /dev/null @@ -1,251 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -# Standard __future__ imports -from __future__ import print_function - -import json -import os -try: - import unittest.mock as mock -except ImportError: - import mock - -import apache_beam as beam -import pyarrow as pa -from apache_beam.metrics.metric import MetricsFilter -from apache_beam.testing.util import assert_that -from apache_beam.testing.util import equal_to -from googleapiclient import discovery -from googleapiclient import http -from six.moves import http_client -import tensorflow as tf -from tfx_bsl.beam import bsl_util -from tfx_bsl.public.beam import run_inference -from tfx_bsl.beam.bsl_constants import DataType -from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN -from tfx_bsl.public.proto import model_spec_pb2 -from tfx_bsl.tfxio import test_util -from tfx_bsl.tfxio import tensor_adapter -from tfx_bsl.tfxio import tf_example_record - -from google.protobuf import text_format -from tensorflow_serving.apis import prediction_log_pb2 -from tensorflow_metadata.proto.v0 import schema_pb2 - - -class RunInferenceFixture(tf.test.TestCase): - - def setUp(self): - super(RunInferenceFixture, self).setUp() - self._predict_examples = [ - text_format.Parse( - """ - features { - feature { key: "input1" value { float_list { value: 0 }}} - } - """, tf.train.Example()), - ] - - def _get_output_data_dir(self, sub_dir=None): - test_dir = self._testMethodName - path = os.path.join( - os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), - test_dir) - if not tf.io.gfile.exists(path): - tf.io.gfile.makedirs(path) - if sub_dir is not None: - path = os.path.join(path, sub_dir) - return path - - def _prepare_predict_examples(self, example_path): - with tf.io.TFRecordWriter(example_path) as output_file: - for example in self._predict_examples: - output_file.write(example.SerializeToString()) - - -class RunOfflineInferenceExamplesTest(RunInferenceFixture): - - def setUp(self): - super(RunOfflineInferenceExamplesTest, self).setUp() - self._predict_examples = [ - text_format.Parse( - """ - features { - feature { key: "input1" value { float_list { value: 0 }}} - } - """, tf.train.Example()), - text_format.Parse( - """ - features { - feature { key: "input1" value { float_list { value: 1 }}} - } - """, tf.train.Example()), - ] - self._multihead_examples = [ - text_format.Parse( - """ - features { - feature {key: "x" value { float_list { value: 0.8 }}} - feature {key: "y" value { float_list { value: 0.2 }}} - } - """, tf.train.Example()), - text_format.Parse( - """ - features { - feature {key: "x" value { float_list { value: 0.6 }}} - feature {key: "y" value { float_list { value: 0.1 }}} - } - """, tf.train.Example()), - ] - - self.schema = text_format.Parse( - """ - tensor_representation_group { - key: "" - value { - tensor_representation { - key: "x" - value { - dense_tensor { - column_name: "x" - shape { dim { size: 1 } } - } - } - } - tensor_representation { - key: "y" - value { - dense_tensor { - column_name: "y" - shape { dim { size: 1 } } - } - } - } - } - } - feature { - name: "x" - type: FLOAT - } - feature { - name: "y" - type: FLOAT - } - """, schema_pb2.Schema()) - - def _prepare_multihead_examples(self, example_path): - with tf.io.TFRecordWriter(example_path) as output_file: - for example in self._multihead_examples: - output_file.write(example.SerializeToString()) - - def _run_inference_with_beam(self, example_path, inference_spec_type, - prediction_log_path, include_schema = False): - schema = None - if include_schema: - schema = self.schema - - with beam.Pipeline() as pipeline: - _ = ( - pipeline - | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) - | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) - | 'RunInference' >> run_inference.RunInference( - inference_spec_type, schema=schema) - | 'WritePredictions' >> beam.io.WriteToTFRecord( - prediction_log_path, - coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) - - def _get_results(self, prediction_log_path): - results = [] - for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'): - record_iterator = tf.compat.v1.io.tf_record_iterator(path=f) - for record_string in record_iterator: - prediction_log = prediction_log_pb2.PredictionLog() - prediction_log.MergeFromString(record_string) - results.append(prediction_log) - return results - - - def testKerasModelPredict(self): - inputs = tf.keras.Input(shape=(1,), name='input1') - output1 = tf.keras.layers.Dense( - 1, activation=tf.nn.sigmoid, name='output1')( - inputs) - output2 = tf.keras.layers.Dense( - 1, activation=tf.nn.sigmoid, name='output2')( - inputs) - inference_model = tf.keras.models.Model(inputs, [output1, output2]) - - class TestKerasModel(tf.keras.Model): - - def __init__(self, inference_model): - super(TestKerasModel, self).__init__(name='test_keras_model') - self.inference_model = inference_model - - @tf.function(input_signature=[ - tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') - ]) - def call(self, serialized_example): - features = { - 'input1': - tf.compat.v1.io.FixedLenFeature([1], - dtype=tf.float32, - default_value=0) - } - input_tensor_dict = tf.io.parse_example(serialized_example, features) - return inference_model(input_tensor_dict['input1']) - - model = TestKerasModel(inference_model) - model.compile( - optimizer=tf.keras.optimizers.Adam(lr=.001), - loss=tf.keras.losses.binary_crossentropy, - metrics=['accuracy']) - - model_path = self._get_output_data_dir('model') - tf.compat.v1.keras.experimental.export_saved_model( - model, model_path, serving_only=True) - - example_path = self._get_output_data_dir('examples') - self._prepare_predict_examples(example_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - example_path, - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path)), prediction_log_path) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - - def testKerasModelPredictMultiTensor(self): - input1 = tf.keras.layers.Input((1,), name='x') - input2 = tf.keras.layers.Input((1,), name='y') - - x1 = tf.keras.layers.Dense(10)(input1) - x2 = tf.keras.layers.Dense(10)(input2) - output = tf.keras.layers.Dense(5, name='output')(x2) - - model = tf.keras.models.Model([input1, input2], output) - model_path = self._get_output_data_dir('model') - tf.compat.v1.keras.experimental.export_saved_model( - model, model_path, serving_only=True) - - example_path = self._get_output_data_dir('examples') - self._prepare_multihead_examples(example_path) - prediction_log_path = self._get_output_data_dir('predictions') - self._run_inference_with_beam( - example_path, - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path)), - prediction_log_path, include_schema = True) - - results = self._get_results(prediction_log_path) - self.assertLen(results, 2) - for result in results: - self.assertLen(result.predict_log.request.inputs, 2) - self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) - - -if __name__ == '__main__': - tf.test.main() From 25b8631e12e1b0e7762934258f25132372f68210 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 30 Jul 2020 18:17:40 -0400 Subject: [PATCH 25/31] add test for serialized example --- tfx_bsl/beam/run_inference_test.py | 34 ++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index 990e243e..192bd58f 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -1199,6 +1199,40 @@ def test_request_body_with_binary_data(self): }, ], result) + def test_request_serialized_example(self): + example = text_format.Parse( + """ + features { + feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} + feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} + feature { key: "y" value { int64_list { value: [1, 2] }}} + } + """, tf.train.Example()) + inference_spec_type = model_spec_pb2.InferenceSpecType( + ai_platform_prediction_model_spec=model_spec_pb2 + .AIPlatformPredictionModelSpec( + project_id='test_project', + model_name='test_model', + version_name='test_version', + use_serialization_config=True)) + + serialized_example_remote = [example.SerializeToString()] + record_batch_remote = pa.RecordBatch.from_arrays( + [ + pa.array([["ASa8asdf"]], type=pa.list_(pa.binary())), + pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())), + pa.array([[1, 2]], type=pa.list_(pa.int32())), + pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32())), + serialized_example_remote + ], + ['x_bytes', 'x', 'y', 'z', _RECORDBATCH_COLUMN] + ) + + result = list(bsl_util.RecordToJSON(record_batch_remote, True)) + self.assertEqual(result, [{ + 'b64': base64.b64encode(example.SerializeToString()).decode() + }]) + if __name__ == '__main__': tf.test.main() From b2e66895af7a9c31dc336107426dbb98c39ccbe2 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Tue, 4 Aug 2020 17:00:42 -0400 Subject: [PATCH 26/31] address comments and fix post-process --- tfx_bsl/beam/bsl_constants.py | 2 +- tfx_bsl/beam/bsl_util.py | 44 +++++++--- tfx_bsl/beam/run_inference.py | 133 ++++++++++++++--------------- tfx_bsl/beam/run_inference_test.py | 31 +++---- 4 files changed, 110 insertions(+), 100 deletions(-) diff --git a/tfx_bsl/beam/bsl_constants.py b/tfx_bsl/beam/bsl_constants.py index caaba5aa..4f797b15 100644 --- a/tfx_bsl/beam/bsl_constants.py +++ b/tfx_bsl/beam/bsl_constants.py @@ -1,5 +1,5 @@ _RECORDBATCH_COLUMN = '__RAW_RECORD__' -KERAS_INPUT_SUFFIX = '_input' +_KERAS_INPUT_SUFFIX = '_input' class DataType(object): EXAMPLE = 'EXAMPLE' diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py index 3bc8c624..3d2a7929 100644 --- a/tfx_bsl/beam/bsl_util.py +++ b/tfx_bsl/beam/bsl_util.py @@ -27,9 +27,27 @@ import typing from typing import Dict, List, Text, Any, Set, Optional from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN -from tfx_bsl.beam.bsl_constants import KERAS_INPUT_SUFFIX +from tfx_bsl.beam.bsl_constants import _KERAS_INPUT_SUFFIX +def ExtractSerializedExampleFromRecordBatch(elements: pa.RecordBatch) -> List[Text]: + serialized_examples = None + for column_name, column_array in zip(elements.schema.names, elements.columns): + if column_name == _RECORDBATCH_COLUMN: + column_type = column_array.flatten().type + if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)): + raise ValueError( + 'Expected a list of serialized examples in bytes or as a string, got %s' % + type(example)) + serialized_examples = column_array.flatten().to_pylist() + break + + if (serialized_examples is None): + raise ValueError('Raw examples not found.') + + return serialized_examples + + def RecordToJSON(record_batch: pa.RecordBatch, prepare_instances_serialized) -> List[Text]: """Returns a JSON string translated from `record_batch`. @@ -41,6 +59,9 @@ def RecordToJSON(record_batch: pa.RecordBatch, prepare_instances_serialized) -> Args: record_batch: input RecordBatch. """ + + # TODO (b/155912552): Handle this for sequence example. + def flatten(element: List[Any]): if len(element) == 1: return element[0] @@ -60,16 +81,16 @@ def flatten(element: List[Any]): return json.loads(df.to_json(orient='records')) -def find_input_name_in_features(features: Set[Text], - input_name: Text) -> Optional[Text]: +def _find_input_name_in_features(features: Set[Text], + input_name: Text) -> Optional[Text]: """Maps input name to an entry in features. Returns None if not found.""" if input_name in features: return input_name # Some keras models prepend '_input' to the names of the inputs # so try under '_input' as well. - elif (input_name.endswith(KERAS_INPUT_SUFFIX) and - input_name[:-len(KERAS_INPUT_SUFFIX)] in features): - return input_name[:-len(KERAS_INPUT_SUFFIX)] + elif (input_name.endswith(_KERAS_INPUT_SUFFIX) and + input_name[:-len(_KERAS_INPUT_SUFFIX)] in features): + return input_name[:-len(_KERAS_INPUT_SUFFIX)] return None @@ -93,13 +114,14 @@ def filter_tensors_by_input_names( return None result = {} tensor_keys = set(tensors.keys()) + + # The case where the model takes serialized examples as input. + if len(input_names) == 1 and _find_input_name_in_features(tensor_keys, input_names[0]): + return None + for name in input_names: - tensor_name = find_input_name_in_features(tensor_keys, name) + tensor_name = _find_input_name_in_features(tensor_keys, name) if tensor_name is None: - # This should happen only in the case where the model takes serialized - # examples as input. Else raise an exception. - if len(input_names) == 1: - return None raise RuntimeError( 'Input tensor not found: {}. Existing keys: {}.'.format( name, ','.join(tensors.keys()))) diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py index 9011dd54..f3bdb5f4 100644 --- a/tfx_bsl/beam/run_inference.py +++ b/tfx_bsl/beam/run_inference.py @@ -301,7 +301,7 @@ def _Predict(pcoll: beam.pvalue.PCollection, # pylint: disable=invalid-name predictions = ( pcoll | 'RemotePredict' >> beam.ParDo(_RemotePredictDoFn( - inference_spec_type, pcoll.pipeline.options, data_type, tensor_adapter_config))) + inference_spec_type, pcoll.pipeline.options, data_type))) return (predictions | 'BuildPredictionLogForPredictions' >> beam.ParDo( _BuildPredictionLogForPredictionsDoFn())) @@ -385,59 +385,24 @@ def update( def __init__( - self, inference_spec_type: model_spec_pb2.InferenceSpecType, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): + self, inference_spec_type: model_spec_pb2.InferenceSpecType): super(_BaseDoFn, self).__init__() self._clock = None self.inference_spec_type = inference_spec_type self._metrics_collector = self._MetricsCollector(inference_spec_type) - self._tensor_adapter_config = tensor_adapter_config - self._io_tensor_spec = None # This value may be None if the model is remote def setup(self): self._clock = _ClockFactory.make_clock() + @abc.abstractmethod def _extract_from_recordBatch(self, elements: pa.RecordBatch): """ Function to extract the compatible input with model signature + return: + - serialized examples for metrics + - model input for processing and post processing """ - serialized_examples = None - for column_name, column_array in zip(elements.schema.names, elements.columns): - if column_name == _RECORDBATCH_COLUMN: - column_type = column_array.flatten().type - if not (pa.types.is_binary(column_type) or pa.types.is_string(column_type)): - raise ValueError( - 'Expected a list of serialized examples in bytes or as a string, got %s' % - type(example)) - serialized_examples = column_array.flatten().to_pylist() - break - - if (serialized_examples is None): - raise ValueError('Raw examples not found.') - - model_input = None - if self._io_tensor_spec is None: # Case when we are running remote inference - prepare_instances_serialized = ( - self.inference_spec_type.ai_platform_prediction_model_spec.use_serialization_config) - model_input = bsl_util.RecordToJSON(elements, prepare_instances_serialized) - elif (len(self._io_tensor_spec.input_tensor_names) == 1): - model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples} - else: - if (self._tensor_adapter_config is None): - raise ValueError('Tensor adaptor config is required with a multi-input model') - - input_tensor_names = self._io_tensor_spec.input_tensor_names - input_tensor_alias = self._io_tensor_spec.input_tensor_alias - _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config) - dict_of_tensors = _tensor_adapter.ToBatchTensors( - elements, produce_eager_tensors = False) - filtered_tensors = bsl_util.filter_tensors_by_input_names( - dict_of_tensors, input_tensor_alias) - - model_input = {} - for feature, tensor_name in zip(input_tensor_alias, input_tensor_names): - model_input[tensor_name] = filtered_tensors[feature] - return serialized_examples, model_input + raise NotImplementedError def process(self, elements: pa.RecordBatch) -> Iterable[Any]: batch_start_time = self._clock.get_current_time_in_microseconds() @@ -507,9 +472,8 @@ class _RemotePredictDoFn(_BaseDoFn): """ def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType, - pipeline_options: PipelineOptions, data_type: Text, - tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): - super(_RemotePredictDoFn, self).__init__(inference_spec_type, tensor_adapter_config) + pipeline_options: PipelineOptions, data_type: Text): + super(_RemotePredictDoFn, self).__init__(inference_spec_type) self._api_client = None self._data_type = data_type @@ -540,6 +504,13 @@ def setup(self): # user agent once custom header is supported in googleapiclient. self._api_client = discovery.build('ml', 'v1') + def _extract_from_recordBatch(self, elements: pa.RecordBatch): + serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements) + prepare_instances_serialized = ( + self.inference_spec_type.ai_platform_prediction_model_spec.use_serialization_config) + model_input = bsl_util.RecordToJSON(elements, prepare_instances_serialized) + return serialized_examples, model_input + # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff. @retry.with_exponential_backoff( initial_delay_secs=1.0, @@ -614,7 +585,7 @@ def __init__( self, inference_spec_type: model_spec_pb2.InferenceSpecType, shared_model_handle: shared.Shared, data_type, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None): - super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type, tensor_adapter_config) + super(_BaseBatchSavedModelDoFn, self).__init__(inference_spec_type) self._inference_spec_type = inference_spec_type self._shared_model_handle = shared_model_handle self._model_path = inference_spec_type.saved_model_spec.model_path @@ -625,6 +596,7 @@ def __init__( _get_tags(inference_spec_type)) self._session = None self._data_type = data_type + self._tensor_adapter_config = tensor_adapter_config def setup(self): """Load the model. @@ -704,6 +676,29 @@ def _has_tpu_tag(self) -> bool: return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and tf.saved_model.TPU in self._tags) + def _extract_from_recordBatch(self, elements: pa.RecordBatch): + serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements) + + model_input = None + if (len(self._io_tensor_spec.input_tensor_names) == 1): + model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples} + else: + if (self._tensor_adapter_config is None): + raise ValueError('Tensor adaptor config is required with a multi-input model') + + input_tensor_names = self._io_tensor_spec.input_tensor_names + input_tensor_alias = self._io_tensor_spec.input_tensor_alias + _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config) + dict_of_tensors = _tensor_adapter.ToBatchTensors( + elements, produce_eager_tensors = False) + filtered_tensors = bsl_util.filter_tensors_by_input_names( + dict_of_tensors, input_tensor_alias) + + model_input = {} + for feature, tensor_name in zip(input_tensor_alias, input_tensor_names): + model_input[tensor_name] = filtered_tensors[feature] + return serialized_examples, model_input + def run_inference( self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]: self._check_elements() @@ -830,21 +825,23 @@ def _post_process( 'dimension, with the first having a size equal to the input batch ' 'size %s. Instead found %s' % (output_alias, batch_size, output.shape)) - predict_log_tmpl = prediction_log_pb2.PredictLog() - predict_log_tmpl.request.model_spec.signature_name = signature_name - predict_log_tmpl.response.model_spec.signature_name = signature_name - for alias, tensor_type in input_tensor_types.items(): - input_tensor_proto = predict_log_tmpl.request.inputs[alias] - input_tensor_proto.dtype = tf.as_dtype(tensor_type).as_datatype_enum - # TODO (Maxine): fix dimension? - input_tensor_proto.tensor_shape.dim.add().size = 1 - - result = [] - for i in range(batch_size): - predict_log = prediction_log_pb2.PredictLog() - predict_log.CopyFrom(predict_log_tmpl) + + if include_request: + predict_log_tmpl = prediction_log_pb2.PredictLog() + predict_log_tmpl.request.model_spec.signature_name = signature_name + predict_log_tmpl.response.model_spec.signature_name = signature_name + for alias, tensor_name in zip(input_tensor_alias, input_tensor_names): + input_tensor_proto = predict_log_tmpl.request.inputs[alias] + input_tensor_proto.dtype = tf.as_dtype(input_tensor_types[alias]).as_datatype_enum + if len(input_tensor_alias) == 1: + input_tensor_proto.tensor_shape.dim.add().size = 1 + else: + input_tensor_proto.tensor_shape.dim.add().size = len(elements[tensor_name][0]) - if include_request: + result = [] + for i in range(batch_size): + predict_log = prediction_log_pb2.PredictLog() + predict_log.CopyFrom(predict_log_tmpl) if len(input_tensor_alias) == 1: alias = input_tensor_alias[0] predict_log.request.inputs[alias].string_val.append(process_elements[i]) @@ -852,14 +849,14 @@ def _post_process( for alias, tensor_name in zip(input_tensor_alias, input_tensor_names): predict_log.request.inputs[alias].float_val.append(elements[tensor_name][i]) - for output_alias, output in outputs.items(): - # Mimic tensor::Split - tensor_proto = tf.make_tensor_proto( - values=output[i], - dtype=tf.as_dtype(output[i].dtype).as_datatype_enum, - shape=np.expand_dims(output[i], axis=0).shape) - predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) - result.append(predict_log) + for output_alias, output in outputs.items(): + # Mimic tensor::Split + tensor_proto = tf.make_tensor_proto( + values=output[i], + dtype=tf.as_dtype(output[i].dtype).as_datatype_enum, + shape=np.expand_dims(output[i], axis=0).shape) + predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) + result.append(predict_log) return result diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index 192bd58f..f28ff1bb 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -267,8 +267,8 @@ class RunRemoteInferenceExamplesTest(RunInferenceFixture): def setUp(self): super(RunRemoteInferenceExamplesTest, self).setUp() - self.example_path = self._get_output_data_dir('example') - self._prepare_predict_examples(self.example_path) + self._example_path = self._get_output_data_dir('example') + self._prepare_predict_examples(self._example_path) # This is from https://ml.googleapis.com/$discovery/rest?version=v1. self._discovery_testdata_dir = os.path.join( os.path.join(os.path.dirname(__file__), 'testdata'), @@ -286,7 +286,7 @@ def _set_up_pipeline(self, inference_spec_type): self.pipeline = beam.Pipeline() self.pcoll = ( self.pipeline - | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) + | 'ReadExamples' >> beam.io.ReadFromTFRecord(self._example_path) | 'ParseExamples' >> beam.Map(tf.train.Example.FromString) | 'RunInference' >> run_inference.RunInferenceOnExamples(inference_spec_type)) @@ -1018,9 +1018,9 @@ class RunRemoteInferenceArrowTest(RunInferenceFixture): def setUp(self): super(RunRemoteInferenceArrowTest, self).setUp() + self._example_path = self._get_output_data_dir('example') + self._prepare_predict_examples(self._example_path) # This is from https://ml.googleapis.com/$discovery/rest?version=v1. - self.example_path = self._get_output_data_dir('example') - self._prepare_predict_examples(self.example_path) self._discovery_testdata_dir = os.path.join( os.path.join(os.path.dirname(__file__), 'testdata'), 'ml_discovery.json') @@ -1041,7 +1041,7 @@ def _set_up_pipeline(self, inference_spec_type): raw_record_column_name=_RECORDBATCH_COLUMN) self.pcoll = ( self.pipeline - | 'ReadExamples' >> beam.io.ReadFromTFRecord(self.example_path) + | 'ReadExamples' >> beam.io.ReadFromTFRecord(self._example_path) | 'ConvertToRecordBatch' >> converter.BeamSource() | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( inference_spec_type, DataType.EXAMPLE)) @@ -1167,19 +1167,9 @@ def test_can_format_requests(self): self._run_inference_with_beam() def test_request_body_with_binary_data(self): - example = text_format.Parse( - """ - features { - feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} - feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} - feature { key: "y" value { int64_list { value: [1, 2] }}} - feature { key: "z" value { float_list { value: [4.5, 5, 5.5] }}} - } - """, tf.train.Example()) - serialized_example_remote = [example.SerializeToString()] record_batch_remote = pa.RecordBatch.from_arrays( [ - pa.array([["ASa8asdf"]], type=pa.list_(pa.binary())), + pa.array([["ASa8asdf", "ASa8asdf"]], type=pa.list_(pa.binary())), pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())), pa.array([[1, 2]], type=pa.list_(pa.int32())), pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32())) @@ -1190,9 +1180,10 @@ def test_request_body_with_binary_data(self): result = list(bsl_util.RecordToJSON(record_batch_remote, False)) self.assertEqual([ { - 'x_bytes': { - 'b64': 'QVNhOGFzZGY=' - }, + 'x_bytes': [ + {'b64': 'QVNhOGFzZGY='}, + {'b64': 'QVNhOGFzZGY='} + ], 'x': 'JLK7ljk3', 'y': [1, 2], 'z': [4.5, 5, 5.5] From dc9c513811d15c49241bff98638acb0993894169 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 6 Aug 2020 15:48:23 -0400 Subject: [PATCH 27/31] add pytypes for returns and complete comments --- tfx_bsl/beam/bsl_constants.py | 1 - tfx_bsl/beam/bsl_util.py | 19 ++++++---- tfx_bsl/beam/run_inference.py | 60 ++++++++++++++++++++---------- tfx_bsl/beam/run_inference_test.py | 23 +++++------- 4 files changed, 62 insertions(+), 41 deletions(-) diff --git a/tfx_bsl/beam/bsl_constants.py b/tfx_bsl/beam/bsl_constants.py index 4f797b15..4b7473b2 100644 --- a/tfx_bsl/beam/bsl_constants.py +++ b/tfx_bsl/beam/bsl_constants.py @@ -1,5 +1,4 @@ _RECORDBATCH_COLUMN = '__RAW_RECORD__' -_KERAS_INPUT_SUFFIX = '_input' class DataType(object): EXAMPLE = 'EXAMPLE' diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py index 3d2a7929..2a735487 100644 --- a/tfx_bsl/beam/bsl_util.py +++ b/tfx_bsl/beam/bsl_util.py @@ -18,17 +18,16 @@ # Standard __future__ imports from __future__ import print_function - import numpy as np import pyarrow as pa import pandas as pd import base64 import json import typing -from typing import Dict, List, Text, Any, Set, Optional +from typing import Dict, List, Text, Any, Set, Mapping, Optional from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN -from tfx_bsl.beam.bsl_constants import _KERAS_INPUT_SUFFIX - + +_KERAS_INPUT_SUFFIX = '_input' def ExtractSerializedExampleFromRecordBatch(elements: pa.RecordBatch) -> List[Text]: serialized_examples = None @@ -42,19 +41,23 @@ def ExtractSerializedExampleFromRecordBatch(elements: pa.RecordBatch) -> List[Te serialized_examples = column_array.flatten().to_pylist() break - if (serialized_examples is None): + if not serialized_examples: raise ValueError('Raw examples not found.') return serialized_examples -def RecordToJSON(record_batch: pa.RecordBatch, prepare_instances_serialized) -> List[Text]: - """Returns a JSON string translated from `record_batch`. +def RecordToJSON( + record_batch: pa.RecordBatch, prepare_instances_serialized) -> List[Mapping[Text, Any]]: + """Returns a list of JSON dictionaries translated from `record_batch`. The conversion will take in a recordbatch that contains features from a tf.train.Example and will return a list of dict like string (JSON) where each item is a JSON representation of an example. - - return format: [{ feature1: value1, ... }, ...] + + Return: + List of JSON dictionaries + - format: [{ feature1: value1, feature2: [value2_1, value2_2]... }, ...] Args: record_batch: input RecordBatch. diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py index f3bdb5f4..87f44bea 100644 --- a/tfx_bsl/beam/run_inference.py +++ b/tfx_bsl/beam/run_inference.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Run batch inference on saved model and private APIs of inference.""" +"""Run batch inference on saved model with private APIs of inference.""" from __future__ import absolute_import from __future__ import division @@ -130,24 +130,29 @@ def RunInferenceOnExamples( # pylint: disable=invalid-name """ data_type = DataType.EXAMPLE + operation_type = _get_operation_type(inference_spec_type) + proximity_descriptor = ( + _METRICS_DESCRIPTOR_IN_PROCESS + if _using_in_process_inference(inference_spec_type) + else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) converter = tf_example_record.TFExampleBeamRecord( physical_format="inmem", - telemetry_descriptors=[], + telemetry_descriptors=[ + _METRICS_DESCRIPTOR_INFERENCE, + operation_type, proximity_descriptor], schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) tensor_adapter_config = None if schema: - tfxio = test_util.InMemoryTFExampleRecord( - schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( - arrow_schema=tfxio.ArrowSchema(), - tensor_representations=tfxio.TensorRepresentations()) + arrow_schema=converter.ArrowSchema(), + tensor_representations=converter.TensorRepresentations()) return (examples | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInferenceImpl' >> RunInferenceOnRecordBatch( + | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch( inference_spec_type, data_type, tensor_adapter_config=tensor_adapter_config)) @@ -180,24 +185,29 @@ def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name """ data_type = DataType.SEQUENCEEXAMPLE + operation_type = _get_operation_type(inference_spec_type) + proximity_descriptor = ( + _METRICS_DESCRIPTOR_IN_PROCESS + if _using_in_process_inference(inference_spec_type) + else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) converter = tf_sequence_example_record.TFSequenceExampleBeamRecord( physical_format="inmem", - telemetry_descriptors=[], + telemetry_descriptors=[ + _METRICS_DESCRIPTOR_INFERENCE, + operation_type, proximity_descriptor], schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) tensor_adapter_config = None if schema: - tfxio = test_util.InMemoryTFExampleRecord( - schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( - arrow_schema=tfxio.ArrowSchema(), - tensor_representations=tfxio.TensorRepresentations()) + arrow_schema=converter.ArrowSchema(), + tensor_representations=converter.TensorRepresentations()) return (examples | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInferenceImpl' >> RunInferenceOnRecordBatch( + | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch( inference_spec_type, data_type, tensor_adapter_config=tensor_adapter_config)) @@ -205,7 +215,7 @@ def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name @beam.ptransform_fn @beam.typehints.with_input_types(pa.RecordBatch) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def RunInferenceOnRecordBatch( # pylint: disable=invalid-name +def _RunInferenceOnRecordBatch( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, inference_spec_type: model_spec_pb2.InferenceSpecType, data_type: Text, tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None @@ -217,7 +227,8 @@ def RunInferenceOnRecordBatch( # pylint: disable=invalid-name inference_spec_type: Model inference endpoint. tensor_adapter_config [Optional]: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. - - Not required when running inference with remote model or 1 input + - Not required when running inference with remote model or + serialized example as the single input tensor Returns: A PCollection containing prediction logs. @@ -421,6 +432,15 @@ def finish_bundle(self): def run_inference( self, tensors: Mapping[Any, Any] ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: + """ + Run inference with extracted model input. + + Parameters: + tensors: a dictionary consists of tensor names and tensors + in the form of ndArray, SparceTensorValues, etc. + - ex: { 'x': SparseTensorValue } + { 'y': [[1, 2, 3], [3, 4, 5] ...] } + """ raise NotImplementedError @abc.abstractmethod @@ -504,7 +524,8 @@ def setup(self): # user agent once custom header is supported in googleapiclient. self._api_client = discovery.build('ml', 'v1') - def _extract_from_recordBatch(self, elements: pa.RecordBatch): + def _extract_from_recordBatch( + self, elements: pa.RecordBatch) -> Tuple[List[Text], List[Mapping[Any, Any]]]: serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements) prepare_instances_serialized = ( self.inference_spec_type.ai_platform_prediction_model_spec.use_serialization_config) @@ -648,7 +669,7 @@ def _pre_process(self) -> _IOTensorSpec: list(signature.signature_def.inputs.values())[0].dtype != tf.string.as_datatype_enum): raise ValueError( - 'With 1 input, dtype is expected to be %s, got %s' % + 'With 1 input, dtype is expected to be %s for serialized examples, got %s' % tf.string.as_datatype_enum, list(signature.signature_def.inputs.values())[0].dtype) io_tensor_specs.append(_signature_pre_process(signature.signature_def)) @@ -676,14 +697,15 @@ def _has_tpu_tag(self) -> bool: return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and tf.saved_model.TPU in self._tags) - def _extract_from_recordBatch(self, elements: pa.RecordBatch): + def _extract_from_recordBatch( + self, elements: pa.RecordBatch) -> Tuple[List[Text], Mapping[Any, Any]]: serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements) model_input = None if (len(self._io_tensor_spec.input_tensor_names) == 1): model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples} else: - if (self._tensor_adapter_config is None): + if not self._tensor_adapter_config: raise ValueError('Tensor adaptor config is required with a multi-input model') input_tensor_names = self._io_tensor_spec.input_tensor_names diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index f28ff1bb..61f0d83c 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -683,25 +683,22 @@ def _build_multihead_model(self, model_path): def _run_inference_with_beam(self, example_path, inference_spec_type, prediction_log_path, include_config = False): - # test RunInferenceOnRecordBatch + # test _RunInferenceOnRecordBatch converter = tf_example_record.TFExampleBeamRecord( - physical_format="inmem", - telemetry_descriptors=[], - raw_record_column_name=_RECORDBATCH_COLUMN) + physical_format="inmem", telemetry_descriptors=[], + schema=self.schema, raw_record_column_name=_RECORDBATCH_COLUMN) if include_config: - tfxio = test_util.InMemoryTFExampleRecord( - schema=self.schema, raw_record_column_name=_RECORDBATCH_COLUMN) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( - arrow_schema=tfxio.ArrowSchema(), - tensor_representations=tfxio.TensorRepresentations()) + arrow_schema=converter.ArrowSchema(), + tensor_representations=converter.TensorRepresentations()) with beam.Pipeline() as pipeline: _ = ( pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( + | 'RunInference' >> run_inference._RunInferenceOnRecordBatch( inference_spec_type, DataType.EXAMPLE, tensor_adapter_config) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, @@ -712,7 +709,7 @@ def _run_inference_with_beam(self, example_path, inference_spec_type, pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( + | 'RunInference' >> run_inference._RunInferenceOnRecordBatch( inference_spec_type, DataType.EXAMPLE) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, @@ -978,7 +975,7 @@ def testTelemetry(self): pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( + | 'RunInference' >> run_inference._RunInferenceOnRecordBatch( inference_spec_type, DataType.EXAMPLE)) run_result = pipeline.run() run_result.wait_until_finish() @@ -1043,7 +1040,7 @@ def _set_up_pipeline(self, inference_spec_type): self.pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(self._example_path) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( + | 'RunInference' >> run_inference._RunInferenceOnRecordBatch( inference_spec_type, DataType.EXAMPLE)) def _run_inference_with_beam(self): @@ -1161,7 +1158,7 @@ def test_can_format_requests(self): | 'CreateExamples' >> beam.Create([example]) | 'ParseExamples' >> beam.Map(lambda x: x.SerializeToString()) | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInference' >> run_inference.RunInferenceOnRecordBatch( + | 'RunInference' >> run_inference._RunInferenceOnRecordBatch( inference_spec_type, DataType.EXAMPLE)) self._run_inference_with_beam() From 1a12c5c8ea505e29aafa474c7dbe724fe92366a9 Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Wed, 12 Aug 2020 11:56:44 -0400 Subject: [PATCH 28/31] separate test for bsl-util --- tfx_bsl/beam/bsl_util.py | 11 +--- tfx_bsl/beam/bsl_util_test.py | 91 ++++++++++++++++++++++++++++ tfx_bsl/public/beam/run_inference.py | 4 +- 3 files changed, 96 insertions(+), 10 deletions(-) create mode 100644 tfx_bsl/beam/bsl_util_test.py diff --git a/tfx_bsl/beam/bsl_util.py b/tfx_bsl/beam/bsl_util.py index 2a735487..4c86c745 100644 --- a/tfx_bsl/beam/bsl_util.py +++ b/tfx_bsl/beam/bsl_util.py @@ -29,7 +29,7 @@ _KERAS_INPUT_SUFFIX = '_input' -def ExtractSerializedExampleFromRecordBatch(elements: pa.RecordBatch) -> List[Text]: +def ExtractSerializedExamplesFromRecordBatch(elements: pa.RecordBatch) -> List[Text]: serialized_examples = None for column_name, column_array in zip(elements.schema.names, elements.columns): if column_name == _RECORDBATCH_COLUMN: @@ -64,12 +64,6 @@ def RecordToJSON( """ # TODO (b/155912552): Handle this for sequence example. - - def flatten(element: List[Any]): - if len(element) == 1: - return element[0] - return element - df = record_batch.to_pandas() if prepare_instances_serialized: return [{'b64': base64.b64encode(value).decode()} for value in df[_RECORDBATCH_COLUMN]] @@ -80,10 +74,11 @@ def flatten(element: List[Any]): if _RECORDBATCH_COLUMN in df.columns: df = df.drop(labels=_RECORDBATCH_COLUMN, axis=1) - df = df.applymap(lambda x: flatten(x)) + df = df.applymap(lambda values: values[0] if len(values) == 1 else values) return json.loads(df.to_json(orient='records')) +# TODO: Reuse these functions in TFMA. def _find_input_name_in_features(features: Set[Text], input_name: Text) -> Optional[Text]: """Maps input name to an entry in features. Returns None if not found.""" diff --git a/tfx_bsl/beam/bsl_util_test.py b/tfx_bsl/beam/bsl_util_test.py new file mode 100644 index 00000000..25f84687 --- /dev/null +++ b/tfx_bsl/beam/bsl_util_test.py @@ -0,0 +1,91 @@ +# Copyright 2019 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for tfx_bsl.bsl_util.""" + +from __future__ import absolute_import +from __future__ import division +# Standard __future__ imports +from __future__ import print_function + +import base64 +import json +import os +try: + import unittest.mock as mock +except ImportError: + import mock + +import apache_beam as beam +import pyarrow as pa +import tensorflow as tf +from google.protobuf import text_format +from tfx_bsl.beam import bsl_util +from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN + + +class TestBslUtil(tf.test.TestCase): + def test_request_body_with_binary_data(self): + record_batch_remote = pa.RecordBatch.from_arrays( + [ + pa.array([["ASa8asdf", "ASa8asdf"]], type=pa.list_(pa.binary())), + pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())), + pa.array([[1, 2]], type=pa.list_(pa.int32())), + pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32())) + ], + ['x_bytes', 'x', 'y', 'z'] + ) + + result = list(bsl_util.RecordToJSON(record_batch_remote, False)) + self.assertEqual([ + { + 'x_bytes': [ + {'b64': 'QVNhOGFzZGY='}, + {'b64': 'QVNhOGFzZGY='} + ], + 'x': 'JLK7ljk3', + 'y': [1, 2], + 'z': [4.5, 5, 5.5] + }, + ], result) + + def test_request_serialized_example(self): + example = text_format.Parse( + """ + features { + feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} + feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} + feature { key: "y" value { int64_list { value: [1, 2] }}} + } + """, tf.train.Example()) + + serialized_example_remote = [example.SerializeToString()] + record_batch_remote = pa.RecordBatch.from_arrays( + [ + pa.array([["ASa8asdf"]], type=pa.list_(pa.binary())), + pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())), + pa.array([[1, 2]], type=pa.list_(pa.int32())), + pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32())), + serialized_example_remote + ], + ['x_bytes', 'x', 'y', 'z', _RECORDBATCH_COLUMN] + ) + + result = list(bsl_util.RecordToJSON(record_batch_remote, True)) + self.assertEqual(result, [{ + 'b64': base64.b64encode(example.SerializeToString()).decode() + }]) + + +if __name__ == '__main__': + tf.test.main() \ No newline at end of file diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py index 8e173d5d..f8461b05 100644 --- a/tfx_bsl/public/beam/run_inference.py +++ b/tfx_bsl/public/beam/run_inference.py @@ -53,7 +53,7 @@ def RunInference( # pylint: disable=invalid-name Args: examples: A PCollection containing examples. inference_spec_type: Model inference endpoint. - Schema [optional]: required for models that requires + schema [optional]: required for predict models that requires multi-tensor inputs. Returns: @@ -85,7 +85,7 @@ def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name Args: examples: A PCollection containing sequence examples. inference_spec_type: Model inference endpoint. - Schema [optional]: required for models that requires + schema [optional]: required for predict models that requires multi-tensor inputs. Returns: From 2fa6720edffda6eb58bcbaa8338c91fd6db8dd6e Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Sat, 15 Aug 2020 12:07:23 -0400 Subject: [PATCH 29/31] checkpoint: address comments on post-process, and modified public api (WIP) --- tfx_bsl/beam/bsl_util_test.py | 2 +- tfx_bsl/beam/run_inference.py | 280 ++++++++++++++------------- tfx_bsl/beam/run_inference_test.py | 172 +++++++++------- tfx_bsl/public/beam/run_inference.py | 38 +--- 4 files changed, 255 insertions(+), 237 deletions(-) diff --git a/tfx_bsl/beam/bsl_util_test.py b/tfx_bsl/beam/bsl_util_test.py index 25f84687..c1a63b0d 100644 --- a/tfx_bsl/beam/bsl_util_test.py +++ b/tfx_bsl/beam/bsl_util_test.py @@ -88,4 +88,4 @@ def test_request_serialized_example(self): if __name__ == '__main__': - tf.test.main() \ No newline at end of file + tf.test.main() diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py index 87f44bea..f93e8212 100644 --- a/tfx_bsl/beam/run_inference.py +++ b/tfx_bsl/beam/run_inference.py @@ -50,8 +50,8 @@ from tfx_bsl.tfxio import tensor_adapter from tfx_bsl.tfxio import tf_example_record from tfx_bsl.tfxio import tf_sequence_example_record -from typing import Any, Generator, Iterable, List, Mapping, Sequence, Text, \ - Tuple, Union, Optional +from typing import Any, Generator, Iterable, List, Mapping, Optional, \ + Sequence, Text, TypeVar, Tuple, Union from tfx_bsl.beam.bsl_constants import _RECORDBATCH_COLUMN from tfx_bsl.beam.bsl_constants import DataType @@ -88,6 +88,7 @@ _MetaGraphDef = Any _SavedModel = Any +MixedExample = TypeVar('MixedExample', tf.train.Example, tf.train.SequenceExample) # TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7 class OperationType(object): @@ -98,7 +99,7 @@ class OperationType(object): @beam.ptransform_fn -@beam.typehints.with_input_types(tf.train.Example) +@beam.typehints.with_input_types(MixedExample) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def RunInferenceOnExamples( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, @@ -129,74 +130,50 @@ def RunInferenceOnExamples( # pylint: disable=invalid-name A PCollection containing prediction logs. """ - data_type = DataType.EXAMPLE operation_type = _get_operation_type(inference_spec_type) proximity_descriptor = ( _METRICS_DESCRIPTOR_IN_PROCESS if _using_in_process_inference(inference_spec_type) else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) - converter = tf_example_record.TFExampleBeamRecord( - physical_format="inmem", - telemetry_descriptors=[ - _METRICS_DESCRIPTOR_INFERENCE, - operation_type, proximity_descriptor], - schema=schema, - raw_record_column_name=_RECORDBATCH_COLUMN) - tensor_adapter_config = None - if schema: - tensor_adapter_config = tensor_adapter.TensorAdapterConfig( - arrow_schema=converter.ArrowSchema(), - tensor_representations=converter.TensorRepresentations()) - - return (examples - | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) - | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch( - inference_spec_type, data_type, - tensor_adapter_config=tensor_adapter_config)) - - -@beam.ptransform_fn -@beam.typehints.with_input_types(tf.train.SequenceExample) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name - examples: beam.pvalue.PCollection, - inference_spec_type: model_spec_pb2.InferenceSpecType, - schema: Optional[schema_pb2.Schema] = None -) -> beam.pvalue.PCollection: - """Run inference with a model. - - There are two types of inference you can perform using this PTransform: - 1. In-process inference from a SavedModel instance. Used when - `saved_model_spec` field is set in `inference_spec_type`. - 2. Remote inference by using a service endpoint. Used when - `ai_platform_prediction_model_spec` field is set in - `inference_spec_type`. - - Args: - examples: A PCollection containing sequence examples. - inference_spec_type: Model inference endpoint. - Schema [optional]: required for models that requires - multi-tensor inputs. - - Returns: - A PCollection containing prediction logs. - """ - - data_type = DataType.SEQUENCEEXAMPLE - operation_type = _get_operation_type(inference_spec_type) - proximity_descriptor = ( - _METRICS_DESCRIPTOR_IN_PROCESS - if _using_in_process_inference(inference_spec_type) - else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) - converter = tf_sequence_example_record.TFSequenceExampleBeamRecord( - physical_format="inmem", - telemetry_descriptors=[ - _METRICS_DESCRIPTOR_INFERENCE, - operation_type, proximity_descriptor], - schema=schema, - raw_record_column_name=_RECORDBATCH_COLUMN) + # determine input dataType + beam_type = examples.element_type + if beam_type == tf.train.Example or beam_type == tf.train.SequenceExample: + data_type = _get_data_type(beam_type) + else: + tagged = (examples | "SortInput" >> beam.Map( + lambda example: beam.pvalue.TaggedOutput( + 'example' if isinstance(example, tf.train.Example) + else 'sequence', example)).with_outputs('example', 'sequence')) + + import ipdb; ipdb.set_trace() + + if tagged.example and tagged.sequence: + raise ValueError('A PCollection containing both tf.Example and ' + 'tf.SequenceExample is not supported') + if not tagged.example: + data_type = DataType.SEQUENCEEXAMPLE + else: + data_type = DataType.EXAMPLE + + if data_type == DataType.EXAMPLE: + converter = tf_example_record.TFExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[ + _METRICS_DESCRIPTOR_INFERENCE, + operation_type, proximity_descriptor], + schema=schema, + raw_record_column_name=_RECORDBATCH_COLUMN) + elif data_type == DataType.SEQUENCEEXAMPLE: + converter = tf_sequence_example_record.TFSequenceExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[ + _METRICS_DESCRIPTOR_INFERENCE, + operation_type, proximity_descriptor], + schema=schema, + raw_record_column_name=_RECORDBATCH_COLUMN) + else: + raise ValueError('Unsupported data_type %s' % data_type) tensor_adapter_config = None if schema: @@ -205,11 +182,11 @@ def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name tensor_representations=converter.TensorRepresentations()) return (examples - | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) - | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch( - inference_spec_type, data_type, - tensor_adapter_config=tensor_adapter_config)) + | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch( + inference_spec_type, data_type, + tensor_adapter_config=tensor_adapter_config)) @beam.ptransform_fn @@ -405,19 +382,26 @@ def __init__( def setup(self): self._clock = _ClockFactory.make_clock() + def _extract_serialized_from_recordBatch( + self, elements: pa.RecordBatch) -> List[Union[str, bytes]]: + """Function to extract serialized examples from the recordbatch""" + serialized_examples = bsl_util.ExtractSerializedExamplesFromRecordBatch(elements) + return serialized_examples + @abc.abstractmethod - def _extract_from_recordBatch(self, elements: pa.RecordBatch): - """ - Function to extract the compatible input with model signature + def _extract_inference_input_from_recordBatch( + self, elements: pa.RecordBatch) -> Union[Mapping[Any, Any], List[Mapping[Any, Any]]]: + """Function to extract the compatible input with model signature + return: - - serialized examples for metrics - model input for processing and post processing """ raise NotImplementedError def process(self, elements: pa.RecordBatch) -> Iterable[Any]: batch_start_time = self._clock.get_current_time_in_microseconds() - serialized_examples, model_input = self._extract_from_recordBatch(elements) + serialized_examples = self._extract_serialized_from_recordBatch(elements) + model_input = self._extract_inference_input_from_recordBatch(elements) outputs = self.run_inference(model_input) result = self._post_process(model_input, outputs) self._metrics_collector.update( @@ -430,10 +414,9 @@ def finish_bundle(self): @abc.abstractmethod def run_inference( - self, tensors: Mapping[Any, Any] + self, tensors: Mapping[Text, Any] ) -> Union[Mapping[Text, np.ndarray], Sequence[Mapping[Text, Any]]]: - """ - Run inference with extracted model input. + """Run inference with extracted model input. Parameters: tensors: a dictionary consists of tensor names and tensors @@ -524,13 +507,12 @@ def setup(self): # user agent once custom header is supported in googleapiclient. self._api_client = discovery.build('ml', 'v1') - def _extract_from_recordBatch( - self, elements: pa.RecordBatch) -> Tuple[List[Text], List[Mapping[Any, Any]]]: - serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements) + def _extract_inference_input_from_recordBatch( + self, elements: pa.RecordBatch) -> List[Mapping[Any, Any]]: prepare_instances_serialized = ( self.inference_spec_type.ai_platform_prediction_model_spec.use_serialization_config) model_input = bsl_util.RecordToJSON(elements, prepare_instances_serialized) - return serialized_examples, model_input + return model_input # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff. @retry.with_exponential_backoff( @@ -550,7 +532,7 @@ def _make_request(self, body: Mapping[Text, List[Any]]) -> http.HttpRequest: @classmethod def _prepare_instances( - cls, elements: List[Union[str, bytes]] + cls, elements: List[Mapping[Any, Any]] ) -> Generator[Mapping[Text, Any], None, None]: for instance in elements: yield instance @@ -697,12 +679,11 @@ def _has_tpu_tag(self) -> bool: return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and tf.saved_model.TPU in self._tags) - def _extract_from_recordBatch( - self, elements: pa.RecordBatch) -> Tuple[List[Text], Mapping[Any, Any]]: - serialized_examples = bsl_util.ExtractSerializedExampleFromRecordBatch(elements) - + def _extract_inference_input_from_recordBatch( + self, elements: pa.RecordBatch) -> Mapping[Any, Any]: model_input = None if (len(self._io_tensor_spec.input_tensor_names) == 1): + serialized_examples = bsl_util.ExtractSerializedExamplesFromRecordBatch(elements) model_input = {self._io_tensor_spec.input_tensor_names[0]: serialized_examples} else: if not self._tensor_adapter_config: @@ -711,24 +692,26 @@ def _extract_from_recordBatch( input_tensor_names = self._io_tensor_spec.input_tensor_names input_tensor_alias = self._io_tensor_spec.input_tensor_alias _tensor_adapter = tensor_adapter.TensorAdapter(self._tensor_adapter_config) + # dict_of_tensors is a map from input_tensor_alias to tensor dict_of_tensors = _tensor_adapter.ToBatchTensors( elements, produce_eager_tensors = False) filtered_tensors = bsl_util.filter_tensors_by_input_names( dict_of_tensors, input_tensor_alias) model_input = {} - for feature, tensor_name in zip(input_tensor_alias, input_tensor_names): - model_input[tensor_name] = filtered_tensors[feature] - return serialized_examples, model_input + for tensor_alias, tensor_name in zip(input_tensor_alias, input_tensor_names): + model_input[tensor_name] = filtered_tensors[tensor_alias] + return model_input def run_inference( - self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]: + self, tensors: Mapping[Text, Any]) -> Mapping[Text, np.ndarray]: + # tensors: a dictionary consists of tensor alias and tensors self._check_elements() outputs = self._run_tf_operations(tensors) return outputs def _run_tf_operations( - self, tensors: Mapping[Any, Any]) -> Mapping[Text, np.ndarray]: + self, tensors: Mapping[Text, Any]) -> Mapping[Text, np.ndarray]: result = self._session.run( self._io_tensor_spec.output_alias_tensor_names, feed_dict=tensors) if len(result) != len(self._io_tensor_spec.output_alias_tensor_names): @@ -824,61 +807,87 @@ def _post_process( if len(input_tensor_alias) != len(input_tensor_names): raise ValueError('Expected to have one name and one alias per tensor') - include_request = True + result = [] + # Single tensor input if len(input_tensor_names) == 1: serialized_examples, = elements.values() batch_size = len(serialized_examples) - process_elements = serialized_examples + + predict_log_tmpl = prediction_log_pb2.PredictLog() + predict_log_tmpl.request.model_spec.signature_name = signature_name + predict_log_tmpl.response.model_spec.signature_name = signature_name + input_tensor_proto = predict_log_tmpl.request.inputs[input_tensor_alias[0]] + input_tensor_proto.dtype = tf.string.as_datatype_enum + input_tensor_proto.tensor_shape.dim.add().size = 1 + + for output_alias, output in outputs.items(): + if len(output.shape) < 1 or output.shape[0] != batch_size: + raise ValueError( + 'Expected output tensor %s to have at least one ' + 'dimension, with the first having a size equal to the input batch ' + 'size %s. Instead found %s' % + (output_alias, batch_size, output.shape)) + + for i in range(batch_size): + predict_log = prediction_log_pb2.PredictLog() + predict_log.CopyFrom(predict_log_tmpl) + predict_log.request.inputs[input_tensor_alias[0]].string_val.append( + serialized_examples[i]) + for output_alias, output in outputs.items(): + # Mimic tensor::Split + tensor_proto = tf.make_tensor_proto( + values=output[i], + dtype=tf.as_dtype(output[i].dtype).as_datatype_enum, + shape=np.expand_dims(output[i], axis=0).shape) + predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) + result.append(predict_log) else: + predict_log_tmpl = prediction_log_pb2.PredictLog() + predict_log_tmpl.request.model_spec.signature_name = signature_name + predict_log_tmpl.response.model_spec.signature_name = signature_name + + # we will only include tensor_proto in requests when all input tensors are dense + include_request = True for tensor_name, tensor in elements.items(): if not isinstance(tensor, np.ndarray): include_request = False break if include_request: + for alias, tensor_name in zip(input_tensor_alias, input_tensor_names): + input_tensor_proto = predict_log_tmpl.request.inputs[alias] + input_tensor_proto.dtype = tf.as_dtype(input_tensor_types[alias]).as_datatype_enum + input_tensor_proto.tensor_shape.dim.add().size = len(elements[tensor_name][0]) + batch_size = len(elements[input_tensor_names[0]]) + for i in range(batch_size): + predict_log = prediction_log_pb2.PredictLog() + predict_log.CopyFrom(predict_log_tmpl) + for alias, tensor_name in zip(input_tensor_alias, input_tensor_names): + predict_log.request.inputs[alias].float_val.append( + elements[tensor_name][i]) else: batch_size = elements[input_tensor_names[0]].shape[0] + predict_log = prediction_log_pb2.PredictLog() + predict_log.CopyFrom(predict_log_tmpl) - for output_alias, output in outputs.items(): - if len(output.shape) < 1 or output.shape[0] != batch_size: - raise ValueError( - 'Expected output tensor %s to have at least one ' - 'dimension, with the first having a size equal to the input batch ' - 'size %s. Instead found %s' % - (output_alias, batch_size, output.shape)) - - if include_request: - predict_log_tmpl = prediction_log_pb2.PredictLog() - predict_log_tmpl.request.model_spec.signature_name = signature_name - predict_log_tmpl.response.model_spec.signature_name = signature_name - for alias, tensor_name in zip(input_tensor_alias, input_tensor_names): - input_tensor_proto = predict_log_tmpl.request.inputs[alias] - input_tensor_proto.dtype = tf.as_dtype(input_tensor_types[alias]).as_datatype_enum - if len(input_tensor_alias) == 1: - input_tensor_proto.tensor_shape.dim.add().size = 1 - else: - input_tensor_proto.tensor_shape.dim.add().size = len(elements[tensor_name][0]) + for output_alias, output in outputs.items(): + if len(output.shape) < 1 or output.shape[0] != batch_size: + raise ValueError( + 'Expected output tensor %s to have at least one ' + 'dimension, with the first having a size equal to the input batch ' + 'size %s. Instead found %s' % + (output_alias, batch_size, output.shape)) - result = [] for i in range(batch_size): - predict_log = prediction_log_pb2.PredictLog() - predict_log.CopyFrom(predict_log_tmpl) - if len(input_tensor_alias) == 1: - alias = input_tensor_alias[0] - predict_log.request.inputs[alias].string_val.append(process_elements[i]) - else: - for alias, tensor_name in zip(input_tensor_alias, input_tensor_names): - predict_log.request.inputs[alias].float_val.append(elements[tensor_name][i]) - - for output_alias, output in outputs.items(): - # Mimic tensor::Split - tensor_proto = tf.make_tensor_proto( - values=output[i], - dtype=tf.as_dtype(output[i].dtype).as_datatype_enum, - shape=np.expand_dims(output[i], axis=0).shape) - predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) - result.append(predict_log) + for output_alias, output in outputs.items(): + # Mimic tensor::Split + tensor_proto = tf.make_tensor_proto( + values=output[i], + dtype=tf.as_dtype(output[i].dtype).as_datatype_enum, + shape=np.expand_dims(output[i], axis=0).shape) + predict_log.response.outputs[output_alias].CopyFrom(tensor_proto) + result.append(predict_log) return result @@ -1237,6 +1246,15 @@ def _get_signatures(model_path: Text, signatures: Sequence[Text], return result +def _get_data_type( + data_type: Union[tf.train.Example, tf.train.SequenceExample]) -> Text: + if (data_type == tf.train.Example): + return DataType.EXAMPLE + elif (data_type == tf.train.SequenceExample): + return DataType.SequenceExample + else: + raise ValueError('Expected tf.Example or tf.SequenceExample, got %s' % data_type) + def _get_operation_type( inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text: if _using_in_process_inference(inference_spec_type): diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index 61f0d83c..a32f5991 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -333,32 +333,32 @@ def setUp(self): self._predict_examples = [ text_format.Parse( """ - features { - feature { key: "input1" value { float_list { value: 0 }}} - } - """, tf.train.Example()), + context { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.SequenceExample()), text_format.Parse( """ - features { - feature { key: "input1" value { float_list { value: 1 }}} - } - """, tf.train.Example()), + context { + feature { key: "input1" value { float_list { value: 1 }}} + } + """, tf.train.SequenceExample()), ] self._multihead_examples = [ text_format.Parse( """ - features { + context { feature {key: "x" value { float_list { value: 0.8 }}} feature {key: "y" value { float_list { value: 0.2 }}} } - """, tf.train.Example()), + """, tf.train.SequenceExample()), text_format.Parse( """ - features { + context { feature {key: "x" value { float_list { value: 0.6 }}} feature {key: "y" value { float_list { value: 0.1 }}} } - """, tf.train.Example()), + """, tf.train.SequenceExample()), ] self.schema = text_format.Parse( @@ -412,7 +412,7 @@ def _run_inference_with_beam(self, example_path, inference_spec_type, pipeline | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString) - | 'RunInference' >> run_inference.RunInferenceOnSequenceExamples( + | 'RunInference' >> run_inference.RunInferenceOnExamples( inference_spec_type, schema=schema) | 'WritePredictions' >> beam.io.WriteToTFRecord( prediction_log_path, @@ -510,6 +510,94 @@ def testKerasModelPredictMultiTensor(self): self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) +class RunOfflineInferenceMixedExamplesTest(RunInferenceFixture): + + def setUp(self): + super(RunOfflineInferenceMixedExamplesTest, self).setUp() + self._predict_examples = [ + text_format.Parse( + """ + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), + text_format.Parse( + """ + context { + feature { key: "input1" value { float_list { value: 1 }}} + } + """, tf.train.SequenceExample()), + ] + + def _run_inference_with_beam(self, example_path, inference_spec_type, + prediction_log_path, include_schema = False): + with beam.Pipeline() as pipeline: + _ = ( + pipeline + | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) + | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString) + | 'RunInference' >> run_inference.RunInferenceOnExamples( + inference_spec_type) + | 'WritePredictions' >> beam.io.WriteToTFRecord( + prediction_log_path, + coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) + + def testMixedExamples(self): + inputs = tf.keras.Input(shape=(1,), name='input1') + output1 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output1')( + inputs) + output2 = tf.keras.layers.Dense( + 1, activation=tf.nn.sigmoid, name='output2')( + inputs) + inference_model = tf.keras.models.Model(inputs, [output1, output2]) + + class TestKerasModel(tf.keras.Model): + + def __init__(self, inference_model): + super(TestKerasModel, self).__init__(name='test_keras_model') + self.inference_model = inference_model + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') + ]) + def call(self, serialized_example): + features = { + 'input1': + tf.compat.v1.io.FixedLenFeature([1], + dtype=tf.float32, + default_value=0) + } + input_tensor_dict = tf.io.parse_example(serialized_example, features) + return inference_model(input_tensor_dict['input1']) + + model = TestKerasModel(inference_model) + model.compile( + optimizer=tf.keras.optimizers.Adam(lr=.001), + loss=tf.keras.losses.binary_crossentropy, + metrics=['accuracy']) + + model_path = self._get_output_data_dir('model') + tf.compat.v1.keras.experimental.export_saved_model( + model, model_path, serving_only=True) + + example_path = self._get_output_data_dir('examples') + self._prepare_predict_examples(example_path) + prediction_log_path = self._get_output_data_dir('predictions') + error_msg = 'Expected element of type' + try: + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path)), prediction_log_path) + except ValueError as exc: + actual_error_msg = str(exc) + self.assertTrue(actual_error_msg.startswith(error_msg)) + else: + self.fail('Test was expected to throw ValueError exception') + + class RunOfflineInferenceArrowTest(RunInferenceFixture): def setUp(self): @@ -1163,64 +1251,6 @@ def test_can_format_requests(self): self._run_inference_with_beam() - def test_request_body_with_binary_data(self): - record_batch_remote = pa.RecordBatch.from_arrays( - [ - pa.array([["ASa8asdf", "ASa8asdf"]], type=pa.list_(pa.binary())), - pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())), - pa.array([[1, 2]], type=pa.list_(pa.int32())), - pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32())) - ], - ['x_bytes', 'x', 'y', 'z'] - ) - - result = list(bsl_util.RecordToJSON(record_batch_remote, False)) - self.assertEqual([ - { - 'x_bytes': [ - {'b64': 'QVNhOGFzZGY='}, - {'b64': 'QVNhOGFzZGY='} - ], - 'x': 'JLK7ljk3', - 'y': [1, 2], - 'z': [4.5, 5, 5.5] - }, - ], result) - - def test_request_serialized_example(self): - example = text_format.Parse( - """ - features { - feature { key: "x_bytes" value { bytes_list { value: ["ASa8asdf"] }}} - feature { key: "x" value { bytes_list { value: "JLK7ljk3" }}} - feature { key: "y" value { int64_list { value: [1, 2] }}} - } - """, tf.train.Example()) - inference_spec_type = model_spec_pb2.InferenceSpecType( - ai_platform_prediction_model_spec=model_spec_pb2 - .AIPlatformPredictionModelSpec( - project_id='test_project', - model_name='test_model', - version_name='test_version', - use_serialization_config=True)) - - serialized_example_remote = [example.SerializeToString()] - record_batch_remote = pa.RecordBatch.from_arrays( - [ - pa.array([["ASa8asdf"]], type=pa.list_(pa.binary())), - pa.array([["JLK7ljk3"]], type=pa.list_(pa.utf8())), - pa.array([[1, 2]], type=pa.list_(pa.int32())), - pa.array([[4.5, 5, 5.5]], type=pa.list_(pa.float32())), - serialized_example_remote - ], - ['x_bytes', 'x', 'y', 'z', _RECORDBATCH_COLUMN] - ) - - result = list(bsl_util.RecordToJSON(record_batch_remote, True)) - self.assertEqual(result, [{ - 'b64': base64.b64encode(example.SerializeToString()).decode() - }]) - if __name__ == '__main__': tf.test.main() diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py index f8461b05..9a8eb738 100644 --- a/tfx_bsl/public/beam/run_inference.py +++ b/tfx_bsl/public/beam/run_inference.py @@ -22,14 +22,16 @@ import apache_beam as beam import tensorflow as tf import pyarrow as pa -from typing import Text, Optional +from typing import Text, Optional, TypeVar from tfx_bsl.beam import run_inference from tfx_bsl.public.proto import model_spec_pb2 from tensorflow_serving.apis import prediction_log_pb2 from tensorflow_metadata.proto.v0 import schema_pb2 +MixedExample = TypeVar('MixedExample', tf.train.Example, tf.train.SequenceExample) + @beam.ptransform_fn -@beam.typehints.with_input_types(tf.train.Example) +@beam.typehints.with_input_types(MixedExample) @beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) def RunInference( # pylint: disable=invalid-name examples: beam.pvalue.PCollection, @@ -63,35 +65,3 @@ def RunInference( # pylint: disable=invalid-name return (examples | 'RunInferenceOnExamples' >> run_inference.RunInferenceOnExamples( inference_spec_type, schema=schema)) - - -@beam.ptransform_fn -@beam.typehints.with_input_types(tf.train.SequenceExample) -@beam.typehints.with_output_types(prediction_log_pb2.PredictionLog) -def RunInferenceOnSequenceExamples( # pylint: disable=invalid-name - examples: beam.pvalue.PCollection, - inference_spec_type: model_spec_pb2.InferenceSpecType, - schema: Optional[schema_pb2.Schema] = None -) -> beam.pvalue.PCollection: - """Run inference with a model. - - There are two types of inference you can perform using this PTransform: - 1. In-process inference from a SavedModel instance. Used when - `saved_model_spec` field is set in `inference_spec_type`. - 2. Remote inference by using a service endpoint. Used when - `ai_platform_prediction_model_spec` field is set in - `inference_spec_type`. - - Args: - examples: A PCollection containing sequence examples. - inference_spec_type: Model inference endpoint. - schema [optional]: required for predict models that requires - multi-tensor inputs. - - Returns: - A PCollection containing prediction logs. - """ - - return (examples - | 'RunInferenceOnSequenceExamples' >> run_inference.RunInferenceOnSequenceExamples( - inference_spec_type, schema=schema)) From 8c279ceb4faea1d863488aa6004eb59c16077f6c Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Sat, 15 Aug 2020 16:54:33 -0400 Subject: [PATCH 30/31] identify if example is empty --- tfx_bsl/beam/run_inference.py | 15 +++++++++++---- tfx_bsl/beam/run_inference_test.py | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py index f93e8212..ef7476fb 100644 --- a/tfx_bsl/beam/run_inference.py +++ b/tfx_bsl/beam/run_inference.py @@ -146,13 +146,20 @@ def RunInferenceOnExamples( # pylint: disable=invalid-name 'example' if isinstance(example, tf.train.Example) else 'sequence', example)).with_outputs('example', 'sequence')) - import ipdb; ipdb.set_trace() + def check_empty(elements: beam.pvalue.PCollection) -> bool: + is_empty_beam = (elements + | "CountElement" >> beam.combiners.Count.Globally() + | "CheckEmpty" >> beam.Map(lambda n: n == 0)) + return is_empty_beam[0] - if tagged.example and tagged.sequence: + example_is_empty = tagged.example | "CheckExample" >> beam.CombineGlobally(check_empty) + sequence_is_empty = tagged.sequence | "CheckSequence" >> beam.CombineGlobally(check_empty) + + if not example_is_empty and not sequence_is_empty: raise ValueError('A PCollection containing both tf.Example and ' 'tf.SequenceExample is not supported') - if not tagged.example: - data_type = DataType.SEQUENCEEXAMPLE + if example_is_empty: + data_type = DataType.SEQUENCEEXAMPLE else: data_type = DataType.EXAMPLE diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index a32f5991..ee4191ac 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -584,7 +584,7 @@ def call(self, serialized_example): example_path = self._get_output_data_dir('examples') self._prepare_predict_examples(example_path) prediction_log_path = self._get_output_data_dir('predictions') - error_msg = 'Expected element of type' + error_msg = 'A PCollection containing both tf.Example' try: self._run_inference_with_beam( example_path, From ff40846050827175b419b7b12c16a3f5b276a20d Mon Sep 17 00:00:00 2001 From: Maxine Zhang Date: Thu, 20 Aug 2020 18:20:01 -0400 Subject: [PATCH 31/31] assert data tyoe and add tests for sequence examples on classify regress and multihead --- tfx_bsl/beam/run_inference.py | 122 ++++++++------- tfx_bsl/beam/run_inference_test.py | 244 +++++++++++++++++------------ 2 files changed, 212 insertions(+), 154 deletions(-) diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py index ef7476fb..87bc2a66 100644 --- a/tfx_bsl/beam/run_inference.py +++ b/tfx_bsl/beam/run_inference.py @@ -129,41 +129,17 @@ def RunInferenceOnExamples( # pylint: disable=invalid-name Returns: A PCollection containing prediction logs. """ - + tensor_adapter_config = None operation_type = _get_operation_type(inference_spec_type) proximity_descriptor = ( _METRICS_DESCRIPTOR_IN_PROCESS if _using_in_process_inference(inference_spec_type) else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION) - # determine input dataType - beam_type = examples.element_type - if beam_type == tf.train.Example or beam_type == tf.train.SequenceExample: - data_type = _get_data_type(beam_type) - else: - tagged = (examples | "SortInput" >> beam.Map( - lambda example: beam.pvalue.TaggedOutput( - 'example' if isinstance(example, tf.train.Example) - else 'sequence', example)).with_outputs('example', 'sequence')) - - def check_empty(elements: beam.pvalue.PCollection) -> bool: - is_empty_beam = (elements - | "CountElement" >> beam.combiners.Count.Globally() - | "CheckEmpty" >> beam.Map(lambda n: n == 0)) - return is_empty_beam[0] - - example_is_empty = tagged.example | "CheckExample" >> beam.CombineGlobally(check_empty) - sequence_is_empty = tagged.sequence | "CheckSequence" >> beam.CombineGlobally(check_empty) - - if not example_is_empty and not sequence_is_empty: - raise ValueError('A PCollection containing both tf.Example and ' - 'tf.SequenceExample is not supported') - if example_is_empty: - data_type = DataType.SEQUENCEEXAMPLE - else: - data_type = DataType.EXAMPLE - - if data_type == DataType.EXAMPLE: + if (operation_type == OperationType.CLASSIFICATION or + operation_type == OperationType.REGRESSION or + operation_type == OperationType.MULTIHEAD): + typed_examples = examples | AssertType(tf.train.Example, operation_type) converter = tf_example_record.TFExampleBeamRecord( physical_format="inmem", telemetry_descriptors=[ @@ -171,29 +147,54 @@ def check_empty(elements: beam.pvalue.PCollection) -> bool: operation_type, proximity_descriptor], schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) - elif data_type == DataType.SEQUENCEEXAMPLE: - converter = tf_sequence_example_record.TFSequenceExampleBeamRecord( + + return (examples + | 'ParseExamples' >> beam.Map(lambda element: element.SerializeToString()) + | 'ConvertToRecordBatch' >> converter.BeamSource() + | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch( + inference_spec_type, DataType.EXAMPLE, + tensor_adapter_config=tensor_adapter_config)) + else: + # TODO: check if there are two types of input data in PREDICT Operation + ExampleConverter = tf_example_record.TFExampleBeamRecord( + physical_format="inmem", + telemetry_descriptors=[ + _METRICS_DESCRIPTOR_INFERENCE, + operation_type, proximity_descriptor], + schema=schema, + raw_record_column_name=_RECORDBATCH_COLUMN) + SequenceConverter = tf_sequence_example_record.TFSequenceExampleBeamRecord( physical_format="inmem", telemetry_descriptors=[ _METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor], schema=schema, raw_record_column_name=_RECORDBATCH_COLUMN) - else: - raise ValueError('Unsupported data_type %s' % data_type) - tensor_adapter_config = None - if schema: - tensor_adapter_config = tensor_adapter.TensorAdapterConfig( - arrow_schema=converter.ArrowSchema(), - tensor_representations=converter.TensorRepresentations()) + tagged = (examples | "SortInput" >> beam.Map( + lambda example: beam.pvalue.TaggedOutput( + 'example' if isinstance(example, tf.train.Example) + else 'sequence', example)).with_outputs('example', 'sequence')) - return (examples - | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) - | 'ConvertToRecordBatch' >> converter.BeamSource() - | 'RunInferenceImpl' >> _RunInferenceOnRecordBatch( - inference_spec_type, data_type, - tensor_adapter_config=tensor_adapter_config)) + if schema: + tensor_adapter_config = tensor_adapter.TensorAdapterConfig( + arrow_schema=ExampleConverter.ArrowSchema(), + tensor_representations=ExampleConverter.TensorRepresentations()) + + return ([ + (tagged.example + | 'ParseExamples' >> beam.Map(lambda example: example.SerializeToString()) + | 'ConvertExampleToRecordBatch' >> ExampleConverter.BeamSource() + | 'RunInferenceImplExample' >> _RunInferenceOnRecordBatch( + inference_spec_type, DataType.EXAMPLE, + tensor_adapter_config=tensor_adapter_config)), + (tagged.sequence + | 'ParseSequenceExamples' >> beam.Map(lambda example: example.SerializeToString()) + | 'ConvertSequenceToRecordBatch' >> SequenceConverter.BeamSource() + | 'RunInferenceImplSequence' >> _RunInferenceOnRecordBatch( + inference_spec_type, DataType.SEQUENCEEXAMPLE, + tensor_adapter_config=tensor_adapter_config)) + ] | 'FlattenResult' >> beam.Flatten()) @beam.ptransform_fn @@ -1253,15 +1254,6 @@ def _get_signatures(model_path: Text, signatures: Sequence[Text], return result -def _get_data_type( - data_type: Union[tf.train.Example, tf.train.SequenceExample]) -> Text: - if (data_type == tf.train.Example): - return DataType.EXAMPLE - elif (data_type == tf.train.SequenceExample): - return DataType.SequenceExample - else: - raise ValueError('Expected tf.Example or tf.SequenceExample, got %s' % data_type) - def _get_operation_type( inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text: if _using_in_process_inference(inference_spec_type): @@ -1342,6 +1334,30 @@ def _is_cygwin() -> bool: return platform.system().startswith('CYGWIN_NT') +class AssertType(beam.PTransform): + """Check and cast a PCollection's elements to a given type.""" + def __init__(self, data_type: Any, operation_type: Text, label=None): + super().__init__(label) + self.data_type = data_type + self.operation_type = operation_type + self.first_data = False + + def expand(self, pcoll: beam.pvalue.PCollection): + @beam.typehints.with_output_types(Iterable[self.data_type]) + def _assert_fn(element: Any): + if not isinstance(element, self.data_type): + raise ValueError( + 'Operation type %s expected element of type %s, got: %s' % + (self.operation_type, self.data_type, type(element))) + yield element + + # Skip run-time type checking if the type already matches. + if pcoll.element_type == self.data_type: + return pcoll + else: + return pcoll | beam.ParDo(_assert_fn) + + class _Clock(object): def get_current_time_in_microseconds(self) -> int: diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py index ee4191ac..6d8b21ae 100644 --- a/tfx_bsl/beam/run_inference_test.py +++ b/tfx_bsl/beam/run_inference_test.py @@ -56,10 +56,10 @@ def setUp(self): self._predict_examples = [ text_format.Parse( """ - features { - feature { key: "input1" value { float_list { value: 0 }}} - } - """, tf.train.Example()), + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), ] def _get_output_data_dir(self, sub_dir=None): @@ -86,16 +86,16 @@ def setUp(self): self._predict_examples = [ text_format.Parse( """ - features { - feature { key: "input1" value { float_list { value: 0 }}} - } - """, tf.train.Example()), + features { + feature { key: "input1" value { float_list { value: 0 }}} + } + """, tf.train.Example()), text_format.Parse( """ - features { - feature { key: "input1" value { float_list { value: 1 }}} - } - """, tf.train.Example()), + features { + feature { key: "input1" value { float_list { value: 1 }}} + } + """, tf.train.Example()), ] self._multihead_examples = [ text_format.Parse( @@ -429,6 +429,137 @@ def _get_results(self, prediction_log_path): return results + def _build_regression_signature(self, input_tensor, output_tensor): + """Helper function for building a regression SignatureDef.""" + input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + input_tensor) + signature_inputs = { + tf.compat.v1.saved_model.signature_constants.REGRESS_INPUTS: + input_tensor_info + } + output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + output_tensor) + signature_outputs = { + tf.compat.v1.saved_model.signature_constants.REGRESS_OUTPUTS: + output_tensor_info + } + return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( + signature_inputs, signature_outputs, + tf.compat.v1.saved_model.signature_constants.REGRESS_METHOD_NAME) + + def _build_classification_signature(self, input_tensor, scores_tensor): + """Helper function for building a classification SignatureDef.""" + input_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + input_tensor) + signature_inputs = { + tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS: + input_tensor_info + } + output_tensor_info = tf.compat.v1.saved_model.utils.build_tensor_info( + scores_tensor) + signature_outputs = { + tf.compat.v1.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: + output_tensor_info + } + return tf.compat.v1.saved_model.signature_def_utils.build_signature_def( + signature_inputs, signature_outputs, + tf.compat.v1.saved_model.signature_constants.CLASSIFY_METHOD_NAME) + + def _build_multihead_model(self, model_path): + with tf.compat.v1.Graph().as_default(): + input_example = tf.compat.v1.placeholder( + tf.string, name='input_examples_tensor') + config = { + 'x': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0), + 'y': tf.compat.v1.io.FixedLenFeature( + [1], dtype=tf.float32, default_value=0), + } + features = tf.compat.v1.parse_example(input_example, config) + x = features['x'] + y = features['y'] + sum_pred = x + y + diff_pred = tf.abs(x - y) + sess = tf.compat.v1.Session() + sess.run(tf.compat.v1.initializers.global_variables()) + signature_def_map = { + 'regress_diff': + self._build_regression_signature(input_example, diff_pred), + 'classify_sum': + self._build_classification_signature(input_example, sum_pred), + tf.compat.v1.saved_model.signature_constants + .DEFAULT_SERVING_SIGNATURE_DEF_KEY: + self._build_regression_signature(input_example, sum_pred) + } + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(model_path) + builder.add_meta_graph_and_variables( + sess, [tf.compat.v1.saved_model.tag_constants.SERVING], + signature_def_map=signature_def_map) + builder.save() + + + def testClassifyModelError(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + error_msg = 'Operation type' + try: + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['classify_sum'])), + prediction_log_path) + except ValueError as exc: + actual_error_msg = str(exc) + self.assertTrue(actual_error_msg.startswith(error_msg)) + else: + self.fail('Test was expected to throw ValueError exception') + + def testRegressModelError(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + error_msg = 'Operation type' + try: + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, signature_name=['regress_diff'])), + prediction_log_path) + except ValueError as exc: + actual_error_msg = str(exc) + self.assertTrue(actual_error_msg.startswith(error_msg)) + else: + self.fail('Test was expected to throw ValueError exception') + + def testMultiInferenceModelError(self): + example_path = self._get_output_data_dir('examples') + self._prepare_multihead_examples(example_path) + model_path = self._get_output_data_dir('model') + self._build_multihead_model(model_path) + prediction_log_path = self._get_output_data_dir('predictions') + error_msg = 'Operation type' + try: + self._run_inference_with_beam( + example_path, + model_spec_pb2.InferenceSpecType( + saved_model_spec=model_spec_pb2.SavedModelSpec( + model_path=model_path, + signature_name=['regress_diff', 'classify_sum'])), + prediction_log_path) + except ValueError as exc: + actual_error_msg = str(exc) + self.assertTrue(actual_error_msg.startswith(error_msg)) + else: + self.fail('Test was expected to throw ValueError exception') + + def testKerasModelPredict(self): inputs = tf.keras.Input(shape=(1,), name='input1') output1 = tf.keras.layers.Dense( @@ -510,94 +641,6 @@ def testKerasModelPredictMultiTensor(self): self.assertAllInSet(list(result.predict_log.request.inputs), list(['x','y'])) -class RunOfflineInferenceMixedExamplesTest(RunInferenceFixture): - - def setUp(self): - super(RunOfflineInferenceMixedExamplesTest, self).setUp() - self._predict_examples = [ - text_format.Parse( - """ - features { - feature { key: "input1" value { float_list { value: 0 }}} - } - """, tf.train.Example()), - text_format.Parse( - """ - context { - feature { key: "input1" value { float_list { value: 1 }}} - } - """, tf.train.SequenceExample()), - ] - - def _run_inference_with_beam(self, example_path, inference_spec_type, - prediction_log_path, include_schema = False): - with beam.Pipeline() as pipeline: - _ = ( - pipeline - | 'ReadExamples' >> beam.io.ReadFromTFRecord(example_path) - | 'ParseExamples' >> beam.Map(tf.train.SequenceExample.FromString) - | 'RunInference' >> run_inference.RunInferenceOnExamples( - inference_spec_type) - | 'WritePredictions' >> beam.io.WriteToTFRecord( - prediction_log_path, - coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))) - - def testMixedExamples(self): - inputs = tf.keras.Input(shape=(1,), name='input1') - output1 = tf.keras.layers.Dense( - 1, activation=tf.nn.sigmoid, name='output1')( - inputs) - output2 = tf.keras.layers.Dense( - 1, activation=tf.nn.sigmoid, name='output2')( - inputs) - inference_model = tf.keras.models.Model(inputs, [output1, output2]) - - class TestKerasModel(tf.keras.Model): - - def __init__(self, inference_model): - super(TestKerasModel, self).__init__(name='test_keras_model') - self.inference_model = inference_model - - @tf.function(input_signature=[ - tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs') - ]) - def call(self, serialized_example): - features = { - 'input1': - tf.compat.v1.io.FixedLenFeature([1], - dtype=tf.float32, - default_value=0) - } - input_tensor_dict = tf.io.parse_example(serialized_example, features) - return inference_model(input_tensor_dict['input1']) - - model = TestKerasModel(inference_model) - model.compile( - optimizer=tf.keras.optimizers.Adam(lr=.001), - loss=tf.keras.losses.binary_crossentropy, - metrics=['accuracy']) - - model_path = self._get_output_data_dir('model') - tf.compat.v1.keras.experimental.export_saved_model( - model, model_path, serving_only=True) - - example_path = self._get_output_data_dir('examples') - self._prepare_predict_examples(example_path) - prediction_log_path = self._get_output_data_dir('predictions') - error_msg = 'A PCollection containing both tf.Example' - try: - self._run_inference_with_beam( - example_path, - model_spec_pb2.InferenceSpecType( - saved_model_spec=model_spec_pb2.SavedModelSpec( - model_path=model_path)), prediction_log_path) - except ValueError as exc: - actual_error_msg = str(exc) - self.assertTrue(actual_error_msg.startswith(error_msg)) - else: - self.fail('Test was expected to throw ValueError exception') - - class RunOfflineInferenceArrowTest(RunInferenceFixture): def setUp(self): @@ -674,7 +717,6 @@ def _prepare_multihead_examples(self, example_path): for example in self._multihead_examples: output_file.write(example.SerializeToString()) - def _build_predict_model(self, model_path): """Exports the dummy sum predict model."""