add tests for _BatchQueries and _RunInferenceCore

hgarrereyn · hgarrereyn · commit 1df33acf8055 · 2020-07-30T00:11:55.000-05:00
diff --git a/tfx_bsl/beam/run_inference_test.py b/tfx_bsl/beam/run_inference_test.py
@@ -27,6 +27,7 @@
 
 import apache_beam as beam
 from apache_beam.metrics.metric import MetricsFilter
+from apache_beam.options import pipeline_options
 from apache_beam.testing.util import assert_that
 from apache_beam.testing.util import equal_to
 from googleapiclient import discovery
@@ -70,6 +71,16 @@ def _prepare_predict_examples(self, example_path):
       for example in self._predict_examples:
         output_file.write(example.SerializeToString())
 
+  def _get_results(self, prediction_log_path):
+    results = []
+    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
+      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
+      for record_string in record_iterator:
+        prediction_log = prediction_log_pb2.PredictionLog()
+        prediction_log.MergeFromString(record_string)
+        results.append(prediction_log)
+    return results
+
 
 class RunOfflineInferenceTest(RunInferenceFixture):
 
@@ -219,16 +230,6 @@ def _run_inference_with_beam(self, example_path, inference_spec_type,
               prediction_log_path,
               coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog)))
 
-  def _get_results(self, prediction_log_path):
-    results = []
-    for f in tf.io.gfile.glob(prediction_log_path + '-?????-of-?????'):
-      record_iterator = tf.compat.v1.io.tf_record_iterator(path=f)
-      for record_string in record_iterator:
-        prediction_log = prediction_log_pb2.PredictionLog()
-        prediction_log.MergeFromString(record_string)
-        results.append(prediction_log)
-    return results
-
   def testModelPathInvalid(self):
     example_path = self._get_output_data_dir('examples')
     self._prepare_predict_examples(example_path)
@@ -616,5 +617,128 @@ def test_request_body_with_binary_data(self):
     ], result)
 
 
+class RunInferenceCoreTest(RunInferenceFixture):
+
+  def _build_keras_model(self, add):
+    """Builds a dummy keras model with one input and output."""
+    inp = tf.keras.layers.Input((1,), name='input')
+    out = tf.keras.layers.Lambda(lambda x: x + add)(inp)
+    m = tf.keras.models.Model(inp, out)
+    return m
+
+  def _new_model(self, model_path, add):
+    """Exports a keras model in the SavedModel format."""
+    class WrapKerasModel(tf.keras.Model):
+      """Wrapper class to apply a signature to a keras model."""
+      def __init__(self, model):
+        super().__init__()
+        self.model = model
+
+      @tf.function(input_signature=[
+          tf.TensorSpec(shape=[None], dtype=tf.string, name='inputs')
+      ])
+      def call(self, serialized_example):
+        features = {
+          'input': tf.compat.v1.io.FixedLenFeature(
+            [1],
+            dtype=tf.float32,
+            default_value=0
+          )
+        }
+        input_tensor_dict = tf.io.parse_example(serialized_example, features)
+        return self.model(input_tensor_dict)
+
+    model = self._build_keras_model(add)
+    wrapped_model = WrapKerasModel(model)
+    tf.compat.v1.keras.experimental.export_saved_model(
+      wrapped_model, model_path, serving_only=True
+    )
+    return self._get_saved_model_spec(model_path)
+
+  def _decode_value(self, pl):
+    """Returns output value from prediction log."""
+    out_tensor = pl.predict_log.response.outputs['output_1']
+    arr = tf.make_ndarray(out_tensor)
+    x = arr[0][0]
+    return x
+
+  def _make_example(self, x):
+    """Builds a TFExample object with a single value."""
+    feature = {}
+    feature['input'] = tf.train.Feature(
+        float_list=tf.train.FloatList(value=[x]))
+    ex = tf.train.Example(features=tf.train.Features(feature=feature))
+    return ex
+
+  def _get_saved_model_spec(self, model_path):
+    """Returns an InferenceSpecType object for a saved model path."""
+    return model_spec_pb2.InferenceSpecType(
+      saved_model_spec=model_spec_pb2.SavedModelSpec(
+          model_path=model_path))
+
+  def test_batch_queries_single_model(self):
+    spec = self._get_saved_model_spec('/example/model')
+    QUERIES = [(spec, self._make_example(i)) for i in range(100)]
+    CORRECT = {example.SerializeToString(): spec for spec, example in QUERIES}
+
+    def _check_batch(batch):
+      """Assert examples are grouped with the correct inference spec."""
+      spec, examples = batch
+      assert all([CORRECT[x.SerializeToString()] == spec for x in examples])
+
+    with beam.Pipeline() as p:
+      queries = p | 'Build queries' >> beam.Create(QUERIES)
+      batches = queries | '_BatchQueries' >> run_inference._BatchQueries()
+
+      _ = batches | 'Check' >> beam.Map(_check_batch)
+
+  # TODO(hgarrereyn): Switch _BatchElements to use GroupIntoBatches once
+  #   BEAM-2717 is fixed so examples are grouped by inference spec key.
+  #
+  # def test_batch_queries_multiple_models(self):
+  #   spec1 = self._get_saved_model_spec('/example/model1')
+  #   spec2 = self._get_saved_model_spec('/example/model2')
+  #
+  #   QUERIES = []
+  #   for i in range(100):
+  #     QUERIES.append((spec1 if i % 2 == 0 else spec2, self._make_example(i)))
+  #
+  #   CORRECT = {example.SerializeToString(): spec for spec, example in QUERIES}
+  #
+  #   def _check_batch(batch):
+  #     """Assert examples are grouped with the correct inference spec."""
+  #     spec, examples = batch
+  #     assert all([CORRECT[x.SerializeToString()] == spec for x in examples])
+  #
+  #   with beam.Pipeline() as p:
+  #     queries = p | 'Build queries' >> beam.Create(QUERIES)
+  #     batches = queries | '_BatchQueries' >> run_inference._BatchQueries()
+  #
+  #     _ = batches | 'Check' >> beam.Map(_check_batch)
+
+  def test_inference_on_queries(self):
+    spec = self._new_model(self._get_output_data_dir('model1'), 100)
+    predictions_path = self._get_output_data_dir('predictions')
+    QUERIES = [(spec, self._make_example(i)) for i in range(10)]
+
+    options = pipeline_options.PipelineOptions(streaming=False)
+    with beam.Pipeline(options=options) as p:
+      _ = (
+        p
+        | 'Queries' >> beam.Create(QUERIES) \
+        | '_RunInferenceCore' >> run_inference._RunInferenceCore() \
+        | 'WritePredictions' >> beam.io.WriteToTFRecord(
+          predictions_path,
+          coder=beam.coders.ProtoCoder(prediction_log_pb2.PredictionLog))
+      )
+
+    results = self._get_results(predictions_path)
+    values = [int(self._decode_value(x)) for x in results]
+    self.assertEqual(
+      values,
+      [100,101,102,103,104,105,106,107,108,109]
+    )
+
+
 if __name__ == '__main__':
   tf.test.main()