fix humpback_whale (#1671)

xadupre · sdpython · web-flow · commit 2be4cf3c7c5b · 2021-08-13T11:38:53.000-07:00
Signed-off-by: xavier dupré &lt;xavier.dupre@gmail.com&gt;

Co-authored-by: xavier dupré &lt;xavier.dupre@gmail.com&gt;
diff --git a/tests/tfhub/_tools.py b/tests/tfhub/_tools.py
@@ -213,7 +213,7 @@ def check_discrepencies(out1, out2, threshold=1e-3):
 
 def benchmark(url, dest, onnx_name, opset, imgs, verbose=True, threshold=1e-3,
               signature=None, tag=None, output_name=None, ort_name=None,
-              optimize=True, convert_tflite=None):
+              optimize=True, convert_tflite=None, custom_tf=None):
     """
     Runs a simple benchmark.
     Goes through every steps (download, convert).
@@ -290,18 +290,21 @@ def benchmark(url, dest, onnx_name, opset, imgs, verbose=True, threshold=1e-3,
         print("ORT", len(imgs), duration_ort)
 
     # tensorflow
-    import tensorflow_hub as hub
-    from tensorflow import convert_to_tensor
-    if isinstance(imgs[0], OrderedDict):
-        imgs_tf = [
-            OrderedDict((k, convert_to_tensor(v)) for k, v in img.items())
-            for img in imgs]
+    if custom_tf is None:
+        import tensorflow_hub as hub
+        from tensorflow import convert_to_tensor
+        if isinstance(imgs[0], OrderedDict):
+            imgs_tf = [
+                OrderedDict((k, convert_to_tensor(v)) for k, v in img.items())
+                for img in imgs]
+        else:
+            imgs_tf = [convert_to_tensor(img) for img in imgs]
+        model = hub.load(url.split("?")[0])
+        if signature is not None:
+            model = model.signatures[signature]
+        results_tf, duration_tf = measure_time(model, imgs_tf)
     else:
-        imgs_tf = [convert_to_tensor(img) for img in imgs]
-    model = hub.load(url.split("?")[0])
-    if signature is not None:
-        model = model.signatures[signature]
-    results_tf, duration_tf = measure_time(model, imgs_tf)
+        output, results_tf, duration_tf = custom_tf(tname)
 
     if verbose:
         print("TF", len(imgs), duration_tf)
@@ -310,7 +313,10 @@ def benchmark(url, dest, onnx_name, opset, imgs, verbose=True, threshold=1e-3,
         print("ratio ORT=%r / TF=%r = %r" % (mean_ort, mean_tf, mean_ort / mean_tf))
 
     # checks discrepencies
-    res = model(imgs_tf[0])
+    if custom_tf is None:
+        res = model(imgs_tf[0])
+    else:
+        res = output
     if isinstance(res, dict):
         if output_name is None:
             if len(res) != 1:
diff --git a/tests/tfhub/tfhub_humpback_whale.py b/tests/tfhub/tfhub_humpback_whale.py
@@ -1,81 +1,53 @@
 # SPDX-License-Identifier: Apache-2.0
 import os
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+import pickle
 import numpy
 from onnxruntime import InferenceSession
-from _tools import generate_random_images, benchmark
+from _tools import generate_random_images, benchmark, measure_time
+import tensorflow as tf
+import tensorflow_hub as hub
 
 
 def main(opset=13):
     url = "https://tfhub.dev/google/humpback_whale/1?tf-hub-format=compressed"
     dest = "tf-humpback-whale"
     name = "humpback-whale"
     onnx_name = os.path.join(dest, "%s-%d.onnx" % (name, opset))
-
-    kind = "function"
-    if kind == "function":
-        import tensorflow as tf
-        import tensorflow_hub as hub
-        import tf2onnx
-        model = hub.load('https://tfhub.dev/google/humpback_whale/1')
-        FILENAME = 'gs://bioacoustics-www1/sounds/Cross_02_060203_071428.d20_7.wav'
-        waveform, sample_rate = tf.audio.decode_wav(tf.io.read_file(FILENAME))
-        waveform = tf.expand_dims(waveform, 0)  # makes a batch of size 1
-        context_step_samples = tf.cast(sample_rate, tf.int64)
-        print(waveform.dtype, waveform.shape, sample_rate.dtype, sample_rate.shape, sample_rate)
-        
-        spec = (tf.TensorSpec((None, ) + waveform.shape[-2:], tf.float32, name="waveform"),
-                tf.TensorSpec((1, 1), tf.int64, name="context_step_samples"))
-        inputs = {'waveform': waveform.numpy(),
-                  'context_step_samples': context_step_samples.numpy()}
-                
-        tf2onnx.convert.from_function(
-            model.signatures['score'], input_signature=spec, opset=13, output_path=onnx_name)
-        # AttributeError: '_WrapperFunction' object has no attribute 'get_concrete_function'
-
-        sess = InferenceSession(onnx_name)
-        got = sess.run(None, inputs)
-        print(got)
-        
-        score_fn = model.signatures['score']
-        scores = score_fn(waveform=waveform, context_step_samples=context_step_samples)
-    
-    if kind == "keras":
-        import tensorflow as tf
-        import tensorflow_hub as hub
-        import tf2onnx
-        model = hub.load('https://tfhub.dev/google/humpback_whale/1').model
-        FILENAME = 'gs://bioacoustics-www1/sounds/Cross_02_060203_071428.d20_7.wav'
-        waveform, sample_rate = tf.audio.decode_wav(tf.io.read_file(FILENAME))
-        waveform = tf.expand_dims(waveform, 0)  # makes a batch of size 1
-        context_step_samples = tf.cast(sample_rate, tf.int64)
-        print(waveform.dtype, waveform.shape, sample_rate.dtype, sample_rate.shape, sample_rate)
-
-        spec = (tf.TensorSpec((None, ) + waveform.shape[-2:], tf.float32, name="waveform"),
-                tf.TensorSpec((1, 1), tf.int64, name="context_step_samples"))
-        inputs = {'waveform': waveform.numpy(),
-                  'context_step_samples': context_step_samples.numpy()}
-                
-        tf2onnx.convert.from_keras(model, input_signature=spec, opset=13, output_path=onnx_name)
-        # AttributeError: '_UserObject' object has no attribute 'output_names'
-
-        sess = InferenceSession(onnx_name)
-        got = sess.run(None, inputs)
-        print(got)
-        
+    print("[download data]")
+    FILENAME = 'gs://bioacoustics-www1/sounds/Cross_02_060203_071428.d20_7.wav'
+    pkl_name = os.path.join(dest, "data.pkl")
+    if not os.path.exists(pkl_name):
+        with open(pkl_name, "wb") as f:
+            waveform, sample_rate = tf.audio.decode_wav(tf.io.read_file(FILENAME))
+            waveform = tf.expand_dims(waveform, 0)  # makes a batch of size 1
+            context_step_samples = tf.cast(sample_rate, tf.int64)
+            data = dict(waveform=waveform, context_step_samples=context_step_samples)
+            pickle.dump(data, f)
+    else:
+        with open(pkl_name, "rb") as f:
+            data = pickle.load(f)
+        waveform = data["waveform"]
+        context_step_samples = data["context_step_samples"]
+    print("[data] done. context_step_samples=", context_step_samples.numpy())
+
+    def benchmark_custom(local_name):
+        model = hub.load(local_name)
         score_fn = model.signatures['score']
         scores = score_fn(waveform=waveform, context_step_samples=context_step_samples)
+        imgs_tf = [dict(waveform=waveform, context_step_samples=context_step_samples)]
+        results_tf, duration_tf = measure_time(
+            lambda inputs: score_fn(**inputs), imgs_tf)
+        return scores, results_tf, duration_tf
 
-    if kind == 'cmd':
-        imgs = generate_random_images(shape=(1, 10000, 1), scale=1.)
-        inputs = [dict(waveform=img,
-                       context_step_samples=numpy.array(512, dtype=numpy.int64))
-                  for img in imgs]
-        benchmark(url, dest, onnx_name, opset, inputs, optimize=False,
-                  signature='score')
-        # onnxruntime.capi.onnxruntime_pybind11_state.RuntimeException: 
-        # [ONNXRuntimeError] : 6 : RUNTIME_EXCEPTION : Non-zero status code returned while running Reshape node. Name:'StatefulPartitionedCall/Reshape_1' Status Message: C:\xadupre\microsoft_xadupre\onnxruntime\onnxruntime\core\providers\cpu\tensor\reshape_helper.h:42 onnxruntime::ReshapeHelper::ReshapeHelper gsl::narrow_cast<int64_t>(input_shape.Size()) == size was false. The input tensor cannot be reshaped to the requested shape. 
-        # Input shape:{0,1}, requested shape:{1,1,1}
+    imgs = generate_random_images(shape=(1, 750000, 1), scale=1., n=2)
+    inputs = [dict(waveform=waveform.numpy(),
+                   context_step_samples=numpy.array(
+                    context_step_samples.numpy(), dtype=numpy.int64))]
+    benchmark(url, dest, onnx_name, opset, inputs, optimize=False,
+              signature='score', custom_tf=benchmark_custom)
 
 
 if __name__ == "__main__":
-    main()
+    with tf.device('/CPU:0'):
+        main()