Runner / Args Refactoring and Improvements

DEKHTIARJonathan · DEKHTIARJonathan · commit ea8f0381c4a6 · 2021-11-11T16:24:11.000-08:00
diff --git a/tftrt/examples/benchmark_args.py b/tftrt/examples/benchmark_args.py
@@ -72,6 +72,18 @@ def __init__(self):
                                     default_sign_key
                                   ))
 
+        self._parser.add_argument('--output_tensor_names', type=str,
+                                  default=None,
+                                  help='Output tensors\' name, defaults to all '
+                                       'tensors available if not set. Will '
+                                       'only work with `--use_tftrt`.')
+
+        self._parser.add_argument('--output_tensor_indices', type=str,
+                                  default=None,
+                                  help='Output tensors\' index, defaults to '
+                                       'all tensors available if not set. Will '
+                                       'only work without `--use_tftrt`.')
+
         self._parser.add_argument('--num_iterations', type=int, default=None,
                                   help='How many iterations(batches) to '
                                        'evaluate. If not supplied, the whole '
@@ -157,6 +169,15 @@ def __init__(self):
             help='Whether to use implicit batch mode or dynamic shape mode.'
         )
 
+        # =========================== DEBUG Flags ========================== #
+
+        self._add_bool_argument(
+            name="debug",
+            default=False,
+            required=False,
+            help='If set to True, will print additional information.'
+        )
+
     def _add_bool_argument(self, name=None, default=False, required=False, help=None):
             if not isinstance(default, bool):
                 raise ValueError()
@@ -205,7 +226,7 @@ def _validate_args(self, args):
         else:
             if args.use_xla:
                 raise ValueError("--use_xla flag is not supported with TF-TRT.")
-                
+
             if args.precision not in self.ALLOWED_TFTRT_PRECISION_MODES:
                 raise ValueError("The received --precision={} is not supported."
                                  " Allowed: {}".format(
@@ -250,5 +271,6 @@ def parse_args(self):
 
         print('\nBenchmark arguments:')
         _print_dict(vars(args))
+        print()
 
         return args
diff --git a/tftrt/examples/benchmark_runner.py b/tftrt/examples/benchmark_runner.py
@@ -9,8 +9,10 @@
 import logging
 import time
 
+from collections import defaultdict
 from contextlib import contextmanager
 from functools import partial
+from operator import itemgetter
 
 import numpy as np
 import tensorflow as tf
@@ -64,11 +66,11 @@ def before_benchmark(self, **kwargs):
         pass
 
     @abc.abstractmethod
-    def compute_accuracy_metric(self, batch_size, steps_executed, **kwargs):
+    def compute_accuracy_metric(self, predictions, expected, **kwargs):
         raise NotImplementedError()
 
     @abc.abstractmethod
-    def process_model_output(self, outputs, batch_y, **kwargs):
+    def process_model_output(self, outputs, **kwargs):
         raise NotImplementedError()
 
     ############################################################################
@@ -81,21 +83,26 @@ def __init__(
         output_saved_model_dir,
         allow_build_at_runtime=False,
         calibration_input_fn=None,
+        debug=False,
         gpu_mem_cap=None,
         input_signature_key=DEFAULT_SERVING_SIGNATURE_DEF_KEY,
         max_workspace_size_bytes=DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
         minimum_segment_size=5,
         num_calib_inputs=None,
         optimize_offline=False,
         optimize_offline_input_fn=None,
+        output_tensor_indices=None,
+        output_tensor_names=None,
         precision_mode=None,
         use_dynamic_shape=False,
-        use_tftrt=False
+        use_tftrt=False,
     ):
 
         logging.getLogger("tensorflow").setLevel(logging.INFO)
         logging.disable(logging.WARNING)
 
+        self._debug = debug
+
         # TensorFlow can execute operations synchronously or asynchronously.
         # If asynchronous execution is enabled, operations may return
         # "non-ready" handles.
@@ -131,15 +138,17 @@ def __init__(
             use_tftrt=use_tftrt
         )
 
+        self._set_output_tensor_name(output_tensor_indices, output_tensor_names)
+
     def _config_gpu_memory(self, gpu_mem_cap):
         gpus = tf.config.experimental.list_physical_devices('GPU')
 
         if not gpus:
             raise RuntimeError("No GPUs has been found.")
 
-        print('Found the following GPUs:')
+        self.debug_print('Found the following GPUs:')
         for gpu in gpus:
-            print(' ', gpu)
+            self.debug_print(f"\t- {gpu}")
 
         for gpu in gpus:
             try:
@@ -153,6 +162,42 @@ def _config_gpu_memory(self, gpu_mem_cap):
             except RuntimeError as e:
                 print('Can not set GPU memory config', e)
 
+    def _set_output_tensor_name(
+        self, output_tensor_indices, output_tensor_names
+    ):
+        structured_outputs = self._graph_func.structured_outputs
+
+        if isinstance(structured_outputs, (list, tuple)):
+            if output_tensor_indices is None:
+                output_tensor_indices = list(range(len(structured_outputs)))
+            else:
+                output_tensor_indices = [
+                    int(i) for i in output_tensor_indices.split(",")
+                ]
+
+            self._output_tensors = output_tensor_indices
+
+        elif isinstance(structured_outputs, dict):
+            structured_outputs = dict(sorted(structured_outputs.items()))
+            if output_tensor_names is None:
+                output_tensor_names = list(structured_outputs.keys())
+            else:
+                output_tensor_names = [n for n in output_tensor_names.split(",")]
+                for name in output_tensor_names:
+                    if name not in structured_outputs.keys():
+                        raise ValueError(
+                          f"Unknown output_tensor_names received: {name}. " \
+                          f"Authorized: {structured_outputs.keys()}")
+
+            self._output_tensors = output_tensor_names
+
+        else:
+            raise RuntimeError('Unknown structured_outputs format received:',
+                               type(structured_outputs))
+
+        self.debug_print(f"Available Output Tensors: {structured_outputs}")
+        self.debug_print(f"Chosen Output Tensor: {self._output_tensors}")
+
     def _get_graph_func(
         self,
         input_saved_model_dir,
@@ -288,6 +333,10 @@ def _check_input_fn(func, name):
 
         return graph_func
 
+    def debug_print(self, msg):
+        if self._debug:
+            print(f"[DEBUG] {msg}")
+
     def execute_benchmark(
         self,
         batch_size,
@@ -317,7 +366,22 @@ def execute_benchmark(
         @_force_gpu_resync
         @tf.function(jit_compile=use_xla)
         def infer_step(_batch_x):
-          return self._graph_func(_batch_x)
+          output = self._graph_func(_batch_x)
+          return itemgetter(*self._output_tensors)(output)
+
+        predicted_dict = defaultdict(lambda: [])
+        expected_arr = []
+
+        def get_debug_output_shape_str(output):
+            if isinstance(output, (tuple, list)):
+                return [t.shape for t in output]
+
+            elif isinstance(output, dict):
+                return {k: v.shape for k, v in output.items()}
+
+            else:
+                return output.shape
+
 
         print("\nStart inference ...")
         for i, data_batch in enumerate(dataset):
@@ -348,19 +412,62 @@ def infer_step(_batch_x):
                 ))
 
             if not skip_accuracy_testing:
-                self.process_model_output(
-                    outputs=batch_preds,
-                    batch_y=batch_y,
-                    **kwargs
-                )
+                if i == 0:
+                    self.debug_print("=========== BEFORE PROCESSING ==========")
+                    debug_batch_preds = get_debug_output_shape_str(batch_preds)
+                    self.debug_print(f"`batch_preds`: {debug_batch_preds}")
+                    if batch_y is not None:
+                        self.debug_print(f"`batch_y` shape: {batch_y.shape}")
+
+                batch_preds = self.process_model_output(batch_preds, **kwargs)
+
+                if not isinstance(batch_preds, dict):
+                    raise ValueError(
+                        f"`self.process_model_output` did not return a dict. " \
+                        f"Received: {type(batch_preds)}"
+                    )
+
+                if batch_y is not None:
+                    batch_y = batch_y.numpy()
+                    if batch_y.shape[-1] == 1:
+                        batch_y = np.squeeze(batch_y, axis=-1)
+
+                if i == 0:
+                    self.debug_print("=========== AFTER PROCESSING ===========")
+                    debug_batch_preds = get_debug_output_shape_str(batch_preds)
+                    self.debug_print(f"`batch_preds`: {debug_batch_preds}")
+                    if batch_y is not None:
+                        self.debug_print(f"`batch_y` shape: {batch_y.shape}")
+                    self.debug_print("========================================")
+
+                for key, value in batch_preds.items():
+                    predicted_dict[key].append(value)
+
+                if batch_y is not None:
+                    expected_arr.append(batch_y)
 
             if (i + 1) >= num_iterations:
                 break
 
         if not skip_accuracy_testing:
+            predicted_dict = {
+                k: np.concatenate(v, axis=0)
+                for k, v in predicted_dict.items()
+            }
+            if expected_arr:
+                expected_arr = np.concatenate(expected_arr, axis=0)
+            else:
+                expected_arr = np.array(expected_arr)
+
+            self.debug_print("=========== BEFORE METRIC COMPUTATION ==========")
+            debug_predicted_dict = get_debug_output_shape_str(predicted_dict)
+            self.debug_print(f"`predicted_dict`: {debug_predicted_dict}")
+            self.debug_print(f"`expected_arr` shape: {expected_arr.shape}")
+            self.debug_print("========================================")
+
             results['accuracy_metric'] = self.compute_accuracy_metric(
-                batch_size=batch_size,
-                steps_executed=steps_executed,
+                predictions=predicted_dict,
+                expected=expected_arr,
                 **kwargs
             )
 
diff --git a/tftrt/examples/image_classification/image_classification.py b/tftrt/examples/image_classification/image_classification.py
@@ -65,33 +65,16 @@ class BenchmarkRunner(BaseBenchmarkRunner):
     ACCURACY_METRIC_NAME = "accuracy"
 
     def before_benchmark(self, **kwargs):
-        self._adjust = 1 if kwargs["num_classes"] == 1001 else 0
-        self._corrects = 0
-
-        try:
-            self._output_tensorname = list(
-                self._graph_func.structured_outputs.keys()
-            )[0]
-        except AttributeError:
-            # Output tensor doesn't have a name, index 0
-            self._output_tensorname = 0
-
-    def compute_accuracy_metric(self, batch_size, steps_executed, **kwargs):
-        return self._corrects / (batch_size * steps_executed)
-
-    def _eval_fn(self, preds, labels, adjust):
-        """Measures number of correct predicted labels in a batch.
-        Assumes preds and labels are numpy arrays.
-        """
-        preds = np.argmax(preds, axis=1).reshape(-1) - adjust
-        return np.sum((labels.reshape(-1) == preds).astype(np.float32))
-
-    def process_model_output(self, outputs, batch_y, **kwargs):
-        self._corrects += self._eval_fn(
-            preds=outputs[self._output_tensorname].numpy(),
-            labels=batch_y.numpy(),
-            adjust=self._adjust
-        )
+        self._labels_shift = 1 if kwargs["num_classes"] == 1001 else 0
+
+    def compute_accuracy_metric(self, predictions, expected, **kwargs):
+        return np.mean(np.equal(predictions["outputs"], expected))
+
+    def process_model_output(self, outputs, **kwargs):
+        outputs = outputs.numpy()
+        if (len(outputs.shape) != 1):
+            outputs = np.argmax(outputs, axis=1).reshape(-1)
+        return {"outputs": outputs - self._labels_shift}
 
 
 def get_dataset(data_files, batch_size, use_synthetic_data, preprocess_method, input_size):
@@ -162,15 +145,13 @@ def preprocess_sample_fn(record):
         input_size=input_size
     )
 
-    dataset = dataset.apply(
-        tf.data.experimental.map_and_batch(
-            map_func=preprocess_fn,
-            batch_size=batch_size,
-            num_parallel_calls=min(8, multiprocessing.cpu_count()),
-            drop_remainder=True
-        )
+    dataset = dataset.map(
+        map_func=preprocess_fn,
+        num_parallel_calls=min(8, multiprocessing.cpu_count())
     )
 
+    dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)
+
     if use_synthetic_data:
         dataset = dataset.take(count=1)  # loop over 1 batch
         dataset = dataset.cache()
@@ -246,16 +227,20 @@ def _input_fn(input_files, build_steps, model_phase):
         output_saved_model_dir=args.output_saved_model_dir,
         allow_build_at_runtime=args.allow_build_at_runtime,
         calibration_input_fn=calibration_input_fn,
+        debug=args.debug,
         gpu_mem_cap=args.gpu_mem_cap,
         input_signature_key=args.input_signature_key,
         max_workspace_size_bytes=args.max_workspace_size,
         minimum_segment_size=args.minimum_segment_size,
         num_calib_inputs=args.num_calib_inputs,
         optimize_offline=args.optimize_offline,
         optimize_offline_input_fn=optimize_offline_input_fn,
+        output_tensor_indices=args.output_tensor_indices,
+        output_tensor_names=args.output_tensor_names,
         precision_mode=args.precision,
         use_dynamic_shape=args.use_dynamic_shape,
-        use_tftrt=args.use_tftrt)
+        use_tftrt=args.use_tftrt
+    )
 
     get_benchmark_input_fn = partial(
         get_dataset,
diff --git a/tftrt/examples/image_classification/scripts/base_script.sh b/tftrt/examples/image_classification/scripts/base_script.sh
@@ -32,6 +32,12 @@ do
         MODEL_DIR="${arg#*=}"
         shift # Remove --input_saved_model_dir= from processing
         ;;
+        --output_tensor_names=*)
+        shift # Remove --output_tensor_names= from processing
+        ;;
+        --output_tensor_indices=*)
+        shift # Remove --output_tensor_indices= from processing
+        ;;
         *)
         BYPASS_ARGUMENTS=" ${BYPASS_ARGUMENTS} ${arg}"
         ;;
@@ -43,6 +49,8 @@ done
 INPUT_SIZE=224
 PREPROCESS_METHOD="vgg"
 NUM_CLASSES=1001
+OUTPUT_TENSOR_NAME_FLAG=""
+OUTPUT_TENSOR_IDX_FLAG=""
 
 case ${MODEL_NAME} in
   "inception_v3" | "inception_v4")
@@ -86,6 +94,8 @@ echo ""
 echo "[*] INPUT_SIZE: ${INPUT_SIZE}"
 echo "[*] PREPROCESS_METHOD: ${PREPROCESS_METHOD}"
 echo "[*] NUM_CLASSES: ${NUM_CLASSES}"
+echo "[*] OUTPUT_TENSOR_IDX_FLAG: ${OUTPUT_TENSOR_IDX_FLAG}"
+echo "[*] OUTPUT_TENSOR_NAME_FLAG: ${OUTPUT_TENSOR_NAME_FLAG}"
 echo ""
 echo "[*] BYPASS_ARGUMENTS: $(echo \"${BYPASS_ARGUMENTS}\" | tr -s ' ')"
 echo -e "********************************************************************\n"
@@ -140,6 +150,8 @@ COMMAND="${PREPEND_COMMAND} python image_classification.py \
     --input_size ${INPUT_SIZE} \
     --preprocess_method ${PREPROCESS_METHOD} \
     --num_classes ${NUM_CLASSES} \
+    ${OUTPUT_TENSOR_IDX_FLAG} \
+    ${OUTPUT_TENSOR_NAME_FLAG} \
     ${BYPASS_ARGUMENTS}"
 
 COMMAND=$(echo "${COMMAND}" | tr -s " ")
diff --git a/tftrt/examples/object_detection/object_detection.py b/tftrt/examples/object_detection/object_detection.py
diff --git a/tftrt/examples/transformers/transformers.py b/tftrt/examples/transformers/transformers.py