BERT & BART refactored

DEKHTIARJonathan · DEKHTIARJonathan · commit f9f566b64a4d · 2022-03-07T17:43:14.000-08:00
diff --git a/tftrt/examples/transformers/scripts/base_script.sh b/tftrt/examples/transformers/scripts/base_script.sh
@@ -14,6 +14,8 @@ DATA_DIR="/tmp"
 
 BYPASS_ARGUMENTS=""
 TF_AUTO_JIT_XLA_FLAG=""
+BATCH_SIZE=32
+SEQ_LEN=128
 
 # Loop through arguments and process them
 for arg in "$@"
@@ -27,14 +29,18 @@ do
         NVIDIA_TF32_OVERRIDE="NVIDIA_TF32_OVERRIDE=0"
         shift # Remove --no_tf32 from processing
         ;;
+        --batch_size=*)
+        BATCH_SIZE="${arg#*=}"
+        shift # Remove --batch_size= from processing
+        ;;
         --data_dir=*)
         shift # Remove --data_dir= from processing
         ;;
-        --vocab_size=*)
-        shift # Remove --vocab_size= from processing
+        --total_max_samples=*)
+        shift # Remove --total_max_samples= from processing
         ;;
-        --minimum_segment_size=*)
-        shift # Remove --minimum_segment_size= from processing
+        --output_tensors_name=*)
+        shift # Remove --output_tensors_name= from processing
         ;;
         --input_saved_model_dir=*)
         MODEL_DIR="${arg#*=}"
@@ -44,6 +50,13 @@ do
         TF_AUTO_JIT_XLA_FLAG="TF_XLA_FLAGS=--tf_xla_auto_jit=2"
         shift # Remove --use_xla_auto_jit from processing
         ;;
+        --vocab_size=*)
+        shift # Remove --vocab_size= from processing
+        ;;
+        --sequence_length=*)
+        SEQ_LEN="${arg#*=}"
+        shift # Remove --sequence_length= from processing
+        ;;
         *)
         BYPASS_ARGUMENTS=" ${BYPASS_ARGUMENTS} ${arg}"
         ;;
@@ -54,6 +67,9 @@ done
 
 MIN_SEGMENT_SIZE=5
 VOCAB_SIZE=-1
+MAX_WORKSPACE_SIZE=$((2 ** (32 + 1)))  # + 1 necessary compared to python
+MAX_SAMPLES=1
+OUTPUT_TENSORS_NAME="prediction_logits,seq_relationship_logits"
 
 case ${MODEL_NAME} in
   "bert_base_uncased" | "bert_large_uncased")
@@ -67,6 +83,7 @@ case ${MODEL_NAME} in
   "bart_base" | "bart_large")
     VOCAB_SIZE=50265
     MIN_SEGMENT_SIZE=90
+    OUTPUT_TENSORS_NAME="encoder_last_hidden_state,logits"
     ;;
 esac
 
@@ -80,9 +97,12 @@ echo "[*] MODEL_DIR: ${MODEL_DIR}"
 echo ""
 echo "[*] NVIDIA_TF32_OVERRIDE: ${NVIDIA_TF32_OVERRIDE}"
 echo ""
-# Custom Transormers Task Flags
-echo "[*] MIN_SEGMENT_SIZE: ${MIN_SEGMENT_SIZE}"
+# Custom Transormer Task Flags
 echo "[*] VOCAB_SIZE: ${VOCAB_SIZE}"
+echo "[*] SEQ_LEN: ${SEQ_LEN}"
+echo "[*] MAX_WORKSPACE_SIZE: ${MAX_WORKSPACE_SIZE}"
+echo "[*] MAX_SAMPLES: ${MAX_SAMPLES}"
+echo "[*] OUTPUT_TENSORS_NAME: ${OUTPUT_TENSORS_NAME}"
 echo ""
 echo "[*] TF_AUTO_JIT_XLA_FLAG: ${TF_AUTO_JIT_XLA_FLAG}"
 echo "[*] BYPASS_ARGUMENTS: $(echo \"${BYPASS_ARGUMENTS}\" | tr -s ' ')"
@@ -132,10 +152,16 @@ cd ${BENCH_DIR}
 PREPEND_COMMAND="${TF_AUTO_JIT_XLA_FLAG} ${NVIDIA_TF32_OVERRIDE}"
 
 COMMAND="${PREPEND_COMMAND} python transformers.py \
-    --input_saved_model_dir ${INPUT_SAVED_MODEL_DIR} \
     --data_dir ${DATA_DIR} \
+    --calib_data_dir ${DATA_DIR} \
+    --input_saved_model_dir ${INPUT_SAVED_MODEL_DIR} \
+    --output_saved_model_dir /tmp/$RANDOM \
+    --batch_size ${BATCH_SIZE} \
     --vocab_size ${VOCAB_SIZE} \
-    --minimum_segment_size ${MIN_SEGMENT_SIZE} \
+    --sequence_length=${SEQ_LEN} \
+    --max_workspace_size ${MAX_WORKSPACE_SIZE} \
+    --total_max_samples=${MAX_SAMPLES} \
+    --output_tensors_name=${OUTPUT_TENSORS_NAME} \
     ${BYPASS_ARGUMENTS}"
 
 COMMAND=$(echo ${COMMAND} | sed 's/ *$//g')  # Trimming whitespaces
diff --git a/tftrt/examples/transformers/transformers.py b/tftrt/examples/transformers/transformers.py
@@ -18,16 +18,9 @@
 import os
 import sys
 
-import logging
-import multiprocessing
-import time
-
-from functools import partial
-
 import numpy as np
-import tensorflow as tf
 
-from statistics import mean
+import tensorflow as tf
 
 # Allow import of top level python files
 import inspect
@@ -54,34 +47,33 @@ def __init__(self):
         super(CommandLineAPI, self).__init__()
 
         self._parser.add_argument(
-            '--sequence_length',
+            "--sequence_length",
             type=int,
             default=128,
-            help='Directory containing the input saved model.'
+            help="Input data sequence length."
         )
 
         self._parser.add_argument(
-            '--vocab_size',
+            "--vocab_size",
             type=int,
             required=True,
             choices=self.ALLOWED_VOCAB_SIZES,
-            help='Size of the vocabulory used for '
-            'training. Refer to huggingface '
-            'documentation.'
+            help="Size of the vocabulory used for training. Refer to "
+            "huggingface documentation."
         )
 
-        self._parser.add_argument(
-            '--validate_output',
-            action='store_true',
-            help='Validates that the model returns the correct '
-            'value. This only works with batch_size =32.'
-        )
+        # self._parser.add_argument(
+        #     "--validate_output",
+        #     action="store_true",
+        #     help="Validates that the model returns the correct value. This "
+        #     "only works with batch_size =32."
+        # )
 
     def _validate_args(self, args):
         super(CommandLineAPI, self)._validate_args(args)
 
-        if args.validate_output and args.batch_size != 32:
-            raise ValueError("Output validation only supports batch size 32.")
+        # if args.validate_output and args.batch_size != 32:
+        #     raise ValueError("Output validation only supports batch size 32.")
 
         # TODO: Remove when proper dataloading is implemented
         if args.num_iterations is None:
@@ -90,145 +82,94 @@ def _validate_args(self, args):
                 "--num_iterations=None"
             )
 
-    # TODO: Remove when proper dataloading is implemented
     def _post_process_args(self, args):
+        args = super(CommandLineAPI, self)._post_process_args(args)
+
         return args
 
 
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+# %%%%%%%%%%%%%%%%% IMPLEMENT MODEL-SPECIFIC FUNCTIONS HERE %%%%%%%%%%%%%%%%%% #
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+
+
 class BenchmarkRunner(BaseBenchmarkRunner):
 
-    ACCURACY_METRIC_NAME = "mAP"
+    def get_dataset_batches(self):
+        """Returns a list of batches of input samples.
 
-    def before_benchmark(self, **kwargs):
-        pass
+        Each batch should be in the form [x, y], where
+        x is a numpy array of the input samples for the batch, and
+        y is a numpy array of the expected model outputs for the batch
 
-    def compute_accuracy_metric(self, predictions, expected, **kwargs):
-        pass
+        Returns:
+        - dataset: a TF Dataset object
+        - bypass_data_to_eval: any object type that will be passed unmodified to
+                            `evaluate_result()`. If not necessary: `None`
 
-    def process_model_output(self, outputs, **kwargs):
-        pass
+        Note: script arguments can be accessed using `self._args.attr`
+        """
 
+        if not self._args.use_synthetic_data:
+            raise NotImplementedError()
 
-# def validate_model_artifacts(infer_func, model_dir, use_tftrt, precision):
-#     numpy_asset_dir = os.path.join(model_dir, "numpy_assets")
-#
-#     input_data = np.load(os.path.join(numpy_asset_dir, 'input_data.npy'))
-#     input_data = tf.constant(input_data, dtype=tf.int32)
-#
-#     output = infer_func(input_ids=input_data)
-#
-#     if use_tftrt:
-#         if precision == "fp16":
-#             rtol=1e-2
-#             atol=2e-1
-#         else:
-#             rtol=1e-2
-#             atol=5e-2
-#     else:
-#         rtol=1e-5
-#         atol=1e-8
-#
-#     for key in output.keys():
-#         target = np.load(os.path.join(numpy_asset_dir, '%s.npy' % key))
-#         np.testing.assert_allclose(
-#             target, output[key].numpy(), rtol=rtol, atol=atol
-#         )
-#     print("\n*****************************************************************")
-#     print("Model was validated with success ...")
-#     print("*****************************************************************\n")
+        tf.random.set_seed(10)
+
+        input_data = tf.random.uniform(
+            shape=(1, self._args.sequence_length),
+            maxval=self._args.vocab_size,
+            dtype=tf.int32
+        )
+
+        dataset = tf.data.Dataset.from_tensor_slices(input_data)
+        dataset = dataset.repeat()
+        dataset = dataset.batch(self._args.batch_size)
+        dataset = dataset.take(count=1)  # loop over 1 batch
+        dataset = dataset.cache()
+        dataset = dataset.repeat()
+        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
+
+        return dataset, None
+
+    def preprocess_model_inputs(self, data_batch):
+        """This function prepare the `data_batch` generated from the dataset.
+        Returns:
+            x: input of the model
+            y: data to be used for model evaluation
 
+        Note: script arguments can be accessed using `self._args.attr`
+        """
 
-def get_dataset(batch_size, seq_len, vocab_size, use_synthetic_data):
+        x = data_batch
+        return x, None
 
-    if not use_synthetic_data:
-        raise NotImplementedError()
+    def postprocess_model_outputs(self, predictions, expected):
+        """Post process if needed the predictions and expected tensors. At the
+        minimum, this function transforms all TF Tensors into a numpy arrays.
+        Most models will not need to modify this function.
 
-    tf.random.set_seed(10)
-    input_data = tf.random.uniform(
-        shape=(1, seq_len), maxval=vocab_size, dtype=tf.int32
-    )
+        Note: script arguments can be accessed using `self._args.attr`
+        """
 
-    dataset = tf.data.Dataset.from_tensor_slices(input_data)
-    dataset = dataset.repeat()
-    dataset = dataset.batch(batch_size)
-    dataset = dataset.take(count=1)  # loop over 1 batch
-    dataset = dataset.cache()
-    dataset = dataset.repeat()
-    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
+        return predictions.numpy(), expected.numpy()
 
-    return dataset
+    def evaluate_model(self, predictions, expected, bypass_data_to_eval):
+        """Evaluate result predictions for entire dataset.
+
+        This computes overall accuracy, mAP,  etc.  Returns the
+        metric value and a metric_units string naming the metric.
+
+        Note: script arguments can be accessed using `args.attr`
+        """
+
+        return None, "Top-1 Accuracy %"
 
 
 if __name__ == '__main__':
+
     cmdline_api = CommandLineAPI()
     args = cmdline_api.parse_args()
 
-    def _input_fn(build_steps, model_phase):
-
-        dataset = get_dataset(
-            batch_size=args.batch_size,
-            seq_len=args.sequence_length,
-            vocab_size=args.vocab_size,
-            use_synthetic_data=args.use_synthetic_data
-        )
+    runner = BenchmarkRunner(args)
 
-        for i, (input_batch) in enumerate(dataset):
-            if i >= build_steps:
-                break
-
-            print("* [%s] - step %04d/%04d" % (model_phase, i + 1, build_steps))
-            yield input_batch,
-
-    calibration_input_fn = partial(
-        _input_fn,
-        build_steps=args.num_calib_batches // args.batch_size,
-        model_phase="Calibration"
-    )
-    optimize_offline_input_fn = partial(
-        _input_fn, build_steps=1, model_phase="Building"
-    )
-
-    runner = BenchmarkRunner(
-        input_saved_model_dir=args.input_saved_model_dir,
-        output_saved_model_dir=args.output_saved_model_dir,
-        allow_build_at_runtime=args.allow_build_at_runtime,
-        calibration_input_fn=calibration_input_fn,
-        debug=args.debug,
-        gpu_mem_cap=args.gpu_mem_cap,
-        input_signature_key=args.input_signature_key,
-        max_workspace_size_bytes=args.max_workspace_size,
-        minimum_segment_size=args.minimum_segment_size,
-        num_calib_batches=args.num_calib_batches,
-        optimize_offline=args.optimize_offline,
-        optimize_offline_input_fn=optimize_offline_input_fn,
-        output_tensor_names=args.output_tensor_names,
-        precision_mode=args.precision,
-        use_dynamic_shape=args.use_dynamic_shape,
-        use_tftrt=args.use_tftrt
-    )
-
-    # if args.validate_output:
-    #     # artifacts only generated for BS == 32
-    #     validate_model_artifacts(
-    #         graph_func,
-    #         args.input_saved_model_dir,
-    #         args.use_tftrt,
-    #         args.precision.lower()
-    #     )
-
-    get_benchmark_input_fn = partial(
-        get_dataset, seq_len=args.sequence_length, vocab_size=args.vocab_size
-    )
-
-    runner.execute_benchmark(
-        batch_size=args.batch_size,
-        display_every=args.display_every,
-        get_benchmark_input_fn=get_benchmark_input_fn,
-        num_iterations=args.num_iterations,
-        num_warmup_iterations=args.num_warmup_iterations,
-        skip_accuracy_testing=(
-            args.use_synthetic_data or args.skip_accuracy_testing
-        ),
-        use_synthetic_data=args.use_synthetic_data,
-        use_xla=args.use_xla,
-    )
+    runner.execute_benchmark()