Merge pull request #540 from NVIDIA/fallback_trtorchc

narendasan · web-flow · commit 7861ab386d5e · 2021-08-05T18:25:21.000-06:00
feat: Support fallback options in trtorchc
diff --git a/cpp/trtorchc/README.md b/cpp/trtorchc/README.md
@@ -36,15 +36,22 @@ trtorchc [input_file_path] [output_file_path]
       --allow-gpu-fallback              (Only used when targeting DLA
                                         (device-type)) Lets engine run layers on
                                         GPU if they are not supported on DLA
+
+      --allow-torch-fallback            Enable layers to run in torch
+                                        if they are not supported in TensorRT
+
       --disable-tf32                    Prevent Float32 layers from using the
                                         TF32 data format
       -p[precision...],
       --enabled-precison=[precision...] (Repeatable) Enabling an operating
                                         precision for kernels to use when
-                                        building the engine (Int8 requires a
-                                        calibration-cache argument) [ float |
+                                        building the engine [ float |
                                         float32 | f32 | half | float16 | f16 |
                                         int8 | i8 ] (default: float)
+
+      --ffo,
+      --forced-fallback-ops             List of operators in the graph that
+                                        should be forced to fallback to Pytorch for execution
       -d[type], --device-type=[type]    The type of device the engine should be
                                         built for [ gpu | dla ] (default: gpu)
       --gpu-id=[gpu_id]                 GPU id if running on multi-GPU platform
@@ -96,6 +103,7 @@ trtorchc [input_file_path] [output_file_path]
 ```
 
 e.g.
+
 ```
 trtorchc tests/modules/ssd_traced.jit.pt ssd_trt.ts "[(1,3,300,300); (1,3,512,512); (1, 3, 1024, 1024)]@fp16%contiguous" -p f16
-```
+```
diff --git a/cpp/trtorchc/main.cpp b/cpp/trtorchc/main.cpp
@@ -237,14 +237,20 @@ int main(int argc, char** argv) {
       "(Only used when targeting DLA (device-type)) Lets engine run layers on GPU if they are not supported on DLA",
       {"allow-gpu-fallback"});
 
+  args::Flag allow_torch_fallback(
+      parser,
+      "allow-torch-fallback",
+      "Enable layers to run in torch if they are not supported in TensorRT",
+      {"allow-torch-fallback"});
+
   args::Flag disable_tf32(
       parser, "disable-tf32", "Prevent Float32 layers from using the TF32 data format", {"disable-tf32"});
 
   args::ValueFlagList<std::string> enabled_precision(
       parser,
       "precision",
       "(Repeatable) Enabling an operating precision for kernels to use when building the engine (Int8 requires a calibration-cache argument) [ float | float32 | f32 | fp32 | half | float16 | f16 | fp16 | int8 | i8 | char ] (default: float)",
-      {'p', "enabled-precison"});
+      {'p', "enabled-precision"});
   args::ValueFlag<std::string> device_type(
       parser,
       "type",
@@ -267,6 +273,12 @@ int main(int argc, char** argv) {
       "Path to calibration cache file to use for post training quantization",
       {"calibration-cache-file"});
 
+  args::ValueFlagList<std::string> forced_fallback_ops(
+      parser,
+      "forced_fallback_ops",
+      "(Repeatable) List of operators in the graph that should be forced to fallback to Pytorch for execution.",
+      {"ffo", "forced-fallback-ops"});
+
   args::Flag embed_engine(
       parser,
       "embed-engine",
@@ -442,6 +454,10 @@ int main(int argc, char** argv) {
     compile_settings.device.allow_gpu_fallback = true;
   }
 
+  if (allow_torch_fallback) {
+    compile_settings.torch_fallback = trtorch::CompileSpec::TorchFallback(true);
+  }
+
   if (disable_tf32) {
     compile_settings.disable_tf32 = true;
   }
@@ -453,6 +469,18 @@ int main(int argc, char** argv) {
 
   auto calibrator = trtorch::ptq::make_int8_cache_calibrator(calibration_cache_file_path);
 
+  if (forced_fallback_ops) {
+    if (!allow_torch_fallback) {
+      trtorch::logging::log(
+          trtorch::logging::Level::kERROR,
+          "Forced fallback ops provided but allow_torch_fallback is False. Please use --allow-torch-fallback to enable automatic fallback of operators.");
+    }
+
+    for (const auto fallback_op : args::get(forced_fallback_ops)) {
+      compile_settings.torch_fallback.forced_fallback_ops.push_back(fallback_op);
+    }
+  }
+
   if (enabled_precision) {
     for (const auto precision : args::get(enabled_precision)) {
       auto dtype = parseDataType(precision);
@@ -563,9 +591,11 @@ int main(int argc, char** argv) {
     return 1;
   }
 
-  if (!trtorch::CheckMethodOperatorSupport(mod, "forward")) {
-    trtorch::logging::log(trtorch::logging::Level::kERROR, "Module is not currently supported by TRTorch");
-    return 1;
+  if (!allow_torch_fallback) {
+    if (!trtorch::CheckMethodOperatorSupport(mod, "forward")) {
+      trtorch::logging::log(trtorch::logging::Level::kERROR, "Module is not currently supported by TRTorch");
+      return 1;
+    }
   }
 
   if (save_engine) {
diff --git a/docsrc/tutorials/trtorchc.rst b/docsrc/tutorials/trtorchc.rst
@@ -39,7 +39,14 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
           --allow-gpu-fallback              (Only used when targeting DLA
                                             (device-type)) Lets engine run layers on
                                             GPU if they are not supported on DLA
-          --disable-tf32                    Prevent Float32 layers from using the
+
+         --allow-torch-fallback            Enable layers to run in torch
+                                          if they are not supported in TensorRT
+         --ffo,
+         --forced-fallback-ops             List of operators in the graph that
+                                          should be forced to fallback to Pytorch for execution
+                                          
+         --disable-tf32                    Prevent Float32 layers from using the
                                             TF32 data format
           -p[precision...],
           --enabled-precison=[precision...] (Repeatable) Enabling an operating
@@ -48,6 +55,7 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
                                             calibration-cache argument) [ float |
                                             float32 | f32 | half | float16 | f16 |
                                             int8 | i8 ] (default: float)
+                                            
           -d[type], --device-type=[type]    The type of device the engine should be
                                             built for [ gpu | dla ] (default: gpu)
           --gpu-id=[gpu_id]                 GPU id if running on multi-GPU platform