pytorch
diff --git a/‎.github/pr-labels.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/pr-labels.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 8 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎WORKSPACE‎
Lines changed: 9 additions & 9 deletions b/‎WORKSPACE‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎core/lowering/lowering.cpp‎
Lines changed: 11 additions & 10 deletions b/‎core/lowering/lowering.cpp‎
Lines changed: 11 additions & 10 deletions
diff --git a/‎core/lowering/passes/module_fallback.cpp‎
Lines changed: 26 additions & 5 deletions b/‎core/lowering/passes/module_fallback.cpp‎
Lines changed: 26 additions & 5 deletions
diff --git a/‎cpp/include/trtorch/trtorch.h‎
Lines changed: 13 additions & 91 deletions b/‎cpp/include/trtorch/trtorch.h‎
Lines changed: 13 additions & 91 deletions
@@ -2,7 +2,7 @@
   - cpp/trtorchc/**/*
 
 "component: api [C++]":
-  - cpp/api/**/*
+  - cpp/**/*
 
 "component: api [Python]":
   - py/**/*
 
@@ -568,4 +568,12 @@ Signed-off-by: Naren Dasan <[email protected]>
 Signed-off-by: Naren Dasan <[email protected]>
 
 
+# 0.4.1 (2021-10-06)
+
+### Bug Fixes
+
+* **//core/lowering:** Fixes module level fallback recursion ([2fc612d](https://github.com/NVIDIA/TRTorch/commit/2fc612d))
+* Move some lowering passes to graph level logging ([0266f41](https://github.com/NVIDIA/TRTorch/commit/0266f41))
+* **//py:** Fix trtorch.Device alternate contructor options ([ac26841](https://github.com/NVIDIA/TRTorch/commit/ac26841))
+
 
@@ -78,10 +78,10 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts")
 These are the following dependencies used to verify the testcases. TRTorch can work with other versions, but the tests are not guaranteed to pass.
 
 - Bazel 4.0.0
-- Libtorch 1.9.0 (built with CUDA 11.1)
+- Libtorch 1.9.1 (built with CUDA 11.1)
 - CUDA 11.1 (10.2 on Jetson)
 - cuDNN 8.2
-- TensorRT 8.0.1.6
+- TensorRT 8.0.3.4 (TensorRT 8.0.1.6 on Jetson)
 
 ## Prebuilt Binaries and Wheel files
 
 
@@ -50,17 +50,17 @@ new_local_repository(
 http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "edc12091193ba772db77a6ec14e05cef6da881288fca0dfc89a031f631601f60",
+    sha256 = "db57b1023fb33768286a98ba22c44cfe03d6ed158bc2dc0ca1d4928ee5f19f60",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-cxx11-abi-shared-with-deps-1.9.0%2Bcu111.zip"],
+    urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-cxx11-abi-shared-with-deps-1.9.1%2Bcu111.zip"],
 )
 
 http_archive(
     name = "libtorch_pre_cxx11_abi",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "af9435fa4b44bb395c1a7645391c00228a72af4305f43a61e9300c0abdbe0819",
+    sha256 = "5563ca53b2b5342f1ab7eef9baf308f197673663ad5b1458a031c46dd802f413",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.9.0%2Bcu111.zip"],
+    urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.9.1%2Bcu111.zip"],
 )
 
 # Download these tarballs manually from the NVIDIA website
@@ -70,20 +70,20 @@ http_archive(
 http_archive(
     name = "cudnn",
     build_file = "@//third_party/cudnn/archive:BUILD",
-    sha256 = "39412acd9ef5dd27954b6b9f5df75bd381c5d7ceb7979af6c743a7f4521f9c77",
+    sha256 = "0e5d2df890b9967efa6619da421310d97323565a79f05a1a8cb9b7165baad0d7",
     strip_prefix = "cuda",
     urls = [
-        "https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.2.1.32/11.3_06072021/cudnn-11.3-linux-x64-v8.2.1.32.tgz",
+        "https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.2.4/11.4_20210831/cudnn-11.4-linux-x64-v8.2.4.15.tgz",
     ],
 )
 
 http_archive(
     name = "tensorrt",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    sha256 = "def6a5ee50bed25a68a9c9e22ec671a8f29ee5414bde47c5767bd279e5596f88",
-    strip_prefix = "TensorRT-8.0.1.6",
+    sha256 = "3177435024ff4aa5a6dba8c1ed06ab11cc0e1bf3bb712dfa63a43422f41313f3",
+    strip_prefix = "TensorRT-8.0.3.4",
     urls = [
-        "https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.0.1/tars/tensorrt-8.0.1.6.linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz",
+        "https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.0.3/tars/tensorrt-8.0.3.4.linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz",
     ],
 )
 
 
@@ -37,7 +37,9 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
     torch::jit::EliminateCommonSubexpression(g);
   }
   torch::jit::EliminateDeadCode(g);
-  passes::MarkNodesForFallback(g, true);
+  if (lower_info.forced_fallback_modules.size() > 0) {
+    passes::MarkNodesForFallback(g, true);
+  }
   passes::UnpackHardSwish(g);
   passes::EliminateExceptionOrPassPattern(g);
   passes::ReduceToOperation(g);
@@ -60,12 +62,13 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
   LOG_GRAPH(*g);
 }
 
-torch::jit::Module LowerModule(
-    const torch::jit::Module& mod,
-    std::string method_name,
-    std::unordered_set<std::string> forced_fallback_modules) {
-  passes::NotateModuleForFallback(mod, "", method_name, forced_fallback_modules);
-  LOG_GRAPH("After MLF notation pass: " << *mod.get_method(method_name).graph());
+torch::jit::Module LowerModule(const torch::jit::Module& mod, std::string method_name, const LowerInfo& lower_info) {
+  std::unordered_set<std::string> forced_fallback_modules(
+      lower_info.forced_fallback_modules.begin(), lower_info.forced_fallback_modules.end());
+  if (forced_fallback_modules.size() > 0) {
+    passes::NotateModuleForFallback(mod, "", method_name, forced_fallback_modules);
+    LOG_GRAPH("After MLF notation pass: " << *mod.get_method(method_name).graph());
+  }
   auto mod_ = torch::jit::freeze_module(mod);
   LOG_GRAPH("After freeze: " << *mod_.get_method(method_name).graph());
   return mod_;
@@ -77,9 +80,7 @@ std::pair<std::shared_ptr<torch::jit::Graph>, std::vector<torch::jit::IValue>> L
     const LowerInfo& lower_info) {
   LOG_DEBUG(lower_info);
   LOG_GRAPH("Before lowering: " << *mod.get_method(method_name).graph());
-  std::unordered_set<std::string> forced_fallback_modules(
-      lower_info.forced_fallback_modules.begin(), lower_info.forced_fallback_modules.end());
-  auto lowered_mod = lower_info.unfreeze_module ? mod : LowerModule(mod, method_name, forced_fallback_modules);
+  auto lowered_mod = lower_info.unfreeze_module ? mod : LowerModule(mod, method_name, lower_info);
   auto g = lowered_mod.get_method(method_name).graph();
 
   LOG_GRAPH("LibTorch Lowering");
 
@@ -61,8 +61,29 @@ void NotateModuleForFallback(
     LOG_GRAPH("Notated graph: " << *g);
   }
 
-  for (const auto sub_mod : mod.named_children()) {
-    NotateModuleForFallback(sub_mod.value, sub_mod.name, method_name, forced_fallback_modules);
+  if (mod.named_children().size() > 0) {
+    for (const auto n : nodes) {
+      std::string sub_method_name = "";
+      if (n->kind() == torch::jit::prim::CallMethod) {
+        sub_method_name = n->s(c10::Symbol::attr("name"));
+        auto sub_mod_val = n->input(0);
+        auto sub_mod_src_n = sub_mod_val->node();
+        if (!sub_mod_src_n->hasAttributeS("name")) {
+          LOG_GRAPH("Node: " << util::node_info(sub_mod_src_n) << " manages a module with no name, skipping");
+          break;
+        }
+        auto sub_mod_name = sub_mod_src_n->s(c10::Symbol::attr("name"));
+        for (const auto sub_mod : mod.named_children()) {
+          // Theres probably a way to directly access the module we care about
+          if (sub_mod.name == sub_mod_name) {
+            LOG_GRAPH(
+                "Looking at <module>.<method>() next: " << sub_mod_name << "." << sub_method_name
+                                                        << "() (lowering.passes.NotateModuleForFallback)");
+            NotateModuleForFallback(sub_mod.value, sub_mod.name, sub_method_name, forced_fallback_modules);
+          }
+        }
+      }
+    }
   }
 }
 
@@ -74,23 +95,23 @@ void MarkNodesForFallback(std::shared_ptr<torch::jit::Graph>& g, bool delete_del
     auto n = *it;
     if (!mark.top() && n->kind() == torch::jit::prim::Enter && n->hasAttributeS("compilation_edge")) {
       if (n->s(c10::Symbol::attr("compilation_edge")) == "start") {
-        LOG_DEBUG("Starting to mark new segmented block targeted for torch");
+        LOG_GRAPH("Starting to mark new segmented block targeted for torch");
         mark.push(true);
         if (delete_delims) {
           it.destroyCurrent();
         }
       }
     } else if (mark.top() && n->kind() == torch::jit::prim::Enter && n->hasAttributeS("compilation_edge")) {
       if (n->s(c10::Symbol::attr("compilation_edge")) == "start") {
-        LOG_DEBUG("Found the start of another segmented block targeted for torch while actively marking a block");
+        LOG_GRAPH("Found the start of another segmented block targeted for torch while actively marking a block");
         mark.push(true);
         if (delete_delims) {
           it.destroyCurrent();
         }
       }
     } else if (mark.top() && n->kind() == torch::jit::prim::Exit && n->hasAttributeS("compilation_edge")) {
       if (n->s(c10::Symbol::attr("compilation_edge")) == "end") {
-        LOG_DEBUG("Found the end of segmented block targeted for torch while actively marking a block");
+        LOG_GRAPH("Found the end of segmented block targeted for torch while actively marking a block");
         mark.pop();
         if (delete_delims) {
           it.destroyCurrent();
 
@@ -427,7 +427,7 @@ struct TRTORCH_API CompileSpec {
     Input(c10::ArrayRef<int64_t> shape, DataType dtype, TensorFormat format = TensorFormat::kContiguous);
 
     /**
-     * @brief Construct a new Input Range object dynamic input size from
+     * @brief Construct a new Input spec object dynamic input size from
      * c10::ArrayRef (the type produced by tensor.sizes()) for min, opt, and max
      * supported sizes. dtype (Expected data type for the input) defaults to PyTorch
      * / traditional TRT convection (FP32 for FP32 only, FP16 for FP32 and FP16, FP32 for Int8)
@@ -462,7 +462,7 @@ struct TRTORCH_API CompileSpec {
         TensorFormat format = TensorFormat::kContiguous);
 
     /**
-     * @brief Construct a new Input Range object dynamic input size from
+     * @brief Construct a new Input spec object dynamic input size from
      * c10::ArrayRef (the type produced by tensor.sizes()) for min, opt, and max
      * supported sizes. dtype (Expected data type for the input) defaults to PyTorch
      * / traditional TRT convection (FP32 for FP32 only, FP16 for FP32 and FP16, FP32 for Int8)
@@ -479,7 +479,7 @@ struct TRTORCH_API CompileSpec {
         TensorFormat format = TensorFormat::kContiguous);
 
     /**
-     * @brief Construct a new Input Range object dynamic input size from
+     * @brief Construct a new Input spec object dynamic input size from
      * c10::ArrayRef (the type produced by tensor.sizes()) for min, opt, and max
      * supported sizes
      *
@@ -496,6 +496,16 @@ struct TRTORCH_API CompileSpec {
         DataType dtype,
         TensorFormat format = TensorFormat::kContiguous);
 
+    /**
+     * @brief Construct a new Input spec object using a torch tensor as an example
+     * The tensor's shape, type and layout inform the spec's values
+     *
+     * Note: You cannot set dynamic shape through this method, you must use an alternative constructor
+     *
+     * @param tensor Reference tensor to set shape, type and layout
+     */
+    Input(at::Tensor tensor);
+
     bool get_explicit_set_dtype() {
       return explicit_set_dtype;
     }
@@ -506,64 +516,6 @@ struct TRTORCH_API CompileSpec {
     bool explicit_set_dtype;
   };
 
-  /**
-   * @brief A struct to hold an input range (used by TensorRT Optimization
-   * profile)
-   *
-   * This struct can either hold a single vector representing an input shape,
-   * signifying a static input shape or a set of three input shapes representing
-   * the min, optiminal and max input shapes allowed for the engine.
-   */
-  struct TRTORCH_API InputRange {
-    /// Minimum acceptable input size into the engine
-    std::vector<int64_t> min;
-    /// Optimal input size into the engine (gets best performace)
-    std::vector<int64_t> opt;
-    /// Maximum acceptable input size into the engine
-    std::vector<int64_t> max;
-    /**
-     * @brief Construct a new Input Range object for static input size from
-     * vector
-     *
-     * @param opt
-     */
-    [[deprecated("trtorch::CompileSpec::InputRange is being deprecated in favor of trtorch::CompileSpec::Input. trtorch::CompileSpec::InputRange will be removed in TRTorch v0.5.0")]] InputRange(
-        std::vector<int64_t> opt);
-    /**
-     * @brief Construct a new Input Range object static input size from
-     * c10::ArrayRef (the type produced by tensor.sizes())
-     *
-     * @param opt
-     */
-    [[deprecated("trtorch::CompileSpec::InputRange is being deprecated in favor of trtorch::CompileSpec::Input. trtorch::CompileSpec::InputRange will be removed in TRTorch v0.5.0")]] InputRange(
-        c10::ArrayRef<int64_t> opt);
-    /**
-     * @brief Construct a new Input Range object dynamic input size from vectors
-     * for min, opt, and max supported sizes
-     *
-     * @param min
-     * @param opt
-     * @param max
-     */
-    [[deprecated("trtorch::CompileSpec::InputRange is being deprecated in favor of trtorch::CompileSpec::Input. trtorch::CompileSpec::InputRange will be removed in TRTorch v0.5.0")]] InputRange(
-        std::vector<int64_t> min,
-        std::vector<int64_t> opt,
-        std::vector<int64_t> max);
-    /**
-     * @brief Construct a new Input Range object dynamic input size from
-     * c10::ArrayRef (the type produced by tensor.sizes()) for min, opt, and max
-     * supported sizes
-     *
-     * @param min
-     * @param opt
-     * @param max
-     */
-    [[deprecated("trtorch::CompileSpec::InputRange is being deprecated in favor of trtorch::CompileSpec::Input. trtorch::CompileSpec::InputRange will be removed in TRTorch v0.5.0")]] InputRange(
-        c10::ArrayRef<int64_t> min,
-        c10::ArrayRef<int64_t> opt,
-        c10::ArrayRef<int64_t> max);
-  };
-
   /**
    * @brief A struct to hold fallback info
    */
@@ -596,18 +548,6 @@ struct TRTORCH_API CompileSpec {
     TorchFallback(bool enabled, uint64_t min_size) : enabled(enabled), min_block_size(min_size) {}
   };
 
-  /**
-   * @brief Construct a new Extra Info object from input ranges.
-   * Each entry in the vector represents a input and should be provided in call
-   * order.
-   *
-   * Use this constructor if you want to use dynamic shape
-   *
-   * @param input_ranges
-   */
-  [[deprecated("trtorch::CompileSpec::CompileSpec(std::vector<InputRange> input_ranges) is being deprecated in favor of trtorch::CompileSpec::CompileSpec(std::vector<Input> inputs). Please use CompileSpec(std::vector<Input> inputs). trtorch::CompileSpec::CompileSpec(std::vector<InputRange> input_ranges) will be removed in TRTorch v0.5.0")]] CompileSpec(
-      std::vector<InputRange> input_ranges)
-      : input_ranges(std::move(input_ranges)) {}
   /**
    * @brief Construct a new Extra Info object
    * Convienence constructor to set fixed input size from vectors describing
@@ -657,24 +597,6 @@ struct TRTORCH_API CompileSpec {
    */
   std::vector<Input> inputs;
 
-  /**
-   * Sizes for inputs to engine, can either be a single size or a range
-   * defined by Min, Optimal, Max sizes
-   *
-   * Order is should match call order
-   */
-  [[deprecated(
-      "trtorch::CompileSpec::input_ranges is being deprecated in favor of trtorch::CompileSpec::inputs. trtorch::CompileSpec::input_ranges will be removed in TRTorch v0.5.0")]] std::
-      vector<InputRange>
-          input_ranges;
-
-  /**
-   * Default operating precision for the engine
-   */
-  [[deprecated(
-      "trtorch::CompileSpec::op_precision is being deprecated in favor of trtorch::CompileSpec::enabled_precisions, a set of all enabled precisions to use during compilation, trtorch::CompileSpec::op_precision will be removed in TRTorch v0.5.0")]] DataType
-      op_precision = DataType::kFloat;
-
   /**
    * @brief The set of precisions TensorRT is allowed to use for kernels during compilation
    *