pytorch
diff --git a/‎core/lowering/BUILD
Lines changed: 1 addition & 0 deletions b/‎core/lowering/BUILD
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/lowering/LowerInfo.cpp
Lines changed: 23 additions & 0 deletions b/‎core/lowering/LowerInfo.cpp
Lines changed: 23 additions & 0 deletions
diff --git a/‎core/lowering/lowering.cpp
Lines changed: 21 additions & 12 deletions b/‎core/lowering/lowering.cpp
Lines changed: 21 additions & 12 deletions
diff --git a/‎core/lowering/lowering.h
Lines changed: 8 additions & 3 deletions b/‎core/lowering/lowering.h
Lines changed: 8 additions & 3 deletions
diff --git a/‎core/lowering/passes/BUILD
Lines changed: 1 addition & 0 deletions b/‎core/lowering/passes/BUILD
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/lowering/passes/module_fallback.cpp
Lines changed: 115 additions & 0 deletions b/‎core/lowering/passes/module_fallback.cpp
Lines changed: 115 additions & 0 deletions
diff --git a/‎core/lowering/passes/passes.h
Lines changed: 6 additions & 0 deletions b/‎core/lowering/passes/passes.h
Lines changed: 6 additions & 0 deletions
diff --git a/‎core/partitioning/partitioning.cpp
Lines changed: 3 additions & 1 deletion b/‎core/partitioning/partitioning.cpp
Lines changed: 3 additions & 1 deletion
diff --git a/‎cpp/api/include/trtorch/trtorch.h
Lines changed: 3 additions & 0 deletions b/‎cpp/api/include/trtorch/trtorch.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎cpp/api/src/compile_spec.cpp
Lines changed: 1 addition & 0 deletions b/‎cpp/api/src/compile_spec.cpp
Lines changed: 1 addition & 0 deletions
@@ -13,6 +13,7 @@ cc_library(
         "drop_unused_nodes.cpp",
         "lowering.cpp",
         "register_trt_placeholder_ops.cpp",
+        "LowerInfo.cpp"
     ],
     hdrs = [
         "lowering.h",
 
@@ -0,0 +1,23 @@
+#include <iostream>
+#include <sstream>
+#include <utility>
+
+#include "core/lowering/lowering.h"
+
+namespace trtorch {
+namespace core {
+namespace lowering {
+
+std::ostream& operator<<(std::ostream& os, const LowerInfo& l) {
+  os << "Settings requested for Lowering:" << std::endl;
+  os << "    Forced Fallback Modules: [" << std::endl;
+  for (auto i : l.forced_fallback_modules) {
+    os << "      " << i << std::endl;
+  }
+  os << "    ]";
+  return os;
+}
+
+} // namespace lowering
+} // namespace core
+} // namespace trtorch
@@ -25,16 +25,21 @@ void LowerBlock(torch::jit::Block* b) {
 }
 
 void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
-  passes::UnpackHardSwish(g);
   torch::jit::EliminateRedundantGuards(g);
   torch::jit::RemoveListMutation(g);
   torch::jit::RemoveTensorMutation(g);
   torch::jit::CreateFunctionalGraphs(g);
   torch::jit::InlineFunctionalGraphs(g);
   torch::jit::PeepholeOptimize(g, false);
-  passes::EliminateExceptionOrPassPattern(g);
   torch::jit::FuseLinear(g);
   torch::jit::LowerAllTuples(g);
+  if (!lower_info.disable_cse) {
+    torch::jit::EliminateCommonSubexpression(g);
+  }
+  torch::jit::EliminateDeadCode(g);
+  passes::MarkNodesForFallback(g, true);
+  passes::UnpackHardSwish(g);
+  passes::EliminateExceptionOrPassPattern(g);
   passes::ReduceToOperation(g);
   passes::RemoveContiguous(g);
   passes::RemoveDropout(g);
@@ -43,9 +48,6 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
   passes::Conv3DToConvolution(g);
   passes::FuseAddMMBranches(g);
   passes::RemoveBNDimCheck(g);
-  if (!lower_info.disable_cse) {
-    torch::jit::EliminateCommonSubexpression(g);
-  }
   // torch::jit::UnrollLoops(g);
   passes::UnpackAddMM(g);
   // passes::UnpackBatchNorm(g);
@@ -55,23 +57,30 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
   passes::RemoveNOPs(g);
   passes::AliasOperators(g);
   passes::SiluToSigmoidMultipication(g);
-  torch::jit::EliminateDeadCode(g);
   LOG_GRAPH(*g);
 }
 
-torch::jit::Module LowerModule(const torch::jit::script::Module& mod) {
-  LOG_DEBUG("Input module is being frozen by torch::jit::freeze_module");
+torch::jit::Module LowerModule(
+    const torch::jit::Module& mod,
+    std::string method_name,
+    std::unordered_set<std::string> forced_fallback_modules) {
+  passes::NotateModuleForFallback(mod, "", method_name, forced_fallback_modules);
+  LOG_GRAPH("After MLF notation pass: " << *mod.get_method(method_name).graph());
   auto mod_ = torch::jit::freeze_module(mod);
+  LOG_GRAPH("After freeze: " << *mod_.get_method(method_name).graph());
   return mod_;
 }
 
 std::pair<std::shared_ptr<torch::jit::Graph>, std::vector<torch::jit::IValue>> Lower(
-    const torch::jit::script::Module& mod,
+    const torch::jit::Module& mod,
     std::string method_name,
-    LowerInfo lower_info) {
-  auto lowered_mod = lower_info.unfreeze_module ? mod : LowerModule(mod);
+    const LowerInfo& lower_info) {
+  LOG_DEBUG(lower_info);
+  LOG_GRAPH("Before lowering: " << *mod.get_method(method_name).graph());
+  std::unordered_set<std::string> forced_fallback_modules(
+      lower_info.forced_fallback_modules.begin(), lower_info.forced_fallback_modules.end());
+  auto lowered_mod = lower_info.unfreeze_module ? mod : LowerModule(mod, method_name, forced_fallback_modules);
   auto g = lowered_mod.get_method(method_name).graph();
-  LOG_GRAPH(*g);
 
   LOG_GRAPH("LibTorch Lowering");
   auto graph_and_ivalues = torch::jit::LowerGraph(*g, lowered_mod._ivalue());
 
@@ -15,15 +15,20 @@ struct LowerInfo {
   // Since these QDQ nodes will be identical as they share same input, one of them is eliminated due to CSE lowering
   // pass. Disable this in order to not disturb TensorRT's QAT optimizations.
   bool disable_cse = false;
+  std::vector<std::string> forced_fallback_modules;
+  friend std::ostream& operator<<(std::ostream& os, const LowerInfo& l);
 };
 
 void LowerBlock(torch::jit::Block* b);
 void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info);
-torch::jit::Module LowerModule(const torch::jit::script::Module& mod);
+torch::jit::Module LowerModule(
+    const torch::jit::Module& mod,
+    std::string method_name,
+    std::unordered_set<std::string> forced_fallback_modules);
 std::pair<std::shared_ptr<torch::jit::Graph>, std::vector<torch::jit::IValue>> Lower(
-    const torch::jit::script::Module& mod,
+    const torch::jit::Module& mod,
     std::string method_name,
-    LowerInfo lower_info);
+    const LowerInfo& lower_info);
 
 } // namespace lowering
 } // namespace core
 
@@ -15,6 +15,7 @@ cc_library(
         "exception_elimination.cpp",
         "fuse_addmm_branches.cpp",
         "linear_to_addmm.cpp",
+        "module_fallback.cpp",
         "op_aliasing.cpp",
         "reduce_to.cpp",
         "remove_bn_dim_check.cpp",
 
@@ -0,0 +1,115 @@
+#include <stack>
+#include <unordered_set>
+
+#include "core/lowering/passes/passes.h"
+#include "core/util/prelude.h"
+
+namespace trtorch {
+namespace core {
+namespace lowering {
+namespace passes {
+
+std::string unmangle_cls_name(const std::string& name) {
+  auto unmangled = name;
+
+  std::size_t torch_prefix = unmangled.find("__torch__");
+  if (torch_prefix != std::string::npos) {
+    unmangled.erase(torch_prefix, 10);
+  }
+
+  std::size_t mangle_pos = unmangled.find("___torch_mangle_");
+  if (mangle_pos != std::string::npos) {
+    unmangled.erase(mangle_pos, 21);
+  }
+
+  return unmangled;
+}
+
+void NotateModuleForFallback(
+    const torch::jit::Module& mod,
+    std::string mod_name,
+    std::string method_name,
+    std::unordered_set<std::string> forced_fallback_modules) {
+  auto cls_name = unmangle_cls_name(mod.type()->name()->qualifiedName());
+
+  auto g = mod.get_method(method_name).graph();
+  auto nodes = g->block()->nodes();
+  bool changed_mod = false;
+  for (const auto n : nodes) {
+    if (n->kind() == torch::jit::prim::GetAttr) {
+      auto out_type = unmangle_cls_name(c10::toString(n->output(0)->type()));
+      if (forced_fallback_modules.find(out_type) != forced_fallback_modules.end()) {
+        LOG_DEBUG(
+            "Notating module for fallback: " << n->s(c10::attr::name) << " (" << out_type << ") [owner: " << mod_name
+                                             << " (" << cls_name << ")]");
+        auto uses = n->output(0)->uses();
+        for (const auto u : uses) {
+          auto user = u.user;
+          auto delim_start_n = g->create(torch::jit::prim::Enter, 0);
+          delim_start_n->s_(c10::Symbol::attr("compilation_edge"), "start");
+          auto delim_end_n = g->create(torch::jit::prim::Exit, 0);
+          delim_end_n->s_(c10::Symbol::attr("compilation_edge"), "end");
+          delim_start_n->insertBefore(user);
+          delim_end_n->insertAfter(user);
+        }
+        changed_mod = true;
+      }
+    }
+  }
+
+  if (changed_mod) {
+    LOG_DEBUG("Notated graph: " << *g);
+  }
+
+  for (const auto sub_mod : mod.named_children()) {
+    NotateModuleForFallback(sub_mod.value, sub_mod.name, method_name, forced_fallback_modules);
+  }
+}
+
+void MarkNodesForFallback(std::shared_ptr<torch::jit::Graph>& g, bool delete_delims) {
+  auto b = g->block();
+
+  std::stack<bool> mark = std::stack<bool>({false});
+  for (auto it = b->nodes().begin(); it != b->nodes().end(); it++) {
+    auto n = *it;
+    if (!mark.top() && n->kind() == torch::jit::prim::Enter && n->hasAttributeS("compilation_edge")) {
+      if (n->s(c10::Symbol::attr("compilation_edge")) == "start") {
+        LOG_DEBUG("Starting to mark new segmented block targeted for torch");
+        mark.push(true);
+        if (delete_delims) {
+          it.destroyCurrent();
+        }
+      }
+    } else if (mark.top() && n->kind() == torch::jit::prim::Enter && n->hasAttributeS("compilation_edge")) {
+      if (n->s(c10::Symbol::attr("compilation_edge")) == "start") {
+        LOG_DEBUG("Found the start of another segmented block targeted for torch while actively marking a block");
+        mark.push(true);
+        if (delete_delims) {
+          it.destroyCurrent();
+        }
+      }
+    } else if (mark.top() && n->kind() == torch::jit::prim::Exit && n->hasAttributeS("compilation_edge")) {
+      if (n->s(c10::Symbol::attr("compilation_edge")) == "end") {
+        LOG_DEBUG("Found the end of segmented block targeted for torch while actively marking a block");
+        mark.pop();
+        if (delete_delims) {
+          it.destroyCurrent();
+        }
+      }
+    } else if (!mark.top() && n->kind() == torch::jit::prim::Exit && n->hasAttributeS("compilation_edge")) {
+      if (n->s(c10::Symbol::attr("compilation_edge")) == "end") {
+        LOG_WARNING("Found the end of segmented block targeted for torch while not actively marking a block");
+      }
+    } else if (mark.top()) {
+      LOG_GRAPH("Marking " << util::node_info(n) << " to run in PyTorch");
+      n->i_(c10::Symbol::attr("to_compile"), (int64_t) false);
+    }
+  }
+
+  LOG_DEBUG("After marking operations for torch fallback: " << *g);
+}
+
+} // namespace passes
+} // namespace lowering
+} // namespace core
+} // namespace trtorch
@@ -7,12 +7,18 @@ namespace core {
 namespace lowering {
 namespace passes {
 
+void NotateModuleForFallback(
+    const torch::jit::Module& mod,
+    std::string mod_name,
+    std::string method_name,
+    std::unordered_set<std::string> forced_fallback_modules);
 void Conv2DToConvolution(std::shared_ptr<torch::jit::Graph>& graph);
 void Conv3DToConvolution(std::shared_ptr<torch::jit::Graph>& graph);
 void FuseAddMMBranches(std::shared_ptr<torch::jit::Graph> graph);
 void LinearToAddMM(std::shared_ptr<torch::jit::Graph>& graph);
 void EliminateExceptionOrPassPattern(std::shared_ptr<torch::jit::Graph> graph);
 void ReduceToOperation(std::shared_ptr<torch::jit::Graph>& graph);
+void MarkNodesForFallback(std::shared_ptr<torch::jit::Graph>& g, bool delete_delims);
 void RemoveBNDimCheck(std::shared_ptr<torch::jit::Graph> graph);
 void RemoveContiguous(std::shared_ptr<torch::jit::Graph>& graph);
 void RemoveDropout(std::shared_ptr<torch::jit::Graph>& graph);
 
@@ -274,7 +274,9 @@ std::vector<SegmentedBlock> segment_graph(torch::jit::Block* block, const Partit
     }
 
     std::string node_string(n->kind().toQualString());
-    if (conversion::OpSupported(n) && !forced_fallback_operators.count(node_string)) {
+    auto has_compile_attribute = n->hasAttribute(c10::Symbol::attr("to_compile"));
+    if (conversion::OpSupported(n) && !forced_fallback_operators.count(node_string) &&
+        (!has_compile_attribute || n->i(c10::Symbol::attr("to_compile")) == (int64_t) true)) {
       tensorrt_nodes.push_back(n);
       if (tensorrt_nodes.size() >= min_block_size && !pytorch_nodes.empty()) {
         segmented_blocks.emplace_back(SegmentedBlock::kTorch, pytorch_nodes);
 
@@ -577,6 +577,9 @@ struct TRTORCH_API CompileSpec {
     /// A list of names of operations that will explicitly run in PyTorch
     std::vector<std::string> forced_fallback_ops;
 
+    /// A list of names of modules that will explicitly run in PyTorch
+    std::vector<std::string> forced_fallback_modules;
+
     /**
      * @brief Construct a default Torch Fallback object, fallback will be off
      */
 
@@ -375,6 +375,7 @@ core::CompileSpec to_internal_compile_spec(CompileSpec external) {
   internal.partition_info.enabled = external.torch_fallback.enabled;
   internal.partition_info.min_block_size = external.torch_fallback.min_block_size;
   internal.partition_info.forced_fallback_operators = external.torch_fallback.forced_fallback_ops;
+  internal.lower_info.forced_fallback_modules = external.torch_fallback.forced_fallback_modules;
 
   switch (external.device.device_type) {
     case CompileSpec::Device::DeviceType::kDLA: