Merge pull request #617 from NVIDIA/arvind/loop_fallback

narendasan · web-flow · commit 299cd240484b · 2021-10-14T19:52:23.000-04:00
Loop Fallback
diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp
@@ -326,7 +326,7 @@ void EvaluateLoopBlock(ConversionCtx* ctx, const torch::jit::Node* n) {
     MapIValues(ctx, n->outputs(), n->blocks()[0]->inputs(), 0, 1);
     for (auto bn : n->blocks()[0]->nodes()) {
       if (bn->kind() == torch::jit::prim::Loop) {
-        EvaluateLoopBlock(ctx, n);
+        EvaluateLoopBlock(ctx, bn);
       } else if (bn->kind() == torch::jit::prim::If) {
         EvaluateConditionalBlock(ctx, bn, true);
       } else {
diff --git a/core/lowering/passes/module_fallback.cpp b/core/lowering/passes/module_fallback.cpp
@@ -133,4 +133,4 @@ void MarkNodesForFallback(std::shared_ptr<torch::jit::Graph>& g, bool delete_del
 } // namespace passes
 } // namespace lowering
 } // namespace core
-} // namespace trtorch
+} // namespace trtorch
diff --git a/core/lowering/passes/unpack_var.cpp b/core/lowering/passes/unpack_var.cpp
@@ -42,7 +42,7 @@ void UnpackVar(std::shared_ptr<torch::jit::Graph>& graph) {
   torch::jit::SubgraphRewriter var_rewriter;
   var_rewriter.RegisterRewritePattern(var_pattern, unpacked_pattern);
   var_rewriter.runOnGraph(graph);
-  LOG_DEBUG("Post unpack var: " << *graph);
+  LOG_GRAPH("Post unpack var: " << *graph);
 }
 
 } // namespace passes
diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp
@@ -2,6 +2,7 @@
 
 #include <queue>
 #include "core/conversion/conversion.h"
+#include "core/conversion/evaluators/evaluators.h"
 #include "core/partitioning/shape_analysis.h"
 #include "torch/csrc/jit/passes/constant_pooling.h"
 #include "torch/csrc/jit/passes/dead_code_elimination.h"
@@ -114,7 +115,7 @@ std::vector<SegmentedBlock> segmentBlocksWithNonTensorInputs(SegmentedBlock& seg
         pytorch_nodes.push_back(n);
         prev_non_tensor_outputs = containNonTensorOutputs(n);
       } else {
-        // If pytorch_nodes is not empty, the previous nodes were all tensorrt_nodes. Construct a
+        // If pytorch_nodes is not empty, the previous nodes were all pytorch_nodes. Construct a
         // Pytorch segmented_block and clear the pytorch_nodes list to be later used for new Pytorch segments.
         if (!pytorch_nodes.empty()) {
           new_seg_blocks.emplace_back(SegmentedBlock::kTorch, pytorch_nodes);
@@ -131,6 +132,7 @@ std::vector<SegmentedBlock> segmentBlocksWithNonTensorInputs(SegmentedBlock& seg
       new_seg_blocks.emplace_back(SegmentedBlock::kTorch, pytorch_nodes);
     }
   }
+
   return std::move(new_seg_blocks);
 }
 
@@ -158,6 +160,8 @@ void resolveNonTensorInputs(PartitionedGraph& segmented_blocks) { // , std::shar
       }
     }
 
+    // For each non-tensor value in the usage_counts map, keep updating the produce_id to the earliest segmented block
+    // that has/produces it.
     for (auto& use : usage_counts) {
       // Set the produce_id to the segmented block index that contains/produces this non-tensor torch::jit::Value
       if (segmented_blocks[i].contain_raw_value(use.first)) {
@@ -177,9 +181,8 @@ void resolveNonTensorInputs(PartitionedGraph& segmented_blocks) { // , std::shar
         // Segmented Blocks with non-tensor inputs will have to be re-segmented as
         // TRTorch doesn't support non-tensor inputs for a module.
         auto to_inject_blocks = segmentBlocksWithNonTensorInputs(segmented_blocks[first_torch_id]);
-        segmented_blocks.erase(segmented_blocks.begin() + first_torch_id);
-        segmented_blocks.insert(
-            segmented_blocks.begin() + first_torch_id, to_inject_blocks.begin(), to_inject_blocks.end());
+        auto next_iter = segmented_blocks_list.erase(idx_to_iter[first_torch_id]);
+        segmented_blocks_list.insert(next_iter, to_inject_blocks.begin(), to_inject_blocks.end());
         updated_segments.insert(first_torch_id);
       }
     }
@@ -258,6 +261,20 @@ void registerSegmentsOutputs(PartitionedGraph& segmented_blocks, torch::jit::Blo
   return;
 }
 
+bool checkLoopEvaluatable(torch::jit::Node* n) {
+  bool compile_to_trt = true;
+  for (auto bn : n->blocks()[0]->nodes()) {
+    if (bn->kind() == torch::jit::prim::Loop) {
+      compile_to_trt = compile_to_trt && checkLoopEvaluatable(bn);
+    } else if (bn->kind() == torch::jit::prim::If) {
+      compile_to_trt = compile_to_trt && containNonTensorOutputs(bn);
+    } else {
+      compile_to_trt = compile_to_trt && core::conversion::evaluators::shouldEvalAtConversionTime(bn);
+    }
+  }
+  return compile_to_trt;
+}
+
 std::vector<SegmentedBlock> segment_graph(torch::jit::Block* block, const PartitionInfo& partition_info) {
   auto min_block_size = partition_info.min_block_size;
   std::unordered_set<std::string> forced_fallback_operators(
@@ -298,6 +315,17 @@ std::vector<SegmentedBlock> segment_graph(torch::jit::Block* block, const Partit
         }
         segmented_blocks.emplace_back(SegmentedBlock::kTorch, std::vector<torch::jit::Node*>{n});
         continue;
+      } else if (n->kind() == torch::jit::prim::Loop) {
+        if (!pytorch_nodes.empty()) {
+          segmented_blocks.emplace_back(SegmentedBlock::kTorch, pytorch_nodes);
+          pytorch_nodes.clear();
+        }
+        if (checkLoopEvaluatable(n)) {
+          tensorrt_nodes.push_back(n);
+        } else {
+          segmented_blocks.emplace_back(SegmentedBlock::kTorch, std::vector<torch::jit::Node*>{n});
+        }
+        continue;
       }
       pytorch_nodes.push_back(n);
     }
diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp
@@ -56,7 +56,9 @@ void getSegmentsOutputByRunning(
   for (auto& input : seg_block.raw_inputs()) {
     TRTORCH_CHECK(
         ivalues_maps.count(input),
-        "Could not find torch::jit::Value* " << input->debugName() << " in lowering graph for mini graph input.\n");
+        "Could not find torch::jit::Value* " << input->debugName() << " produced from "
+                                             << util::node_info(input->node())
+                                             << " in lowering graph for mini graph input.\n");
     if (input->node()->kind() == torch::jit::prim::Param) {
       jit_inputs_ivalues.push_back(ivalues_maps[input]);
     } else if (input->type()->isSubtypeOf(torch::jit::TensorType::get())) {
diff --git a/tests/core/partitioning/BUILD b/tests/core/partitioning/BUILD
@@ -11,7 +11,9 @@ filegroup(
     name = "jit_models",
     srcs = ["//tests/modules:resnet50_traced.jit.pt",
             "//tests/modules:mobilenet_v2_traced.jit.pt",
-            "//tests/modules:conditional_scripted.jit.pt"]
+            "//tests/modules:conditional_scripted.jit.pt",
+            "//tests/modules:loop_fallback_eval_scripted.jit.pt",
+            "//tests/modules:loop_fallback_no_eval_scripted.jit.pt"]
 )
 
 partitioning_test(
@@ -46,6 +48,22 @@ cc_test(
   ]
 )
 
+cc_test(
+  name = "test_loop_fallback",
+  srcs = ["test_loop_fallback.cpp"],
+  deps = [
+      "//tests/util",
+      "//core",
+      "@googletest//:gtest_main",
+  ] + select({
+      ":use_pre_cxx11_abi":  ["@libtorch_pre_cxx11_abi//:libtorch"],
+      "//conditions:default":  ["@libtorch//:libtorch"],
+  }),
+  data = [
+      ":jit_models"
+  ]
+)
+
 cc_test(
   name = "test_conditionals",
   srcs = ["test_conditionals.cpp"],
@@ -70,6 +88,7 @@ test_suite(
         ":test_tensorrt_conversion",
         ":test_stitched_graph",
         ":test_fallback_graph_output",
+        ":test_loop_fallback",
         ":test_conditionals"
     ]
 )
diff --git a/tests/core/partitioning/test_loop_fallback.cpp b/tests/core/partitioning/test_loop_fallback.cpp
@@ -0,0 +1,62 @@
+#include <string>
+#include <unordered_set>
+#include "core/compiler.h"
+#include "gtest/gtest.h"
+#include "tests/util/util.h"
+#include "torch/script.h"
+
+TEST(Partitioning, CheckLoopFallbackEvalCompilesCorrectly) {
+  torch::jit::script::Module mod;
+  try {
+    mod = torch::jit::load("tests/modules/loop_fallback_eval_scripted.jit.pt");
+  } catch (const c10::Error& e) {
+    std::cerr << "error loading the model\n";
+    return;
+  }
+
+  const std::vector<std::vector<int64_t>> input_shapes = {{1, 10}};
+  std::vector<torch::jit::IValue> jit_inputs_ivalues;
+  std::vector<torch::jit::IValue> trt_inputs_ivalues;
+  for (auto in_shape : input_shapes) {
+    auto in = at::randint(5, in_shape, {at::kCUDA});
+    jit_inputs_ivalues.push_back(in.clone());
+    trt_inputs_ivalues.push_back(in.clone());
+  }
+
+  std::vector<trtorch::core::ir::Input> input_ranges{trtorch::core::ir::Input({1, 10})};
+  trtorch::core::CompileSpec cfg(input_ranges);
+  cfg.partition_info.enabled = true;
+
+  auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
+  auto trt_mod = trtorch::core::CompileGraph(mod, cfg);
+  auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
+  ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results, trt_results, 2e-6));
+}
+
+TEST(Partitioning, CheckLoopFallbackNoEvalCompilesCorrectly) {
+  torch::jit::script::Module mod;
+  try {
+    mod = torch::jit::load("tests/modules/loop_fallback_no_eval_scripted.jit.pt");
+  } catch (const c10::Error& e) {
+    std::cerr << "error loading the model\n";
+    return;
+  }
+
+  const std::vector<std::vector<int64_t>> input_shapes = {{1, 10}};
+  std::vector<torch::jit::IValue> jit_inputs_ivalues;
+  std::vector<torch::jit::IValue> trt_inputs_ivalues;
+  for (auto in_shape : input_shapes) {
+    auto in = at::randint(5, in_shape, {at::kCUDA});
+    jit_inputs_ivalues.push_back(in.clone());
+    trt_inputs_ivalues.push_back(in.clone());
+  }
+
+  std::vector<trtorch::core::ir::Input> input_ranges{trtorch::core::ir::Input({1, 10})};
+  trtorch::core::CompileSpec cfg(input_ranges);
+  cfg.partition_info.enabled = true;
+
+  auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
+  auto trt_mod = trtorch::core::CompileGraph(mod, cfg);
+  auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
+  ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results, trt_results, 2e-6));
+}
diff --git a/tests/modules/hub.py b/tests/modules/hub.py
@@ -129,6 +129,38 @@ def forward(self, x):
 torch.jit.save(module_fallback_script_model, "module_fallback_scripted.jit.pt")
 
 
+# Sample Looping Modules (for loop fallback testing)
+class LoopFallbackEval(nn.Module):
+
+    def __init__(self):
+        super(LoopFallbackEval, self).__init__()
+
+    def forward(self, x):
+        add_list = torch.empty(0).to(x.device)
+        for i in range(x.shape[1]):
+            add_list = torch.cat((add_list, torch.tensor([x.shape[1]]).to(x.device)), 0)
+        return x + add_list
+
+
+class LoopFallbackNoEval(nn.Module):
+
+    def __init__(self):
+        super(LoopFallbackNoEval, self).__init__()
+
+    def forward(self, x):
+        for _ in range(x.shape[1]):
+            x = x + torch.ones_like(x)
+        return x
+
+
+loop_fallback_eval_model = LoopFallbackEval().eval().cuda()
+loop_fallback_eval_script_model = torch.jit.script(loop_fallback_eval_model)
+torch.jit.save(loop_fallback_eval_script_model, "loop_fallback_eval_scripted.jit.pt")
+loop_fallback_no_eval_model = LoopFallbackNoEval().eval().cuda()
+loop_fallback_no_eval_script_model = torch.jit.script(loop_fallback_no_eval_model)
+torch.jit.save(loop_fallback_no_eval_script_model, "loop_fallback_no_eval_scripted.jit.pt")
+
+
 # Sample Conditional Model (for testing partitioning and fallback in conditionals)
 class FallbackIf(torch.nn.Module):
 

Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ void UnpackVar(std::shared_ptr<torch::jit::Graph>& graph) {`
`42`	`42`	`torch::jit::SubgraphRewriter var_rewriter;`
`43`	`43`	`var_rewriter.RegisterRewritePattern(var_pattern, unpacked_pattern);`
`44`	`44`	`var_rewriter.runOnGraph(graph);`
`45`		`- LOG_DEBUG("Post unpack var: " << *graph);`
	`45`	`+ LOG_GRAPH("Post unpack var: " << *graph);`
`46`	`46`	`}`
`47`	`47`
`48`	`48`	`} // namespace passes`