Merge pull request #354 from NVIDIA/fix_zeros

narendasan · web-flow · commit 409599999d09 · 2021-02-23T17:20:05.000-07:00
Verify zeros evaluator works correctly and detect programs that will have an empty TensorRT engine
diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp
@@ -433,6 +433,30 @@ std::set<std::string> GetUnsupportedOpsInBlock(const torch::jit::Block* b) {
   return unsupported_ops;
 }
 
+std::set<std::string> ConvertableOpsInBlock(const torch::jit::Block* b) {
+  std::set<std::string> convertable_ops;
+  for (const auto n : b->nodes()) {
+    if (n->kind() == torch::jit::prim::Loop || n->kind() == torch::jit::prim::If ||
+        converters::node_is_convertable(n)) {
+      if (n->blocks().size() > 0) {
+        for (const auto sub_b : n->blocks()) {
+          auto sub_b_convertable_ops = ConvertableOpsInBlock(sub_b);
+          convertable_ops.insert(sub_b_convertable_ops.begin(), sub_b_convertable_ops.end());
+        }
+      }
+      if (converters::node_is_convertable(n)) {
+        auto schema = n->maybeSchema();
+        TRTORCH_CHECK(
+            schema, "Unable to get schema for Node " << util::node_info(n) << " (conversion.CheckForConvertableOps)");
+        std::stringstream ss;
+        ss << *schema;
+        convertable_ops.insert(ss.str());
+      }
+    }
+  }
+  return convertable_ops;
+}
+
 bool VerifyConverterSupportForBlock(const torch::jit::Block* b) {
   auto unsupported_ops = GetUnsupportedOpsInBlock(b);
 
@@ -448,7 +472,21 @@ bool VerifyConverterSupportForBlock(const torch::jit::Block* b) {
     unsupported_msg << "https://www.github.com/nvidia/TRTorch/issues" << std::endl;
     LOG_ERROR(unsupported_msg.str());
     return false;
-  } else {
+  }
+
+  if (ConvertableOpsInBlock(b).size() == 0) {
+    std::stringstream unsupported_msg;
+    unsupported_msg
+        << "Method requested cannot be compiled by TRTorch.\nThere is no work to be done since the resulting compiled program will contain an engine that is empty."
+        << std::endl;
+    unsupported_msg
+        << "This may be because there are no operators that can be added to the TensorRT graph or all operators have a resolved compile time value."
+        << std::endl;
+    LOG_ERROR(unsupported_msg.str());
+    return false;
+  }
+
+  else {
     return true;
   }
 }
diff --git a/core/conversion/evaluators/aten.cpp b/core/conversion/evaluators/aten.cpp
@@ -118,10 +118,12 @@ auto aten_registrations TRTORCH_UNUSED =
                     // aten::zeros(int[] size, *, int? dtype=None, int? layout=None,
                     // Device? device=None, bool? pin_memory=None) -> (Tensor)
                     [](const torch::jit::Node* n, kwargs& args) -> c10::optional<torch::jit::IValue> {
-                      auto options = torch::TensorOptions()
-                                         .dtype(c10::ScalarType(args.at(n->output(1)).unwrapToInt()))
-                                         .layout(torch::kStrided)
-                                         .device(torch::kCUDA);
+                      auto options = torch::TensorOptions().layout(torch::kStrided).device(torch::kCUDA);
+
+                      // Input 1 here is the dtype
+                      if (!args.at(n->input(1)).isNone() && !args.at(n->input(1)).IValue()->isNone()) {
+                        options = options.dtype(c10::ScalarType(args.at(n->input(1)).unwrapToInt()));
+                      }
 
                       auto out_tensor = torch::zeros(args.at(n->input(0)).unwrapToIntList().vec(), options);
                       return out_tensor;
diff --git a/tests/core/conversion/evaluators/test_aten_evaluators.cpp b/tests/core/conversion/evaluators/test_aten_evaluators.cpp
@@ -36,4 +36,43 @@ TEST(Evaluators, DivFloatEvaluatesCorrectly) {
   auto trt_results = trtorch::tests::util::EvaluateGraph(g->block(), {});
 
   ASSERT_TRUE(jit_results[0] == trt_results[0]);
+}
+
+TEST(Evaluators, ZerosEvaluatesCorrectly) {
+  const auto graph = R"IR(
+      graph(%x.1 : Tensor):
+        %2 : None = prim::Constant() # :0:0
+        %3 : int[] = aten::size(%x.1) # <string>:7:9
+        %z.1 : Tensor = aten::zeros(%3, %2, %2, %2, %2) # experiments/test_zeros.py:8:12
+        return (%z.1))IR";
+
+  auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA});
+
+  auto g = std::make_shared<torch::jit::Graph>();
+  torch::jit::parseIR(graph, &*g);
+
+  auto jit_results = trtorch::tests::util::EvaluateGraphJIT(g, {in});
+  auto trt_results = trtorch::tests::util::EvaluateGraph(g->block(), {in});
+
+  ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
+}
+
+TEST(Evaluators, ZerosDataTypeEvaluatesCorrectly) {
+  const auto graph = R"IR(
+      graph(%x.1 : Tensor):
+        %2 : int = prim::Constant[value=5]() # :0:0 (Float16)
+        %3 : None = prim::Constant() # :0:0
+        %4 : int[] = aten::size(%x.1) # <string>:7:9
+        %z.1 : Tensor = aten::zeros(%4, %2, %3, %3, %3) # experiments/test_zeros.py:8:12
+        return (%z.1))IR";
+
+  auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA});
+
+  auto g = std::make_shared<torch::jit::Graph>();
+  torch::jit::parseIR(graph, &*g);
+
+  auto jit_results = trtorch::tests::util::EvaluateGraphJIT(g, {in});
+  auto trt_results = trtorch::tests::util::EvaluateGraph(g->block(), {in});
+
+  ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
 }