chore: Clean up and refactor code

peri044 · peri044 · commit 375bdfc3dee1 · 2022-10-19T14:23:44.000-07:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -137,8 +137,6 @@ partitioning::GraphAndMapping BuildHybridGraph(
   auto partitioning_info = cfg.partitioning_info;
 
   auto partitioning_ctx = partitioning::PartitioningCtx(block, partitioning_info);
-  // auto collection_input_ivalues_map =
-      // partitioning::generateRandomInputs(partitioning_info.collection_input_spec_map, first_use_types);
   partitioning_ctx.input_types_map = first_use_types;
   partitioning::partition(&partitioning_ctx);
 
@@ -151,16 +149,7 @@ partitioning::GraphAndMapping BuildHybridGraph(
       trt_engine_id << reinterpret_cast<const int*>(&seg_block);
 
       if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) {
-        // auto shapes = seg_block.in_shapes();
-        // auto types = seg_block.in_types();
-        // std::vector<ir::Input> inputs;
-        // for (size_t i = 0; i < shapes.size(); i++) {
-        //   auto in = ir::Input(shapes[i]);
-        //   in.dtype = util::ScalarTypeToTRTDataType(types[i]);
-        //   inputs.push_back(in);
-        // }
         auto inputs = seg_block.construct_inputs_spec();
-        LOG_DEBUG("============ INPUTS: " << inputs);
         // update the input ranges for each segments
         convert_info.inputs = ir::associate_specs_with_inputs(seg_block.g(), inputs, static_params);
 
diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp
@@ -436,6 +436,20 @@ void segmentGraph(PartitioningCtx* ctx, torch::jit::Block* block) {
   return;
 }
 
+bool isInputDynamic(PartitioningCtx* ctx) {
+  // Check if inputs have dynamic shapes
+  bool input_is_dynamic = true;
+  auto inputs_map = ctx->settings.collection_input_spec_map;
+  for (auto inputs : inputs_map) {
+    for (auto input : inputs.second) {
+      if (!input.input_is_dynamic) {
+        input_is_dynamic = false;
+      }
+    }
+  }
+  return input_is_dynamic;
+}
+
 void partition(PartitioningCtx* ctx) {
   LOG_DEBUG(ctx->settings);
 
@@ -446,24 +460,33 @@ void partition(PartitioningCtx* ctx) {
 
     // It's possible that some TensorRT blocks have nonTensor inputs/output because they are interleaved by Torch blocks
     // resolve nonTensor inputs/outputs
+    LOG_DEBUG("Resolving non-tensor inputs for segmented blocks");
     resolveTRTNonTensorInputs(ctx, block);
 
     // register input/output torch::jit::Value for segmented graphs
     LOG_DEBUG("Registering input/output torch::jit::Value for segmented graphs");
     registerSegmentsOutputs(ctx, block);
 
-    // run shape analysis on each segmented block
-    auto min_input_ivalues_map =
-        partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "min");
-    auto opt_input_ivalues_map =
-        partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "opt");
-    auto max_input_ivalues_map =
-        partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "max");
-
-    runShapeAnalysis(ctx, block, min_input_ivalues_map, "min");
-    runShapeAnalysis(ctx, block, opt_input_ivalues_map, "opt");
-    runShapeAnalysis(ctx, block, max_input_ivalues_map, "max");
-
+    // Incase of dynamic shape inputs, run shape analysis on each segmented block for min/opt/max ranges and register
+    // output shapes for each block accordingly
+    if (isInputDynamic(ctx)) {
+      LOG_DEBUG("Performing shape analysis for segmented blocks using min/opt/max shapes for inputs");
+      auto min_input_ivalues_map =
+          partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "min");
+      auto opt_input_ivalues_map =
+          partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "opt");
+      auto max_input_ivalues_map =
+          partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "max");
+
+      runShapeAnalysis(ctx, block, min_input_ivalues_map, "min");
+      runShapeAnalysis(ctx, block, opt_input_ivalues_map, "opt");
+      runShapeAnalysis(ctx, block, max_input_ivalues_map, "max");
+    } else {
+      LOG_DEBUG("Performing shape analysis for segmented blocks using static shapes for inputs");
+      auto opt_input_ivalues_map =
+          partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "opt");
+      runShapeAnalysis(ctx, block, opt_input_ivalues_map, "opt");
+    }
   }
 }
 
diff --git a/core/partitioning/partitioning.h b/core/partitioning/partitioning.h
@@ -18,9 +18,16 @@ typedef std::unordered_map<const torch::jit::Value*, c10::IValue> ExampleIValues
 typedef std::pair<std::shared_ptr<torch::jit::Graph>, std::unordered_map<torch::jit::Value*, torch::jit::Value*>>
     GraphAndMapping;
 
-ExampleIValues generateRandomInputs(ir::CollectionInputSpecMap& input_ranges, ir::CollectionTypeMap& input_types, const std::string& shape_mode = std::string("opt"));
-
-void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& ivalues_maps, const std::string& shape_mode);
+ExampleIValues generateRandomInputs(
+    ir::CollectionInputSpecMap& input_ranges,
+    ir::CollectionTypeMap& input_types,
+    const std::string& shape_mode = std::string("opt"));
+
+void runShapeAnalysis(
+    PartitioningCtx* ctx,
+    torch::jit::Block* block,
+    ExampleIValues& ivalues_maps,
+    const std::string& shape_mode);
 
 void segmentGraph(PartitioningCtx* ctx, torch::jit::Block* block);
 
diff --git a/core/partitioning/segmentedblock/SegmentedBlock.cpp b/core/partitioning/segmentedblock/SegmentedBlock.cpp
@@ -59,16 +59,14 @@ torch::jit::Value* SegmentedBlock::getOrAddInputForValue(torch::jit::Value* old_
 
 std::vector<ir::Input> SegmentedBlock::construct_inputs_spec() const {
   std::vector<ir::Input> inputs;
-  if (min_shapes_.size() == opt_shapes_.size() && opt_shapes_.size() == max_shapes_.size()){
-    LOG_DEBUG("====== IS DYNAMIC ====");
-    for (uint64_t i=0; i < opt_shapes_.size(); i++){
+  if (min_shapes_.size() == opt_shapes_.size() && opt_shapes_.size() == max_shapes_.size()) {
+    for (uint64_t i = 0; i < opt_shapes_.size(); i++) {
       auto in = ir::Input(min_shapes_[i], opt_shapes_[i], max_shapes_[i]);
       in.dtype = util::ScalarTypeToTRTDataType(in_types_[i]);
       inputs.push_back(in);
     }
   } else {
-    LOG_DEBUG("====== IS STATIC ====");
-    for (uint64_t i=0; i < opt_shapes_.size(); i++){
+    for (uint64_t i = 0; i < opt_shapes_.size(); i++) {
       auto in = ir::Input(opt_shapes_[i]);
       in.dtype = util::ScalarTypeToTRTDataType(in_types_[i]);
       inputs.push_back(in);
diff --git a/core/partitioning/segmentedblock/SegmentedBlock.h b/core/partitioning/segmentedblock/SegmentedBlock.h
@@ -74,11 +74,11 @@ struct SegmentedBlock {
     return old_to_new_.count(input);
   }
   void register_inshapes(std::vector<std::vector<int64_t>>& in_shapes, const std::string& shape_mode) {
-    if (shape_mode.compare("min") == 0){
+    if (shape_mode.compare("min") == 0) {
       min_shapes_ = in_shapes;
-    } else if(shape_mode.compare("opt") == 0){
+    } else if (shape_mode.compare("opt") == 0) {
       opt_shapes_ = in_shapes;
-    } else{
+    } else {
       max_shapes_ = in_shapes;
     }
   }
diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp
@@ -9,12 +9,15 @@ namespace torch_tensorrt {
 namespace core {
 namespace partitioning {
 
-at::Tensor generateSingleInput(ir::Input& input, c10::optional<at::ScalarType>& type_opt, const std::string& shape_mode) {
+at::Tensor generateSingleInput(
+    ir::Input& input,
+    c10::optional<at::ScalarType>& type_opt,
+    const std::string& shape_mode) {
   nvinfer1::Dims input_shape = input.input_shape;
-  if (input.input_is_dynamic){
-    if (shape_mode.compare("min") == 0){
+  if (input.input_is_dynamic) {
+    if (shape_mode.compare("min") == 0) {
       input_shape = input.min;
-    } else if(shape_mode.compare("opt") == 0){
+    } else if (shape_mode.compare("opt") == 0) {
       input_shape = input.opt;
     } else {
       input_shape = input.max;
@@ -188,7 +191,11 @@ void getSegmentsOutputByRunning(
   seg_block.register_intypes(input_types);
 }
 
-void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& example_tensor_map, const std::string& shape_mode) {
+void runShapeAnalysis(
+    PartitioningCtx* ctx,
+    torch::jit::Block* block,
+    ExampleIValues& example_tensor_map,
+    const std::string& shape_mode) {
   // register every segment's input shape, and it's running output IValues
   for (auto& seg_block : ctx->partitioned_blocks[block]) {
     torch::jit::ConstantPooling(seg_block.g());
diff --git a/tests/cpp/BUILD b/tests/cpp/BUILD
@@ -15,9 +15,9 @@ test_suite(
         ":test_collections",
         ":test_compiled_modules",
         ":test_default_input_types",
+        ":test_dynamic_fallback",
         ":test_example_tensors",
         ":test_module_fallback",
-        ":test_dynamic_fallback",
         ":test_modules_as_engines",
         ":test_multiple_registered_engines",
         ":test_runtime_thread_safety",
@@ -31,9 +31,9 @@ test_suite(
         ":test_collections",
         ":test_compiled_modules",
         ":test_default_input_types",
+        ":test_dynamic_fallback",
         ":test_example_tensors",
         ":test_module_fallback",
-        ":test_dynamic_fallback",
         ":test_modules_as_engines",
         ":test_multiple_registered_engines",
         ":test_runtime_thread_safety",
diff --git a/tests/cpp/test_dynamic_fallback.cpp b/tests/cpp/test_dynamic_fallback.cpp
@@ -4,7 +4,7 @@
 #include "torch/script.h"
 #include "torch_tensorrt/torch_tensorrt.h"
 
-TEST(CppAPITest, ResNet50DynamicFallbackGraphCorrectly) {
+TEST(CppAPITest, ResNet18DynamicBatchFallbackCorrectly) {
   torch::jit::script::Module mod;
   try {
     mod = torch::jit::load("tests/modules/resnet18_scripted.jit.pt");
@@ -16,17 +16,40 @@ TEST(CppAPITest, ResNet50DynamicFallbackGraphCorrectly) {
   const std::vector<std::vector<int64_t>> input_shapes = {{1, 3, 224, 224}, {4, 3, 224, 224}, {8, 3, 224, 224}};
   std::vector<torch::jit::IValue> jit_inputs_ivalues;
   std::vector<torch::jit::IValue> trt_inputs_ivalues;
-  auto in = at::randint(5, input_shapes[0], {at::kCUDA});
-  jit_inputs_ivalues.push_back(in.clone());
-  trt_inputs_ivalues.push_back(in.clone());
+  auto in_bs1 = at::randint(5, input_shapes[0], {at::kCUDA});
+  jit_inputs_ivalues.push_back(in_bs1.clone());
+  trt_inputs_ivalues.push_back(in_bs1.clone());
 
   std::vector<torch_tensorrt::Input> inputs;
   inputs.push_back(torch_tensorrt::Input(input_shapes[0], input_shapes[1], input_shapes[2]));
   torch_tensorrt::ts::CompileSpec cfg(inputs);
   cfg.torch_executed_ops.push_back("aten::add");
 
-  auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
+  auto jit_results_bs1 = mod.forward(jit_inputs_ivalues).toTensor();
+  // Compile and build the hybrid graph with dynamic shapes
   auto trt_mod = torch_tensorrt::ts::compile(mod, cfg);
-  auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
+  auto trt_results_bs1 = trt_mod.forward(trt_inputs_ivalues).toTensor();
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs1, trt_results_bs1));
+  jit_inputs_ivalues.clear();
+  trt_inputs_ivalues.clear();
+
+  // Run with batch size of 4
+  auto in_bs4 = at::randint(5, input_shapes[1], {at::kCUDA});
+  jit_inputs_ivalues.push_back(in_bs4.clone());
+  trt_inputs_ivalues.push_back(in_bs4.clone());
+
+  auto jit_results_bs4 = mod.forward(jit_inputs_ivalues).toTensor();
+  auto trt_results_bs4 = trt_mod.forward(trt_inputs_ivalues).toTensor();
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs4, trt_results_bs4));
+  jit_inputs_ivalues.clear();
+  trt_inputs_ivalues.clear();
+
+  // Run with batch size of 8
+  auto in_bs8 = at::randint(5, input_shapes[2], {at::kCUDA});
+  jit_inputs_ivalues.push_back(in_bs8.clone());
+  trt_inputs_ivalues.push_back(in_bs8.clone());
+
+  auto jit_results_bs8 = mod.forward(jit_inputs_ivalues).toTensor();
+  auto trt_results_bs8 = trt_mod.forward(trt_inputs_ivalues).toTensor();
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results_bs8, trt_results_bs8));
 }

Original file line number	Diff line number	Diff line change
`@@ -74,11 +74,11 @@ struct SegmentedBlock {`
`74`	`74`	`return old_to_new_.count(input);`
`75`	`75`	`}`
`76`	`76`	`void register_inshapes(std::vector<std::vector<int64_t>>& in_shapes, const std::string& shape_mode) {`
`77`		`- if (shape_mode.compare("min") == 0){`
	`77`	`+ if (shape_mode.compare("min") == 0) {`
`78`	`78`	`min_shapes_ = in_shapes;`
`79`		`- } else if(shape_mode.compare("opt") == 0){`
	`79`	`+ } else if (shape_mode.compare("opt") == 0) {`
`80`	`80`	`opt_shapes_ = in_shapes;`
`81`		`- } else{`
	`81`	`+ } else {`
`82`	`82`	`max_shapes_ = in_shapes;`
`83`	`83`	`}`
`84`	`84`	`}`