feat: Initial implementation of dynamic shapes + fallback

peri044 · peri044 · commit 8e60a54e0dd5 · 2022-10-18T10:46:35.000-07:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -137,10 +137,10 @@ partitioning::GraphAndMapping BuildHybridGraph(
   auto partitioning_info = cfg.partitioning_info;
 
   auto partitioning_ctx = partitioning::PartitioningCtx(block, partitioning_info);
-  auto collection_input_ivalues_map =
-      partitioning::generateRandomInputs(partitioning_info.collection_input_spec_map, first_use_types);
-
-  partitioning::partition(&partitioning_ctx, collection_input_ivalues_map);
+  // auto collection_input_ivalues_map =
+      // partitioning::generateRandomInputs(partitioning_info.collection_input_spec_map, first_use_types);
+  partitioning_ctx.input_types_map = first_use_types;
+  partitioning::partition(&partitioning_ctx);
 
   for (auto& partitioned_block : partitioning_ctx.partitioned_blocks) {
     partitioning::PartitionedGraph& segmented_blocks = partitioned_block.second;
@@ -151,14 +151,16 @@ partitioning::GraphAndMapping BuildHybridGraph(
       trt_engine_id << reinterpret_cast<const int*>(&seg_block);
 
       if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) {
-        auto shapes = seg_block.in_shapes();
-        auto types = seg_block.in_types();
-        std::vector<ir::Input> inputs;
-        for (size_t i = 0; i < shapes.size(); i++) {
-          auto in = ir::Input(shapes[i]);
-          in.dtype = util::ScalarTypeToTRTDataType(types[i]);
-          inputs.push_back(in);
-        }
+        // auto shapes = seg_block.in_shapes();
+        // auto types = seg_block.in_types();
+        // std::vector<ir::Input> inputs;
+        // for (size_t i = 0; i < shapes.size(); i++) {
+        //   auto in = ir::Input(shapes[i]);
+        //   in.dtype = util::ScalarTypeToTRTDataType(types[i]);
+        //   inputs.push_back(in);
+        // }
+        auto inputs = seg_block.construct_inputs_spec();
+        LOG_DEBUG("============ INPUTS: " << inputs);
         // update the input ranges for each segments
         convert_info.inputs = ir::associate_specs_with_inputs(seg_block.g(), inputs, static_params);
 
diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp
@@ -436,7 +436,7 @@ void segmentGraph(PartitioningCtx* ctx, torch::jit::Block* block) {
   return;
 }
 
-void partition(PartitioningCtx* ctx, ExampleIValues& example_tensor_map) {
+void partition(PartitioningCtx* ctx) {
   LOG_DEBUG(ctx->settings);
 
   // Go through all the blocks to do the partitioning
@@ -453,7 +453,17 @@ void partition(PartitioningCtx* ctx, ExampleIValues& example_tensor_map) {
     registerSegmentsOutputs(ctx, block);
 
     // run shape analysis on each segmented block
-    runShapeAnalysis(ctx, block, example_tensor_map);
+    auto min_input_ivalues_map =
+        partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "min");
+    auto opt_input_ivalues_map =
+        partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "opt");
+    auto max_input_ivalues_map =
+        partitioning::generateRandomInputs(ctx->settings.collection_input_spec_map, ctx->input_types_map, "max");
+
+    runShapeAnalysis(ctx, block, min_input_ivalues_map, "min");
+    runShapeAnalysis(ctx, block, opt_input_ivalues_map, "opt");
+    runShapeAnalysis(ctx, block, max_input_ivalues_map, "max");
+
   }
 }
 
diff --git a/core/partitioning/partitioning.h b/core/partitioning/partitioning.h
@@ -13,20 +13,20 @@ namespace torch_tensorrt {
 namespace core {
 namespace partitioning {
 
-typedef std::unordered_map<const torch::jit::Value*, torch::jit::IValue> ExampleIValues;
+typedef std::unordered_map<const torch::jit::Value*, c10::IValue> ExampleIValues;
 
 typedef std::pair<std::shared_ptr<torch::jit::Graph>, std::unordered_map<torch::jit::Value*, torch::jit::Value*>>
     GraphAndMapping;
 
-ExampleIValues generateRandomInputs(ir::CollectionInputSpecMap& input_ranges, ir::CollectionTypeMap& input_types);
+ExampleIValues generateRandomInputs(ir::CollectionInputSpecMap& input_ranges, ir::CollectionTypeMap& input_types, const std::string& shape_mode = std::string("opt"));
 
-void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& ivalues_maps);
+void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& ivalues_maps, const std::string& shape_mode);
 
 void segmentGraph(PartitioningCtx* ctx, torch::jit::Block* block);
 
 GraphAndMapping stitch(PartitioningCtx* ctx, torch::jit::Block* block);
 
-void partition(PartitioningCtx* ctx, ExampleIValues& example_tensor_map);
+void partition(PartitioningCtx* ctx);
 
 } // namespace partitioning
 } // namespace core
diff --git a/core/partitioning/partitioningctx/PartitioningCtx.h b/core/partitioning/partitioningctx/PartitioningCtx.h
@@ -60,6 +60,7 @@ struct PartitioningCtx {
   bool shouldNodeRunInTorch(torch::jit::Node* n);
   bool shouldNodeRunInTensorRT(torch::jit::Node* n);
   std::vector<torch::jit::Node*> getNodesRunInTorch();
+  std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>> input_types_map;
 
  private:
   void _load_nodes_into_decision_map(torch::jit::Block* b);
diff --git a/core/partitioning/segmentedblock/SegmentedBlock.cpp b/core/partitioning/segmentedblock/SegmentedBlock.cpp
@@ -1,4 +1,5 @@
 #include "SegmentedBlock.h"
+#include "core/util/prelude.h"
 
 namespace torch_tensorrt {
 namespace core {
@@ -56,6 +57,26 @@ torch::jit::Value* SegmentedBlock::getOrAddInputForValue(torch::jit::Value* old_
   }
 }
 
+std::vector<ir::Input> SegmentedBlock::construct_inputs_spec() const {
+  std::vector<ir::Input> inputs;
+  if (min_shapes_.size() == opt_shapes_.size() && opt_shapes_.size() == max_shapes_.size()){
+    LOG_DEBUG("====== IS DYNAMIC ====");
+    for (uint64_t i=0; i < opt_shapes_.size(); i++){
+      auto in = ir::Input(min_shapes_[i], opt_shapes_[i], max_shapes_[i]);
+      in.dtype = util::ScalarTypeToTRTDataType(in_types_[i]);
+      inputs.push_back(in);
+    }
+  } else {
+    LOG_DEBUG("====== IS STATIC ====");
+    for (uint64_t i=0; i < opt_shapes_.size(); i++){
+      auto in = ir::Input(opt_shapes_[i]);
+      in.dtype = util::ScalarTypeToTRTDataType(in_types_[i]);
+      inputs.push_back(in);
+    }
+  }
+  return inputs;
+}
+
 torch::jit::Node* SegmentedBlock::cloneNode(torch::jit::Node* node) {
   auto* block = g_->block();
   auto env = [&](torch::jit::Value* v) { return getOrAddInputForValue(v); };
diff --git a/core/partitioning/segmentedblock/SegmentedBlock.h b/core/partitioning/segmentedblock/SegmentedBlock.h
@@ -35,6 +35,7 @@ struct SegmentedBlock {
   SegmentedBlock(BlockID id, SegmentedBlockTarget blk_target, const std::vector<torch::jit::Node*>& nodes);
 
   torch::jit::Value* getOrAddInputForValue(torch::jit::Value* v);
+  std::vector<ir::Input> construct_inputs_spec() const;
   torch::jit::Node* cloneNode(torch::jit::Node* node);
   void appendNode(torch::jit::Node* n) {
     cloneNode(n);
@@ -72,18 +73,25 @@ struct SegmentedBlock {
   bool contain_raw_value(torch::jit::Value* input) const {
     return old_to_new_.count(input);
   }
-  void register_inshapes(std::vector<ir::Input>& in_shapes) {
-    in_shapes_ = in_shapes;
-  }
-  const std::vector<ir::Input>& in_shapes() const {
-    return in_shapes_;
+  void register_inshapes(std::vector<std::vector<int64_t>>& in_shapes, const std::string& shape_mode) {
+    if (shape_mode.compare("min") == 0){
+      min_shapes_ = in_shapes;
+    } else if(shape_mode.compare("opt") == 0){
+      opt_shapes_ = in_shapes;
+    } else{
+      max_shapes_ = in_shapes;
+    }
   }
+  // const std::vector<ir::Input>& in_shapes() const {
+  //   return in_shapes_;
+  // }
   void register_intypes(std::vector<at::ScalarType>& in_types) {
     in_types_ = in_types;
   }
   const std::vector<at::ScalarType>& in_types() const {
     return in_types_;
   }
+
   void update_id(BlockID new_id) {
     id_ = new_id;
   }
@@ -99,7 +107,9 @@ struct SegmentedBlock {
  private:
   BlockID id_;
   SegmentedBlockTarget target_;
-  std::vector<ir::Input> in_shapes_;
+  std::vector<std::vector<int64_t>> min_shapes_;
+  std::vector<std::vector<int64_t>> opt_shapes_;
+  std::vector<std::vector<int64_t>> max_shapes_;
   std::vector<at::ScalarType> in_types_;
   std::vector<torch::jit::Value*> inputs_;
   std::vector<torch::jit::Value*> outputs_;
diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp
@@ -9,25 +9,33 @@ namespace torch_tensorrt {
 namespace core {
 namespace partitioning {
 
-at::Tensor generateSingleInput(ir::Input& input, c10::optional<at::ScalarType>& type_opt) {
-  auto cur_shape = input.input_shape;
-  std::vector<int64_t> shape;
-  shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims);
-  // auto type_opt = types[input.first][i];
+at::Tensor generateSingleInput(ir::Input& input, c10::optional<at::ScalarType>& type_opt, const std::string& shape_mode) {
+  nvinfer1::Dims input_shape = input.input_shape;
+  if (input.input_is_dynamic){
+    if (shape_mode.compare("min") == 0){
+      input_shape = input.min;
+    } else if(shape_mode.compare("opt") == 0){
+      input_shape = input.opt;
+    } else {
+      input_shape = input.max;
+    }
+  }
+
   auto type = at::kFloat;
   if (type_opt) {
     type = type_opt.value();
   } else {
     LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32");
   }
-  auto in = at::randint(5, shape, {at::kCUDA}).to(type);
-  // ivalue_map[input.first] = in.clone();
+  auto in = at::randint(5, util::toVec(input_shape), {at::kCUDA}).to(type);
+
   return in;
 }
 
 std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs(
     std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>>& inputs,
-    std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& types) {
+    std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& types,
+    const std::string& shape_mode) {
   // generate random inputs for running pytorch segments
   std::unordered_map<const torch::jit::Value*, torch::jit::IValue> ivalue_map;
 
@@ -36,21 +44,21 @@ std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomI
       c10::TypePtr elementType = c10::TensorType::get();
       auto generic_list = c10::impl::GenericList(elementType);
       for (size_t i = 0; i < input.second.size(); i++) {
-        auto in = generateSingleInput(input.second[i], types[input.first][i]);
+        auto in = generateSingleInput(input.second[i], types[input.first][i], shape_mode);
         generic_list.push_back(in.clone());
       }
       ivalue_map[input.first] = c10::IValue(generic_list);
     } else if (input.first->type()->kind() == torch::jit::TypeKind::TupleType) {
       // create tuple
       std::vector<torch::jit::IValue> list;
       for (size_t i = 0; i < input.second.size(); i++) {
-        auto in = generateSingleInput(input.second[i], types[input.first][i]);
+        auto in = generateSingleInput(input.second[i], types[input.first][i], shape_mode);
         list.push_back(in.clone());
       }
       auto tuple = c10::ivalue::Tuple::create(list); // create tuple ptr
       ivalue_map[input.first] = c10::IValue(tuple);
     } else {
-      auto in = generateSingleInput(input.second[0], types[input.first][0]);
+      auto in = generateSingleInput(input.second[0], types[input.first][0], shape_mode);
       ivalue_map[input.first] = in.clone();
     }
   }
@@ -60,7 +68,8 @@ std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomI
 void getSegmentsOutputByRunning(
     SegmentedBlock& seg_block,
     std::unordered_map<const torch::jit::Value*, torch::jit::IValue>& ivalues_maps,
-    const PartitioningInfo& partitioning_info) {
+    const PartitioningInfo& partitioning_info,
+    const std::string& shape_mode) {
   // create a module to run the graph
   auto g = seg_block.g();
   auto copy_g = g->copy();
@@ -141,7 +150,7 @@ void getSegmentsOutputByRunning(
   }
 
   // set input shape for each segmented block so we wil use it in conversion process
-  std::vector<ir::Input> input_shapes;
+  std::vector<std::vector<int64_t>> input_shapes;
   std::vector<at::ScalarType> input_types;
   for (auto& i : seg_block.raw_inputs()) {
     if (ivalues_maps[i].isTensor()) {
@@ -175,15 +184,15 @@ void getSegmentsOutputByRunning(
     // TODO: tuple and list inputs in subgraph
   }
 
-  seg_block.register_inshapes(input_shapes);
+  seg_block.register_inshapes(input_shapes, shape_mode);
   seg_block.register_intypes(input_types);
 }
 
-void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& example_tensor_map) {
+void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& example_tensor_map, const std::string& shape_mode) {
   // register every segment's input shape, and it's running output IValues
   for (auto& seg_block : ctx->partitioned_blocks[block]) {
     torch::jit::ConstantPooling(seg_block.g());
-    getSegmentsOutputByRunning(seg_block, example_tensor_map, ctx->settings);
+    getSegmentsOutputByRunning(seg_block, example_tensor_map, ctx->settings, shape_mode);
   }
   return;
 }
diff --git a/tests/cpp/BUILD b/tests/cpp/BUILD
@@ -17,6 +17,7 @@ test_suite(
         ":test_default_input_types",
         ":test_example_tensors",
         ":test_module_fallback",
+        ":test_dynamic_fallback",
         ":test_modules_as_engines",
         ":test_multiple_registered_engines",
         ":test_runtime_thread_safety",
@@ -32,6 +33,7 @@ test_suite(
         ":test_default_input_types",
         ":test_example_tensors",
         ":test_module_fallback",
+        ":test_dynamic_fallback",
         ":test_modules_as_engines",
         ":test_multiple_registered_engines",
         ":test_runtime_thread_safety",
@@ -125,6 +127,21 @@ cc_test(
     }),
 )
 
+cc_test(
+    name = "test_dynamic_fallback",
+    srcs = ["test_dynamic_fallback.cpp"],
+    data = [
+        "//tests/modules:jit_models",
+    ],
+    deps = [
+        "//tests/util",
+        "@googletest//:gtest_main",
+    ] + select({
+        ":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
+        "//conditions:default": ["@libtorch//:libtorch"],
+    }),
+)
+
 cc_test(
     name = "test_collections",
     srcs = ["test_collections.cpp"],