Merge branch 'master' of https://github.com/NVIDIA/TRTorch into bowa_fallback

bowang007 · bowang007 · commit 824b55565166 · 2021-04-12T20:02:21.000-05:00
diff --git a/core/conversion/converters/BUILD b/core/conversion/converters/BUILD
@@ -44,6 +44,7 @@ cc_library(
         "impl/matrix_multiply.cpp",
         "impl/pooling.cpp",
         "impl/reduce.cpp",
+        "impl/replication_pad.cpp",
         "impl/shuffle.cpp",
         "impl/softmax.cpp",
         "impl/unary.cpp",
diff --git a/core/conversion/converters/impl/replication_pad.cpp b/core/conversion/converters/impl/replication_pad.cpp
@@ -0,0 +1,117 @@
+#include <ATen/ATen.h>
+#include <vector>
+#include "NvInfer.h"
+#include "core/conversion/converters/converters.h"
+#include "core/util/prelude.h"
+#include "torch/torch.h"
+
+namespace trtorch {
+namespace core {
+namespace conversion {
+namespace converters {
+namespace impl {
+namespace {
+
+bool replication_padXd(ConversionCtx* ctx, const torch::jit::Node* n, args& args, int x_dim) {
+  auto in = args[0].ITensor();
+  auto inDims = in->getDimensions();
+  int64_t inRank = inDims.nbDims;
+  auto padding = args[1].unwrapToIntList().vec();
+  if (padding.size() == 1) {
+    for (int64_t i = 0; i < x_dim * 2 - 1; i++)
+      padding.push_back(padding[0]);
+  }
+  if (inRank == 3) {
+    TRTORCH_CHECK(padding.size() == 2, "3D tensors expect 2 values for padding");
+  } else if (inRank == 4) {
+    TRTORCH_CHECK(padding.size() == 4, "4D tensors expect 4 values for padding");
+  } else if (inRank == 5) {
+    TRTORCH_CHECK(padding.size() == 6, "5D tensors expect 6 values for padding");
+  } else {
+    TRTORCH_THROW_ERROR("Only 3D, 4D, 5D padding with non-constant padding are supported for now");
+  }
+
+  std::vector<nvinfer1::ITensor*> tensors_vec;
+  // input: (N, C, D_in, H_in, W_in).
+  // padding: (padding_left, padding_right, padding_top, padding_bottom, padding_front, padding_back)
+  // When axis is inRank - 1, making W_out = W_in + padding_left + padding_right.
+  // When axis is inRank - 2, making H_out = H_in + padding_top + padding_bottom.
+  // When axis is inRank - 1, making D_out = D_in + padding_front + padding_back.
+  for (int64_t i = 0; i < int(padding.size() / 2); i++) {
+    int64_t axis = inRank - (i + 1); // axis = {inRank - 1, inRank - 2, inRank - 3}
+    int64_t padding_index = i * 2;
+
+    if (padding[padding_index] > 0) { // left/top/front padding value
+      tensors_vec.clear();
+      at::Tensor left_indices = torch::tensor({0}, torch::kInt32);
+      auto indicesTensor = tensor_to_const(ctx, left_indices);
+      auto left_gather_layer = ctx->net->addGather(*in, *indicesTensor, axis);
+      auto left_gather_out = left_gather_layer->getOutput(0);
+      for (int i = 0; i < padding[padding_index]; i++) {
+        tensors_vec.push_back(left_gather_out);
+      }
+      tensors_vec.push_back(in);
+      auto concat_layer = ctx->net->addConcatenation(tensors_vec.data(), tensors_vec.size());
+      concat_layer->setAxis(axis);
+      in = concat_layer->getOutput(0);
+      inDims = in->getDimensions();
+    }
+
+    if (padding[padding_index + 1] > 0) { // right/bottom/back padding value
+      tensors_vec.clear();
+      tensors_vec.push_back(in);
+
+      nvinfer1::ITensor* indicesTensor = NULL;
+      if (inDims.d[axis] == -1) {
+        auto shapeTensor = ctx->net->addShape(*in)->getOutput(0);
+        at::Tensor dimValue = torch::tensor({axis}, torch::kInt32);
+        auto dimTensor = tensor_to_const(ctx, dimValue);
+        indicesTensor = ctx->net->addGather(*shapeTensor, *dimTensor, 0)->getOutput(0);
+      } else {
+        auto indices = torch::tensor({inDims.d[axis] - 1}, torch::kInt32);
+        indicesTensor = tensor_to_const(ctx, indices);
+      }
+      auto right_gather_layer = ctx->net->addGather(*in, *indicesTensor, axis);
+      auto right_gather_out = right_gather_layer->getOutput(0);
+
+      for (int i = 0; i < padding[padding_index + 1]; i++) {
+        tensors_vec.push_back(right_gather_out);
+      }
+
+      auto concat_layer = ctx->net->addConcatenation(tensors_vec.data(), tensors_vec.size());
+      concat_layer->setAxis(axis);
+      in = concat_layer->getOutput(0);
+      inDims = in->getDimensions();
+    }
+  }
+
+  auto out = ctx->AssociateValueAndTensor(n->outputs()[0], in);
+  LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+
+  return true;
+}
+
+auto replication_pad_registrations TRTORCH_UNUSED =
+    RegisterNodeConversionPatterns()
+        .pattern({"aten::replication_pad1d(Tensor self, int[2] padding) -> (Tensor)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    replication_padXd(ctx, n, args, 1);
+                    return true;
+                  }})
+        .pattern({"aten::replication_pad2d(Tensor self, int[4] padding) -> (Tensor)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    replication_padXd(ctx, n, args, 2);
+                    return true;
+                  }})
+        .pattern({"aten::replication_pad3d(Tensor self, int[6] padding) -> (Tensor)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    replication_padXd(ctx, n, args, 3);
+                    return true;
+                  }});
+
+} // namespace
+} // namespace impl
+} // namespace converters
+} // namespace conversion
+} // namespace core
+} // namespace trtorch
diff --git a/core/conversion/converters/impl/shuffle.cpp b/core/conversion/converters/impl/shuffle.cpp
@@ -125,6 +125,81 @@ static auto shuffle_registrations TRTORCH_UNUSED =
                     auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle->getOutput(0));
                     LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
 
+                    return true;
+                  }})
+        .pattern({"aten::pixel_shuffle(Tensor self, int upscale_factor) -> (Tensor)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto self = args[0].ITensorOrFreeze(ctx);
+                    auto in_shape = util::toVec(self->getDimensions());
+                    int64_t irank = in_shape.size();
+                    TRTORCH_CHECK(
+                        irank >= 3,
+                        "pixel_shuffle expects input to have at least 3 dimensions, but got input with "
+                            << irank << " dimension(s)");
+                    int64_t upscale_factor = args[1].unwrapToInt();
+                    TRTORCH_CHECK(
+                        upscale_factor > 0,
+                        "pixel_shuffle expects a positive upscale_factor, but got " << upscale_factor);
+                    int64_t upscale_factor_squared = upscale_factor * upscale_factor;
+
+                    const auto NUM_NON_BATCH_DIMS = 3;
+                    const auto self_sizes_batch_end = in_shape.end() - NUM_NON_BATCH_DIMS;
+
+                    int64_t ic = in_shape[irank - 3];
+                    int64_t ih = in_shape[irank - 2];
+                    int64_t iw = in_shape[irank - 1];
+
+                    TRTORCH_CHECK(
+                        ic % upscale_factor_squared == 0,
+                        "pixel_shuffle expects its input's 'channel' dimension to be divisible by the square of "
+                            << "upscale_factor, but input.size(-3)=" << ic << " is not divisible by "
+                            << upscale_factor_squared);
+
+                    int64_t oc = ic / upscale_factor_squared;
+                    int64_t oh = ih * upscale_factor;
+                    int64_t ow = iw * upscale_factor;
+
+                    // First, reshape to split the channels dim from c into 3 separate dims: (oc,
+                    // upscale_factor, upscale_factor). This allows shuffling to be done next by
+                    // permuting dims.
+                    std::vector<int64_t> added_dims_shape(in_shape.begin(), self_sizes_batch_end);
+                    added_dims_shape.insert(added_dims_shape.end(), {oc, upscale_factor, upscale_factor, ih, iw});
+                    auto view_layer = ctx->net->addShuffle(*self);
+                    TRTORCH_CHECK(view_layer, "Unable to create shuffle layer from node: " << *n);
+                    view_layer->setReshapeDimensions(util::toDims(added_dims_shape));
+                    int64_t view_rank = added_dims_shape.size();
+
+                    // Next, shuffle by permuting the new upscale_factor dims alongside the height and width dims.
+                    auto permutation_layer = ctx->net->addShuffle(*view_layer->getOutput(0));
+                    TRTORCH_CHECK(permutation_layer, "Unable to create shuffle layer from node: " << *n);
+                    // std::iota is used to maintain the batch dims within the permutation.
+                    // Eg: if added_dims_shape is {n1, n2, c, r, r, h, w}, then the new_order is {view_rank-7,
+                    // view_rank-6, view_rank-5, view_rank-2, view_rank-4, view_rank-1, view_rank-3}
+                    std::vector<int64_t> new_order(in_shape.begin(), self_sizes_batch_end);
+                    std::iota(new_order.begin(), new_order.end(), 0);
+                    new_order.insert(
+                        new_order.end(),
+                        {view_rank - 5 /* oc */,
+                         view_rank - 2 /* ih */,
+                         view_rank - 4 /* 1st upscale_factor */,
+                         view_rank - 1 /* iw */,
+                         view_rank - 3 /* 2nd upscale_factor */});
+                    nvinfer1::Permutation permute;
+                    std::copy(new_order.begin(), new_order.end(), permute.order);
+                    permutation_layer->setSecondTranspose(permute);
+
+                    // Finally, upscale by collapsing (ih, upscale_factor) -> a single dim (oh)
+                    // and (iw, upscale_factor) -> a single dim (ow).
+                    std::vector<int64_t> final_shape(in_shape.begin(), self_sizes_batch_end);
+                    final_shape.insert(final_shape.end(), {oc, oh, ow});
+                    auto last_view_layer = ctx->net->addShuffle(*permutation_layer->getOutput(0));
+                    TRTORCH_CHECK(last_view_layer, "Unable to create shuffle layer from node: " << *n);
+                    last_view_layer->setReshapeDimensions(util::toDims(final_shape));
+                    last_view_layer->setName(util::node_info(n).c_str());
+
+                    auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], last_view_layer->getOutput(0));
+                    LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
+
                     return true;
                   }});
 
diff --git a/docs/_notebooks/Resnet50-example.html b/docs/_notebooks/Resnet50-example.html
@@ -690,7 +690,7 @@
         </div>
        </div>
        <p>
-        <img alt="f03b9ffb995e463a9ece48dbc03b19a6" src="http://developer.download.nvidia.com/compute/machine-learning/frameworks/nvidia_logo.png"/>
+        <img alt="2f9e31058b32421bb811779059a6af66" src="http://developer.download.nvidia.com/compute/machine-learning/frameworks/nvidia_logo.png"/>
        </p>
        <h1 id="notebooks-resnet50-example--page-root">
         TRTorch Getting Started - ResNet 50
diff --git a/docs/_notebooks/lenet-getting-started.html b/docs/_notebooks/lenet-getting-started.html
@@ -784,7 +784,7 @@
         </div>
        </div>
        <p>
-        <img alt="cd6d780b542849ab939e734e03646533" src="http://developer.download.nvidia.com/compute/machine-learning/frameworks/nvidia_logo.png"/>
+        <img alt="ab920573d9c6495cbe539fd91862cc7e" src="http://developer.download.nvidia.com/compute/machine-learning/frameworks/nvidia_logo.png"/>
        </p>
        <h1 id="notebooks-lenet-getting-started--page-root">
         TRTorch Getting Started - LeNet
diff --git a/docs/_notebooks/ssd-object-detection-demo.html b/docs/_notebooks/ssd-object-detection-demo.html
@@ -804,7 +804,7 @@
         </div>
        </div>
        <p>
-        <img alt="01bd2524df734a8aabc6c06d402cfd04" src="http://developer.download.nvidia.com/compute/machine-learning/frameworks/nvidia_logo.png"/>
+        <img alt="d216ad9778614725a54e3da2ff4c800a" src="http://developer.download.nvidia.com/compute/machine-learning/frameworks/nvidia_logo.png"/>
        </p>
        <h1 id="notebooks-ssd-object-detection-demo--page-root">
         Object Detection with TRTorch (SSD)
diff --git a/docs/py_api/trtorch.html b/docs/py_api/trtorch.html
@@ -1011,7 +1011,7 @@ <h2 id="functions">
          <span class="sig-paren">
           )
          </span>
-         → &lt;torch._C.ScriptClass object at 0x7fc84234b370&gt;
+         → &lt;torch._C.ScriptClass object at 0x7fb1218be8f0&gt;
          <a class="headerlink" href="#trtorch.TensorRTCompileSpec" title="Permalink to this definition">
           ¶
          </a>
diff --git a/docs/searchindex.js b/docs/searchindex.js
diff --git a/tests/core/conversion/converters/BUILD b/tests/core/conversion/converters/BUILD
@@ -47,6 +47,10 @@ converter_test(
   name = "test_reduce"
 )
 
+converter_test(
+  name = "test_replication_pad"
+)
+
 converter_test(
   name = "test_shuffle"
 )
@@ -99,6 +103,7 @@ test_suite(
     ":test_matrix_multiply",
     ":test_pooling",
     ":test_reduce",
+    ":test_replication_pad",
     ":test_shuffle",
     ":test_softmax",
     ":test_unary",
diff --git a/tests/core/conversion/converters/test_replication_pad.cpp b/tests/core/conversion/converters/test_replication_pad.cpp
diff --git a/tests/core/conversion/converters/test_shuffle.cpp b/tests/core/conversion/converters/test_shuffle.cpp

Original file line number	Diff line number	Diff line change
`@@ -1011,7 +1011,7 @@ <h2 id="functions">`
`1011`	`1011`	`<span class="sig-paren">`
`1012`	`1012`	`)`
`1013`	`1013`	`</span>`
`1014`		`- → <torch._C.ScriptClass object at 0x7fc84234b370>`
	`1014`	`+ → <torch._C.ScriptClass object at 0x7fb1218be8f0>`
`1015`	`1015`	`<a class="headerlink" href="#trtorch.TensorRTCompileSpec" title="Permalink to this definition">`
`1016`	`1016`	`¶`
`1017`	`1017`	`</a>`