Merge pull request #421 from guoruoqian/pixel_shuffle

narendasan · web-flow · commit 0aea7287fb5b · 2021-04-08T14:49:52.000-06:00
Support pixel_shuffle converter
diff --git a/core/conversion/converters/impl/shuffle.cpp b/core/conversion/converters/impl/shuffle.cpp
@@ -125,6 +125,81 @@ static auto shuffle_registrations TRTORCH_UNUSED =
                     auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle->getOutput(0));
                     LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
 
+                    return true;
+                  }})
+        .pattern({"aten::pixel_shuffle(Tensor self, int upscale_factor) -> (Tensor)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    auto self = args[0].ITensorOrFreeze(ctx);
+                    auto in_shape = util::toVec(self->getDimensions());
+                    int64_t irank = in_shape.size();
+                    TRTORCH_CHECK(
+                        irank >= 3,
+                        "pixel_shuffle expects input to have at least 3 dimensions, but got input with "
+                            << irank << " dimension(s)");
+                    int64_t upscale_factor = args[1].unwrapToInt();
+                    TRTORCH_CHECK(
+                        upscale_factor > 0,
+                        "pixel_shuffle expects a positive upscale_factor, but got " << upscale_factor);
+                    int64_t upscale_factor_squared = upscale_factor * upscale_factor;
+
+                    const auto NUM_NON_BATCH_DIMS = 3;
+                    const auto self_sizes_batch_end = in_shape.end() - NUM_NON_BATCH_DIMS;
+
+                    int64_t ic = in_shape[irank - 3];
+                    int64_t ih = in_shape[irank - 2];
+                    int64_t iw = in_shape[irank - 1];
+
+                    TRTORCH_CHECK(
+                        ic % upscale_factor_squared == 0,
+                        "pixel_shuffle expects its input's 'channel' dimension to be divisible by the square of "
+                            << "upscale_factor, but input.size(-3)=" << ic << " is not divisible by "
+                            << upscale_factor_squared);
+
+                    int64_t oc = ic / upscale_factor_squared;
+                    int64_t oh = ih * upscale_factor;
+                    int64_t ow = iw * upscale_factor;
+
+                    // First, reshape to split the channels dim from c into 3 separate dims: (oc,
+                    // upscale_factor, upscale_factor). This allows shuffling to be done next by
+                    // permuting dims.
+                    std::vector<int64_t> added_dims_shape(in_shape.begin(), self_sizes_batch_end);
+                    added_dims_shape.insert(added_dims_shape.end(), {oc, upscale_factor, upscale_factor, ih, iw});
+                    auto view_layer = ctx->net->addShuffle(*self);
+                    TRTORCH_CHECK(view_layer, "Unable to create shuffle layer from node: " << *n);
+                    view_layer->setReshapeDimensions(util::toDims(added_dims_shape));
+                    int64_t view_rank = added_dims_shape.size();
+
+                    // Next, shuffle by permuting the new upscale_factor dims alongside the height and width dims.
+                    auto permutation_layer = ctx->net->addShuffle(*view_layer->getOutput(0));
+                    TRTORCH_CHECK(permutation_layer, "Unable to create shuffle layer from node: " << *n);
+                    // std::iota is used to maintain the batch dims within the permutation.
+                    // Eg: if added_dims_shape is {n1, n2, c, r, r, h, w}, then the new_order is {view_rank-7,
+                    // view_rank-6, view_rank-5, view_rank-2, view_rank-4, view_rank-1, view_rank-3}
+                    std::vector<int64_t> new_order(in_shape.begin(), self_sizes_batch_end);
+                    std::iota(new_order.begin(), new_order.end(), 0);
+                    new_order.insert(
+                        new_order.end(),
+                        {view_rank - 5 /* oc */,
+                         view_rank - 2 /* ih */,
+                         view_rank - 4 /* 1st upscale_factor */,
+                         view_rank - 1 /* iw */,
+                         view_rank - 3 /* 2nd upscale_factor */});
+                    nvinfer1::Permutation permute;
+                    std::copy(new_order.begin(), new_order.end(), permute.order);
+                    permutation_layer->setSecondTranspose(permute);
+
+                    // Finally, upscale by collapsing (ih, upscale_factor) -> a single dim (oh)
+                    // and (iw, upscale_factor) -> a single dim (ow).
+                    std::vector<int64_t> final_shape(in_shape.begin(), self_sizes_batch_end);
+                    final_shape.insert(final_shape.end(), {oc, oh, ow});
+                    auto last_view_layer = ctx->net->addShuffle(*permutation_layer->getOutput(0));
+                    TRTORCH_CHECK(last_view_layer, "Unable to create shuffle layer from node: " << *n);
+                    last_view_layer->setReshapeDimensions(util::toDims(final_shape));
+                    last_view_layer->setName(util::node_info(n).c_str());
+
+                    auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], last_view_layer->getOutput(0));
+                    LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
+
                     return true;
                   }});
 
diff --git a/tests/core/conversion/converters/test_shuffle.cpp b/tests/core/conversion/converters/test_shuffle.cpp
@@ -266,3 +266,78 @@ TEST(Converters, ATenTransposeNegativeConvertsCorrectly) {
 
   ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt, 2e-6));
 }
+
+TEST(Converters, ATenPixelShuffleConvertsCorrectly) {
+  const auto graph = R"IR(
+    graph(%x.1 : Tensor):
+      %2 : int = prim::Constant[value=3]()
+      %3 : Tensor = aten::pixel_shuffle(%x.1, %2)
+      return (%3))IR";
+
+  auto g = std::make_shared<torch::jit::Graph>();
+  torch::jit::parseIR(graph, g.get());
+
+  auto in = at::randint(0, 5, {1, 9, 4, 5}, {at::kCUDA});
+  auto params = trtorch::core::conversion::get_named_params(g->inputs(), {});
+
+  std::cout << "Running JIT" << std::endl;
+  auto jit_results = trtorch::tests::util::RunGraph(g, params, {in});
+
+  std::cout << "Running TRT" << std::endl;
+  in = at::clone(in);
+  params = trtorch::core::conversion::get_named_params(g->inputs(), {});
+  auto trt_results = trtorch::tests::util::RunGraphEngine(g, params, {in});
+  // auto trt = trt_results[0].reshape_as(jit_results[0]);
+
+  ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
+}
+
+TEST(Converters, ATenPixelShuffle3DConvertsCorrectly) {
+  const auto graph = R"IR(
+    graph(%x.1 : Tensor):
+      %2 : int = prim::Constant[value=3]()
+      %3 : Tensor = aten::pixel_shuffle(%x.1, %2)
+      return (%3))IR";
+
+  auto g = std::make_shared<torch::jit::Graph>();
+  torch::jit::parseIR(graph, g.get());
+
+  auto in = at::randint(0, 5, {9, 4, 5}, {at::kCUDA});
+  auto params = trtorch::core::conversion::get_named_params(g->inputs(), {});
+
+  std::cout << "Running JIT" << std::endl;
+  auto jit_results = trtorch::tests::util::RunGraph(g, params, {in});
+
+  std::cout << "Running TRT" << std::endl;
+  in = at::clone(in);
+  params = trtorch::core::conversion::get_named_params(g->inputs(), {});
+  auto trt_results = trtorch::tests::util::RunGraphEngine(g, params, {in});
+  // auto trt = trt_results[0].reshape_as(jit_results[0]);
+
+  ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
+}
+
+TEST(Converters, ATenPixelShuffle5DConvertsCorrectly) {
+  const auto graph = R"IR(
+    graph(%x.1 : Tensor):
+      %2 : int = prim::Constant[value=3]()
+      %3 : Tensor = aten::pixel_shuffle(%x.1, %2)
+      return (%3))IR";
+
+  auto g = std::make_shared<torch::jit::Graph>();
+  torch::jit::parseIR(graph, g.get());
+
+  auto in = at::randint(0, 5, {2, 3, 9, 4, 5}, {at::kCUDA});
+  auto params = trtorch::core::conversion::get_named_params(g->inputs(), {});
+
+  std::cout << "Running JIT" << std::endl;
+  auto jit_results = trtorch::tests::util::RunGraph(g, params, {in});
+
+  std::cout << "Running TRT" << std::endl;
+  in = at::clone(in);
+  params = trtorch::core::conversion::get_named_params(g->inputs(), {});
+  auto trt_results = trtorch::tests::util::RunGraphEngine(g, params, {in});
+  // auto trt = trt_results[0].reshape_as(jit_results[0]);
+
+  ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
+}