adding support for gelu converter

ruoqianguo · peri044 · commit 4198e19dda53 · 2021-04-02T18:35:51.000-07:00
Signed-off-by: Ruoqian Guo &lt;ruoqiang@nvidia.com&gt;
diff --git a/core/conversion/conversionctx/BUILD b/core/conversion/conversionctx/BUILD
@@ -17,6 +17,7 @@ cc_library(
     ],
     deps = [
         "@tensorrt//:nvinfer",
+        "@tensorrt//:nvinferplugin",
         "//core/util:prelude",
         #"//core/plugins:trtorch_plugins",
     ] + select({
diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -1,5 +1,9 @@
 #include <iostream>
 #include <sstream>
+
+#include "NvInferPlugin.h"
+#include "NvInferPluginUtils.h"
+
 #include <utility>
 // #include "core/plugins/plugin_prelude.h"
 #include "core/conversion/conversionctx/ConversionCtx.h"
diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h
@@ -68,6 +68,9 @@ struct ConversionCtx {
   // copy of the values
   std::vector<void*> builder_resources;
 
+  // Registry of official tensorrt plugin layers.
+  std::unordered_map<std::string, nvinfer1::IPluginCreator*> mPluginRegistry;
+
   std::unordered_map<const torch::jit::Value*, nvinfer1::ITensor*> value_tensor_map;
   std::unordered_map<const torch::jit::Value*, torch::jit::IValue> evaluated_value_map;
 };
diff --git a/core/conversion/converters/impl/activation.cpp b/core/conversion/converters/impl/activation.cpp
@@ -42,24 +42,25 @@ convert(tanh, kTANH);
 
 auto acthardtanh TRTORCH_UNUSED =
     RegisterNodeConversionPatterns()
-        .pattern({"aten::hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> (Tensor)",
-                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-                    auto in = args[0].ITensorOrFreeze(ctx);
-                    auto min = args[1].unwrapToDouble();
-                    auto max = args[2].unwrapToDouble();
+        .pattern(
+            {"aten::hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> (Tensor)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto in = args[0].ITensorOrFreeze(ctx);
+               auto min = args[1].unwrapToDouble();
+               auto max = args[2].unwrapToDouble();
 
-                    auto new_layer = ctx->net->addActivation(*in, nvinfer1::ActivationType::kCLIP);
-                    TRTORCH_CHECK(new_layer, "Unable to create layer for aten::hardtanh");
+               auto new_layer = ctx->net->addActivation(*in, nvinfer1::ActivationType::kCLIP);
+               TRTORCH_CHECK(new_layer, "Unable to create layer for aten::hardtanh");
 
-                    new_layer->setAlpha(min);
-                    new_layer->setBeta(max);
+               new_layer->setAlpha(min);
+               new_layer->setBeta(max);
 
-                    new_layer->setName(util::node_info(n).c_str());
-                    auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
+               new_layer->setName(util::node_info(n).c_str());
+               auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
 
-                    LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
-                    return true;
-                  }})
+               LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
+               return true;
+             }})
         .pattern({// TODO: Remove after functionalization
                   "aten::hardtanh_(Tensor(a!) self, Scalar min_val=-1, Scalar max_val=1) -> (Tensor(a!))",
                   [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -125,34 +126,35 @@ auto acthardtanh TRTORCH_UNUSED =
                LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
                return true;
              }})
-        .pattern({"aten::leaky_relu(Tensor self, Scalar negative_slope=0.01) -> (Tensor)",
-                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-                    auto self = args[0].ITensorOrFreeze(ctx);
-                    auto negative_slopeScalar = args[1].unwrapToScalar().to<float>();
+        .pattern(
+            {"aten::leaky_relu(Tensor self, Scalar negative_slope=0.01) -> (Tensor)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto self = args[0].ITensorOrFreeze(ctx);
+               auto negative_slopeScalar = args[1].unwrapToScalar().to<float>();
 
-                    auto new_layer = ctx->net->addActivation(*self, nvinfer1::ActivationType::kLEAKY_RELU);
-                    new_layer->setAlpha(negative_slopeScalar);
+               auto new_layer = ctx->net->addActivation(*self, nvinfer1::ActivationType::kLEAKY_RELU);
+               new_layer->setAlpha(negative_slopeScalar);
 
-                    new_layer->setName(util::node_info(n).c_str());
-                    auto out_tensor = new_layer->getOutput(0);
-                    out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out_tensor);
-                    LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
-                    return true;
-                  }})
-        .pattern({"aten::leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!)",
-                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-                    auto self = args[0].ITensorOrFreeze(ctx);
-                    auto negative_slopeScalar = args[1].unwrapToScalar().to<float>();
-
-                    auto new_layer = ctx->net->addActivation(*self, nvinfer1::ActivationType::kLEAKY_RELU);
-                    new_layer->setAlpha(negative_slopeScalar);
+               new_layer->setName(util::node_info(n).c_str());
+               auto out_tensor = new_layer->getOutput(0);
+               out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out_tensor);
+               LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
+               return true;
+             }})
+        .pattern(
+            {"aten::leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto self = args[0].ITensorOrFreeze(ctx);
+               auto negative_slopeScalar = args[1].unwrapToScalar().to<float>();
 
-                    new_layer->setName(util::node_info(n).c_str());
-                    auto out_tensor = new_layer->getOutput(0);
-                    out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out_tensor);
-                    LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
-                    return true;
-                  }})
+               auto new_layer = ctx->net->addActivation(*self, nvinfer1::ActivationType::kLEAKY_RELU);
+               new_layer->setAlpha(negative_slopeScalar);
+               new_layer->setName(util::node_info(n).c_str());
+               auto out_tensor = new_layer->getOutput(0);
+               out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out_tensor);
+               LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
+               return true;
+             }})
         .pattern({"aten::elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> (Tensor)",
                   [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
                     auto in = args[0].ITensorOrFreeze(ctx);
@@ -167,7 +169,33 @@ auto acthardtanh TRTORCH_UNUSED =
                     auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
                     LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
                     return true;
-                  }});
+             }})
+        .pattern(
+            {"aten::gelu(Tensor self) -> (Tensor)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto in = args[0].ITensorOrFreeze(ctx);
+               nvinfer1::DataType type = in->getType();
+               TRTORCH_CHECK(
+                   type == nvinfer1::DataType::kFLOAT || type == nvinfer1::DataType::kHALF,
+                   "gelu only supports kFLOAT and kHALF");
+               std::string pluginName = "CustomGeluPluginDynamic";
+               nvinfer1::PluginFieldCollection fc;
+               std::vector<nvinfer1::PluginField> f;
+               int type_id = 0; // Integer encoding the DataType (0: FP32, 1: FP16)
+               if (type == nvinfer1::DataType::kHALF)
+                 type_id = 1;
+               f.emplace_back(nvinfer1::PluginField("type_id", &type_id, nvinfer1::PluginFieldType::kINT32, 1));
+               fc.nbFields = f.size();
+               fc.fields = f.data();
+               nvinfer1::IPluginV2* pluginV2 = ctx->mPluginRegistry.at(pluginName)->createPlugin("gelu", &fc);
+               TRTORCH_CHECK(pluginV2, "Unable to create gelu plugin from TensorRT plugin registry" << *n);
+               auto new_layer = ctx->net->addPluginV2(reinterpret_cast<nvinfer1::ITensor* const*>(&in), 1, *pluginV2);
+               new_layer->setName("gelu");
+               auto out_tensor = new_layer->getOutput(0);
+               out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out_tensor);
+               LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
+               return true;
+             }});
 
 } // namespace
 } // namespace impl
diff --git a/tests/core/conversion/converters/test_activation.cpp b/tests/core/conversion/converters/test_activation.cpp
@@ -198,4 +198,28 @@ TEST(Converters, ATenEluConvertsCorrectly) {
   auto trt_results = trtorch::tests::util::RunGraphEngine(g, params, {in});
 
   ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6));
-}
+}
+
+TEST(Converters, ATenGELUConvertsCorrectly) {
+  const auto graph = R"IR(
+      graph(%0 : Tensor):
+        %3 : Tensor = aten::gelu(%0)
+        return (%3))IR";
+
+  auto g = std::make_shared<torch::jit::Graph>();
+  torch::jit::parseIR(graph, &*g);
+
+  auto in = at::randint(-5, 5, {5}, {at::kCUDA});
+
+  auto params = trtorch::core::conversion::get_named_params(g->inputs(), {});
+  auto jit_results = trtorch::tests::util::RunGraph(g, params, {in});
+
+  in = at::clone(in);
+  params = trtorch::core::conversion::get_named_params(g->inputs(), {});
+  auto trt_results = trtorch::tests::util::RunGraphEngine(g, params, {in});
+
+  // The official tensorrt plugin applies the Gelu activation x * Phi(x), where Phi is the Gaussian cdf, approximated
+  // by: 0.5 * (1 + tanh(sqrt(2 / M_PI) * (x + 0.044715 * x^3))) and the pytorch uses c10::cuda::compat::normcdf to
+  // compute Phi(x). So there's a difference here.
+  ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-4));
+}