pytorch
diff --git a/‎.bazelrc
Lines changed: 4 additions & 4 deletions b/‎.bazelrc
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/docgen.yml
Lines changed: 4 additions & 3 deletions b/‎.github/workflows/docgen.yml
Lines changed: 4 additions & 3 deletions
diff --git a/‎.github/workflows/linter.yml
Lines changed: 40 additions & 10 deletions b/‎.github/workflows/linter.yml
Lines changed: 40 additions & 10 deletions
diff --git a/‎README.md
Lines changed: 5 additions & 4 deletions b/‎README.md
Lines changed: 5 additions & 4 deletions
diff --git a/‎core/conversion/converters/impl/activation.cpp
Lines changed: 0 additions & 32 deletions b/‎core/conversion/converters/impl/activation.cpp
Lines changed: 0 additions & 32 deletions
diff --git a/‎core/conversion/converters/impl/batch_norm.cpp
Lines changed: 1 addition & 1 deletion b/‎core/conversion/converters/impl/batch_norm.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/conversion/evaluators/aten.cpp
Lines changed: 17 additions & 1 deletion b/‎core/conversion/evaluators/aten.cpp
Lines changed: 17 additions & 1 deletion
diff --git a/‎core/lowering/lowering.cpp
Lines changed: 2 additions & 0 deletions b/‎core/lowering/lowering.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/lowering/passes/BUILD
Lines changed: 2 additions & 0 deletions b/‎core/lowering/passes/BUILD
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/lowering/passes/fuse_addmm_branches.cpp
Lines changed: 1 addition & 1 deletion b/‎core/lowering/passes/fuse_addmm_branches.cpp
Lines changed: 1 addition & 1 deletion
@@ -35,8 +35,8 @@ build:pre_cxx11_abi --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0"
 build:pre_cxx11_abi --linkopt="-D_GLIBCXX_USE_CXX11_ABI=0"
 build:pre_cxx11_abi --define=abi=pre_cxx11_abi
 
-build:ci_testing --define=torchtrt_src=pre_built --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0"
-build:use_precompiled_torchtrt --define=torchtrt_src=pre_built
+build:ci_testing --define=torchtrt_src=prebuilt --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0"
+build:use_precompiled_torchtrt --define=torchtrt_src=prebuilt
 
-test:ci_testing --define=torchtrt_src=pre_built --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0"
-test:use_precompiled_torchtrt --define=torchtrt_src=pre_built
+test:ci_testing --define=torchtrt_src=prebuilt --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0"
+test:use_precompiled_torchtrt --define=torchtrt_src=prebuilt
@@ -12,10 +12,10 @@ jobs:
   build-docs:
     runs-on: ubuntu-18.04
     container:
-      image: docker.pkg.github.com/nvidia/torch-tensorrt/docgen:latest
+      image: ghcr.io/nvidia/torch-tensorrt/docgen:latest
       credentials:
-        username: $GITHUB_ACTOR
-        password: ${{secrets.GITHUB_TOKEN}}
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
     steps:
       - name: Reclaim space
         run: |
@@ -36,6 +36,7 @@ jobs:
       - name: Generate New Docs
         run: |
           cd docsrc
+          pip3 install -r requirements.txt
           python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)"
           make html
       - uses: stefanzweifel/git-auto-commit-action@v4
 
@@ -8,17 +8,32 @@ jobs:
   cpp-linting:
     name: C++ Linting
     runs-on: ubuntu-latest
+    permissions:
+      actions: write
+      checks: write
+      contents: write
+      deployments: none
+      id-token: write
+      issues: write
+      discussions: write
+      packages: write
+      pull-requests: write
+      repository-projects: none
+      security-events: none
+      statuses: write
     steps:
       - uses: actions/checkout@v2
         with:
           ref: ${{ github.event.pull_request.head.sha }}
-      - name: Docker Login
-        run: docker login docker.pkg.github.com -u $GITHUB_ACTOR -p $GITHUB_TOKEN
-        env:
-          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
+      - name: Docker login
+        uses: docker/login-action@v1
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
       - name: Run image
         run: |
-          docker run -it -d --name cpplinter -e GITHUB_TOKEN=$GITHUB_TOKEN -v $GITHUB_WORKSPACE:/workspace -v $GITHUB_EVENT_PATH:/GITHUB_EVENT.json -w /workspace docker.pkg.github.com/nvidia/torch-tensorrt/docgen:latest
+          docker run -it -d --name cpplinter -e GITHUB_TOKEN=$GITHUB_TOKEN -v $GITHUB_WORKSPACE:/workspace -v $GITHUB_EVENT_PATH:/GITHUB_EVENT.json -w /workspace ghcr.io/nvidia/torch-tensorrt/docgen:latest
           docker exec cpplinter bash -c "cp /workspace/docker/WORKSPACE.docs /workspace/WORKSPACE"
         env:
           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
@@ -30,17 +45,32 @@ jobs:
   py-linting:
     name: Python Linting
     runs-on: ubuntu-latest
+    permissions:
+      actions: write
+      checks: write
+      contents: write
+      deployments: none
+      id-token: write
+      issues: write
+      discussions: write
+      packages: write
+      pull-requests: write
+      repository-projects: none
+      security-events: none
+      statuses: write
     steps:
       - uses: actions/checkout@v2
         with:
           ref: ${{ github.event.pull_request.head.sha }}
-      - name: Docker Login
-        run: docker login docker.pkg.github.com -u $GITHUB_ACTOR -p $GITHUB_TOKEN
-        env:
-          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
+      - name: Docker login
+        uses: docker/login-action@v1
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
       - name: Run image
         run: |
-          docker run -it -d --name pylinter -e GITHUB_TOKEN=$GITHUB_TOKEN -v $GITHUB_WORKSPACE:/workspace -v $GITHUB_EVENT_PATH:/GITHUB_EVENT.json -w /workspace docker.pkg.github.com/nvidia/torch-tensorrt/docgen:latest
+          docker run -it -d --name pylinter -e GITHUB_TOKEN=$GITHUB_TOKEN -v $GITHUB_WORKSPACE:/workspace -v $GITHUB_EVENT_PATH:/GITHUB_EVENT.json -w /workspace ghcr.io/nvidia/torch-tensorrt/docgen:latest
           docker exec pylinter bash -c "cp /workspace/docker/WORKSPACE.docs /workspace/WORKSPACE"
         env:
           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
 
@@ -6,10 +6,11 @@
 
 Torch-TensorRT is a compiler for PyTorch/TorchScript, targeting NVIDIA GPUs via NVIDIA's TensorRT Deep Learning Optimizer and Runtime. Unlike PyTorch's Just-In-Time (JIT) compiler, Torch-TensorRT is an Ahead-of-Time (AOT) compiler, meaning that before you deploy your TorchScript code, you go through an explicit compile step to convert a standard TorchScript program into an module targeting a TensorRT engine. Torch-TensorRT operates as a PyTorch extention and compiles modules that integrate into the JIT runtime seamlessly. After compilation using the optimized graph should feel no different than running a TorchScript module. You also have access to TensorRT's suite of configurations at compile time, so you are able to specify operating precision (FP32/FP16/INT8) and other settings for your module.
 
-More Information / System Architecture:
-
-- [GTC 2020 Talk](https://developer.nvidia.com/gtc/2020/video/s21671)
-
+Resources:
+- [Documentation](https://nvidia.github.io/Torch-TensorRT/)
+- [Torch-TensorRT Explained in 2 minutes!](https://www.youtube.com/watch?v=TU5BMU6iYZ0&ab_channel=NVIDIADeveloper)
+- [Comprehensive Discusion (GTC Event)](https://www.nvidia.com/en-us/on-demand/session/gtcfall21-a31107/)
+- [Pre-built Docker Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). To use this container, make an NGC account and sign in to NVIDIA's registry with an API key. Refer to [this guide](https://docs.nvidia.com/ngc/ngc-catalog-user-guide/index.html#registering-activating-ngc-account) for the same.
 
 
 ## Building a docker container for Torch-TensorRT
 
@@ -166,39 +166,7 @@ auto acthardtanh TORCHTRT_UNUSED =
                     auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
                     LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
                     return true;
-                  }})
-        .pattern({"aten::gelu(Tensor self) -> (Tensor)",
-                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
-                    auto in = args[0].ITensorOrFreeze(ctx);
-                    nvinfer1::DataType type = in->getType();
-                    TORCHTRT_CHECK(
-                        type == nvinfer1::DataType::kFLOAT || type == nvinfer1::DataType::kHALF,
-                        "gelu only supports kFLOAT and kHALF");
-                    std::string pluginName = "CustomGeluPluginDynamic";
-                    nvinfer1::PluginFieldCollection fc;
-                    std::vector<nvinfer1::PluginField> f;
-                    // REVIEW is this right?
-                    int type_id = ctx->settings.enabled_precisions.find(nvinfer1::DataType::kHALF) ==
-                            ctx->settings.enabled_precisions.end()
-                        ? 0
-                        : 1; // Integer encoding the DataType (0: FP32, 1: FP16)
-                    f.emplace_back(nvinfer1::PluginField("type_id", &type_id, nvinfer1::PluginFieldType::kINT32, 1));
-                    fc.nbFields = f.size();
-                    fc.fields = f.data();
-
-                    auto creator = getPluginRegistry()->getPluginCreator("CustomGeluPluginDynamic", "1", "");
-                    auto gelu_plugin = creator->createPlugin("gelu", &fc);
-
-                    TORCHTRT_CHECK(gelu_plugin, "Unable to create gelu plugin from TensorRT plugin registry" << *n);
-                    auto new_layer =
-                        ctx->net->addPluginV2(reinterpret_cast<nvinfer1::ITensor* const*>(&in), 1, *gelu_plugin);
-                    new_layer->setName(util::node_info(n).c_str());
-                    auto out_tensor = new_layer->getOutput(0);
-                    out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out_tensor);
-                    LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
-                    return true;
                   }});
-
 } // namespace
 } // namespace impl
 } // namespace converters
 
@@ -71,7 +71,7 @@ auto batch_norm_registrations TORCHTRT_UNUSED =
               LOG_DEBUG("momentum disregarded");
               LOG_DEBUG("training disregarded");
               LOG_DEBUG("cudnn disregarded");
-              TORCHTRT_CHECK(orig_shape.nbDims > 2, "Unable to create batch normalization layer from node: " << *n);
+              TORCHTRT_CHECK(orig_shape.nbDims >= 2, "Unable to create batch normalization layer from node: " << *n);
 
               // Expand spatial dims from 1D to 2D if needed
               bool expandDims = (orig_shape.nbDims < 4);
 
@@ -706,7 +706,23 @@ auto aten_registrations TORCHTRT_UNUSED =
                     },
                     EvalOptions().validSchemas({
                         R"SIG(aten::copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> (Tensor(a!)))SIG",
-                    })});
+                    })})
+        .evaluator({c10::Symbol::fromQualString("aten::format"),
+                    [](const torch::jit::Node* n, kwargs& args) -> c10::optional<torch::jit::IValue> {
+                      int64_t input_num = n->inputs().size();
+                      std::vector<torch::jit::IValue> stack;
+                      for (auto v : n->inputs()) {
+                        stack.push_back(*args.at(v).IValue());
+                      }
+                      stack.push_back(input_num);
+                      auto& ops = torch::jit::getAllOperatorsFor(c10::Symbol::fromQualString("aten::format"));
+                      auto& aten_format = ops.front();
+                      aten_format->getOperation()(stack);
+                      std::string output;
+                      torch::jit::pop(stack, output);
+                      return output;
+                    },
+                    EvalOptions().validSchemas({"aten::format(str self, ...) -> (str)"})});
 } // namespace
 } // namespace evaluators
 } // namespace conversion
 
@@ -43,7 +43,9 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
   passes::UnpackHardSwish(g);
   passes::EliminateExceptionOrPassPattern(g);
   passes::ReduceToOperation(g);
+  passes::ReduceGelu(g);
   passes::RemoveContiguous(g);
+  passes::ViewToReshape(g);
   passes::RemoveDropout(g);
   passes::LinearToAddMM(g);
   passes::Conv1DToConvolution(g);
 
@@ -17,8 +17,10 @@ cc_library(
         "module_fallback.cpp",
         "op_aliasing.cpp",
         "reduce_to.cpp",
+        "reduce_gelu.cpp",
         "remove_bn_dim_check.cpp",
         "remove_contiguous.cpp",
+        "view_to_reshape.cpp",
         "remove_dropout.cpp",
         "remove_nops.cpp",
         "silu_to_sigmoid_multiplication.cpp",
 
@@ -49,7 +49,7 @@ struct AddMMBranchFusion {
     if ((*arm1_start)->kind().toQualString() == std::string("aten::addmm") &&
         (*(++arm1_start))->kind() == prim::Return &&
         (*arm2_start)->kind().toQualString() == std::string("aten::matmul") &&
-        (*(++arm2_start))->kind().toQualString() != std::string("aten::add") &&
+        (*(++arm2_start))->kind().toQualString() == std::string("aten::add") &&
         (*(++arm2_start))->kind() == prim::Return) {
       // Make sure that block0 is solely just the aten::addmm op and block1 is matmul + add
       return true;