pytorch
diff --git a/‎.bazelversion
Lines changed: 1 addition & 0 deletions b/‎.bazelversion
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/pr-labels.yml
Lines changed: 20 additions & 0 deletions b/‎.github/pr-labels.yml
Lines changed: 20 additions & 0 deletions
diff --git a/‎.github/workflows/label.yml
Lines changed: 20 additions & 0 deletions b/‎.github/workflows/label.yml
Lines changed: 20 additions & 0 deletions
diff --git a/‎.github/workflows/stale.yml
Lines changed: 23 additions & 0 deletions b/‎.github/workflows/stale.yml
Lines changed: 23 additions & 0 deletions
diff --git a/‎BUILD
Lines changed: 12 additions & 0 deletions b/‎BUILD
Lines changed: 12 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 24 additions & 3 deletions b/‎README.md
Lines changed: 24 additions & 3 deletions
diff --git a/‎core/compiler.cpp
Lines changed: 63 additions & 21 deletions b/‎core/compiler.cpp
Lines changed: 63 additions & 21 deletions
diff --git a/‎core/conversion/BUILD
Lines changed: 1 addition & 0 deletions b/‎core/conversion/BUILD
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/conversion/InterfaceTypes.cpp
Lines changed: 2 additions & 1 deletion b/‎core/conversion/InterfaceTypes.cpp
Lines changed: 2 additions & 1 deletion
@@ -0,0 +1 @@
+3.2.0
@@ -0,0 +1,20 @@
+"component: api [C++]":
+  - cpp/api/**/*
+
+"component: api [Python]":
+  - py/api/**/*
+
+"component: conversion":
+  - core/conversion/**/*
+
+"component: converters":
+  - core/conversion/converters/**/*
+
+"component: evaluators":
+  - core/conversion/evaluators/**/*
+
+"component: execution":
+  - core/execution/**/*
+
+"component: lowering":
+ - core/lowering/**/*
@@ -0,0 +1,20 @@
+# This workflow will triage pull requests and apply a label based on the
+# paths that are modified in the pull request.
+#
+# To use this workflow, you will need to set up a .github/labeler.yml
+# file with configuration.  For more information, see:
+# https://github.com/actions/labeler/blob/master/README.md
+
+name: Labeler
+on: [pull_request]
+
+jobs:
+  label:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/labeler@v2
+      with:
+        repo-token: "${{ secrets.GITHUB_TOKEN }}"
+        configuration-path: .github/pr-labels.yml
@@ -0,0 +1,23 @@
+name: Mark stale issues and pull requests
+
+on:
+  schedule:
+  - cron: "0 0 * * *"
+
+jobs:
+  stale:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/stale@v1
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-issue-message: 'This issue has not seen activity for 30 days, Remove stale label or comment or this will be closed in 5 days'
+        stale-pr-message: 'This PR has not seen activity for 30 days, Remove stale label or comment or this will be closed in 5 days'
+        stale-issue-label: 'No Activity'
+        exempt-issue-labels: 'WIP'
+        stale-pr-label: 'No Activity'
+        exempt-pr-labels: 'WIP'
+        days-before-stale: 30
+        days-before-close: 5
@@ -8,6 +8,8 @@ pkg_tar(
         "//core/conversion:include",
         "//core/conversion/conversionctx:include",
         "//core/conversion/converters:include",
+        "//core/conversion/var:include",
+        "//core/conversion/tensorcontainer:include",
         "//core/conversion/evaluators:include",
         "//core/conversion/converters/impl/plugins:include",
         "//core/execution:include",
@@ -36,6 +38,15 @@ pkg_tar(
 )
 
 
+pkg_tar(
+    name = "bin",
+    package_dir = "bin/",
+    srcs = [
+        "//cpp/trtorchc:trtorchc",
+    ],
+    mode = "0755",
+)
+
 
 pkg_tar(
     name = "libtrtorch",
@@ -46,6 +57,7 @@ pkg_tar(
     ],
     deps = [
         ":lib",
+        ":bin",
         ":include",
         ":include_core",
     ],
 
@@ -23,6 +23,8 @@ compile_settings.op_precision = torch::kFloat;
 auto trt_mod = trtorch::CompileGraph(ts_mod, compile_settings);
 // Run like normal
 auto results = trt_mod.forward({in_tensor});
+// Save module for later
+trt_mod.save("trt_torchscript_module.ts");
 ...
 ```
 
@@ -46,6 +48,7 @@ trt_ts_module = trtorch.compile(torch_script_module, compile_settings)
 
 input_data = input_data.half()
 result = trt_ts_module(input_data)
+torch.jit.save(trt_ts_module, "trt_torchscript_module.ts")
 ```
 
 > Notes on running in lower precisions:
@@ -65,6 +68,7 @@ result = trt_ts_module(input_data)
 
 ### Dependencies
 
+- Bazel 3.2.0
 - Libtorch 1.5.0
 - CUDA 10.2
 - cuDNN 7.6.5
@@ -78,7 +82,24 @@ Releases: https://github.com/NVIDIA/TRTorch/releases
 
 ### Installing Dependencies
 
-You need to start by having CUDA installed on the system, Libtorch will automatically be pulled for you by bazel,
+#### 0. Install Bazel
+
+If you don't have bazel installed, the easiest way is to install bazelisk using the method of you choosing https://github.com/bazelbuild/bazelisk
+
+Otherwise you can use the following instructions to install binaries https://docs.bazel.build/versions/master/install.html
+
+Finally if you need to compile from source (e.g. aarch64 until bazel distributes binaries for the architecture) you can use these instructions
+
+```sh
+export BAZEL_VERSION=<VERSION>
+mkdir bazel
+cd bazel
+curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip
+unzip bazel-$BAZEL_VERSION-dist.zip
+bash ./compile.sh
+```
+
+You need to start by having CUDA installed on the system, LibTorch will automatically be pulled for you by bazel,
 then you have two options.
 
 #### 1. Building using cuDNN & TensorRT tarball distributions
@@ -90,10 +111,10 @@ then you have two options.
 1. You need to download the tarball distributions of TensorRT and cuDNN from the NVIDIA website.
     - https://developer.nvidia.com/cudnn
     - https://developer.nvidia.com/tensorrt
-2. Place these files in a directory (the directories `thrid_party/distdir/[x86_64-linux-gnu | aarch64-linux-gnu]` exist for this purpose)
+2. Place these files in a directory (the directories `third_party/distdir/[x86_64-linux-gnu | aarch64-linux-gnu]` exist for this purpose)
 3. Compile using:
 ``` shell
-bazel build //:libtrtorch --compilation_mode opt --distdir thrid_party/distdir/[x86_64-linux-gnu | aarch64-linux-gnu]
+bazel build //:libtrtorch --compilation_mode opt --distdir third_party/distdir/[x86_64-linux-gnu | aarch64-linux-gnu]
 ```
 
 #### 2. Building using locally installed cuDNN & TensorRT
 
@@ -6,7 +6,9 @@
 #include "NvInfer.h"
 
 #include "ATen/core/function_schema.h"
+#include "ATen/core/jit_type.h"
 
+#include "torch/custom_class.h"
 #include "torch/csrc/jit/frontend/function_schema_parser.h"
 #include "torch/csrc/jit/ir/ir.h"
 #include "torch/csrc/jit/passes/pass_manager.h"
@@ -40,32 +42,69 @@ c10::FunctionSchema GenerateGraphSchema(torch::jit::script::Module mod, std::str
 
 
 void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr<torch::jit::Graph>& g, std::string& serialized_engine) {
-    execution::EngineID uid = execution::RegisterEngineFromSerializedEngine(serialized_engine);
-    auto num_io = execution::GetEngineIO(uid);
-
-    auto self = g->addInput("self.1");
+    auto engine_ptr = c10::make_intrusive<execution::TRTEngine>(mod._ivalue()->name(), serialized_engine);
+    // Get required metadata about the engine out
+    auto num_io = engine_ptr->num_io;
+    auto name = engine_ptr->name;
+
+    // Add the engine as an attribute of the module, this will let the engine be serialized and deserialized
+    mod.register_attribute(
+        name,
+        c10::getCustomClassType<c10::intrusive_ptr<execution::TRTEngine>>(),
+        c10::IValue(std::move(engine_ptr)),
+        false
+    );
+
+    // Add the module as an input into the graph
+    auto self = g->addInput("self_1");
     self->setType(mod.type());
 
-    auto id_val = g->insertConstant(uid);
+    // Start by retriveing the engine from the module attribute list
+    auto engine_node = g->createGetAttr(self, name);
+    g->block()->appendNode(engine_node);
 
+    // Add inputs to the graph corresponding to the number of input tensors expected by the engine
+    // Also store those inputs in a vector so that they can be coalesced into a single list at runtime
     std::vector<torch::jit::Value*> engine_inputs;
-    engine_inputs.push_back(id_val);
-
     for (uint64_t i = 0; i < num_io.first; i++) {
-        auto in_val = g->addInput("");
+        auto in_val = g->addInput(std::string("input_") + std::to_string(i));
         in_val->setType(c10::TensorType::get());
         engine_inputs.push_back(in_val);
     }
 
-    auto engine_node = g->create(c10::Symbol::fromQualString("trt::execute_engine"), torch::jit::ArrayRef<torch::jit::Value*>(engine_inputs), num_io.second);
-    g->block()->appendNode(engine_node);
-
-    if (engine_node->outputs().size() > 1) {
-        auto return_tuple_node = g->createTuple(engine_node->outputs());
+    // Create a node that will merge all of the input tensors into a single list argument to the trt::execute_engine op
+    // Creates: prim::ListConstruct(<input tensors>)
+    auto input_list_node = g->createList(c10::TensorType::get(), torch::jit::ArrayRef<torch::jit::Value*>(engine_inputs));
+    g->block()->appendNode(input_list_node);
+
+    // Make a list of inputs to the actual trt::execute_engine op
+    // Note: Ordering of list and then engine is because we can pop off the engine first which contains all the metadata
+    // needed for execution
+    std::vector<torch::jit::Value*> execute_node_inputs;
+    execute_node_inputs.push_back(input_list_node->outputs()[0]);
+    execute_node_inputs.push_back(engine_node->outputs()[0]);
+
+    // Create the actual execution node trt::execute_engine using the assembled inputs
+    auto execute_node = g->create(c10::Symbol::fromQualString("trt::execute_engine"), torch::jit::ArrayRef<torch::jit::Value*>(execute_node_inputs), 1);
+    g->block()->appendNode(execute_node);
+    execute_node->outputs()[0]->setType(c10::ListType::ofTensors());
+
+    // Create a node to unpack the list into seperate tensors, in the case of there being only one tensor, the tensor will be returned,
+    // otherwise they are returned as a tuple of tensors.
+    // Creates: prim::ListUnpack(<engine output>)
+    auto unpack_node = g->createListUnpack(execute_node->outputs()[0], num_io.second);
+    g->block()->appendNode(unpack_node);
+
+    // If there are multiple output tensors from TensorRT we wrap them in a tuple to return
+    if (unpack_node->outputs().size() > 1) {
+        // Creates prim::TupleConstruct(<output tensors>) using outputs of the unpack node
+        auto return_tuple_node = g->createTuple(unpack_node->outputs());
         g->block()->appendNode(return_tuple_node);
+        // Set the output as the produced tuple
         g->registerOutput(return_tuple_node->outputs()[0]);
     } else {
-        g->registerOutput(engine_node->outputs()[0]);
+        // Set the output as the sole output tensor
+        g->registerOutput(unpack_node->outputs()[0]);
     }
 
     LOG_DEBUG(*g << "(AddEngineToGraph)\n");
@@ -110,13 +149,16 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod,
     torch::jit::script::Module new_mod(mod._ivalue()->name() + "_trt");
     std::vector<std::shared_ptr<torch::jit::Graph>> graphs;
     for (const torch::jit::script::Method& method : mod.get_methods()) {
-        auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
-        auto new_g = std::make_shared<torch::jit::Graph>();
-        AddEngineToGraph(new_mod, new_g, engine);
-        auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
-        auto schema = GenerateGraphSchema(new_mod, new_method->name(), new_g);
-        new_mod.type()->addMethod(new_method);
-        new_method->setSchema(schema);
+        // Don't convert hidden methods
+        if (method.name().rfind("_", 0)) {
+            auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
+            auto new_g = std::make_shared<torch::jit::Graph>();
+            AddEngineToGraph(new_mod, new_g, engine);
+            auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
+            auto schema = GenerateGraphSchema(new_mod, new_method->name(), new_g);
+            new_mod.type()->addMethod(new_method);
+            new_method->setSchema(schema);
+        }
     }
 
     return new_mod;
 
@@ -19,6 +19,7 @@ cc_library(
     ],
     deps = [
         "@tensorrt//:nvinfer",
+        "//core/conversion/var",
         "//core/conversion/conversionctx",
         "//core/conversion/converters",
         "//core/conversion/evaluators",
 
@@ -34,7 +34,7 @@ InputRange::InputRange(std::vector<int64_t> d) {
     min = util::toDims(d);
     max = util::toDims(d);
     input_shape = util::toDims(d);
-
+    input_is_dynamic = false;
 }
 
 
@@ -67,6 +67,7 @@ InputRange::InputRange(std::vector<int64_t> min_shape, std::vector<int64_t> opt_
         dim.insert(max_shape[i]);
         if (dim.size() != 1) {
             dyn_shape.push_back(-1);
+            input_is_dynamic = true;
         } else {
             dyn_shape.push_back(opt_shape[i]);
         }
Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ InputRange::InputRange(std::vector<int64_t> d) {`
`34`	`34`	`min = util::toDims(d);`
`35`	`35`	`max = util::toDims(d);`
`36`	`36`	`input_shape = util::toDims(d);`
`37`		`-`
	`37`	`+ input_is_dynamic = false;`
`38`	`38`	`}`
`39`	`39`
`40`	`40`
`@@ -67,6 +67,7 @@ InputRange::InputRange(std::vector<int64_t> min_shape, std::vector<int64_t> opt_`
`67`	`67`	`dim.insert(max_shape[i]);`
`68`	`68`	`if (dim.size() != 1) {`
`69`	`69`	`dyn_shape.push_back(-1);`
	`70`	`+ input_is_dynamic = true;`
`70`	`71`	`} else {`
`71`	`72`	`dyn_shape.push_back(opt_shape[i]);`
`72`	`73`	`}`