Skip to content

Commit 98c797d

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 205ab99 + ac4ac5e commit 98c797d

File tree

185 files changed

+10684
-1354
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

185 files changed

+10684
-1354
lines changed

.bazelversion

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.2.0

.github/pr-labels.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"component: api [C++]":
2+
- cpp/api/**/*
3+
4+
"component: api [Python]":
5+
- py/api/**/*
6+
7+
"component: conversion":
8+
- core/conversion/**/*
9+
10+
"component: converters":
11+
- core/conversion/converters/**/*
12+
13+
"component: evaluators":
14+
- core/conversion/evaluators/**/*
15+
16+
"component: execution":
17+
- core/execution/**/*
18+
19+
"component: lowering":
20+
- core/lowering/**/*

.github/workflows/label.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# This workflow will triage pull requests and apply a label based on the
2+
# paths that are modified in the pull request.
3+
#
4+
# To use this workflow, you will need to set up a .github/labeler.yml
5+
# file with configuration. For more information, see:
6+
# https://github.com/actions/labeler/blob/master/README.md
7+
8+
name: Labeler
9+
on: [pull_request]
10+
11+
jobs:
12+
label:
13+
14+
runs-on: ubuntu-latest
15+
16+
steps:
17+
- uses: actions/labeler@v2
18+
with:
19+
repo-token: "${{ secrets.GITHUB_TOKEN }}"
20+
configuration-path: .github/pr-labels.yml

.github/workflows/stale.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: Mark stale issues and pull requests
2+
3+
on:
4+
schedule:
5+
- cron: "0 0 * * *"
6+
7+
jobs:
8+
stale:
9+
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- uses: actions/stale@v1
14+
with:
15+
repo-token: ${{ secrets.GITHUB_TOKEN }}
16+
stale-issue-message: 'This issue has not seen activity for 30 days, Remove stale label or comment or this will be closed in 5 days'
17+
stale-pr-message: 'This PR has not seen activity for 30 days, Remove stale label or comment or this will be closed in 5 days'
18+
stale-issue-label: 'No Activity'
19+
exempt-issue-labels: 'WIP'
20+
stale-pr-label: 'No Activity'
21+
exempt-pr-labels: 'WIP'
22+
days-before-stale: 30
23+
days-before-close: 5

BUILD

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ pkg_tar(
88
"//core/conversion:include",
99
"//core/conversion/conversionctx:include",
1010
"//core/conversion/converters:include",
11+
"//core/conversion/var:include",
12+
"//core/conversion/tensorcontainer:include",
1113
"//core/conversion/evaluators:include",
1214
"//core/conversion/converters/impl/plugins:include",
1315
"//core/execution:include",
@@ -36,6 +38,15 @@ pkg_tar(
3638
)
3739

3840

41+
pkg_tar(
42+
name = "bin",
43+
package_dir = "bin/",
44+
srcs = [
45+
"//cpp/trtorchc:trtorchc",
46+
],
47+
mode = "0755",
48+
)
49+
3950

4051
pkg_tar(
4152
name = "libtrtorch",
@@ -46,6 +57,7 @@ pkg_tar(
4657
],
4758
deps = [
4859
":lib",
60+
":bin",
4961
":include",
5062
":include_core",
5163
],

README.md

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ compile_settings.op_precision = torch::kFloat;
2323
auto trt_mod = trtorch::CompileGraph(ts_mod, compile_settings);
2424
// Run like normal
2525
auto results = trt_mod.forward({in_tensor});
26+
// Save module for later
27+
trt_mod.save("trt_torchscript_module.ts");
2628
...
2729
```
2830
@@ -46,6 +48,7 @@ trt_ts_module = trtorch.compile(torch_script_module, compile_settings)
4648
4749
input_data = input_data.half()
4850
result = trt_ts_module(input_data)
51+
torch.jit.save(trt_ts_module, "trt_torchscript_module.ts")
4952
```
5053

5154
> Notes on running in lower precisions:
@@ -65,6 +68,7 @@ result = trt_ts_module(input_data)
6568

6669
### Dependencies
6770

71+
- Bazel 3.2.0
6872
- Libtorch 1.5.0
6973
- CUDA 10.2
7074
- cuDNN 7.6.5
@@ -78,7 +82,24 @@ Releases: https://github.com/NVIDIA/TRTorch/releases
7882

7983
### Installing Dependencies
8084

81-
You need to start by having CUDA installed on the system, Libtorch will automatically be pulled for you by bazel,
85+
#### 0. Install Bazel
86+
87+
If you don't have bazel installed, the easiest way is to install bazelisk using the method of you choosing https://github.com/bazelbuild/bazelisk
88+
89+
Otherwise you can use the following instructions to install binaries https://docs.bazel.build/versions/master/install.html
90+
91+
Finally if you need to compile from source (e.g. aarch64 until bazel distributes binaries for the architecture) you can use these instructions
92+
93+
```sh
94+
export BAZEL_VERSION=<VERSION>
95+
mkdir bazel
96+
cd bazel
97+
curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip
98+
unzip bazel-$BAZEL_VERSION-dist.zip
99+
bash ./compile.sh
100+
```
101+
102+
You need to start by having CUDA installed on the system, LibTorch will automatically be pulled for you by bazel,
82103
then you have two options.
83104

84105
#### 1. Building using cuDNN & TensorRT tarball distributions
@@ -90,10 +111,10 @@ then you have two options.
90111
1. You need to download the tarball distributions of TensorRT and cuDNN from the NVIDIA website.
91112
- https://developer.nvidia.com/cudnn
92113
- https://developer.nvidia.com/tensorrt
93-
2. Place these files in a directory (the directories `thrid_party/distdir/[x86_64-linux-gnu | aarch64-linux-gnu]` exist for this purpose)
114+
2. Place these files in a directory (the directories `third_party/distdir/[x86_64-linux-gnu | aarch64-linux-gnu]` exist for this purpose)
94115
3. Compile using:
95116
``` shell
96-
bazel build //:libtrtorch --compilation_mode opt --distdir thrid_party/distdir/[x86_64-linux-gnu | aarch64-linux-gnu]
117+
bazel build //:libtrtorch --compilation_mode opt --distdir third_party/distdir/[x86_64-linux-gnu | aarch64-linux-gnu]
97118
```
98119

99120
#### 2. Building using locally installed cuDNN & TensorRT

core/compiler.cpp

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
#include "NvInfer.h"
77

88
#include "ATen/core/function_schema.h"
9+
#include "ATen/core/jit_type.h"
910

11+
#include "torch/custom_class.h"
1012
#include "torch/csrc/jit/frontend/function_schema_parser.h"
1113
#include "torch/csrc/jit/ir/ir.h"
1214
#include "torch/csrc/jit/passes/pass_manager.h"
@@ -40,32 +42,69 @@ c10::FunctionSchema GenerateGraphSchema(torch::jit::script::Module mod, std::str
4042

4143

4244
void AddEngineToGraph(torch::jit::script::Module mod, std::shared_ptr<torch::jit::Graph>& g, std::string& serialized_engine) {
43-
execution::EngineID uid = execution::RegisterEngineFromSerializedEngine(serialized_engine);
44-
auto num_io = execution::GetEngineIO(uid);
45-
46-
auto self = g->addInput("self.1");
45+
auto engine_ptr = c10::make_intrusive<execution::TRTEngine>(mod._ivalue()->name(), serialized_engine);
46+
// Get required metadata about the engine out
47+
auto num_io = engine_ptr->num_io;
48+
auto name = engine_ptr->name;
49+
50+
// Add the engine as an attribute of the module, this will let the engine be serialized and deserialized
51+
mod.register_attribute(
52+
name,
53+
c10::getCustomClassType<c10::intrusive_ptr<execution::TRTEngine>>(),
54+
c10::IValue(std::move(engine_ptr)),
55+
false
56+
);
57+
58+
// Add the module as an input into the graph
59+
auto self = g->addInput("self_1");
4760
self->setType(mod.type());
4861

49-
auto id_val = g->insertConstant(uid);
62+
// Start by retriveing the engine from the module attribute list
63+
auto engine_node = g->createGetAttr(self, name);
64+
g->block()->appendNode(engine_node);
5065

66+
// Add inputs to the graph corresponding to the number of input tensors expected by the engine
67+
// Also store those inputs in a vector so that they can be coalesced into a single list at runtime
5168
std::vector<torch::jit::Value*> engine_inputs;
52-
engine_inputs.push_back(id_val);
53-
5469
for (uint64_t i = 0; i < num_io.first; i++) {
55-
auto in_val = g->addInput("");
70+
auto in_val = g->addInput(std::string("input_") + std::to_string(i));
5671
in_val->setType(c10::TensorType::get());
5772
engine_inputs.push_back(in_val);
5873
}
5974

60-
auto engine_node = g->create(c10::Symbol::fromQualString("trt::execute_engine"), torch::jit::ArrayRef<torch::jit::Value*>(engine_inputs), num_io.second);
61-
g->block()->appendNode(engine_node);
62-
63-
if (engine_node->outputs().size() > 1) {
64-
auto return_tuple_node = g->createTuple(engine_node->outputs());
75+
// Create a node that will merge all of the input tensors into a single list argument to the trt::execute_engine op
76+
// Creates: prim::ListConstruct(<input tensors>)
77+
auto input_list_node = g->createList(c10::TensorType::get(), torch::jit::ArrayRef<torch::jit::Value*>(engine_inputs));
78+
g->block()->appendNode(input_list_node);
79+
80+
// Make a list of inputs to the actual trt::execute_engine op
81+
// Note: Ordering of list and then engine is because we can pop off the engine first which contains all the metadata
82+
// needed for execution
83+
std::vector<torch::jit::Value*> execute_node_inputs;
84+
execute_node_inputs.push_back(input_list_node->outputs()[0]);
85+
execute_node_inputs.push_back(engine_node->outputs()[0]);
86+
87+
// Create the actual execution node trt::execute_engine using the assembled inputs
88+
auto execute_node = g->create(c10::Symbol::fromQualString("trt::execute_engine"), torch::jit::ArrayRef<torch::jit::Value*>(execute_node_inputs), 1);
89+
g->block()->appendNode(execute_node);
90+
execute_node->outputs()[0]->setType(c10::ListType::ofTensors());
91+
92+
// Create a node to unpack the list into seperate tensors, in the case of there being only one tensor, the tensor will be returned,
93+
// otherwise they are returned as a tuple of tensors.
94+
// Creates: prim::ListUnpack(<engine output>)
95+
auto unpack_node = g->createListUnpack(execute_node->outputs()[0], num_io.second);
96+
g->block()->appendNode(unpack_node);
97+
98+
// If there are multiple output tensors from TensorRT we wrap them in a tuple to return
99+
if (unpack_node->outputs().size() > 1) {
100+
// Creates prim::TupleConstruct(<output tensors>) using outputs of the unpack node
101+
auto return_tuple_node = g->createTuple(unpack_node->outputs());
65102
g->block()->appendNode(return_tuple_node);
103+
// Set the output as the produced tuple
66104
g->registerOutput(return_tuple_node->outputs()[0]);
67105
} else {
68-
g->registerOutput(engine_node->outputs()[0]);
106+
// Set the output as the sole output tensor
107+
g->registerOutput(unpack_node->outputs()[0]);
69108
}
70109

71110
LOG_DEBUG(*g << "(AddEngineToGraph)\n");
@@ -110,13 +149,16 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod,
110149
torch::jit::script::Module new_mod(mod._ivalue()->name() + "_trt");
111150
std::vector<std::shared_ptr<torch::jit::Graph>> graphs;
112151
for (const torch::jit::script::Method& method : mod.get_methods()) {
113-
auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
114-
auto new_g = std::make_shared<torch::jit::Graph>();
115-
AddEngineToGraph(new_mod, new_g, engine);
116-
auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
117-
auto schema = GenerateGraphSchema(new_mod, new_method->name(), new_g);
118-
new_mod.type()->addMethod(new_method);
119-
new_method->setSchema(schema);
152+
// Don't convert hidden methods
153+
if (method.name().rfind("_", 0)) {
154+
auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
155+
auto new_g = std::make_shared<torch::jit::Graph>();
156+
AddEngineToGraph(new_mod, new_g, engine);
157+
auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
158+
auto schema = GenerateGraphSchema(new_mod, new_method->name(), new_g);
159+
new_mod.type()->addMethod(new_method);
160+
new_method->setSchema(schema);
161+
}
120162
}
121163

122164
return new_mod;

core/conversion/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ cc_library(
1919
],
2020
deps = [
2121
"@tensorrt//:nvinfer",
22+
"//core/conversion/var",
2223
"//core/conversion/conversionctx",
2324
"//core/conversion/converters",
2425
"//core/conversion/evaluators",

core/conversion/InterfaceTypes.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ InputRange::InputRange(std::vector<int64_t> d) {
3434
min = util::toDims(d);
3535
max = util::toDims(d);
3636
input_shape = util::toDims(d);
37-
37+
input_is_dynamic = false;
3838
}
3939

4040

@@ -67,6 +67,7 @@ InputRange::InputRange(std::vector<int64_t> min_shape, std::vector<int64_t> opt_
6767
dim.insert(max_shape[i]);
6868
if (dim.size() != 1) {
6969
dyn_shape.push_back(-1);
70+
input_is_dynamic = true;
7071
} else {
7172
dyn_shape.push_back(opt_shape[i]);
7273
}

0 commit comments

Comments
 (0)