Skip to content

Commit d85c327

Browse files
committed
Merge branch 'master' into trt_8.4ga
2 parents a64956e + 5b03083 commit d85c327

File tree

266 files changed

+3057
-989
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

266 files changed

+3057
-989
lines changed

.circleci/config.yml

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Use the latest 2.1 version of CircleCI pipeline process engine.
2+
# See: https://circleci.com/docs/2.0/configuration-reference
3+
version: 2.1
4+
5+
# Define a job to be invoked later in a workflow.
6+
# See: https://circleci.com/docs/2.0/configuration-reference/#jobs
7+
jobs:
8+
build:
9+
machine:
10+
# Primary container image where all steps run.
11+
# image: nvcr.io/nvidia/tensorrt:22.01-py3 # does not work with customized image
12+
# https://circleci.com/docs/2.0/configuration-reference#available-linux-gpu-images
13+
image: ubuntu-2004-cuda-11.4:202110-01
14+
resource_class: gpu.nvidia.large
15+
steps:
16+
- checkout
17+
- run:
18+
name: install cudnn + tensorrt + bazel
19+
command: |
20+
cd ~
21+
OS=ubuntu2004
22+
CUDNN_VERSION=8.2.1.*-1+cuda11.3
23+
TRT_VERSION=8.2.4-1+cuda11.4
24+
BAZEL_VERSION=5.1.1
25+
26+
wget https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
27+
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
28+
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/7fa2af80.pub
29+
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 536F8F1DE80F6A35
30+
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC
31+
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/ /"
32+
sudo apt-get update
33+
sudo apt-get install libcudnn8=${CUDNN_VERSION}
34+
sudo apt-get install libcudnn8-dev=${CUDNN_VERSION}
35+
36+
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/{OS}/x86_64/3bf863cc.pub
37+
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/ /"
38+
sudo apt-get update
39+
40+
sudo apt-get install libnvinfer8=${TRT_VERSION} libnvonnxparsers8=${TRT_VERSION} libnvparsers8=${TRT_VERSION} libnvinfer-plugin8=${TRT_VERSION} libnvinfer-dev=${TRT_VERSION} libnvonnxparsers-dev=${TRT_VERSION} libnvparsers-dev=${TRT_VERSION} libnvinfer-plugin-dev=${TRT_VERSION} python3-libnvinfer=${TRT_VERSION}
41+
# check available version, apt list libnvinfer8 -a
42+
sudo wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-x86_64 -O /usr/bin/bazel
43+
sudo chmod a+x /usr/bin/bazel
44+
45+
- run:
46+
name: set up python environment
47+
command: |
48+
pip3 install nvidia-pyindex
49+
pip3 install nvidia-tensorrt==8.2.4.2
50+
pip3 install --pre torch==1.13.0.dev20220621 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu113
51+
pip3 install pytest parameterized expecttest
52+
# install torch_tensorrt
53+
mv WORKSPACE.ci WORKSPACE
54+
cd py
55+
python3 setup.py install
56+
57+
# install fx2trt
58+
# cd py/torch_tensorrt/fx/setup
59+
# python3 setup.py install
60+
- run:
61+
name: run fx2trt tests
62+
command: |
63+
# one fix pending to enable below
64+
# cd py/torch_tensorrt/fx/test
65+
# pytest $(find . -name '*.py' | grep -v test_dispatch* | grep -v test_setitem*)
66+
67+
cd py/torch_tensorrt/fx/test
68+
pushd converters/acc_op
69+
pytest
70+
popd
71+
pushd passes
72+
list_passes=$(ls | grep -v test_setitem*)
73+
pytest $list_passes
74+
popd
75+
pushd core
76+
pytest
77+
popd
78+
# pushd quant
79+
# pytest
80+
# popd
81+
pushd tools
82+
pytest
83+
popd
84+
pushd trt_lower
85+
pytest
86+
popd
87+
pushd tracer
88+
list_tracer=$(ls | grep -v test_dispatch_*)
89+
pytest $list_tracer
90+
popd
91+
# Invoke jobs via workflows
92+
# See: https://circleci.com/docs/2.0/configuration-reference/#workflows
93+
workflows:
94+
build_run:
95+
jobs:
96+
- build

.github/code-owners.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,5 +121,6 @@
121121

122122
"component: fx":
123123
- "frank-wei"
124+
- "yinghai"
124125
- "842974287"
125126
- "wushirong"

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
### Developing Torch-TensorRT
44

5-
Do try to fill an issue with your feature or bug before filling a PR (op support is generally an exception as long as you provide tests to prove functionality). There is also a backlog (https://github.com/NVIDIA/Torch-TensorRT/issues) of issues which are tagged with the area of focus, a coarse priority level and whether the issue may be accessible to new contributors. Let us know if you are interested in working on a issue. We are happy to provide guidance and mentorship for new contributors. Though note, there is no claiming of issues, we prefer getting working code quickly vs. addressing concerns about "wasted work".
5+
Do try to fill an issue with your feature or bug before filling a PR (op support is generally an exception as long as you provide tests to prove functionality). There is also a backlog (https://github.com/pytorch/TensorRT/issues) of issues which are tagged with the area of focus, a coarse priority level and whether the issue may be accessible to new contributors. Let us know if you are interested in working on a issue. We are happy to provide guidance and mentorship for new contributors. Though note, there is no claiming of issues, we prefer getting working code quickly vs. addressing concerns about "wasted work".
66

77
#### Communication
88

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ These are the following dependencies used to verify the testcases. Torch-TensorR
118118

119119
## Prebuilt Binaries and Wheel files
120120

121-
Releases: https://github.com/NVIDIA/Torch-TensorRT/releases
121+
Releases: https://github.com/pytorch/TensorRT/releases
122122

123123
## Compiling Torch-TensorRT
124124

@@ -212,6 +212,12 @@ new_local_repository(
212212
bazel build //:libtorchtrt --compilation_mode opt
213213
```
214214

215+
### FX path (Python only) installation
216+
If the user plan to try FX path (Python only) and would like to avoid bazel build. Please follow the steps below.
217+
``` shell
218+
cd py && python3 setup.py install --fx-only
219+
```
220+
215221
### Debug build
216222

217223
``` shell
@@ -291,7 +297,7 @@ Supported Python versions:
291297

292298
### In Torch-TensorRT?
293299

294-
Thanks for wanting to contribute! There are two main ways to handle supporting a new op. Either you can write a converter for the op from scratch and register it in the NodeConverterRegistry or if you can map the op to a set of ops that already have converters you can write a graph rewrite pass which will replace your new op with an equivalent subgraph of supported ops. Its preferred to use graph rewriting because then we do not need to maintain a large library of op converters. Also do look at the various op support trackers in the [issues](https://github.com/NVIDIA/Torch-TensorRT/issues) for information on the support status of various operators.
300+
Thanks for wanting to contribute! There are two main ways to handle supporting a new op. Either you can write a converter for the op from scratch and register it in the NodeConverterRegistry or if you can map the op to a set of ops that already have converters you can write a graph rewrite pass which will replace your new op with an equivalent subgraph of supported ops. Its preferred to use graph rewriting because then we do not need to maintain a large library of op converters. Also do look at the various op support trackers in the [issues](https://github.com/pytorch/TensorRT/issues) for information on the support status of various operators.
295301

296302
### In my application?
297303

WORKSPACE.ci

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
workspace(name = "Torch-TensorRT")
2+
3+
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
4+
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
5+
6+
http_archive(
7+
name = "rules_python",
8+
sha256 = "778197e26c5fbeb07ac2a2c5ae405b30f6cb7ad1f5510ea6fdac03bded96cc6f",
9+
url = "https://github.com/bazelbuild/rules_python/releases/download/0.2.0/rules_python-0.2.0.tar.gz",
10+
)
11+
12+
load("@rules_python//python:pip.bzl", "pip_install")
13+
14+
http_archive(
15+
name = "rules_pkg",
16+
sha256 = "038f1caa773a7e35b3663865ffb003169c6a71dc995e39bf4815792f385d837d",
17+
urls = [
18+
"https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz",
19+
"https://github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz",
20+
],
21+
)
22+
23+
load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies")
24+
25+
rules_pkg_dependencies()
26+
27+
git_repository(
28+
name = "googletest",
29+
commit = "703bd9caab50b139428cea1aaff9974ebee5742e",
30+
remote = "https://github.com/google/googletest",
31+
shallow_since = "1570114335 -0400",
32+
)
33+
34+
# External dependency for torch_tensorrt if you already have precompiled binaries.
35+
local_repository(
36+
name = "torch_tensorrt",
37+
path = "/opt/conda/lib/python3.8/site-packages/torch_tensorrt"
38+
)
39+
40+
# CUDA should be installed on the system locally
41+
new_local_repository(
42+
name = "cuda",
43+
build_file = "@//third_party/cuda:BUILD",
44+
path = "/usr/local/cuda/",
45+
)
46+
47+
new_local_repository(
48+
name = "cublas",
49+
build_file = "@//third_party/cublas:BUILD",
50+
path = "/usr",
51+
)
52+
#############################################################################################################
53+
# Tarballs and fetched dependencies (default - use in cases when building from precompiled bin and tarballs)
54+
#############################################################################################################
55+
56+
#http_archive(
57+
# name = "libtorch",
58+
# build_file = "@//third_party/libtorch:BUILD",
59+
# sha256 = "8d9e829ce9478db4f35bdb7943308cf02e8a2f58cf9bb10f742462c1d57bf287",
60+
# strip_prefix = "libtorch",
61+
# urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.11.0%2Bcu113.zip"],
62+
#)
63+
#
64+
#http_archive(
65+
# name = "libtorch_pre_cxx11_abi",
66+
# build_file = "@//third_party/libtorch:BUILD",
67+
# sha256 = "90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad",
68+
# strip_prefix = "libtorch",
69+
# urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip"],
70+
#)
71+
72+
# Download these tarballs manually from the NVIDIA website
73+
# Either place them in the distdir directory in third_party and use the --distdir flag
74+
# or modify the urls to "file:///<PATH TO TARBALL>/<TARBALL NAME>.tar.gz
75+
76+
#http_archive(
77+
# name = "cudnn",
78+
# build_file = "@//third_party/cudnn/archive:BUILD",
79+
# sha256 = "0e5d2df890b9967efa6619da421310d97323565a79f05a1a8cb9b7165baad0d7",
80+
# strip_prefix = "cuda",
81+
# urls = [
82+
# "https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.2.4/11.4_20210831/cudnn-11.4-linux-x64-v8.2.4.15.tgz",
83+
# ],
84+
#)
85+
#
86+
#http_archive(
87+
# name = "tensorrt",
88+
# build_file = "@//third_party/tensorrt/archive:BUILD",
89+
# sha256 = "826180eaaecdf9a7e76116855b9f1f3400ea9b06e66b06a3f6a0747ba6f863ad",
90+
# strip_prefix = "TensorRT-8.2.4.2",
91+
# urls = [
92+
# "https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.2.4/tars/tensorrt-8.2.4.2.linux.x86_64-gnu.cuda-11.4.cudnn8.2.tar.gz",
93+
# ],
94+
#)
95+
96+
####################################################################################
97+
# Locally installed dependencies (use in cases of custom dependencies or aarch64)
98+
####################################################################################
99+
100+
# NOTE: In the case you are using just the pre-cxx11-abi path or just the cxx11 abi path
101+
# with your local libtorch, just point deps at the same path to satisfy bazel.
102+
103+
# NOTE: NVIDIA's aarch64 PyTorch (python) wheel file uses the CXX11 ABI unlike PyTorch's standard
104+
# x86_64 python distribution. If using NVIDIA's version just point to the root of the package
105+
# for both versions here and do not use --config=pre-cxx11-abi
106+
107+
new_local_repository(
108+
name = "libtorch",
109+
path = "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch",
110+
build_file = "third_party/libtorch/BUILD"
111+
)
112+
113+
new_local_repository(
114+
name = "libtorch_pre_cxx11_abi",
115+
path = "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch",
116+
build_file = "third_party/libtorch/BUILD"
117+
)
118+
119+
new_local_repository(
120+
name = "cudnn",
121+
path = "/usr/",
122+
build_file = "@//third_party/cudnn/local:BUILD"
123+
)
124+
125+
new_local_repository(
126+
name = "tensorrt",
127+
path = "/usr/",
128+
build_file = "@//third_party/tensorrt/local:BUILD"
129+
)
130+
131+
# #########################################################################
132+
# # Testing Dependencies (optional - comment out on aarch64)
133+
# #########################################################################
134+
# pip_install(
135+
# name = "torch_tensorrt_py_deps",
136+
# requirements = "//py:requirements.txt",
137+
# )
138+
139+
# pip_install(
140+
# name = "py_test_deps",
141+
# requirements = "//tests/py:requirements.txt",
142+
# )
143+
144+
pip_install(
145+
name = "pylinter_deps",
146+
requirements = "//tools/linter:requirements.txt",
147+
)

core/compiler.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,8 @@ void AddIfBlockToGraph(
198198

199199
auto env = [&](torch::jit::Value* v) { return util::getOrAddInputForValue(v, new_g, block_graph_to_new_g); };
200200
new_if_block->cloneFrom(cur_block_graph->block(), env);
201-
if (cur_block_graph->inputs()[0]->type()->str().find("__torch__") != std::string::npos) {
201+
if (cur_block_graph->inputs().size() &&
202+
cur_block_graph->inputs()[0]->type()->str().find("__torch__") != std::string::npos) {
202203
if (new_g->inputs()[0]->type()->str().find("__torch__") == std::string::npos) {
203204
auto self = new_g->insertInput(0, "self_1");
204205
self->setType(cur_block_graph->inputs()[0]->type());
@@ -223,13 +224,14 @@ GraphAndMapping ConstructFallbackGraph(
223224
torch::jit::Block* block,
224225
std::unordered_map<const torch::jit::Value*, torch::jit::IValue> example_tensor_map,
225226
CompileSpec cfg,
226-
ir::StaticParams static_params) {
227+
ir::StaticParams static_params,
228+
std::unordered_map<torch::jit::Node*, int>& fallback_nodes) {
227229
auto convert_cfg = cfg.convert_info;
228230
auto partition_info = cfg.partition_info;
229231

230232
auto new_g = std::make_shared<torch::jit::Graph>();
231233

232-
auto segmented_blocks = partitioning::Partition(block, example_tensor_map, partition_info);
234+
auto segmented_blocks = partitioning::Partition(block, example_tensor_map, partition_info, fallback_nodes);
233235

234236
// the mapping from lowering graph => fallback global graph
235237
std::unordered_map<torch::jit::Value*, torch::jit::Value*> old_to_new_g;
@@ -270,7 +272,7 @@ GraphAndMapping ConstructFallbackGraph(
270272
std::vector<GraphAndMapping> graph_and_mappings;
271273
for (auto cur_block : if_node->blocks()) {
272274
graph_and_mappings.push_back(
273-
ConstructFallbackGraph(new_mod, cur_block, example_tensor_map, cfg, static_params));
275+
ConstructFallbackGraph(new_mod, cur_block, example_tensor_map, cfg, static_params, fallback_nodes));
274276
}
275277
AddIfBlockToGraph(new_g, if_node, graph_and_mappings, old_to_new_g);
276278

@@ -293,7 +295,7 @@ GraphAndMapping ConstructFallbackGraph(
293295
// Set the output as the produced tuple
294296
new_g->registerOutput(return_tuple_node->outputs()[0]);
295297
} else {
296-
if (old_to_new_g.count(block->outputs()[0])) {
298+
if (block->outputs().size() && old_to_new_g.count(block->outputs()[0])) {
297299
new_g->registerOutput(old_to_new_g[block->outputs()[0]]);
298300
}
299301
}
@@ -430,7 +432,9 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)
430432
!(cfg.lower_info.forced_fallback_modules.size() == 0 &&
431433
cfg.partition_info.forced_fallback_operators.size() == 0 && isBlockConvertible)) {
432434
auto input_ivalues_map = partitioning::generateRandomInputs(cfg.convert_info.inputs, first_use_types);
433-
auto graph_and_mapping = ConstructFallbackGraph(new_mod, g->block(), input_ivalues_map, cfg, static_params);
435+
std::unordered_map<torch::jit::Node*, int> fallback_nodes;
436+
auto graph_and_mapping =
437+
ConstructFallbackGraph(new_mod, g->block(), input_ivalues_map, cfg, static_params, fallback_nodes);
434438
new_g = graph_and_mapping.first;
435439
LOG_INFO("Segmented Graph: " << *new_g);
436440

core/conversion/converters/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ cc_library(
5454
"NodeConverterRegistry.cpp",
5555
"impl/activation.cpp",
5656
"impl/batch_norm.cpp",
57+
"impl/bitwise.cpp",
5758
"impl/cast.cpp",
5859
"impl/concat.cpp",
5960
"impl/constant.cpp",

0 commit comments

Comments
 (0)