Skip to content

Commit c055c3c

Browse files
authored
Merge branch 'master' into 810
2 parents 9bb2684 + b652045 commit c055c3c

File tree

182 files changed

+39365
-26006
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

182 files changed

+39365
-26006
lines changed

.bazelrc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ build:pre_cxx11_abi --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0"
3535
build:pre_cxx11_abi --linkopt="-D_GLIBCXX_USE_CXX11_ABI=0"
3636
build:pre_cxx11_abi --define=abi=pre_cxx11_abi
3737

38-
build:ci_testing --define=torchtrt_src=pre_built --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0"
39-
build:use_precompiled_torchtrt --define=torchtrt_src=pre_built
38+
build:ci_testing --define=torchtrt_src=prebuilt --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0"
39+
build:use_precompiled_torchtrt --define=torchtrt_src=prebuilt
4040

41-
test:ci_testing --define=torchtrt_src=pre_built --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0"
42-
test:use_precompiled_torchtrt --define=torchtrt_src=pre_built
41+
test:ci_testing --define=torchtrt_src=prebuilt --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0"
42+
test:use_precompiled_torchtrt --define=torchtrt_src=prebuilt

.github/workflows/docgen.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ jobs:
1212
build-docs:
1313
runs-on: ubuntu-18.04
1414
container:
15-
image: docker.pkg.github.com/nvidia/torch-tensorrt/docgen:latest
15+
image: ghcr.io/nvidia/torch-tensorrt/docgen:latest
1616
credentials:
17-
username: $GITHUB_ACTOR
18-
password: ${{secrets.GITHUB_TOKEN}}
17+
username: ${{ github.actor }}
18+
password: ${{ secrets.GITHUB_TOKEN }}
1919
steps:
2020
- name: Reclaim space
2121
run: |
@@ -36,6 +36,7 @@ jobs:
3636
- name: Generate New Docs
3737
run: |
3838
cd docsrc
39+
pip3 install -r requirements.txt
3940
python3 -c "import torch_tensorrt; print(torch_tensorrt.__version__)"
4041
make html
4142
- uses: stefanzweifel/git-auto-commit-action@v4

.github/workflows/linter.yml

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,32 @@ jobs:
88
cpp-linting:
99
name: C++ Linting
1010
runs-on: ubuntu-latest
11+
permissions:
12+
actions: write
13+
checks: write
14+
contents: write
15+
deployments: none
16+
id-token: write
17+
issues: write
18+
discussions: write
19+
packages: write
20+
pull-requests: write
21+
repository-projects: none
22+
security-events: none
23+
statuses: write
1124
steps:
1225
- uses: actions/checkout@v2
1326
with:
1427
ref: ${{ github.event.pull_request.head.sha }}
15-
- name: Docker Login
16-
run: docker login docker.pkg.github.com -u $GITHUB_ACTOR -p $GITHUB_TOKEN
17-
env:
18-
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
28+
- name: Docker login
29+
uses: docker/login-action@v1
30+
with:
31+
registry: ghcr.io
32+
username: ${{ github.actor }}
33+
password: ${{ secrets.GITHUB_TOKEN }}
1934
- name: Run image
2035
run: |
21-
docker run -it -d --name cpplinter -e GITHUB_TOKEN=$GITHUB_TOKEN -v $GITHUB_WORKSPACE:/workspace -v $GITHUB_EVENT_PATH:/GITHUB_EVENT.json -w /workspace docker.pkg.github.com/nvidia/torch-tensorrt/docgen:latest
36+
docker run -it -d --name cpplinter -e GITHUB_TOKEN=$GITHUB_TOKEN -v $GITHUB_WORKSPACE:/workspace -v $GITHUB_EVENT_PATH:/GITHUB_EVENT.json -w /workspace ghcr.io/nvidia/torch-tensorrt/docgen:latest
2237
docker exec cpplinter bash -c "cp /workspace/docker/WORKSPACE.docs /workspace/WORKSPACE"
2338
env:
2439
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
@@ -30,17 +45,32 @@ jobs:
3045
py-linting:
3146
name: Python Linting
3247
runs-on: ubuntu-latest
48+
permissions:
49+
actions: write
50+
checks: write
51+
contents: write
52+
deployments: none
53+
id-token: write
54+
issues: write
55+
discussions: write
56+
packages: write
57+
pull-requests: write
58+
repository-projects: none
59+
security-events: none
60+
statuses: write
3361
steps:
3462
- uses: actions/checkout@v2
3563
with:
3664
ref: ${{ github.event.pull_request.head.sha }}
37-
- name: Docker Login
38-
run: docker login docker.pkg.github.com -u $GITHUB_ACTOR -p $GITHUB_TOKEN
39-
env:
40-
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
65+
- name: Docker login
66+
uses: docker/login-action@v1
67+
with:
68+
registry: ghcr.io
69+
username: ${{ github.actor }}
70+
password: ${{ secrets.GITHUB_TOKEN }}
4171
- name: Run image
4272
run: |
43-
docker run -it -d --name pylinter -e GITHUB_TOKEN=$GITHUB_TOKEN -v $GITHUB_WORKSPACE:/workspace -v $GITHUB_EVENT_PATH:/GITHUB_EVENT.json -w /workspace docker.pkg.github.com/nvidia/torch-tensorrt/docgen:latest
73+
docker run -it -d --name pylinter -e GITHUB_TOKEN=$GITHUB_TOKEN -v $GITHUB_WORKSPACE:/workspace -v $GITHUB_EVENT_PATH:/GITHUB_EVENT.json -w /workspace ghcr.io/nvidia/torch-tensorrt/docgen:latest
4474
docker exec pylinter bash -c "cp /workspace/docker/WORKSPACE.docs /workspace/WORKSPACE"
4575
env:
4676
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
77
Torch-TensorRT is a compiler for PyTorch/TorchScript, targeting NVIDIA GPUs via NVIDIA's TensorRT Deep Learning Optimizer and Runtime. Unlike PyTorch's Just-In-Time (JIT) compiler, Torch-TensorRT is an Ahead-of-Time (AOT) compiler, meaning that before you deploy your TorchScript code, you go through an explicit compile step to convert a standard TorchScript program into an module targeting a TensorRT engine. Torch-TensorRT operates as a PyTorch extention and compiles modules that integrate into the JIT runtime seamlessly. After compilation using the optimized graph should feel no different than running a TorchScript module. You also have access to TensorRT's suite of configurations at compile time, so you are able to specify operating precision (FP32/FP16/INT8) and other settings for your module.
88

9-
More Information / System Architecture:
10-
11-
- [GTC 2020 Talk](https://developer.nvidia.com/gtc/2020/video/s21671)
12-
9+
Resources:
10+
- [Documentation](https://nvidia.github.io/Torch-TensorRT/)
11+
- [Torch-TensorRT Explained in 2 minutes!](https://www.youtube.com/watch?v=TU5BMU6iYZ0&ab_channel=NVIDIADeveloper)
12+
- [Comprehensive Discusion (GTC Event)](https://www.nvidia.com/en-us/on-demand/session/gtcfall21-a31107/)
13+
- [Pre-built Docker Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). To use this container, make an NGC account and sign in to NVIDIA's registry with an API key. Refer to [this guide](https://docs.nvidia.com/ngc/ngc-catalog-user-guide/index.html#registering-activating-ngc-account) for the same.
1314

1415

1516
## Building a docker container for Torch-TensorRT

core/conversion/converters/impl/activation.cpp

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -166,39 +166,7 @@ auto acthardtanh TORCHTRT_UNUSED =
166166
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
167167
LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
168168
return true;
169-
}})
170-
.pattern({"aten::gelu(Tensor self) -> (Tensor)",
171-
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
172-
auto in = args[0].ITensorOrFreeze(ctx);
173-
nvinfer1::DataType type = in->getType();
174-
TORCHTRT_CHECK(
175-
type == nvinfer1::DataType::kFLOAT || type == nvinfer1::DataType::kHALF,
176-
"gelu only supports kFLOAT and kHALF");
177-
std::string pluginName = "CustomGeluPluginDynamic";
178-
nvinfer1::PluginFieldCollection fc;
179-
std::vector<nvinfer1::PluginField> f;
180-
// REVIEW is this right?
181-
int type_id = ctx->settings.enabled_precisions.find(nvinfer1::DataType::kHALF) ==
182-
ctx->settings.enabled_precisions.end()
183-
? 0
184-
: 1; // Integer encoding the DataType (0: FP32, 1: FP16)
185-
f.emplace_back(nvinfer1::PluginField("type_id", &type_id, nvinfer1::PluginFieldType::kINT32, 1));
186-
fc.nbFields = f.size();
187-
fc.fields = f.data();
188-
189-
auto creator = getPluginRegistry()->getPluginCreator("CustomGeluPluginDynamic", "1", "");
190-
auto gelu_plugin = creator->createPlugin("gelu", &fc);
191-
192-
TORCHTRT_CHECK(gelu_plugin, "Unable to create gelu plugin from TensorRT plugin registry" << *n);
193-
auto new_layer =
194-
ctx->net->addPluginV2(reinterpret_cast<nvinfer1::ITensor* const*>(&in), 1, *gelu_plugin);
195-
new_layer->setName(util::node_info(n).c_str());
196-
auto out_tensor = new_layer->getOutput(0);
197-
out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out_tensor);
198-
LOG_DEBUG("Output shape: " << out_tensor->getDimensions());
199-
return true;
200169
}});
201-
202170
} // namespace
203171
} // namespace impl
204172
} // namespace converters

core/conversion/converters/impl/batch_norm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ auto batch_norm_registrations TORCHTRT_UNUSED =
7171
LOG_DEBUG("momentum disregarded");
7272
LOG_DEBUG("training disregarded");
7373
LOG_DEBUG("cudnn disregarded");
74-
TORCHTRT_CHECK(orig_shape.nbDims > 2, "Unable to create batch normalization layer from node: " << *n);
74+
TORCHTRT_CHECK(orig_shape.nbDims >= 2, "Unable to create batch normalization layer from node: " << *n);
7575

7676
// Expand spatial dims from 1D to 2D if needed
7777
bool expandDims = (orig_shape.nbDims < 4);

core/conversion/evaluators/aten.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,23 @@ auto aten_registrations TORCHTRT_UNUSED =
706706
},
707707
EvalOptions().validSchemas({
708708
R"SIG(aten::copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> (Tensor(a!)))SIG",
709-
})});
709+
})})
710+
.evaluator({c10::Symbol::fromQualString("aten::format"),
711+
[](const torch::jit::Node* n, kwargs& args) -> c10::optional<torch::jit::IValue> {
712+
int64_t input_num = n->inputs().size();
713+
std::vector<torch::jit::IValue> stack;
714+
for (auto v : n->inputs()) {
715+
stack.push_back(*args.at(v).IValue());
716+
}
717+
stack.push_back(input_num);
718+
auto& ops = torch::jit::getAllOperatorsFor(c10::Symbol::fromQualString("aten::format"));
719+
auto& aten_format = ops.front();
720+
aten_format->getOperation()(stack);
721+
std::string output;
722+
torch::jit::pop(stack, output);
723+
return output;
724+
},
725+
EvalOptions().validSchemas({"aten::format(str self, ...) -> (str)"})});
710726
} // namespace
711727
} // namespace evaluators
712728
} // namespace conversion

core/lowering/lowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
4343
passes::UnpackHardSwish(g);
4444
passes::EliminateExceptionOrPassPattern(g);
4545
passes::ReduceToOperation(g);
46+
passes::ReduceGelu(g);
4647
passes::RemoveContiguous(g);
48+
passes::ViewToReshape(g);
4749
passes::RemoveDropout(g);
4850
passes::LinearToAddMM(g);
4951
passes::Conv1DToConvolution(g);

core/lowering/passes/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ cc_library(
1717
"module_fallback.cpp",
1818
"op_aliasing.cpp",
1919
"reduce_to.cpp",
20+
"reduce_gelu.cpp",
2021
"remove_bn_dim_check.cpp",
2122
"remove_contiguous.cpp",
23+
"view_to_reshape.cpp",
2224
"remove_dropout.cpp",
2325
"remove_nops.cpp",
2426
"silu_to_sigmoid_multiplication.cpp",

core/lowering/passes/fuse_addmm_branches.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct AddMMBranchFusion {
4949
if ((*arm1_start)->kind().toQualString() == std::string("aten::addmm") &&
5050
(*(++arm1_start))->kind() == prim::Return &&
5151
(*arm2_start)->kind().toQualString() == std::string("aten::matmul") &&
52-
(*(++arm2_start))->kind().toQualString() != std::string("aten::add") &&
52+
(*(++arm2_start))->kind().toQualString() == std::string("aten::add") &&
5353
(*(++arm2_start))->kind() == prim::Return) {
5454
// Make sure that block0 is solely just the aten::addmm op and block1 is matmul + add
5555
return true;

0 commit comments

Comments
 (0)