Skip to content

Commit 8db194f

Browse files
committed
Merge branch 'master' into arvind/loop_fallback
2 parents cc10876 + a38f0c7 commit 8db194f

File tree

360 files changed

+98501
-1014
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

360 files changed

+98501
-1014
lines changed

.github/pr-labels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
- cpp/trtorchc/**/*
33

44
"component: api [C++]":
5-
- cpp/api/**/*
5+
- cpp/**/*
66

77
"component: api [Python]":
88
- py/**/*

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,4 +568,12 @@ Signed-off-by: Naren Dasan <[email protected]>
568568
Signed-off-by: Naren Dasan <[email protected]>
569569

570570

571+
# 0.4.1 (2021-10-06)
572+
573+
### Bug Fixes
574+
575+
* **//core/lowering:** Fixes module level fallback recursion ([2fc612d](https://github.com/NVIDIA/TRTorch/commit/2fc612d))
576+
* Move some lowering passes to graph level logging ([0266f41](https://github.com/NVIDIA/TRTorch/commit/0266f41))
577+
* **//py:** Fix trtorch.Device alternate contructor options ([ac26841](https://github.com/NVIDIA/TRTorch/commit/ac26841))
578+
571579

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,10 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts")
7878
These are the following dependencies used to verify the testcases. TRTorch can work with other versions, but the tests are not guaranteed to pass.
7979

8080
- Bazel 4.0.0
81-
- Libtorch 1.9.0 (built with CUDA 11.1)
81+
- Libtorch 1.9.1 (built with CUDA 11.1)
8282
- CUDA 11.1 (10.2 on Jetson)
8383
- cuDNN 8.2
84-
- TensorRT 8.0.1.6
84+
- TensorRT 8.0.3.4 (TensorRT 8.0.1.6 on Jetson)
8585

8686
## Prebuilt Binaries and Wheel files
8787

WORKSPACE

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,17 @@ new_local_repository(
5050
http_archive(
5151
name = "libtorch",
5252
build_file = "@//third_party/libtorch:BUILD",
53-
sha256 = "edc12091193ba772db77a6ec14e05cef6da881288fca0dfc89a031f631601f60",
53+
sha256 = "db57b1023fb33768286a98ba22c44cfe03d6ed158bc2dc0ca1d4928ee5f19f60",
5454
strip_prefix = "libtorch",
55-
urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-cxx11-abi-shared-with-deps-1.9.0%2Bcu111.zip"],
55+
urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-cxx11-abi-shared-with-deps-1.9.1%2Bcu111.zip"],
5656
)
5757

5858
http_archive(
5959
name = "libtorch_pre_cxx11_abi",
6060
build_file = "@//third_party/libtorch:BUILD",
61-
sha256 = "af9435fa4b44bb395c1a7645391c00228a72af4305f43a61e9300c0abdbe0819",
61+
sha256 = "5563ca53b2b5342f1ab7eef9baf308f197673663ad5b1458a031c46dd802f413",
6262
strip_prefix = "libtorch",
63-
urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.9.0%2Bcu111.zip"],
63+
urls = ["https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.9.1%2Bcu111.zip"],
6464
)
6565

6666
# Download these tarballs manually from the NVIDIA website
@@ -70,20 +70,20 @@ http_archive(
7070
http_archive(
7171
name = "cudnn",
7272
build_file = "@//third_party/cudnn/archive:BUILD",
73-
sha256 = "39412acd9ef5dd27954b6b9f5df75bd381c5d7ceb7979af6c743a7f4521f9c77",
73+
sha256 = "0e5d2df890b9967efa6619da421310d97323565a79f05a1a8cb9b7165baad0d7",
7474
strip_prefix = "cuda",
7575
urls = [
76-
"https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.2.1.32/11.3_06072021/cudnn-11.3-linux-x64-v8.2.1.32.tgz",
76+
"https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.2.4/11.4_20210831/cudnn-11.4-linux-x64-v8.2.4.15.tgz",
7777
],
7878
)
7979

8080
http_archive(
8181
name = "tensorrt",
8282
build_file = "@//third_party/tensorrt/archive:BUILD",
83-
sha256 = "def6a5ee50bed25a68a9c9e22ec671a8f29ee5414bde47c5767bd279e5596f88",
84-
strip_prefix = "TensorRT-8.0.1.6",
83+
sha256 = "3177435024ff4aa5a6dba8c1ed06ab11cc0e1bf3bb712dfa63a43422f41313f3",
84+
strip_prefix = "TensorRT-8.0.3.4",
8585
urls = [
86-
"https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.0.1/tars/tensorrt-8.0.1.6.linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz",
86+
"https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.0.3/tars/tensorrt-8.0.3.4.linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz",
8787
],
8888
)
8989

core/lowering/lowering.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
3737
torch::jit::EliminateCommonSubexpression(g);
3838
}
3939
torch::jit::EliminateDeadCode(g);
40-
passes::MarkNodesForFallback(g, true);
40+
if (lower_info.forced_fallback_modules.size() > 0) {
41+
passes::MarkNodesForFallback(g, true);
42+
}
4143
passes::UnpackHardSwish(g);
4244
passes::EliminateExceptionOrPassPattern(g);
4345
passes::ReduceToOperation(g);
@@ -60,12 +62,13 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
6062
LOG_GRAPH(*g);
6163
}
6264

63-
torch::jit::Module LowerModule(
64-
const torch::jit::Module& mod,
65-
std::string method_name,
66-
std::unordered_set<std::string> forced_fallback_modules) {
67-
passes::NotateModuleForFallback(mod, "", method_name, forced_fallback_modules);
68-
LOG_GRAPH("After MLF notation pass: " << *mod.get_method(method_name).graph());
65+
torch::jit::Module LowerModule(const torch::jit::Module& mod, std::string method_name, const LowerInfo& lower_info) {
66+
std::unordered_set<std::string> forced_fallback_modules(
67+
lower_info.forced_fallback_modules.begin(), lower_info.forced_fallback_modules.end());
68+
if (forced_fallback_modules.size() > 0) {
69+
passes::NotateModuleForFallback(mod, "", method_name, forced_fallback_modules);
70+
LOG_GRAPH("After MLF notation pass: " << *mod.get_method(method_name).graph());
71+
}
6972
auto mod_ = torch::jit::freeze_module(mod);
7073
LOG_GRAPH("After freeze: " << *mod_.get_method(method_name).graph());
7174
return mod_;
@@ -77,9 +80,7 @@ std::pair<std::shared_ptr<torch::jit::Graph>, std::vector<torch::jit::IValue>> L
7780
const LowerInfo& lower_info) {
7881
LOG_DEBUG(lower_info);
7982
LOG_GRAPH("Before lowering: " << *mod.get_method(method_name).graph());
80-
std::unordered_set<std::string> forced_fallback_modules(
81-
lower_info.forced_fallback_modules.begin(), lower_info.forced_fallback_modules.end());
82-
auto lowered_mod = lower_info.unfreeze_module ? mod : LowerModule(mod, method_name, forced_fallback_modules);
83+
auto lowered_mod = lower_info.unfreeze_module ? mod : LowerModule(mod, method_name, lower_info);
8384
auto g = lowered_mod.get_method(method_name).graph();
8485

8586
LOG_GRAPH("LibTorch Lowering");

core/lowering/passes/module_fallback.cpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,29 @@ void NotateModuleForFallback(
6161
LOG_GRAPH("Notated graph: " << *g);
6262
}
6363

64-
for (const auto sub_mod : mod.named_children()) {
65-
NotateModuleForFallback(sub_mod.value, sub_mod.name, method_name, forced_fallback_modules);
64+
if (mod.named_children().size() > 0) {
65+
for (const auto n : nodes) {
66+
std::string sub_method_name = "";
67+
if (n->kind() == torch::jit::prim::CallMethod) {
68+
sub_method_name = n->s(c10::Symbol::attr("name"));
69+
auto sub_mod_val = n->input(0);
70+
auto sub_mod_src_n = sub_mod_val->node();
71+
if (!sub_mod_src_n->hasAttributeS("name")) {
72+
LOG_GRAPH("Node: " << util::node_info(sub_mod_src_n) << " manages a module with no name, skipping");
73+
break;
74+
}
75+
auto sub_mod_name = sub_mod_src_n->s(c10::Symbol::attr("name"));
76+
for (const auto sub_mod : mod.named_children()) {
77+
// Theres probably a way to directly access the module we care about
78+
if (sub_mod.name == sub_mod_name) {
79+
LOG_GRAPH(
80+
"Looking at <module>.<method>() next: " << sub_mod_name << "." << sub_method_name
81+
<< "() (lowering.passes.NotateModuleForFallback)");
82+
NotateModuleForFallback(sub_mod.value, sub_mod.name, sub_method_name, forced_fallback_modules);
83+
}
84+
}
85+
}
86+
}
6687
}
6788
}
6889

@@ -74,23 +95,23 @@ void MarkNodesForFallback(std::shared_ptr<torch::jit::Graph>& g, bool delete_del
7495
auto n = *it;
7596
if (!mark.top() && n->kind() == torch::jit::prim::Enter && n->hasAttributeS("compilation_edge")) {
7697
if (n->s(c10::Symbol::attr("compilation_edge")) == "start") {
77-
LOG_DEBUG("Starting to mark new segmented block targeted for torch");
98+
LOG_GRAPH("Starting to mark new segmented block targeted for torch");
7899
mark.push(true);
79100
if (delete_delims) {
80101
it.destroyCurrent();
81102
}
82103
}
83104
} else if (mark.top() && n->kind() == torch::jit::prim::Enter && n->hasAttributeS("compilation_edge")) {
84105
if (n->s(c10::Symbol::attr("compilation_edge")) == "start") {
85-
LOG_DEBUG("Found the start of another segmented block targeted for torch while actively marking a block");
106+
LOG_GRAPH("Found the start of another segmented block targeted for torch while actively marking a block");
86107
mark.push(true);
87108
if (delete_delims) {
88109
it.destroyCurrent();
89110
}
90111
}
91112
} else if (mark.top() && n->kind() == torch::jit::prim::Exit && n->hasAttributeS("compilation_edge")) {
92113
if (n->s(c10::Symbol::attr("compilation_edge")) == "end") {
93-
LOG_DEBUG("Found the end of segmented block targeted for torch while actively marking a block");
114+
LOG_GRAPH("Found the end of segmented block targeted for torch while actively marking a block");
94115
mark.pop();
95116
if (delete_delims) {
96117
it.destroyCurrent();

cpp/include/trtorch/trtorch.h

Lines changed: 13 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ struct TRTORCH_API CompileSpec {
427427
Input(c10::ArrayRef<int64_t> shape, DataType dtype, TensorFormat format = TensorFormat::kContiguous);
428428

429429
/**
430-
* @brief Construct a new Input Range object dynamic input size from
430+
* @brief Construct a new Input spec object dynamic input size from
431431
* c10::ArrayRef (the type produced by tensor.sizes()) for min, opt, and max
432432
* supported sizes. dtype (Expected data type for the input) defaults to PyTorch
433433
* / traditional TRT convection (FP32 for FP32 only, FP16 for FP32 and FP16, FP32 for Int8)
@@ -462,7 +462,7 @@ struct TRTORCH_API CompileSpec {
462462
TensorFormat format = TensorFormat::kContiguous);
463463

464464
/**
465-
* @brief Construct a new Input Range object dynamic input size from
465+
* @brief Construct a new Input spec object dynamic input size from
466466
* c10::ArrayRef (the type produced by tensor.sizes()) for min, opt, and max
467467
* supported sizes. dtype (Expected data type for the input) defaults to PyTorch
468468
* / traditional TRT convection (FP32 for FP32 only, FP16 for FP32 and FP16, FP32 for Int8)
@@ -479,7 +479,7 @@ struct TRTORCH_API CompileSpec {
479479
TensorFormat format = TensorFormat::kContiguous);
480480

481481
/**
482-
* @brief Construct a new Input Range object dynamic input size from
482+
* @brief Construct a new Input spec object dynamic input size from
483483
* c10::ArrayRef (the type produced by tensor.sizes()) for min, opt, and max
484484
* supported sizes
485485
*
@@ -496,6 +496,16 @@ struct TRTORCH_API CompileSpec {
496496
DataType dtype,
497497
TensorFormat format = TensorFormat::kContiguous);
498498

499+
/**
500+
* @brief Construct a new Input spec object using a torch tensor as an example
501+
* The tensor's shape, type and layout inform the spec's values
502+
*
503+
* Note: You cannot set dynamic shape through this method, you must use an alternative constructor
504+
*
505+
* @param tensor Reference tensor to set shape, type and layout
506+
*/
507+
Input(at::Tensor tensor);
508+
499509
bool get_explicit_set_dtype() {
500510
return explicit_set_dtype;
501511
}
@@ -506,64 +516,6 @@ struct TRTORCH_API CompileSpec {
506516
bool explicit_set_dtype;
507517
};
508518

509-
/**
510-
* @brief A struct to hold an input range (used by TensorRT Optimization
511-
* profile)
512-
*
513-
* This struct can either hold a single vector representing an input shape,
514-
* signifying a static input shape or a set of three input shapes representing
515-
* the min, optiminal and max input shapes allowed for the engine.
516-
*/
517-
struct TRTORCH_API InputRange {
518-
/// Minimum acceptable input size into the engine
519-
std::vector<int64_t> min;
520-
/// Optimal input size into the engine (gets best performace)
521-
std::vector<int64_t> opt;
522-
/// Maximum acceptable input size into the engine
523-
std::vector<int64_t> max;
524-
/**
525-
* @brief Construct a new Input Range object for static input size from
526-
* vector
527-
*
528-
* @param opt
529-
*/
530-
[[deprecated("trtorch::CompileSpec::InputRange is being deprecated in favor of trtorch::CompileSpec::Input. trtorch::CompileSpec::InputRange will be removed in TRTorch v0.5.0")]] InputRange(
531-
std::vector<int64_t> opt);
532-
/**
533-
* @brief Construct a new Input Range object static input size from
534-
* c10::ArrayRef (the type produced by tensor.sizes())
535-
*
536-
* @param opt
537-
*/
538-
[[deprecated("trtorch::CompileSpec::InputRange is being deprecated in favor of trtorch::CompileSpec::Input. trtorch::CompileSpec::InputRange will be removed in TRTorch v0.5.0")]] InputRange(
539-
c10::ArrayRef<int64_t> opt);
540-
/**
541-
* @brief Construct a new Input Range object dynamic input size from vectors
542-
* for min, opt, and max supported sizes
543-
*
544-
* @param min
545-
* @param opt
546-
* @param max
547-
*/
548-
[[deprecated("trtorch::CompileSpec::InputRange is being deprecated in favor of trtorch::CompileSpec::Input. trtorch::CompileSpec::InputRange will be removed in TRTorch v0.5.0")]] InputRange(
549-
std::vector<int64_t> min,
550-
std::vector<int64_t> opt,
551-
std::vector<int64_t> max);
552-
/**
553-
* @brief Construct a new Input Range object dynamic input size from
554-
* c10::ArrayRef (the type produced by tensor.sizes()) for min, opt, and max
555-
* supported sizes
556-
*
557-
* @param min
558-
* @param opt
559-
* @param max
560-
*/
561-
[[deprecated("trtorch::CompileSpec::InputRange is being deprecated in favor of trtorch::CompileSpec::Input. trtorch::CompileSpec::InputRange will be removed in TRTorch v0.5.0")]] InputRange(
562-
c10::ArrayRef<int64_t> min,
563-
c10::ArrayRef<int64_t> opt,
564-
c10::ArrayRef<int64_t> max);
565-
};
566-
567519
/**
568520
* @brief A struct to hold fallback info
569521
*/
@@ -596,18 +548,6 @@ struct TRTORCH_API CompileSpec {
596548
TorchFallback(bool enabled, uint64_t min_size) : enabled(enabled), min_block_size(min_size) {}
597549
};
598550

599-
/**
600-
* @brief Construct a new Extra Info object from input ranges.
601-
* Each entry in the vector represents a input and should be provided in call
602-
* order.
603-
*
604-
* Use this constructor if you want to use dynamic shape
605-
*
606-
* @param input_ranges
607-
*/
608-
[[deprecated("trtorch::CompileSpec::CompileSpec(std::vector<InputRange> input_ranges) is being deprecated in favor of trtorch::CompileSpec::CompileSpec(std::vector<Input> inputs). Please use CompileSpec(std::vector<Input> inputs). trtorch::CompileSpec::CompileSpec(std::vector<InputRange> input_ranges) will be removed in TRTorch v0.5.0")]] CompileSpec(
609-
std::vector<InputRange> input_ranges)
610-
: input_ranges(std::move(input_ranges)) {}
611551
/**
612552
* @brief Construct a new Extra Info object
613553
* Convienence constructor to set fixed input size from vectors describing
@@ -657,24 +597,6 @@ struct TRTORCH_API CompileSpec {
657597
*/
658598
std::vector<Input> inputs;
659599

660-
/**
661-
* Sizes for inputs to engine, can either be a single size or a range
662-
* defined by Min, Optimal, Max sizes
663-
*
664-
* Order is should match call order
665-
*/
666-
[[deprecated(
667-
"trtorch::CompileSpec::input_ranges is being deprecated in favor of trtorch::CompileSpec::inputs. trtorch::CompileSpec::input_ranges will be removed in TRTorch v0.5.0")]] std::
668-
vector<InputRange>
669-
input_ranges;
670-
671-
/**
672-
* Default operating precision for the engine
673-
*/
674-
[[deprecated(
675-
"trtorch::CompileSpec::op_precision is being deprecated in favor of trtorch::CompileSpec::enabled_precisions, a set of all enabled precisions to use during compilation, trtorch::CompileSpec::op_precision will be removed in TRTorch v0.5.0")]] DataType
676-
op_precision = DataType::kFloat;
677-
678600
/**
679601
* @brief The set of precisions TensorRT is allowed to use for kernels during compilation
680602
*

0 commit comments

Comments
 (0)