Skip to content

Commit b30cbd9

Browse files
committed
refactor: removing the strict_types and max_batch_size apis
BREAKING CHANGE: This commit removes the strict types and max_batch_size apis. We are doing this because the functionality of these APIs in TRT is convoluted and likely to be ignored during building. A replacement for strict types with actual guarantees will be added at a later date. Signed-off-by: Dheeraj Peri <[email protected]>
1 parent 733a4b1 commit b30cbd9

File tree

21 files changed

+3
-91
lines changed

21 files changed

+3
-91
lines changed

WORKSPACE

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ http_archive(
8686
http_archive(
8787
name = "tensorrt",
8888
build_file = "@//third_party/tensorrt/archive:BUILD",
89-
sha256 = "3177435024ff4aa5a6dba8c1ed06ab11cc0e1bf3bb712dfa63a43422f41313f3",
90-
strip_prefix = "TensorRT-8.0.3.4",
89+
sha256 = "da130296ac6636437ff8465812eb55dbab0621747d82dc4fe9b9376f00d214af",
90+
strip_prefix = "TensorRT-8.2.2.1",
9191
urls = [
92-
"https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.0.3/tars/tensorrt-8.0.3.4.linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz",
92+
"https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.2.2.1/tars/tensorrt-8.2.2.1.linux.x86_64-gnu.cuda-11.4.cudnn8.2.tar.gz",
9393
],
9494
)
9595

core/conversion/conversionctx/ConversionCtx.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,12 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
1818
<< "\n Truncate Long and Double: " << s.truncate_long_and_double \
1919
<< "\n Make Refittable Engine: " << s.refit \
2020
<< "\n Debuggable Engine: " << s.debug \
21-
<< "\n Strict Types: " << s.strict_types \
2221
<< "\n GPU ID: " << s.device.gpu_id \
2322
<< "\n Allow GPU Fallback (if running on DLA): " << s.device.allow_gpu_fallback \
2423
<< "\n Min Timing Iterations: " << s.num_min_timing_iters \
2524
<< "\n Avg Timing Iterations: " << s.num_avg_timing_iters \
2625
<< "\n Max Workspace Size: " << s.workspace_size;
2726

28-
if (s.max_batch_size != 0) {
29-
os << "\n Max Batch Size: " << s.max_batch_size;
30-
} else {
31-
os << "\n Max Batch Size: Not set";
32-
}
33-
3427
os << "\n Device Type: " << s.device.device_type \
3528
<< "\n GPU ID: " << s.device.gpu_id;
3629
if (s.device.device_type == nvinfer1::DeviceType::kDLA) {
@@ -107,18 +100,10 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
107100
cfg->setFlag(nvinfer1::BuilderFlag::kDEBUG);
108101
}
109102

110-
if (settings.strict_types) {
111-
cfg->setFlag(nvinfer1::BuilderFlag::kSTRICT_TYPES);
112-
}
113-
114103
if (settings.device.allow_gpu_fallback) {
115104
cfg->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK);
116105
}
117106

118-
if (settings.max_batch_size != 0) {
119-
builder->setMaxBatchSize(settings.max_batch_size);
120-
}
121-
122107
cfg->setMinTimingIterations(settings.num_min_timing_iters);
123108
cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
124109
cfg->setMaxWorkspaceSize(settings.workspace_size);

core/conversion/conversionctx/ConversionCtx.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,13 @@ struct BuilderSettings {
2929
bool disable_tf32 = false;
3030
bool refit = false;
3131
bool debug = false;
32-
bool strict_types = false;
3332
bool truncate_long_and_double = false;
3433
Device device;
3534
nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
3635
nvinfer1::IInt8Calibrator* calibrator = nullptr;
3736
uint64_t num_min_timing_iters = 2;
3837
uint64_t num_avg_timing_iters = 1;
3938
uint64_t workspace_size = 0;
40-
uint64_t max_batch_size = 0;
4139

4240
BuilderSettings() = default;
4341
BuilderSettings(const BuilderSettings& other) = default;

cpp/bin/torchtrtc/README.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@ OPTIONS:
3131
--i, --info Dumps info messages generated during
3232
compilation onto the console
3333
--build-debuggable-engine Creates a debuggable engine
34-
--use-strict-types Restrict operating type to only use set
35-
operation precision
3634
--allow-gpu-fallback (Only used when targeting DLA
3735
(device-type)) Lets engine run layers on
3836
GPU if they are not supported on DLA
@@ -90,8 +88,6 @@ OPTIONS:
9088
used to select kernels
9189
--workspace-size=[workspace_size] Maximum size of workspace given to
9290
TensorRT
93-
--max-batch-size=[max_batch_size] Maximum batch size (must be >= 1 to be
94-
set, 0 means not set)
9591
-t[threshold],
9692
--threshold=[threshold] Maximum acceptable numerical deviation
9793
from standard torchscript output

cpp/include/torch_tensorrt/torch_tensorrt.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -626,12 +626,6 @@ struct TORCHTRT_API CompileSpec {
626626
*/
627627
bool truncate_long_and_double = false;
628628

629-
/**
630-
* Restrict operating type to only the lowest enabled operation precision
631-
* (enabled_precisions)
632-
*/
633-
bool strict_types = false;
634-
635629
/**
636630
* Target Device
637631
*/
@@ -656,11 +650,6 @@ struct TORCHTRT_API CompileSpec {
656650
*/
657651
uint64_t workspace_size = 0;
658652

659-
/**
660-
* Maximum batch size (must be >= 1 to be set, 0 means not set)
661-
*/
662-
uint64_t max_batch_size = 0;
663-
664653
/**
665654
* Calibration dataloaders for each input for post training quantizatiom
666655
*/

cpp/src/compile_spec.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) {
4040
internal.convert_info.engine_settings.refit = external.refit;
4141
internal.convert_info.engine_settings.debug = external.debug;
4242
internal.convert_info.engine_settings.truncate_long_and_double = external.truncate_long_and_double;
43-
internal.convert_info.engine_settings.strict_types = external.strict_types;
4443
internal.convert_info.engine_settings.device.allow_gpu_fallback = external.device.allow_gpu_fallback;
45-
internal.convert_info.engine_settings.max_batch_size = external.max_batch_size;
4644

4745
TORCHTRT_CHECK(
4846
!(external.require_full_compilation && (external.torch_executed_ops.size() > 0)),

docsrc/tutorials/ptq.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,6 @@ to use ``CacheCalibrator`` to use in INT8 mode.
194194
"inputs": [torch_tensorrt.Input([1, 3, 32, 32])],
195195
"enabled_precisions": {torch.float, torch.half, torch.int8},
196196
"calibrator": calibrator,
197-
"max_batch_size": 32,
198197
}
199198
200199
trt_mod = torch_tensorrt.compile(model, compile_settings)

docsrc/tutorials/torchtrtc.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
3434
--i, --info Dumps info messages generated during
3535
compilation onto the console
3636
--build-debuggable-engine Creates a debuggable engine
37-
--use-strict-types Restrict operating type to only use set
38-
operation precision
3937
--allow-gpu-fallback (Only used when targeting DLA
4038
(device-type)) Lets engine run layers on
4139
GPU if they are not supported on DLA
@@ -93,8 +91,6 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
9391
used to select kernels
9492
--workspace-size=[workspace_size] Maximum size of workspace given to
9593
TensorRT
96-
--max-batch-size=[max_batch_size] Maximum batch size (must be >= 1 to be
97-
set, 0 means not set)
9894
-t[threshold],
9995
--threshold=[threshold] Maximum acceptable numerical deviation
10096
from standard torchscript output

docsrc/tutorials/use_from_pytorch.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ at the documentation for the Torch-TensorRT ``TensorRTCompileSpec`` API.
3838
"enabled_precisions": {torch.float, torch.half},
3939
"refit": False,
4040
"debug": False,
41-
"strict_types": False,
4241
"device": {
4342
"device_type": torch_tensorrt.DeviceType.GPU,
4443
"gpu_id": 0,
@@ -48,7 +47,6 @@ at the documentation for the Torch-TensorRT ``TensorRTCompileSpec`` API.
4847
"capability": torch_tensorrt.EngineCapability.default,
4948
"num_min_timing_iters": 2,
5049
"num_avg_timing_iters": 1,
51-
"max_batch_size": 0,
5250
})
5351
}
5452

examples/int8/ptq/main.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ torch::jit::Module compile_int8_model(const std::string& data_dir, torch::jit::M
4949
compile_spec.enabled_precisions.insert(torch::kI8);
5050
/// Use the TensorRT Entropy Calibrator
5151
compile_spec.ptq_calibrator = calibrator;
52-
/// Set max batch size for the engine
53-
compile_spec.max_batch_size = 32;
5452
/// Set a larger workspace
5553
compile_spec.workspace_size = 1 << 28;
5654

0 commit comments

Comments
 (0)