Skip to content

Commit a2f6b2c

Browse files
Breaking Change: Remove the deprecated int8 calibrator related (#3759)
1 parent 0d90e6b commit a2f6b2c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+115
-1494
lines changed

core/conversion/conversion.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> input
202202
TORCHTRT_CHECK(
203203
profile->isValid(),
204204
"Optimization profile is invalid, please check the input range provided (conversion.AddInputs)");
205-
206205
ctx->cfg->addOptimizationProfile(profile);
207-
#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)
208-
if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) {
209-
ctx->cfg->setCalibrationProfile(profile);
210-
}
211-
#endif
212206
}
213207

214208
void MarkOutputs(ConversionCtx* ctx, at::ArrayRef<const torch::jit::Value*> outputs) {

core/conversion/conversionctx/ConversionCtx.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
3131
if (s.device.device_type == nvinfer1::DeviceType::kDLA) {
3232
os << "\n DLACore: " << s.device.dla_core;
3333
}
34-
os << "\n Engine Capability: " << s.capability \
35-
<< "\n Calibrator Created: " << (s.calibrator != nullptr);
34+
os << "\n Engine Capability: " << s.capability;
3635
return os;
3736
}
3837
// clang-format on
@@ -64,15 +63,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
6463
cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
6564
break;
6665
case nvinfer1::DataType::kINT8:
67-
TORCHTRT_CHECK(
68-
builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
69-
cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
70-
if (!settings.calibrator) {
71-
LOG_INFO(
72-
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
73-
} else {
74-
cfg->setInt8Calibrator(settings.calibrator);
75-
}
66+
LOG_DEBUG("INT8 precision has been enabled, we assume the network has Q/DQ nodes obtained from modelopt");
7667
break;
7768
case nvinfer1::DataType::kFLOAT:
7869
break;

core/conversion/conversionctx/ConversionCtx.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ struct BuilderSettings {
2626
bool allow_shape_tensors = false;
2727
ir::Device device;
2828
nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
29-
nvinfer1::IInt8Calibrator* calibrator = nullptr;
3029
uint64_t num_avg_timing_iters = 1;
3130
uint64_t workspace_size = 0;
3231
uint64_t dla_sram_size = DLA_SRAM_SIZE;

cpp/BUILD

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,12 @@ cc_library(
77
srcs = [
88
"src/compile_spec.cpp",
99
"src/logging.cpp",
10-
"src/ptq.cpp",
1110
"src/torch_tensorrt.cpp",
1211
"src/types.cpp",
1312
],
1413
hdrs = [
1514
"include/torch_tensorrt/logging.h",
1615
"include/torch_tensorrt/macros.h",
17-
"include/torch_tensorrt/ptq.h",
1816
"include/torch_tensorrt/torch_tensorrt.h",
1917
],
2018
linkstatic = True,

cpp/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,13 @@ add_library(${lib_name} OBJECT)
44
set(CXX_SRCS
55
"${CMAKE_CURRENT_SOURCE_DIR}/src/compile_spec.cpp"
66
"${CMAKE_CURRENT_SOURCE_DIR}/src/logging.cpp"
7-
"${CMAKE_CURRENT_SOURCE_DIR}/src/ptq.cpp"
87
"${CMAKE_CURRENT_SOURCE_DIR}/src/torch_tensorrt.cpp"
98
"${CMAKE_CURRENT_SOURCE_DIR}/src/types.cpp"
109
)
1110

1211
set(HEADER_FILES
1312
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/logging.h"
1413
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/macros.h"
15-
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/ptq.h"
1614
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/torch_tensorrt.h"
1715
)
1816

cpp/bin/torchtrtc/fileio.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
#include "torch/torch.h"
2424

2525
#include "torch_tensorrt/logging.h"
26-
#include "torch_tensorrt/ptq.h"
2726
#include "torch_tensorrt/torch_tensorrt.h"
2827

2928
namespace torchtrtc {

cpp/bin/torchtrtc/main.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include "torch/script.h"
88

99
#include "torch_tensorrt/logging.h"
10-
#include "torch_tensorrt/ptq.h"
1110
#include "torch_tensorrt/torch_tensorrt.h"
1211

1312
#include "accuracy.h"
@@ -335,8 +334,6 @@ int main(int argc, char** argv) {
335334
calibration_cache_file_path = torchtrtc::fileio::resolve_path(args::get(calibration_cache_file));
336335
}
337336

338-
auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path);
339-
340337
compile_settings.require_full_compilation = require_full_compilation;
341338

342339
if (torch_executed_ops || torch_executed_mods) {
@@ -367,13 +364,9 @@ int main(int argc, char** argv) {
367364
compile_settings.enabled_precisions.insert(torch::kF16);
368365
} else if (dtype == torchtrt::DataType::kChar) {
369366
compile_settings.enabled_precisions.insert(torch::kI8);
370-
if (calibration_cache_file) {
371-
compile_settings.ptq_calibrator = calibrator;
372-
} else {
373-
torchtrt::logging::log(
374-
torchtrt::logging::Level::kINFO,
375-
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
376-
}
367+
torchtrt::logging::log(
368+
torchtrt::logging::Level::kDEBUG,
369+
"Int8 precision has been enabled which assumes the network has Q/DQ nodes obtained");
377370
} else {
378371
std::stringstream ss;
379372
ss << "Invalid precision given for enabled kernel precision, options are [ float | float32 | f32 | fp32 | half | float16 | f16 | fp16 | char | int8 | i8 ], found: ";

cpp/bin/torchtrtc/parser_util.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#include "torch/torch.h"
1010

1111
#include "torch_tensorrt/logging.h"
12-
#include "torch_tensorrt/ptq.h"
1312
#include "torch_tensorrt/torch_tensorrt.h"
1413

1514
namespace torchtrtc {

cpp/include/torch_tensorrt/macros.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@
3030
STR(TORCH_TENSORRT_MAJOR_VERSION) \
3131
"." STR(TORCH_TENSORRT_MINOR_VERSION) "." STR(TORCH_TENSORRT_PATCH_VERSION)
3232

33-
#define TORCH_TENSORRT_PTQ_DEPRECATION \
34-
[[deprecated( \
35-
"Int8 PTQ Calibrator has been deprecated by TensorRT, please plan on porting to a NVIDIA Model Optimizer Toolkit based workflow. See: https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_ptq.html for more details")]]
3633
// Setup namespace aliases for ease of use
3734
namespace torch_tensorrt {
3835
namespace torchscript {}

0 commit comments

Comments
 (0)