Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions core/conversion/conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> input
TORCHTRT_CHECK(
profile->isValid(),
"Optimization profile is invalid, please check the input range provided (conversion.AddInputs)");

ctx->cfg->addOptimizationProfile(profile);
#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)
if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) {
ctx->cfg->setCalibrationProfile(profile);
}
#endif
}

void MarkOutputs(ConversionCtx* ctx, at::ArrayRef<const torch::jit::Value*> outputs) {
Expand Down
13 changes: 2 additions & 11 deletions core/conversion/conversionctx/ConversionCtx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
if (s.device.device_type == nvinfer1::DeviceType::kDLA) {
os << "\n DLACore: " << s.device.dla_core;
}
os << "\n Engine Capability: " << s.capability \
<< "\n Calibrator Created: " << (s.calibrator != nullptr);
os << "\n Engine Capability: " << s.capability;
return os;
}
// clang-format on
Expand Down Expand Up @@ -64,15 +63,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
break;
case nvinfer1::DataType::kINT8:
TORCHTRT_CHECK(
builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
if (!settings.calibrator) {
LOG_INFO(
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
} else {
cfg->setInt8Calibrator(settings.calibrator);
}
LOG_WARNING("INT8 precision has been enabled, we assume the network has Q/DQ nodes obtained from modelopt");
break;
case nvinfer1::DataType::kFLOAT:
break;
Expand Down
1 change: 0 additions & 1 deletion core/conversion/conversionctx/ConversionCtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ struct BuilderSettings {
bool allow_shape_tensors = false;
ir::Device device;
nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
nvinfer1::IInt8Calibrator* calibrator = nullptr;
uint64_t num_avg_timing_iters = 1;
uint64_t workspace_size = 0;
uint64_t dla_sram_size = DLA_SRAM_SIZE;
Expand Down
2 changes: 0 additions & 2 deletions cpp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@ cc_library(
srcs = [
"src/compile_spec.cpp",
"src/logging.cpp",
"src/ptq.cpp",
"src/torch_tensorrt.cpp",
"src/types.cpp",
],
hdrs = [
"include/torch_tensorrt/logging.h",
"include/torch_tensorrt/macros.h",
"include/torch_tensorrt/ptq.h",
"include/torch_tensorrt/torch_tensorrt.h",
],
linkstatic = True,
Expand Down
2 changes: 0 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@ add_library(${lib_name} OBJECT)
set(CXX_SRCS
"${CMAKE_CURRENT_SOURCE_DIR}/src/compile_spec.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/logging.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ptq.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/torch_tensorrt.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/types.cpp"
)

set(HEADER_FILES
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/logging.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/macros.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/ptq.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/torch_tensorrt.h"
)

Expand Down
1 change: 0 additions & 1 deletion cpp/bin/torchtrtc/fileio.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include "torch/torch.h"

#include "torch_tensorrt/logging.h"
#include "torch_tensorrt/ptq.h"
#include "torch_tensorrt/torch_tensorrt.h"

namespace torchtrtc {
Expand Down
13 changes: 3 additions & 10 deletions cpp/bin/torchtrtc/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "torch/script.h"

#include "torch_tensorrt/logging.h"
#include "torch_tensorrt/ptq.h"
#include "torch_tensorrt/torch_tensorrt.h"

#include "accuracy.h"
Expand Down Expand Up @@ -335,8 +334,6 @@ int main(int argc, char** argv) {
calibration_cache_file_path = torchtrtc::fileio::resolve_path(args::get(calibration_cache_file));
}

auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path);

compile_settings.require_full_compilation = require_full_compilation;

if (torch_executed_ops || torch_executed_mods) {
Expand Down Expand Up @@ -367,13 +364,9 @@ int main(int argc, char** argv) {
compile_settings.enabled_precisions.insert(torch::kF16);
} else if (dtype == torchtrt::DataType::kChar) {
compile_settings.enabled_precisions.insert(torch::kI8);
if (calibration_cache_file) {
compile_settings.ptq_calibrator = calibrator;
} else {
torchtrt::logging::log(
torchtrt::logging::Level::kINFO,
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
}
torchtrt::logging::log(
torchtrt::logging::Level::kINFO,
"Int8 precision has been enabled which assumes the network has Q/DQ nodes obtained");
} else {
std::stringstream ss;
ss << "Invalid precision given for enabled kernel precision, options are [ float | float32 | f32 | fp32 | half | float16 | f16 | fp16 | char | int8 | i8 ], found: ";
Expand Down
1 change: 0 additions & 1 deletion cpp/bin/torchtrtc/parser_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#include "torch/torch.h"

#include "torch_tensorrt/logging.h"
#include "torch_tensorrt/ptq.h"
#include "torch_tensorrt/torch_tensorrt.h"

namespace torchtrtc {
Expand Down
3 changes: 0 additions & 3 deletions cpp/include/torch_tensorrt/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@
STR(TORCH_TENSORRT_MAJOR_VERSION) \
"." STR(TORCH_TENSORRT_MINOR_VERSION) "." STR(TORCH_TENSORRT_PATCH_VERSION)

#define TORCH_TENSORRT_PTQ_DEPRECATION \
[[deprecated( \
"Int8 PTQ Calibrator has been deprecated by TensorRT, please plan on porting to a NVIDIA Model Optimizer Toolkit based workflow. See: https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_ptq.html for more details")]]
// Setup namespace aliases for ease of use
namespace torch_tensorrt {
namespace torchscript {}
Expand Down
Loading
Loading