Merge pull request #95 from NVIDIA/int8_mixed_precision_fix

narendasan · web-flow · commit bbcf2caec1fa · 2020-06-11T16:32:49.000-06:00
Enable FP16 mixed precision with Int8
diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -13,7 +13,7 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
        << "\n    Operating Precision: " << s.op_precision                                  \
        << "\n    Make Refittable Engine: " << s.refit                                      \
        << "\n    Debuggable Engine: " << s.debug                                           \
-       << "\n    Strict Type: " << s.strict_types                                          \
+       << "\n    Strict Types: " << s.strict_types                                         \
        << "\n    Allow GPU Fallback (if running on DLA): " << s.allow_gpu_fallback         \
        << "\n    Min Timing Iterations: " << s.num_min_timing_iters                        \
        << "\n    Avg Timing Iterations: " << s.num_avg_timing_iters                        \
@@ -51,6 +51,9 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
     case nvinfer1::DataType::kINT8:
         TRTORCH_CHECK(builder->platformHasFastInt8(), "Requested inference in INT8 but platform does support INT8");
         cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
+        if (!settings.strict_types) {
+            cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
+        }
         input_type = nvinfer1::DataType::kFLOAT;
         TRTORCH_CHECK(settings.calibrator != nullptr, "Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the ExtraInfo struct with your calibrator");
         cfg->setInt8Calibrator(settings.calibrator);