Merge pull request #407 from inocsin/double_long_ival

narendasan · web-flow · commit 8156465e3eb9 · 2021-03-25T13:33:50.000-06:00
feat: support truncate long/double to int/float with option
diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h
@@ -28,6 +28,7 @@ struct BuilderSettings {
   bool refit = false;
   bool debug = false;
   bool strict_types = false;
+  bool truncate_long_and_double = false;
   Device device;
   nvinfer1::EngineCapability capability = nvinfer1::EngineCapability::kDEFAULT;
   nvinfer1::IInt8Calibrator* calibrator = nullptr;
diff --git a/core/conversion/var/Var.cpp b/core/conversion/var/Var.cpp
@@ -89,6 +89,7 @@ nvinfer1::ITensor* Var::ITensorOrFreeze(ConversionCtx* ctx) {
   if (isIValue()) {
     LOG_DEBUG(ctx->logger, "Found IValue containing object of type " << *(ptr_.ivalue->type()));
   }
+  
   TRTORCH_CHECK(
       isITensor() || (isIValue() && (ptr_.ivalue->isTensor() || ptr_.ivalue->isCustomClass())),
       "Requested either IValue containing a Tensor, or ITensor, however Var type is " << type_name());
@@ -97,11 +98,22 @@ nvinfer1::ITensor* Var::ITensorOrFreeze(ConversionCtx* ctx) {
 
   if (isIValue()) {
     if (ptr_.ivalue->isTensor()) {
-      auto weights = converters::Weights(ctx, ptr_.ivalue->toTensor());
-
+      auto weights = converters::Weights();
+      auto tensor = ptr_.ivalue->toTensor();
+      if ((tensor.scalar_type() == at::kLong || tensor.scalar_type() == at::kDouble) && !ctx->settings.truncate_long_and_double) {
+        TRTORCH_THROW_ERROR("Unable to freeze tensor of type Int64/Float64 into constant layer, try to compile model with truncate_long_and_double enabled");
+      } else if (tensor.scalar_type() == at::kLong && ctx->settings.truncate_long_and_double) {
+        weights = converters::Weights(ctx, tensor.toType(at::kInt));
+        LOG_WARNING("Truncating weight (constant in the graph) from Int64 to Int32");
+      } else if (tensor.scalar_type() == at::kDouble && ctx->settings.truncate_long_and_double) {
+        weights = converters::Weights(ctx, tensor.toType(at::kFloat));
+        LOG_WARNING("Truncating weight (constant in the graph) from Float64 to Float32");
+      } else {
+        weights = converters::Weights(ctx, tensor);
+      }
+      
       auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
       TRTORCH_CHECK(const_layer, "Unable to freeze tensor into constant layer");
-
       out = const_layer->getOutput(0);
 
       std::ostringstream tensor_id;
@@ -119,7 +131,6 @@ nvinfer1::ITensor* Var::ITensorOrFreeze(ConversionCtx* ctx) {
   }
 
   LOG_DEBUG("Frozen tensor shape: " << out->getDimensions());
-
   return out;
 }
 
diff --git a/cpp/api/include/trtorch/trtorch.h b/cpp/api/include/trtorch/trtorch.h
@@ -258,6 +258,11 @@ struct TRTORCH_API CompileSpec {
    */
   bool debug = false;
 
+  /**
+   * Truncate long/double type to int/float type
+   */
+  bool truncate_long_and_double = false;
+
   /**
    * Restrict operating type to only set default operation precision
    * (op_precision)
diff --git a/cpp/api/src/compile_spec.cpp b/cpp/api/src/compile_spec.cpp
@@ -92,6 +92,7 @@ core::CompileSpec to_internal_compile_spec(CompileSpec external) {
   internal.convert_info.engine_settings.disable_tf32 = external.disable_tf32;
   internal.convert_info.engine_settings.refit = external.refit;
   internal.convert_info.engine_settings.debug = external.debug;
+  internal.convert_info.engine_settings.truncate_long_and_double = external.truncate_long_and_double;
   internal.convert_info.engine_settings.strict_types = external.strict_types;
   internal.convert_info.engine_settings.device.allow_gpu_fallback = external.device.allow_gpu_fallback;
   internal.convert_info.engine_settings.max_batch_size = external.max_batch_size;
diff --git a/py/trtorch/_compile_spec.py b/py/trtorch/_compile_spec.py
@@ -176,6 +176,10 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
     if "max_batch_size" in compile_spec:
         assert type(compile_spec["max_batch_size"]) is int
         info.max_batch_size = compile_spec["max_batch_size"]
+    
+    if "truncate_long_and_double" in compile_spec:
+        assert type(compile_spec["truncate_long_and_double"]) is bool
+        info.truncate_long_and_double = compile_spec["truncate_long_and_double"]
 
     return info
 
@@ -217,6 +221,7 @@ def TensorRTCompileSpec(compile_spec: Dict[str, Any]) -> torch.classes.tensorrt.
                         "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
                         "workspace_size": 0, # Maximum size of workspace given to TensorRT
                         "max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set)
+                        "truncate_long_and_double": False, # Truncate long and double into int and float
                     })
                 }
 
@@ -257,6 +262,7 @@ def TensorRTCompileSpec(compile_spec: Dict[str, Any]) -> torch.classes.tensorrt.
     backend_spec.set_num_avg_timing_iters(parsed_spec.num_avg_timing_iters)
     backend_spec.set_workspace_size(parsed_spec.workspace_size)
     backend_spec.set_max_batch_size(parsed_spec.max_batch_size)
+    backend_spec.set_truncate_long_and_double(parsed_spec.truncate_long_and_double)
     backend_spec._set_ptq_calibrator(parsed_spec._get_calibrator_handle())
 
     return backend_spec
diff --git a/py/trtorch/csrc/register_tensorrt_classes.cpp b/py/trtorch/csrc/register_tensorrt_classes.cpp
@@ -42,6 +42,7 @@ void RegisterTRTCompileSpec() {
   ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, num_avg_timing_iters);
   ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, workspace_size);
   ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, max_batch_size);
+  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, truncate_long_and_double);
 }
 
 struct TRTTSRegistrations {
diff --git a/py/trtorch/csrc/tensorrt_classes.cpp b/py/trtorch/csrc/tensorrt_classes.cpp
@@ -108,6 +108,7 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
   info.convert_info.engine_settings.device.gpu_id = device.gpu_id;
   info.convert_info.engine_settings.device.dla_core = device.dla_core;
   info.convert_info.engine_settings.device.allow_gpu_fallback = device.allow_gpu_fallback;
+  info.convert_info.engine_settings.truncate_long_and_double = truncate_long_and_double;
 
   info.convert_info.engine_settings.capability = toTRTEngineCapability(capability);
   TRTORCH_CHECK(num_min_timing_iters >= 0, "num_min_timing_iters must be 0 or greater");
@@ -143,6 +144,7 @@ std::string CompileSpec::stringify() {
   ss << "     \"Num Avg Timing Iters\": " << num_avg_timing_iters << std::endl;
   ss << "     \"Workspace Size\": " << workspace_size << std::endl;
   ss << "     \"Max Batch Size\": " << max_batch_size << std::endl;
+  ss << "     \"Truncate long and double\": " << truncate_long_and_double << std::endl;
   ss << "}";
   return ss.str();
 }
diff --git a/py/trtorch/csrc/tensorrt_classes.h b/py/trtorch/csrc/tensorrt_classes.h
@@ -115,6 +115,7 @@ struct CompileSpec : torch::CustomClassHolder {
   ADD_FIELD_GET_SET(num_min_timing_iters, int64_t);
   ADD_FIELD_GET_SET(num_avg_timing_iters, int64_t);
   ADD_FIELD_GET_SET(workspace_size, int64_t);
+  ADD_FIELD_GET_SET(truncate_long_and_double, bool);
   ADD_FIELD_GET_SET(max_batch_size, int64_t);
   ADD_FIELD_GET_SET(device, Device);
   ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*);
@@ -126,6 +127,7 @@ struct CompileSpec : torch::CustomClassHolder {
   bool refit = false;
   bool debug = false;
   bool strict_types = false;
+  bool truncate_long_and_double = false;
   Device device;
   EngineCapability capability = EngineCapability::kDEFAULT;
   int64_t num_min_timing_iters = 2;
diff --git a/py/trtorch/csrc/trtorch_py.cpp b/py/trtorch/csrc/trtorch_py.cpp
@@ -246,7 +246,8 @@ PYBIND11_MODULE(_C, m) {
       .def_readwrite("num_min_timing_iters", &CompileSpec::num_min_timing_iters)
       .def_readwrite("num_avg_timing_iters", &CompileSpec::num_avg_timing_iters)
       .def_readwrite("workspace_size", &CompileSpec::workspace_size)
-      .def_readwrite("max_batch_size", &CompileSpec::max_batch_size);
+      .def_readwrite("max_batch_size", &CompileSpec::max_batch_size)
+      .def_readwrite("truncate_long_and_double", &CompileSpec::truncate_long_and_double);
 
   py::class_<Device>(m, "Device")
       .def(py::init<>())

Original file line number	Diff line number	Diff line change
`@@ -42,6 +42,7 @@ void RegisterTRTCompileSpec() {`
`42`	`42`	`ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, num_avg_timing_iters);`
`43`	`43`	`ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, workspace_size);`
`44`	`44`	`ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, max_batch_size);`
	`45`	`+ ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, truncate_long_and_double);`
`45`	`46`	`}`
`46`	`47`
`47`	`48`	`struct TRTTSRegistrations {`