@@ -1016,6 +1016,17 @@ InferenceRequest::Normalize()
10161016 for (auto & pr : original_inputs_) {
10171017 auto & input = pr.second ;
10181018 *input.MutableShape () = input.OriginalShape ();
1019+
1020+ const inference::ModelInput* input_config;
1021+ RETURN_IF_ERROR (model_raw_->GetInput (input.Name (), &input_config));
1022+ if (input_config->is_shape_tensor ()) {
1023+ // For a shape tensor, mark that the input is a shape tensor.
1024+ input.SetIsShapeTensor ();
1025+ } else if (input_config->is_non_linear_format_io ()) {
1026+ // If a tensor uses a non-linear IO format, indicate that the input uses
1027+ // a non-linear IO format.
1028+ input.SetIsNonLinearFormatIo ();
1029+ }
10191030 }
10201031 } else {
10211032 // Model does support Triton-style batching so each input tensor
@@ -1025,15 +1036,19 @@ InferenceRequest::Normalize()
10251036 batch_size_ = 0 ;
10261037 for (auto & pr : original_inputs_) {
10271038 auto & input = pr.second ;
1039+ const inference::ModelInput* input_config;
1040+ RETURN_IF_ERROR (model_raw_->GetInput (input.Name (), &input_config));
10281041
10291042 // For a shape tensor, keep the tensor's shape as it is and mark
10301043 // that the input is a shape tensor.
1031- const inference::ModelInput* input_config;
1032- RETURN_IF_ERROR (model_raw_->GetInput (input.Name (), &input_config));
10331044 if (input_config->is_shape_tensor ()) {
10341045 *input.MutableShape () = input.OriginalShape ();
1035- input.SetIsShapeTensor (true );
1046+ input.SetIsShapeTensor ();
10361047 continue ;
1048+ } else if (input_config->is_non_linear_format_io ()) {
1049+ // If a tensor uses a non-linear IO format, indicate that the input uses
1050+ // a non-linear IO format.
1051+ input.SetIsNonLinearFormatIo ();
10371052 }
10381053
10391054 if (input.OriginalShape ().size () == 0 ) {
@@ -1183,15 +1198,9 @@ InferenceRequest::Normalize()
11831198 {
11841199 const auto & data_type = input.DType ();
11851200
1186- // FIXME: Skip byte size validation for TensorRT backend because it breaks
1187- // shape-size assumption. See DLIS-6805 for proper fix for TRT backend
1188- // reformat_free tensors.
1189- bool skip_byte_size_check = false ;
1190- constexpr char trt_prefix[] = " tensorrt_" ;
1191- const std::string& platform = model_raw_->Config ().platform ();
1192- skip_byte_size_check |= (platform.rfind (trt_prefix) == 0 );
1193-
1194- if (!skip_byte_size_check) {
1201+ // Non-linear IO format input byte size validation will be handled in the
1202+ // TensorRT backend.
1203+ if (!input.IsNonLinearFormatIo ()) {
11951204 TRITONSERVER_MemoryType input_memory_type;
11961205 // Because Triton expects STRING type to be in special format
11971206 // (prepend 4 bytes to specify string length), so need to add all the
@@ -1201,10 +1210,13 @@ InferenceRequest::Normalize()
12011210 input_name, input, model_name, &input_memory_type));
12021211 // FIXME: Temporarily skips byte size checks for GPU tensors. See
12031212 // DLIS-6820.
1204- skip_byte_size_check |=
1205- (input_memory_type == TRITONSERVER_MEMORY_GPU);
12061213 } else {
1207- const auto & input_dims = input.ShapeWithBatchDim ();
1214+ // Shape tensor with dynamic batching does not introduce a new
1215+ // dimension to the tensor but adds an additional value to the 1-D
1216+ // array.
1217+ const std::vector<int64_t >& input_dims =
1218+ input.IsShapeTensor () ? input.OriginalShape ()
1219+ : input.ShapeWithBatchDim ();
12081220 int64_t expected_byte_size = INT_MAX;
12091221 expected_byte_size =
12101222 triton::common::GetByteSize (data_type, input_dims);
@@ -1524,7 +1536,7 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter)
15241536// Input
15251537//
15261538InferenceRequest::Input::Input ()
1527- : is_shape_tensor_( false ), data_(new MemoryReference),
1539+ : tensor_type_(TensorType::TENSOR ), data_(new MemoryReference),
15281540 has_host_policy_specific_data_(false )
15291541{
15301542}
@@ -1533,16 +1545,17 @@ InferenceRequest::Input::Input(
15331545 const std::string& name, const inference::DataType datatype,
15341546 const int64_t * shape, const uint64_t dim_count)
15351547 : name_(name), datatype_(datatype),
1536- original_shape_(shape, shape + dim_count), is_shape_tensor_(false ),
1537- data_(new MemoryReference), has_host_policy_specific_data_(false )
1548+ original_shape_(shape, shape + dim_count),
1549+ tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
1550+ has_host_policy_specific_data_(false )
15381551{
15391552}
15401553
15411554InferenceRequest::Input::Input (
15421555 const std::string& name, const inference::DataType datatype,
15431556 const std::vector<int64_t >& shape)
15441557 : name_(name), datatype_(datatype), original_shape_(shape),
1545- is_shape_tensor_( false ), data_(new MemoryReference),
1558+ tensor_type_(TensorType::TENSOR ), data_(new MemoryReference),
15461559 has_host_policy_specific_data_(false )
15471560{
15481561}
@@ -1558,9 +1571,16 @@ InferenceRequest::Input::SetMetadata(
15581571}
15591572
15601573Status
1561- InferenceRequest::Input::SetIsShapeTensor (const bool is_shape_tensor)
1574+ InferenceRequest::Input::SetIsShapeTensor ()
1575+ {
1576+ tensor_type_ = TensorType::SHAPE_TENSOR;
1577+ return Status::Success;
1578+ }
1579+
1580+ Status
1581+ InferenceRequest::Input::SetIsNonLinearFormatIo ()
15621582{
1563- is_shape_tensor_ = is_shape_tensor ;
1583+ tensor_type_ = TensorType::NON_LINEAR ;
15641584 return Status::Success;
15651585}
15661586
0 commit comments