@@ -1015,6 +1015,17 @@ InferenceRequest::Normalize()
10151015 for (auto & pr : original_inputs_) {
10161016 auto & input = pr.second ;
10171017 *input.MutableShape () = input.OriginalShape ();
1018+
1019+ const inference::ModelInput* input_config;
1020+ RETURN_IF_ERROR (model_raw_->GetInput (input.Name (), &input_config));
1021+ if (input_config->is_shape_tensor ()) {
1022+ // For a shape tensor, mark that the input is a shape tensor.
1023+ input.SetIsShapeTensor ();
1024+ } else if (input_config->is_non_linear_format_io ()) {
1025+ // If a tensor uses a non-linear IO format, indicate that the input uses
1026+ // a non-linear IO format.
1027+ input.SetIsNonLinearFormatIo ();
1028+ }
10181029 }
10191030 } else {
10201031 // Model does support Triton-style batching so each input tensor
@@ -1024,15 +1035,19 @@ InferenceRequest::Normalize()
10241035 batch_size_ = 0 ;
10251036 for (auto & pr : original_inputs_) {
10261037 auto & input = pr.second ;
1038+ const inference::ModelInput* input_config;
1039+ RETURN_IF_ERROR (model_raw_->GetInput (input.Name (), &input_config));
10271040
10281041 // For a shape tensor, keep the tensor's shape as it is and mark
10291042 // that the input is a shape tensor.
1030- const inference::ModelInput* input_config;
1031- RETURN_IF_ERROR (model_raw_->GetInput (input.Name (), &input_config));
10321043 if (input_config->is_shape_tensor ()) {
10331044 *input.MutableShape () = input.OriginalShape ();
1034- input.SetIsShapeTensor (true );
1045+ input.SetIsShapeTensor ();
10351046 continue ;
1047+ } else if (input_config->is_non_linear_format_io ()) {
1048+ // If a tensor uses a non-linear IO format, indicate that the input uses
1049+ // a non-linear IO format.
1050+ input.SetIsNonLinearFormatIo ();
10361051 }
10371052
10381053 if (input.OriginalShape ().size () == 0 ) {
@@ -1182,28 +1197,26 @@ InferenceRequest::Normalize()
11821197 {
11831198 const auto & data_type = input.DType ();
11841199
1185- // FIXME: Skip byte size validation for TensorRT backend because it breaks
1186- // shape-size assumption. See DLIS-6805 for proper fix for TRT backend
1187- // reformat_free tensors.
1188- bool skip_byte_size_check = false ;
1189- constexpr char trt_prefix[] = " tensorrt_" ;
1190- const std::string& platform = model_raw_->Config ().platform ();
1191- skip_byte_size_check |= (platform.rfind (trt_prefix) == 0 );
1192-
1193- if (!skip_byte_size_check) {
1200+ // Non-linear IO format input byte size validation will be handled in the
1201+ // TensorRT backend.
1202+ if (!input.IsNonLinearFormatIo ()) {
11941203 TRITONSERVER_MemoryType input_memory_type;
11951204 // Because Triton expects STRING type to be in special format
11961205 // (prepend 4 bytes to specify string length), so need to add all the
11971206 // first 4 bytes for each element to find expected byte size
11981207 if (data_type == inference::DataType::TYPE_STRING) {
11991208 RETURN_IF_ERROR (
12001209 ValidateBytesInputs (input_id, input, &input_memory_type));
1210+
12011211 // FIXME: Temporarily skips byte size checks for GPU tensors. See
12021212 // DLIS-6820.
1203- skip_byte_size_check |=
1204- (input_memory_type == TRITONSERVER_MEMORY_GPU);
12051213 } else {
1206- const auto & input_dims = input.ShapeWithBatchDim ();
1214+ // Shape tensor with dynamic batching does not introduce a new
1215+ // dimension to the tensor but adds an additional value to the 1-D
1216+ // array.
1217+ const std::vector<int64_t >& input_dims =
1218+ input.IsShapeTensor () ? input.OriginalShape ()
1219+ : input.ShapeWithBatchDim ();
12071220 int64_t expected_byte_size = INT_MAX;
12081221 expected_byte_size =
12091222 triton::common::GetByteSize (data_type, input_dims);
@@ -1506,7 +1519,7 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter)
15061519// Input
15071520//
15081521InferenceRequest::Input::Input ()
1509- : is_shape_tensor_( false ), data_(new MemoryReference),
1522+ : tensor_type_(TensorType::TENSOR ), data_(new MemoryReference),
15101523 has_host_policy_specific_data_(false )
15111524{
15121525}
@@ -1515,16 +1528,17 @@ InferenceRequest::Input::Input(
15151528 const std::string& name, const inference::DataType datatype,
15161529 const int64_t * shape, const uint64_t dim_count)
15171530 : name_(name), datatype_(datatype),
1518- original_shape_(shape, shape + dim_count), is_shape_tensor_(false ),
1519- data_(new MemoryReference), has_host_policy_specific_data_(false )
1531+ original_shape_(shape, shape + dim_count),
1532+ tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
1533+ has_host_policy_specific_data_(false )
15201534{
15211535}
15221536
15231537InferenceRequest::Input::Input (
15241538 const std::string& name, const inference::DataType datatype,
15251539 const std::vector<int64_t >& shape)
15261540 : name_(name), datatype_(datatype), original_shape_(shape),
1527- is_shape_tensor_( false ), data_(new MemoryReference),
1541+ tensor_type_(TensorType::TENSOR ), data_(new MemoryReference),
15281542 has_host_policy_specific_data_(false )
15291543{
15301544}
@@ -1540,9 +1554,16 @@ InferenceRequest::Input::SetMetadata(
15401554}
15411555
15421556Status
1543- InferenceRequest::Input::SetIsShapeTensor (const bool is_shape_tensor)
1557+ InferenceRequest::Input::SetIsShapeTensor ()
1558+ {
1559+ tensor_type_ = TensorType::SHAPE_TENSOR;
1560+ return Status::Success;
1561+ }
1562+
1563+ Status
1564+ InferenceRequest::Input::SetIsNonLinearFormatIo ()
15441565{
1545- is_shape_tensor_ = is_shape_tensor ;
1566+ tensor_type_ = TensorType::NON_LINEAR ;
15461567 return Status::Success;
15471568}
15481569
0 commit comments