Skip to content

Commit 557af46

Browse files
committed
Merge branch 'main' of https://github.com/triton-inference-server/core into yinggeh-DLIS-6657-client-input-byte-size-check
2 parents 73d374e + 9ed1544 commit 557af46

File tree

5 files changed

+99
-27
lines changed

5 files changed

+99
-27
lines changed

python/test/test_api.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
exit_on_error=True,
7171
strict_model_config=False,
7272
model_control_mode=tritonserver.ModelControlMode.EXPLICIT,
73-
exit_timeout=10,
73+
exit_timeout=30,
7474
)
7575

7676

@@ -357,6 +357,11 @@ def test_stop(self):
357357
{
358358
"backend": "python",
359359
"parameters": {"decoupled": {"string_value": "False"}},
360+
# Keep instance count low for fast startup/cleanup.
361+
# Alternatively can use KIND_CPU here, but keeping gpus/count explicit.
362+
"instance_group": [
363+
{"kind": "KIND_GPU", "gpus": [0], "count": 1}
364+
],
360365
}
361366
)
362367
},

src/infer_request.cc

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,6 +1016,17 @@ InferenceRequest::Normalize()
10161016
for (auto& pr : original_inputs_) {
10171017
auto& input = pr.second;
10181018
*input.MutableShape() = input.OriginalShape();
1019+
1020+
const inference::ModelInput* input_config;
1021+
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
1022+
if (input_config->is_shape_tensor()) {
1023+
// For a shape tensor, mark that the input is a shape tensor.
1024+
input.SetIsShapeTensor();
1025+
} else if (input_config->is_non_linear_format_io()) {
1026+
// If a tensor uses a non-linear IO format, indicate that the input uses
1027+
// a non-linear IO format.
1028+
input.SetIsNonLinearFormatIo();
1029+
}
10191030
}
10201031
} else {
10211032
// Model does support Triton-style batching so each input tensor
@@ -1025,15 +1036,19 @@ InferenceRequest::Normalize()
10251036
batch_size_ = 0;
10261037
for (auto& pr : original_inputs_) {
10271038
auto& input = pr.second;
1039+
const inference::ModelInput* input_config;
1040+
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
10281041

10291042
// For a shape tensor, keep the tensor's shape as it is and mark
10301043
// that the input is a shape tensor.
1031-
const inference::ModelInput* input_config;
1032-
RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
10331044
if (input_config->is_shape_tensor()) {
10341045
*input.MutableShape() = input.OriginalShape();
1035-
input.SetIsShapeTensor(true);
1046+
input.SetIsShapeTensor();
10361047
continue;
1048+
} else if (input_config->is_non_linear_format_io()) {
1049+
// If a tensor uses a non-linear IO format, indicate that the input uses
1050+
// a non-linear IO format.
1051+
input.SetIsNonLinearFormatIo();
10371052
}
10381053

10391054
if (input.OriginalShape().size() == 0) {
@@ -1183,15 +1198,9 @@ InferenceRequest::Normalize()
11831198
{
11841199
const auto& data_type = input.DType();
11851200

1186-
// FIXME: Skip byte size validation for TensorRT backend because it breaks
1187-
// shape-size assumption. See DLIS-6805 for proper fix for TRT backend
1188-
// reformat_free tensors.
1189-
bool skip_byte_size_check = false;
1190-
constexpr char trt_prefix[] = "tensorrt_";
1191-
const std::string& platform = model_raw_->Config().platform();
1192-
skip_byte_size_check |= (platform.rfind(trt_prefix) == 0);
1193-
1194-
if (!skip_byte_size_check) {
1201+
// Non-linear IO format input byte size validation will be handled in the
1202+
// TensorRT backend.
1203+
if (!input.IsNonLinearFormatIo()) {
11951204
TRITONSERVER_MemoryType input_memory_type;
11961205
// Because Triton expects STRING type to be in special format
11971206
// (prepend 4 bytes to specify string length), so need to add all the
@@ -1201,10 +1210,13 @@ InferenceRequest::Normalize()
12011210
input_name, input, model_name, &input_memory_type));
12021211
// FIXME: Temporarily skips byte size checks for GPU tensors. See
12031212
// DLIS-6820.
1204-
skip_byte_size_check |=
1205-
(input_memory_type == TRITONSERVER_MEMORY_GPU);
12061213
} else {
1207-
const auto& input_dims = input.ShapeWithBatchDim();
1214+
// Shape tensor with dynamic batching does not introduce a new
1215+
// dimension to the tensor but adds an additional value to the 1-D
1216+
// array.
1217+
const std::vector<int64_t>& input_dims =
1218+
input.IsShapeTensor() ? input.OriginalShape()
1219+
: input.ShapeWithBatchDim();
12081220
int64_t expected_byte_size = INT_MAX;
12091221
expected_byte_size =
12101222
triton::common::GetByteSize(data_type, input_dims);
@@ -1524,7 +1536,7 @@ InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter)
15241536
// Input
15251537
//
15261538
InferenceRequest::Input::Input()
1527-
: is_shape_tensor_(false), data_(new MemoryReference),
1539+
: tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
15281540
has_host_policy_specific_data_(false)
15291541
{
15301542
}
@@ -1533,16 +1545,17 @@ InferenceRequest::Input::Input(
15331545
const std::string& name, const inference::DataType datatype,
15341546
const int64_t* shape, const uint64_t dim_count)
15351547
: name_(name), datatype_(datatype),
1536-
original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
1537-
data_(new MemoryReference), has_host_policy_specific_data_(false)
1548+
original_shape_(shape, shape + dim_count),
1549+
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
1550+
has_host_policy_specific_data_(false)
15381551
{
15391552
}
15401553

15411554
InferenceRequest::Input::Input(
15421555
const std::string& name, const inference::DataType datatype,
15431556
const std::vector<int64_t>& shape)
15441557
: name_(name), datatype_(datatype), original_shape_(shape),
1545-
is_shape_tensor_(false), data_(new MemoryReference),
1558+
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
15461559
has_host_policy_specific_data_(false)
15471560
{
15481561
}
@@ -1558,9 +1571,16 @@ InferenceRequest::Input::SetMetadata(
15581571
}
15591572

15601573
Status
1561-
InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor)
1574+
InferenceRequest::Input::SetIsShapeTensor()
1575+
{
1576+
tensor_type_ = TensorType::SHAPE_TENSOR;
1577+
return Status::Success;
1578+
}
1579+
1580+
Status
1581+
InferenceRequest::Input::SetIsNonLinearFormatIo()
15621582
{
1563-
is_shape_tensor_ = is_shape_tensor;
1583+
tensor_type_ = TensorType::NON_LINEAR;
15641584
return Status::Success;
15651585
}
15661586

src/infer_request.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ class InferenceRequest {
8282
// Input tensor
8383
class Input {
8484
public:
85+
enum class TensorType { TENSOR, SHAPE_TENSOR, NON_LINEAR };
86+
8587
Input();
8688
Input(
8789
const std::string& name, const inference::DataType datatype,
@@ -134,10 +136,22 @@ class InferenceRequest {
134136
}
135137

136138
// Whether or not the input is a tensorrt shape tensor
137-
bool IsShapeTensor() const { return is_shape_tensor_; }
139+
bool IsShapeTensor() const
140+
{
141+
return tensor_type_ == TensorType::SHAPE_TENSOR;
142+
}
143+
144+
// Specifies whether the input uses a non-linear IO format
145+
bool IsNonLinearFormatIo() const
146+
{
147+
return tensor_type_ == TensorType::NON_LINEAR;
148+
}
138149

139150
// Set the input to be treated as a shape tensor.
140-
Status SetIsShapeTensor(const bool is_shape_tensor);
151+
Status SetIsShapeTensor();
152+
153+
// Set the input uses a non-linear IO format
154+
Status SetIsNonLinearFormatIo();
141155

142156
// The data for this input.
143157
const std::shared_ptr<Memory>& Data() const { return data_; }
@@ -240,7 +254,7 @@ class InferenceRequest {
240254
std::vector<int64_t> original_shape_;
241255
std::vector<int64_t> shape_;
242256
std::vector<int64_t> shape_with_batch_dim_;
243-
bool is_shape_tensor_;
257+
TensorType tensor_type_;
244258
std::shared_ptr<Memory> data_;
245259

246260
bool has_host_policy_specific_data_;

src/model_config_utils.cc

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,34 @@ ValidateIOShape(
418418
return Status::Success;
419419
}
420420

421+
/// Validate that Non-linear format inputs or outputs are specified correctly
422+
/// in a model configuration.
423+
template <class ModelIO>
424+
Status
425+
ValidateNonLinearFormatIO(
426+
const ModelIO& io, const std::string& platform, bool is_input)
427+
{
428+
if (!io.is_non_linear_format_io()) {
429+
// Nothing to validate as the tensor is not non-linear format.
430+
return Status::Success;
431+
}
432+
433+
if (platform != kTensorRTPlanPlatform) {
434+
return Status(
435+
Status::Code::INVALID_ARG,
436+
"Non-linear IO format is only supported for the TensorRT platform");
437+
}
438+
439+
if (io.dims_size() != 3) {
440+
std::string io_type = is_input ? "input" : "output";
441+
return Status(
442+
Status::Code::INVALID_ARG,
443+
"Non-linear IO format " + io_type + " requires 3 dims");
444+
}
445+
446+
return Status::Success;
447+
}
448+
421449
} // namespace
422450

423451
Status
@@ -1732,6 +1760,8 @@ ValidateModelInput(
17321760
"shape tensors are only supported for TensorRT platform");
17331761
}
17341762

1763+
RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, true /* is_input*/));
1764+
17351765
return Status::Success;
17361766
}
17371767

@@ -1768,6 +1798,8 @@ ValidateModelOutput(
17681798
"shape tensors are only supported for TensorRT platform");
17691799
}
17701800

1801+
RETURN_IF_ERROR(ValidateNonLinearFormatIO(io, platform, false /* is_input*/));
1802+
17711803
return Status::Success;
17721804
}
17731805

src/test/response_cache_test.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ InferenceRequest::Input::Input(
7070
const std::string& name, const inference::DataType datatype,
7171
const int64_t* shape, const uint64_t dim_count)
7272
: name_(name), datatype_(datatype),
73-
original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
74-
data_(new MemoryReference), has_host_policy_specific_data_(false)
73+
original_shape_(shape, shape + dim_count),
74+
tensor_type_(TensorType::TENSOR), data_(new MemoryReference),
75+
has_host_policy_specific_data_(false)
7576
{
7677
}
7778

0 commit comments

Comments
 (0)