Skip to content

Commit 334f81f

Browse files
authored
ci: Fix shape and reformat free tensor handling in the input byte size check (#7444)
1 parent aca16ba commit 334f81f

File tree

11 files changed

+312
-2
lines changed

11 files changed

+312
-2
lines changed

docs/user_guide/model_configuration.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,40 @@ input1: [4, 4, 6] <== shape of this tensor [3]
598598
Currently, only TensorRT supports shape tensors. Read [Shape Tensor I/O](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#shape_tensor_io)
599599
to learn more about shape tensors.
600600

601+
## Non-Linear I/O Formats
602+
603+
For models that process input or output data in non-linear formats, the _is_non_linear_format_io_ property
604+
must be set. The following example model configuration shows how to specify that INPUT0 and INPUT1 use non-linear I/O data formats.
605+
606+
```
607+
name: "mytensorrtmodel"
608+
platform: "tensorrt_plan"
609+
max_batch_size: 8
610+
input [
611+
{
612+
name: "INPUT0"
613+
data_type: TYPE_FP16
614+
dims: [ 3,224,224 ]
615+
is_non_linear_format_io: true
616+
},
617+
{
618+
name: "INPUT1"
619+
data_type: TYPE_FP16
620+
dims: [ 3,224,224 ]
621+
is_non_linear_format_io: true
622+
}
623+
]
624+
output [
625+
{
626+
name: "OUTPUT0"
627+
data_type: TYPE_FP16
628+
dims: [ 1,3 ]
629+
}
630+
]
631+
```
632+
633+
Currently, only TensorRT supports this property. To learn more about I/O formats, refer to the [I/O Formats documentation](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#reformat-free-network-tensors).
634+
601635
## Version Policy
602636

603637
Each model can have one or more

qa/L0_input_validation/input_validation_test.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import infer_util as iu
3535
import numpy as np
3636
import tritonclient.grpc as tritongrpcclient
37+
import tritonclient.utils.shared_memory as shm
3738
from tritonclient.utils import InferenceServerException, np_to_triton_dtype
3839

3940

@@ -211,6 +212,77 @@ def get_input_array(input_size, np_dtype):
211212
err_str,
212213
)
213214

215+
def test_wrong_input_shape_tensor_size(self):
216+
def inference_helper(model_name, batch_size=1):
217+
triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
218+
if batch_size > 1:
219+
dummy_input_data = np.random.rand(batch_size, 32, 32).astype(np.float32)
220+
else:
221+
dummy_input_data = np.random.rand(32, 32).astype(np.float32)
222+
shape_tensor_data = np.asarray([4, 4], dtype=np.int32)
223+
224+
# Pass incorrect input byte size date for shape tensor
225+
# Use shared memory to bypass the shape check in client library
226+
input_byte_size = (shape_tensor_data.size - 1) * np.dtype(np.int32).itemsize
227+
228+
input_shm_handle = shm.create_shared_memory_region(
229+
"INPUT0_SHM",
230+
"/INPUT0_SHM",
231+
input_byte_size,
232+
)
233+
shm.set_shared_memory_region(
234+
input_shm_handle,
235+
[
236+
shape_tensor_data,
237+
],
238+
)
239+
triton_client.register_system_shared_memory(
240+
"INPUT0_SHM",
241+
"/INPUT0_SHM",
242+
input_byte_size,
243+
)
244+
245+
inputs = [
246+
tritongrpcclient.InferInput(
247+
"DUMMY_INPUT0",
248+
dummy_input_data.shape,
249+
np_to_triton_dtype(np.float32),
250+
),
251+
tritongrpcclient.InferInput(
252+
"INPUT0",
253+
shape_tensor_data.shape,
254+
np_to_triton_dtype(np.int32),
255+
),
256+
]
257+
inputs[0].set_data_from_numpy(dummy_input_data)
258+
inputs[1].set_shared_memory("INPUT0_SHM", input_byte_size)
259+
260+
outputs = [
261+
tritongrpcclient.InferRequestedOutput("DUMMY_OUTPUT0"),
262+
tritongrpcclient.InferRequestedOutput("OUTPUT0"),
263+
]
264+
265+
try:
266+
# Perform inference
267+
with self.assertRaises(InferenceServerException) as e:
268+
triton_client.infer(
269+
model_name=model_name, inputs=inputs, outputs=outputs
270+
)
271+
err_str = str(e.exception)
272+
correct_input_byte_size = (
273+
shape_tensor_data.size * np.dtype(np.int32).itemsize
274+
)
275+
self.assertIn(
276+
f"input byte size mismatch for input 'INPUT0' for model '{model_name}'. Expected {correct_input_byte_size}, got {input_byte_size}",
277+
err_str,
278+
)
279+
finally:
280+
shm.destroy_shared_memory_region(input_shm_handle)
281+
triton_client.unregister_system_shared_memory("INPUT0_SHM")
282+
283+
inference_helper(model_name="plan_nobatch_zero_1_float32_int32")
284+
inference_helper(model_name="plan_zero_1_float32_int32", batch_size=8)
285+
214286

215287
if __name__ == "__main__":
216288
unittest.main()

qa/L0_input_validation/test.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ dynamic_batching {
123123
EOL
124124

125125
cp -r $DATADIR/qa_model_repository/graphdef_object_int32_int32 models/.
126+
cp -r $DATADIR/qa_shapetensor_model_repository/plan_nobatch_zero_1_float32_int32 models/.
127+
cp -r $DATADIR/qa_shapetensor_model_repository/plan_zero_1_float32_int32 models/.
126128

127129
SERVER_ARGS="--model-repository=`pwd`/models"
128130
run_server
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
max_batch_size: 8
2+
input [
3+
{
4+
name: "INPUT0"
5+
data_type: TYPE_FP32
6+
dims: [ 16 ]
7+
is_non_linear_format_io: true
8+
},
9+
{
10+
name: "INPUT1"
11+
data_type: TYPE_FP32
12+
dims: [ 16 ]
13+
}
14+
]
15+
output [
16+
{
17+
name: "OUTPUT0"
18+
data_type: TYPE_FP32
19+
dims: [ 16 ]
20+
},
21+
{
22+
name: "OUTPUT1"
23+
data_type: TYPE_FP32
24+
dims: [ 16 ]
25+
}
26+
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
'INPUT0' uses a linear IO format, but 'is_non_linear_format_io' is incorrectly set to true in the model configuration.
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
max_batch_size: 8
2+
input [
3+
{
4+
name: "INPUT0"
5+
data_type: TYPE_FP32
6+
dims: [ 16 ]
7+
},
8+
{
9+
name: "INPUT1"
10+
data_type: TYPE_FP32
11+
dims: [ 16 ]
12+
}
13+
]
14+
output [
15+
{
16+
name: "OUTPUT0"
17+
data_type: TYPE_FP32
18+
dims: [ 16 ]
19+
},
20+
{
21+
name: "OUTPUT1"
22+
data_type: TYPE_FP32
23+
dims: [ 16 ]
24+
is_non_linear_format_io: true
25+
}
26+
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
'OUTPUT1' uses a linear IO format, but 'is_non_linear_format_io' is incorrectly set to true in the model configuration.
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
name: "no_config_non_linear_format_io"
2+
platform: "tensorrt_plan"
3+
backend: "tensorrt"
4+
version_policy {
5+
latest {
6+
num_versions: 1
7+
}
8+
}
9+
max_batch_size: 8
10+
input {
11+
name: "INPUT0"
12+
data_type: TYPE_FP32
13+
dims: -1
14+
dims: 2
15+
dims: 1
16+
is_non_linear_format_io: true
17+
}
18+
input {
19+
name: "INPUT1"
20+
data_type: TYPE_FP32
21+
dims: -1
22+
dims: 2
23+
dims: 1
24+
is_non_linear_format_io: true
25+
}
26+
output {
27+
name: "OUTPUT0"
28+
data_type: TYPE_FP32
29+
dims: -1
30+
dims: 2
31+
dims: 1
32+
}
33+
output {
34+
name: "OUTPUT1"
35+
data_type: TYPE_FP32
36+
dims: -1
37+
dims: 2
38+
dims: 1
39+
}
40+
optimization {
41+
input_pinned_memory {
42+
enable: true
43+
}
44+
output_pinned_memory {
45+
enable: true
46+
}
47+
}
48+
dynamic_batching {
49+
preferred_batch_size: 8
50+
}
51+
instance_group {
52+
name: "no_config_non_linear_format_io"
53+
kind: KIND_GPU
54+
count: 1
55+
gpus: 0
56+
}
57+
default_model_filename: "model.plan"

qa/L0_model_config/test.sh

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,12 @@ for modelpath in \
5656
autofill_noplatform/tensorrt/bad_input_shape/1 \
5757
autofill_noplatform/tensorrt/bad_input_type/1 \
5858
autofill_noplatform/tensorrt/bad_input_shape_tensor/1 \
59+
autofill_noplatform/tensorrt/bad_input_non_linear_format_io/1 \
5960
autofill_noplatform/tensorrt/bad_output_dims/1 \
6061
autofill_noplatform/tensorrt/bad_output_shape/1 \
6162
autofill_noplatform/tensorrt/bad_output_type/1 \
6263
autofill_noplatform/tensorrt/bad_output_shape_tensor/1 \
64+
autofill_noplatform/tensorrt/bad_outut_non_linear_format_io/1 \
6365
autofill_noplatform/tensorrt/too_few_inputs/1 \
6466
autofill_noplatform/tensorrt/too_many_inputs/1 \
6567
autofill_noplatform/tensorrt/unknown_input/1 \
@@ -92,6 +94,14 @@ for modelpath in \
9294
$modelpath/.
9395
done
9496

97+
# Copy TensorRT plans with non-linear format IO into the test model repositories.
98+
for modelpath in \
99+
autofill_noplatform_success/tensorrt/no_config_non_linear_format_io/1 ; do
100+
mkdir -p $modelpath
101+
cp /data/inferenceserver/${REPO_VERSION}/qa_trt_format_model_repository/plan_CHW32_LINEAR_float32_float32_float32/1/model.plan \
102+
$modelpath/.
103+
done
104+
95105
# Copy variable-sized TensorRT plans into the test model repositories.
96106
for modelpath in \
97107
autofill_noplatform_success/tensorrt/no_name_platform_variable/1 \
@@ -593,7 +603,8 @@ for TARGET_DIR in `ls -d autofill_noplatform_success/*/*`; do
593603
# that the directory is an entire model repository.
594604
rm -fr models && mkdir models
595605
if [ -f ${TARGET_DIR}/config.pbtxt ] || [ "$TARGET" = "no_config" ] \
596-
|| [ "$TARGET" = "no_config_variable" ] || [ "$TARGET" = "no_config_shape_tensor" ] ; then
606+
|| [ "$TARGET" = "no_config_variable" ] || [ "$TARGET" = "no_config_shape_tensor" ] \
607+
|| [ "$TARGET" = "no_config_non_linear_format_io" ] ; then
597608
cp -r ${TARGET_DIR} models/.
598609
else
599610
cp -r ${TARGET_DIR}/* models/.

qa/L0_trt_reformat_free/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ if [ $? -ne 0 ]; then
7575
cat $CLIENT_LOG
7676
RET=1
7777
else
78-
check_test_results $TEST_RESULT_FILE 4
78+
check_test_results $TEST_RESULT_FILE 6
7979
if [ $? -ne 0 ]; then
8080
cat $CLIENT_LOG
8181
echo -e "\n***\n*** Test Result Verification Failed\n***"

0 commit comments

Comments
 (0)