Skip to content

Commit be7d4b1

Browse files
authored
fix: Improve input size validation for large HTTP JSON requests (#8432)
1 parent c31c40f commit be7d4b1

File tree

5 files changed

+94
-14
lines changed

5 files changed

+94
-14
lines changed

qa/L0_cuda_shared_memory/cuda_shared_memory_test.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22

3-
# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
44
#
55
# Redistribution and use in source and binary forms, with or without
66
# modification, are permitted provided that the following conditions
@@ -723,7 +723,11 @@ def test_exceeds_cshm_handle_size_limit(self):
723723
try:
724724
error_message = response.json().get("error", "")
725725
self.assertIn(
726-
"'raw_handle' exceeds the maximum allowed data size limit INT_MAX",
726+
"Request JSON size",
727+
error_message,
728+
)
729+
self.assertIn(
730+
"exceeds the maximum allowed value",
727731
error_message,
728732
)
729733
except ValueError:

qa/L0_http/http_input_size_limit_test.py

Lines changed: 65 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
sys.path.append("../common")
3131

32+
import json
3233
import unittest
3334

3435
import numpy as np
@@ -39,6 +40,7 @@
3940
# Each FP32 value is 4 bytes, so we need to divide target byte sizes by 4 to get element counts
4041
BYTES_PER_FP32 = 4
4142
MB = 2**20 # 1 MB = 1,048,576 bytes
43+
GB = 2**30 # 1 GB = 1,073,741,824 bytes
4244
DEFAULT_LIMIT_BYTES = 64 * MB # 64MB default limit
4345
INCREASED_LIMIT_BYTES = 128 * MB # 128MB increased limit
4446

@@ -167,8 +169,11 @@ def test_default_limit_rejection_json(self):
167169
)
168170

169171
# Test case 2: Input just under the 64MB limit (should succeed)
170-
# (2^24 - 32) elements * 4 bytes = 64MB - 128 bytes = 67,108,736 bytes
171-
shape_size = DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS
172+
# The test creates a JSON payload with data, which adds overhead compared
173+
# to raw binary format. We adjust the shape size to ensure the final
174+
# JSON payload is under the size limit. An element is roughly 5
175+
# bytes in JSON, compared to 4 bytes as a raw FP32.
176+
shape_size = (DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5
172177

173178
payload = {
174179
"inputs": [
@@ -180,9 +185,8 @@ def test_default_limit_rejection_json(self):
180185
}
181186
]
182187
}
183-
assert (
184-
shape_size * BYTES_PER_FP32 < 64 * MB
185-
) # Verify we're actually under the 64MB limit
188+
# Verify we're actually under the 64MB limit
189+
self.assertLess(len(json.dumps(payload).encode("utf-8")), DEFAULT_LIMIT_BYTES)
186190

187191
response = requests.post(
188192
self._get_infer_url(model), headers=headers, json=payload
@@ -320,8 +324,11 @@ def test_large_input_json(self):
320324
)
321325

322326
# Test case 2: Input just under the 128MB configured limit (should succeed)
323-
# (2^25 - 32) elements * 4 bytes = 128MB - 128 bytes = 134,217,600 bytes
324-
shape_size = INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS
327+
# The test creates a JSON payload with data, which adds overhead compared
328+
# to raw binary format. We adjust the shape size to ensure the final
329+
# JSON payload is under the size limit. An element is roughly 5
330+
# bytes in JSON, compared to 4 bytes as a raw FP32.
331+
shape_size = (INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5
325332

326333
payload = {
327334
"inputs": [
@@ -333,9 +340,8 @@ def test_large_input_json(self):
333340
}
334341
]
335342
}
336-
assert (
337-
shape_size * BYTES_PER_FP32 < 128 * MB
338-
) # Verify we're actually under the 128MB limit
343+
# Verify we're actually under the 128MB limit
344+
self.assertLess(len(json.dumps(payload).encode("utf-8")), INCREASED_LIMIT_BYTES)
339345

340346
response = requests.post(
341347
self._get_infer_url(model), headers=headers, json=payload
@@ -360,6 +366,55 @@ def test_large_input_json(self):
360366
f"Expected shape {[1, shape_size]}, got {result['outputs'][0]['shape']}",
361367
)
362368

369+
def test_large_string_in_json(self):
370+
"""Test JSON request with large string input"""
371+
model = "simple_identity"
372+
373+
# Create a string that is larger (large payload about 2GB) than the default limit of 64MB
374+
# (2^31 + 64) elements * 1 bytes = 2GB + 64 bytes = 2,147,483,712 bytes
375+
large_string_size = 2 * GB + 64
376+
large_string = "A" * large_string_size
377+
378+
payload = {
379+
"inputs": [
380+
{
381+
"name": "INPUT0",
382+
"datatype": "BYTES",
383+
"shape": [1, 1],
384+
"data": [large_string],
385+
}
386+
]
387+
}
388+
389+
headers = {"Content-Type": "application/json"}
390+
response = requests.post(
391+
self._get_infer_url(model), headers=headers, json=payload
392+
)
393+
394+
# Should fail with 400 bad request
395+
self.assertEqual(
396+
400,
397+
response.status_code,
398+
"Expected error code for oversized JSON request, got: {}".format(
399+
response.status_code
400+
),
401+
)
402+
403+
# Verify error message
404+
error_msg = response.content.decode()
405+
self.assertIn(
406+
"Request JSON size",
407+
error_msg,
408+
)
409+
self.assertIn(
410+
"exceeds the maximum allowed value",
411+
error_msg,
412+
)
413+
self.assertIn(
414+
"Use --http-max-input-size to increase the limit",
415+
error_msg,
416+
)
417+
363418

364419
if __name__ == "__main__":
365420
unittest.main()

qa/L0_http/http_test.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,8 +364,11 @@ def test_loading_large_invalid_model(self):
364364
try:
365365
error_message = response.json().get("error", "")
366366
self.assertIn(
367-
"'file:1/model.onnx' exceeds the maximum allowed data size limit "
368-
"INT_MAX",
367+
"Request JSON size",
368+
error_message,
369+
)
370+
self.assertIn(
371+
"exceeds the maximum allowed value",
369372
error_message,
370373
)
371374
except ValueError:

qa/L0_http/test.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,7 @@ MODELDIR=http_input_size_limit_test_models
760760
mkdir -p $MODELDIR
761761
rm -rf ${MODELDIR}/*
762762
cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32 ${MODELDIR}/.
763+
cp -r ./models/simple_identity ${MODELDIR}/.
763764

764765
# First run with default size limit - large inputs should fail
765766
SERVER_ARGS="--model-repository=${MODELDIR}"
@@ -787,6 +788,13 @@ if [ $? -ne 0 ]; then
787788
echo -e "\n***\n*** Default Input Size Limit Test Failed for JSON input\n***"
788789
RET=1
789790
fi
791+
792+
python http_input_size_limit_test.py InferSizeLimitTest.test_large_string_in_json >> $CLIENT_LOG 2>&1
793+
if [ $? -ne 0 ]; then
794+
cat $CLIENT_LOG
795+
echo -e "\n***\n*** Default Input Size Limit Test Failed for large string in JSON\n***"
796+
RET=1
797+
fi
790798
set -e
791799

792800
kill $SERVER_PID

src/http_server.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3070,6 +3070,16 @@ HTTPAPIServer::EVBufferToJson(
30703070
triton::common::TritonJson::Value* document, evbuffer_iovec* v, int* v_idx,
30713071
const size_t length, int n)
30723072
{
3073+
if (length > max_input_size_) {
3074+
return TRITONSERVER_ErrorNew(
3075+
TRITONSERVER_ERROR_INVALID_ARG,
3076+
("Request JSON size of " + std::to_string(length) +
3077+
" bytes exceeds the maximum allowed value of " +
3078+
std::to_string(max_input_size_) +
3079+
" bytes. Use --http-max-input-size to increase the limit.")
3080+
.c_str());
3081+
}
3082+
30733083
size_t offset = 0, remaining_length = length;
30743084
char* json_base;
30753085
std::vector<char> json_buffer;

0 commit comments

Comments
 (0)