Skip to content

Commit af8bb93

Browse files
authored
fix: Update handling of large array sizes (#8174)
1 parent 17fcee4 commit af8bb93

File tree

8 files changed

+525
-9
lines changed

8 files changed

+525
-9
lines changed
Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
#!/usr/bin/python
2+
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in the
11+
# documentation and/or other materials provided with the distribution.
12+
# * Neither the name of NVIDIA CORPORATION nor the names of its
13+
# contributors may be used to endorse or promote products derived
14+
# from this software without specific prior written permission.
15+
#
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
import sys
29+
30+
sys.path.append("../common")
31+
32+
import unittest
33+
34+
import numpy as np
35+
import requests
36+
import test_util as tu
37+
38+
# Constants for size calculations
39+
# Each FP32 value is 4 bytes, so we need to divide target byte sizes by 4 to get element counts
40+
BYTES_PER_FP32 = 4
41+
MB = 2**20 # 1 MB = 1,048,576 bytes
42+
DEFAULT_LIMIT_BYTES = 64 * MB # 64MB default limit
43+
INCREASED_LIMIT_BYTES = 128 * MB # 128MB increased limit
44+
45+
# Calculate element counts for size limits
46+
DEFAULT_LIMIT_ELEMENTS = DEFAULT_LIMIT_BYTES // BYTES_PER_FP32 # 16,777,216 elements
47+
INCREASED_LIMIT_ELEMENTS = (
48+
INCREASED_LIMIT_BYTES // BYTES_PER_FP32
49+
) # 33,554,432 elements
50+
51+
# Small offsets to go just over/under the limits
52+
OFFSET_ELEMENTS = 32
53+
54+
55+
class InferSizeLimitTest(tu.TestResultCollector):
56+
def _get_infer_url(self, model_name):
57+
return "http://localhost:8000/v2/models/{}/infer".format(model_name)
58+
59+
def test_default_limit_rejection_raw_binary(self):
60+
"""Test raw binary inputs with default limit"""
61+
model = "onnx_zero_1_float32"
62+
63+
# Test case 1: Input just over the 64MB limit (should fail)
64+
# (2^24 + 32) elements * 4 bytes = 64MB + 128 bytes = 67,108,992 bytes
65+
large_input = np.ones(
66+
DEFAULT_LIMIT_ELEMENTS + OFFSET_ELEMENTS, dtype=np.float32
67+
)
68+
input_bytes = large_input.tobytes()
69+
assert len(input_bytes) > 64 * MB # Verify we're actually over the 64MB limit
70+
71+
headers = {"Inference-Header-Content-Length": "0"}
72+
response = requests.post(
73+
self._get_infer_url(model), data=input_bytes, headers=headers
74+
)
75+
76+
# Should fail with 400 bad request with default limit
77+
self.assertEqual(
78+
400,
79+
response.status_code,
80+
"Expected error code for oversized request, got: {}".format(
81+
response.status_code
82+
),
83+
)
84+
85+
# Verify error message contains size limit info
86+
error_msg = response.content.decode()
87+
self.assertIn(
88+
"exceeds the maximum allowed value",
89+
error_msg,
90+
"Expected error message about exceeding max input size",
91+
)
92+
93+
# Test case 2: Input just under the 64MB limit (should succeed)
94+
# (2^24 - 32) elements * 4 bytes = 64MB - 128 bytes = 67,108,736 bytes
95+
small_input = np.ones(
96+
DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS, dtype=np.float32
97+
)
98+
input_bytes = small_input.tobytes()
99+
assert len(input_bytes) < 64 * MB # Verify we're actually under the 64MB limit
100+
101+
response = requests.post(
102+
self._get_infer_url(model), data=input_bytes, headers=headers
103+
)
104+
105+
# Should succeed with 200 OK
106+
self.assertEqual(
107+
200,
108+
response.status_code,
109+
"Expected success code for request within size limit, got: {}".format(
110+
response.status_code
111+
),
112+
)
113+
114+
# Verify output matches our input (identity model)
115+
header_size = int(response.headers["Inference-Header-Content-Length"])
116+
output_data = response.content[header_size:]
117+
118+
# Convert output bytes back to numpy array for comparison
119+
output_array = np.frombuffer(output_data, dtype=np.float32)
120+
self.assertTrue(
121+
np.array_equal(output_array, small_input),
122+
"Response data does not match input data",
123+
)
124+
125+
def test_default_limit_rejection_json(self):
126+
"""Test JSON inputs with default limit"""
127+
model = "onnx_zero_1_float32"
128+
129+
# Test case 1: Input just over the 64MB limit (should fail)
130+
# (2^24 + 32) elements * 4 bytes = 64MB + 128 bytes = 67,108,992 bytes
131+
shape_size = DEFAULT_LIMIT_ELEMENTS + OFFSET_ELEMENTS
132+
133+
payload = {
134+
"inputs": [
135+
{
136+
"name": "INPUT0",
137+
"datatype": "FP32",
138+
"shape": [1, shape_size],
139+
"data": [1.0] * shape_size,
140+
}
141+
]
142+
}
143+
assert (
144+
shape_size * BYTES_PER_FP32 > 64 * MB
145+
) # Verify we're actually over the 64MB limit
146+
147+
headers = {"Content-Type": "application/json"}
148+
response = requests.post(
149+
self._get_infer_url(model), headers=headers, json=payload
150+
)
151+
152+
# Should fail with 400 bad request with default limit
153+
self.assertEqual(
154+
400,
155+
response.status_code,
156+
"Expected error code for oversized JSON request, got: {}".format(
157+
response.status_code
158+
),
159+
)
160+
161+
# Verify error message contains size limit info
162+
error_msg = response.content.decode()
163+
self.assertIn(
164+
"exceeds the maximum allowed value",
165+
error_msg,
166+
"Expected error message about exceeding max input size",
167+
)
168+
169+
# Test case 2: Input just under the 64MB limit (should succeed)
170+
# (2^24 - 32) elements * 4 bytes = 64MB - 128 bytes = 67,108,736 bytes
171+
shape_size = DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS
172+
173+
payload = {
174+
"inputs": [
175+
{
176+
"name": "INPUT0",
177+
"datatype": "FP32",
178+
"shape": [1, shape_size],
179+
"data": [1.0] * shape_size,
180+
}
181+
]
182+
}
183+
assert (
184+
shape_size * BYTES_PER_FP32 < 64 * MB
185+
) # Verify we're actually under the 64MB limit
186+
187+
response = requests.post(
188+
self._get_infer_url(model), headers=headers, json=payload
189+
)
190+
191+
# Should succeed with 200 OK
192+
self.assertEqual(
193+
200,
194+
response.status_code,
195+
"Expected success code for JSON request within size limit, got: {}".format(
196+
response.status_code
197+
),
198+
)
199+
200+
# Verify we got a valid response
201+
result = response.json()
202+
self.assertIn("outputs", result, "Response missing outputs field")
203+
self.assertEqual(1, len(result["outputs"]), "Expected 1 output")
204+
self.assertEqual(
205+
shape_size,
206+
result["outputs"][0]["shape"][1],
207+
f"Expected shape {[1, shape_size]}, got {result['outputs'][0]['shape']}",
208+
)
209+
210+
def test_large_input_raw_binary(self):
211+
"""Test raw binary input larger with custom limit set"""
212+
model = "onnx_zero_1_float32"
213+
214+
# Test case 1: Input just over the 128MB configured limit (should fail)
215+
# (2^25 + 32) elements * 4 bytes = 128MB + 128 bytes = 134,217,856 bytes
216+
large_input = np.ones(
217+
INCREASED_LIMIT_ELEMENTS + OFFSET_ELEMENTS, dtype=np.float32
218+
)
219+
input_bytes = large_input.tobytes()
220+
assert len(input_bytes) > 128 * MB # Verify we're actually over the 128MB limit
221+
222+
headers = {"Inference-Header-Content-Length": "0"}
223+
response = requests.post(
224+
self._get_infer_url(model), data=input_bytes, headers=headers
225+
)
226+
227+
# Should fail with 400 bad request with our increased limit
228+
self.assertEqual(
229+
400,
230+
response.status_code,
231+
"Expected error code for oversized request, got: {}".format(
232+
response.status_code
233+
),
234+
)
235+
236+
# Verify error message contains size limit info
237+
error_msg = response.content.decode()
238+
self.assertIn(
239+
"exceeds the maximum allowed value",
240+
error_msg,
241+
"Expected error message about exceeding max input size",
242+
)
243+
244+
# Test case 2: Input just under the 128MB configured limit (should succeed)
245+
# (2^25 - 32) elements * 4 bytes = 128MB - 128 bytes = 134,217,600 bytes
246+
small_input = np.ones(
247+
INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS, dtype=np.float32
248+
)
249+
input_bytes = small_input.tobytes()
250+
assert (
251+
len(input_bytes) < 128 * MB
252+
) # Verify we're actually under the 128MB limit
253+
254+
response = requests.post(
255+
self._get_infer_url(model), data=input_bytes, headers=headers
256+
)
257+
258+
# Should succeed with 200 OK
259+
self.assertEqual(
260+
200,
261+
response.status_code,
262+
"Expected success code for request within increased limit, got: {}".format(
263+
response.status_code
264+
),
265+
)
266+
267+
# Verify output matches our input (identity model)
268+
header_size = int(response.headers["Inference-Header-Content-Length"])
269+
output_data = response.content[header_size:]
270+
271+
# Convert output bytes back to numpy array for comparison
272+
output_array = np.frombuffer(output_data, dtype=np.float32)
273+
self.assertTrue(
274+
np.array_equal(output_array, small_input),
275+
"Response data does not match input data",
276+
)
277+
278+
def test_large_input_json(self):
279+
"""Test JSON input larger with custom limit set"""
280+
model = "onnx_zero_1_float32"
281+
282+
# Test case 1: Input just over the 128MB configured limit (should fail)
283+
# (2^25 + 32) elements * 4 bytes = 128MB + 128 bytes = 134,217,856 bytes
284+
shape_size = INCREASED_LIMIT_ELEMENTS + OFFSET_ELEMENTS
285+
286+
payload = {
287+
"inputs": [
288+
{
289+
"name": "INPUT0",
290+
"datatype": "FP32",
291+
"shape": [1, shape_size],
292+
"data": [1.0] * shape_size,
293+
}
294+
]
295+
}
296+
assert (
297+
shape_size * BYTES_PER_FP32 > 128 * MB
298+
) # Verify we're actually over the 128MB limit
299+
300+
headers = {"Content-Type": "application/json"}
301+
response = requests.post(
302+
self._get_infer_url(model), headers=headers, json=payload
303+
)
304+
305+
# Should fail with 400 bad request with our increased limit
306+
self.assertEqual(
307+
400,
308+
response.status_code,
309+
"Expected error code for oversized JSON request, got: {}".format(
310+
response.status_code
311+
),
312+
)
313+
314+
# Verify error message contains size limit info
315+
error_msg = response.content.decode()
316+
self.assertIn(
317+
"exceeds the maximum allowed value",
318+
error_msg,
319+
"Expected error message about exceeding max input size",
320+
)
321+
322+
# Test case 2: Input just under the 128MB configured limit (should succeed)
323+
# (2^25 - 32) elements * 4 bytes = 128MB - 128 bytes = 134,217,600 bytes
324+
shape_size = INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS
325+
326+
payload = {
327+
"inputs": [
328+
{
329+
"name": "INPUT0",
330+
"datatype": "FP32",
331+
"shape": [1, shape_size],
332+
"data": [1.0] * shape_size,
333+
}
334+
]
335+
}
336+
assert (
337+
shape_size * BYTES_PER_FP32 < 128 * MB
338+
) # Verify we're actually under the 128MB limit
339+
340+
response = requests.post(
341+
self._get_infer_url(model), headers=headers, json=payload
342+
)
343+
344+
# Should succeed with 200 OK
345+
self.assertEqual(
346+
200,
347+
response.status_code,
348+
"Expected success code for request within increased limit, got: {}".format(
349+
response.status_code
350+
),
351+
)
352+
353+
# Verify we got a valid response
354+
result = response.json()
355+
self.assertIn("outputs", result, "Response missing outputs field")
356+
self.assertEqual(1, len(result["outputs"]), "Expected 1 output")
357+
self.assertEqual(
358+
shape_size,
359+
result["outputs"][0]["shape"][1],
360+
f"Expected shape {[1, shape_size]}, got {result['outputs'][0]['shape']}",
361+
)
362+
363+
364+
if __name__ == "__main__":
365+
unittest.main()

0 commit comments

Comments
 (0)