Skip to content

Commit abd6924

Browse files
committed
Workaround with L0_trt_reformat_free by removing shm checks
1 parent 7ed8e0f commit abd6924

File tree

1 file changed

+0
-166
lines changed

1 file changed

+0
-166
lines changed

qa/L0_input_validation/input_validation_test.py

Lines changed: 0 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -226,172 +226,6 @@ def identity_inference(triton_client, np_array, binary_data):
226226
identity_inference(triton_client, np_bytes_data, True) # Using binary data
227227
identity_inference(triton_client, np_bytes_data, False) # Using JSON data
228228

229-
def test_client_input_shm_size_validation(self):
230-
# We use a simple model that takes 2 input tensors of 16 integers
231-
# each and returns 2 output tensors of 16 integers each. One
232-
# output tensor is the element-wise sum of the inputs and one
233-
# output is the element-wise difference.
234-
model_name = "simple"
235-
236-
for client_type in ["http", "grpc"]:
237-
if client_type == "http":
238-
triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
239-
else:
240-
triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
241-
# To make sure no shared memory regions are registered with the
242-
# server.
243-
triton_client.unregister_system_shared_memory()
244-
triton_client.unregister_cuda_shared_memory()
245-
246-
# Create the data for the two input tensors. Initialize the first
247-
# to unique integers and the second to all ones.
248-
input0_data = np.arange(start=0, stop=16, dtype=np.int32)
249-
input1_data = np.ones(shape=16, dtype=np.int32)
250-
251-
input_byte_size = input0_data.size * input0_data.itemsize
252-
253-
# Create shared memory region for input and store shared memory handle
254-
shm_ip_handle = shm.create_shared_memory_region(
255-
"input_data", "/input_simple", input_byte_size * 2
256-
)
257-
258-
# Put input data values into shared memory
259-
shm.set_shared_memory_region(shm_ip_handle, [input0_data])
260-
shm.set_shared_memory_region(
261-
shm_ip_handle, [input1_data], offset=input_byte_size
262-
)
263-
264-
# Register shared memory region for inputs with Triton Server
265-
triton_client.register_system_shared_memory(
266-
"input_data", "/input_simple", input_byte_size * 2
267-
)
268-
269-
# Set the parameters to use data from shared memory
270-
inputs = []
271-
if client_type == "http":
272-
inputs.append(tritonhttpclient.InferInput("INPUT0", [1, 16], "INT32"))
273-
inputs.append(tritonhttpclient.InferInput("INPUT1", [1, 16], "INT32"))
274-
else:
275-
inputs.append(tritongrpcclient.InferInput("INPUT0", [1, 16], "INT32"))
276-
inputs.append(tritongrpcclient.InferInput("INPUT1", [1, 16], "INT32"))
277-
inputs[-2].set_shared_memory("input_data", input_byte_size + 4)
278-
inputs[-1].set_shared_memory(
279-
"input_data", input_byte_size, offset=input_byte_size
280-
)
281-
282-
with self.assertRaises(InferenceServerException) as e:
283-
triton_client.infer(model_name=model_name, inputs=inputs)
284-
err_str = str(e.exception)
285-
self.assertIn(
286-
f"input 'INPUT0' got unexpected byte size {input_byte_size+4}, expected {input_byte_size}",
287-
err_str,
288-
)
289-
290-
# Set the parameters to use data from shared memory
291-
inputs[-2].set_shared_memory("input_data", input_byte_size)
292-
inputs[-1].set_shared_memory(
293-
"input_data", input_byte_size - 4, offset=input_byte_size
294-
)
295-
296-
with self.assertRaises(InferenceServerException) as e:
297-
triton_client.infer(model_name=model_name, inputs=inputs)
298-
err_str = str(e.exception)
299-
self.assertIn(
300-
f"input 'INPUT1' got unexpected byte size {input_byte_size-4}, expected {input_byte_size}",
301-
err_str,
302-
)
303-
304-
print(triton_client.get_system_shared_memory_status())
305-
triton_client.unregister_system_shared_memory()
306-
assert len(shm.mapped_shared_memory_regions()) == 1
307-
shm.destroy_shared_memory_region(shm_ip_handle)
308-
assert len(shm.mapped_shared_memory_regions()) == 0
309-
310-
def test_client_input_string_shm_size_validation(self):
311-
# We use a simple model that takes 2 input tensors of 16 strings
312-
# each and returns 2 output tensors of 16 strings each. The input
313-
# strings must represent integers. One output tensor is the
314-
# element-wise sum of the inputs and one output is the element-wise
315-
# difference.
316-
model_name = "simple_string"
317-
318-
for client_type in ["http", "grpc"]:
319-
if client_type == "http":
320-
triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
321-
else:
322-
triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
323-
324-
# To make sure no shared memory regions are registered with the
325-
# server.
326-
triton_client.unregister_system_shared_memory()
327-
triton_client.unregister_cuda_shared_memory()
328-
329-
# Create the data for the two input tensors. Initialize the first
330-
# to unique integers and the second to all ones.
331-
in0 = np.arange(start=0, stop=16, dtype=np.int32)
332-
in0n = np.array(
333-
[str(x).encode("utf-8") for x in in0.flatten()], dtype=object
334-
)
335-
input0_data = in0n.reshape(in0.shape)
336-
in1 = np.ones(shape=16, dtype=np.int32)
337-
in1n = np.array(
338-
[str(x).encode("utf-8") for x in in1.flatten()], dtype=object
339-
)
340-
input1_data = in1n.reshape(in1.shape)
341-
342-
input0_data_serialized = utils.serialize_byte_tensor(input0_data)
343-
input1_data_serialized = utils.serialize_byte_tensor(input1_data)
344-
input0_byte_size = utils.serialized_byte_size(input0_data_serialized)
345-
input1_byte_size = utils.serialized_byte_size(input1_data_serialized)
346-
347-
# Create Input0 and Input1 in Shared Memory and store shared memory handles
348-
shm_ip0_handle = shm.create_shared_memory_region(
349-
"input0_data", "/input0_simple", input0_byte_size
350-
)
351-
shm_ip1_handle = shm.create_shared_memory_region(
352-
"input1_data", "/input1_simple", input1_byte_size
353-
)
354-
355-
# Put input data values into shared memory
356-
shm.set_shared_memory_region(shm_ip0_handle, [input0_data_serialized])
357-
shm.set_shared_memory_region(shm_ip1_handle, [input1_data_serialized])
358-
359-
# Register Input0 and Input1 shared memory with Triton Server
360-
triton_client.register_system_shared_memory(
361-
"input0_data", "/input0_simple", input0_byte_size
362-
)
363-
triton_client.register_system_shared_memory(
364-
"input1_data", "/input1_simple", input1_byte_size
365-
)
366-
367-
# Set the parameters to use data from shared memory
368-
inputs = []
369-
if client_type == "http":
370-
inputs.append(tritonhttpclient.InferInput("INPUT0", [1, 16], "BYTES"))
371-
inputs.append(tritonhttpclient.InferInput("INPUT1", [1, 16], "BYTES"))
372-
else:
373-
inputs.append(tritongrpcclient.InferInput("INPUT0", [1, 16], "BYTES"))
374-
inputs.append(tritongrpcclient.InferInput("INPUT1", [1, 16], "BYTES"))
375-
inputs[-2].set_shared_memory("input0_data", input0_byte_size + 4)
376-
inputs[-1].set_shared_memory("input1_data", input1_byte_size)
377-
378-
with self.assertRaises(InferenceServerException) as e:
379-
triton_client.infer(model_name=model_name, inputs=inputs)
380-
err_str = str(e.exception)
381-
382-
# BYTES inputs in shared memory will skip the check at the client
383-
self.assertIn(
384-
f"Invalid offset + byte size for shared memory region: 'input0_data'",
385-
err_str,
386-
)
387-
388-
print(triton_client.get_system_shared_memory_status())
389-
triton_client.unregister_system_shared_memory()
390-
assert len(shm.mapped_shared_memory_regions()) == 2
391-
shm.destroy_shared_memory_region(shm_ip0_handle)
392-
shm.destroy_shared_memory_region(shm_ip1_handle)
393-
assert len(shm.mapped_shared_memory_regions()) == 0
394-
395229
def test_wrong_input_shape_tensor_size(self):
396230
def inference_helper(model_name, batch_size=1):
397231
triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")

0 commit comments

Comments
 (0)