@@ -226,172 +226,6 @@ def identity_inference(triton_client, np_array, binary_data):
226226            identity_inference (triton_client , np_bytes_data , True )  # Using binary data 
227227            identity_inference (triton_client , np_bytes_data , False )  # Using JSON data 
228228
229-     def  test_client_input_shm_size_validation (self ):
230-         # We use a simple model that takes 2 input tensors of 16 integers 
231-         # each and returns 2 output tensors of 16 integers each. One 
232-         # output tensor is the element-wise sum of the inputs and one 
233-         # output is the element-wise difference. 
234-         model_name  =  "simple" 
235- 
236-         for  client_type  in  ["http" , "grpc" ]:
237-             if  client_type  ==  "http" :
238-                 triton_client  =  tritonhttpclient .InferenceServerClient ("localhost:8000" )
239-             else :
240-                 triton_client  =  tritongrpcclient .InferenceServerClient ("localhost:8001" )
241-             # To make sure no shared memory regions are registered with the 
242-             # server. 
243-             triton_client .unregister_system_shared_memory ()
244-             triton_client .unregister_cuda_shared_memory ()
245- 
246-             # Create the data for the two input tensors. Initialize the first 
247-             # to unique integers and the second to all ones. 
248-             input0_data  =  np .arange (start = 0 , stop = 16 , dtype = np .int32 )
249-             input1_data  =  np .ones (shape = 16 , dtype = np .int32 )
250- 
251-             input_byte_size  =  input0_data .size  *  input0_data .itemsize 
252- 
253-             # Create shared memory region for input and store shared memory handle 
254-             shm_ip_handle  =  shm .create_shared_memory_region (
255-                 "input_data" , "/input_simple" , input_byte_size  *  2 
256-             )
257- 
258-             # Put input data values into shared memory 
259-             shm .set_shared_memory_region (shm_ip_handle , [input0_data ])
260-             shm .set_shared_memory_region (
261-                 shm_ip_handle , [input1_data ], offset = input_byte_size 
262-             )
263- 
264-             # Register shared memory region for inputs with Triton Server 
265-             triton_client .register_system_shared_memory (
266-                 "input_data" , "/input_simple" , input_byte_size  *  2 
267-             )
268- 
269-             # Set the parameters to use data from shared memory 
270-             inputs  =  []
271-             if  client_type  ==  "http" :
272-                 inputs .append (tritonhttpclient .InferInput ("INPUT0" , [1 , 16 ], "INT32" ))
273-                 inputs .append (tritonhttpclient .InferInput ("INPUT1" , [1 , 16 ], "INT32" ))
274-             else :
275-                 inputs .append (tritongrpcclient .InferInput ("INPUT0" , [1 , 16 ], "INT32" ))
276-                 inputs .append (tritongrpcclient .InferInput ("INPUT1" , [1 , 16 ], "INT32" ))
277-             inputs [- 2 ].set_shared_memory ("input_data" , input_byte_size  +  4 )
278-             inputs [- 1 ].set_shared_memory (
279-                 "input_data" , input_byte_size , offset = input_byte_size 
280-             )
281- 
282-             with  self .assertRaises (InferenceServerException ) as  e :
283-                 triton_client .infer (model_name = model_name , inputs = inputs )
284-             err_str  =  str (e .exception )
285-             self .assertIn (
286-                 f"input 'INPUT0' got unexpected byte size { input_byte_size + 4 }  , expected { input_byte_size }  " ,
287-                 err_str ,
288-             )
289- 
290-             # Set the parameters to use data from shared memory 
291-             inputs [- 2 ].set_shared_memory ("input_data" , input_byte_size )
292-             inputs [- 1 ].set_shared_memory (
293-                 "input_data" , input_byte_size  -  4 , offset = input_byte_size 
294-             )
295- 
296-             with  self .assertRaises (InferenceServerException ) as  e :
297-                 triton_client .infer (model_name = model_name , inputs = inputs )
298-             err_str  =  str (e .exception )
299-             self .assertIn (
300-                 f"input 'INPUT1' got unexpected byte size { input_byte_size - 4 }  , expected { input_byte_size }  " ,
301-                 err_str ,
302-             )
303- 
304-             print (triton_client .get_system_shared_memory_status ())
305-             triton_client .unregister_system_shared_memory ()
306-             assert  len (shm .mapped_shared_memory_regions ()) ==  1 
307-             shm .destroy_shared_memory_region (shm_ip_handle )
308-             assert  len (shm .mapped_shared_memory_regions ()) ==  0 
309- 
310-     def  test_client_input_string_shm_size_validation (self ):
311-         # We use a simple model that takes 2 input tensors of 16 strings 
312-         # each and returns 2 output tensors of 16 strings each. The input 
313-         # strings must represent integers. One output tensor is the 
314-         # element-wise sum of the inputs and one output is the element-wise 
315-         # difference. 
316-         model_name  =  "simple_string" 
317- 
318-         for  client_type  in  ["http" , "grpc" ]:
319-             if  client_type  ==  "http" :
320-                 triton_client  =  tritonhttpclient .InferenceServerClient ("localhost:8000" )
321-             else :
322-                 triton_client  =  tritongrpcclient .InferenceServerClient ("localhost:8001" )
323- 
324-             # To make sure no shared memory regions are registered with the 
325-             # server. 
326-             triton_client .unregister_system_shared_memory ()
327-             triton_client .unregister_cuda_shared_memory ()
328- 
329-             # Create the data for the two input tensors. Initialize the first 
330-             # to unique integers and the second to all ones. 
331-             in0  =  np .arange (start = 0 , stop = 16 , dtype = np .int32 )
332-             in0n  =  np .array (
333-                 [str (x ).encode ("utf-8" ) for  x  in  in0 .flatten ()], dtype = object 
334-             )
335-             input0_data  =  in0n .reshape (in0 .shape )
336-             in1  =  np .ones (shape = 16 , dtype = np .int32 )
337-             in1n  =  np .array (
338-                 [str (x ).encode ("utf-8" ) for  x  in  in1 .flatten ()], dtype = object 
339-             )
340-             input1_data  =  in1n .reshape (in1 .shape )
341- 
342-             input0_data_serialized  =  utils .serialize_byte_tensor (input0_data )
343-             input1_data_serialized  =  utils .serialize_byte_tensor (input1_data )
344-             input0_byte_size  =  utils .serialized_byte_size (input0_data_serialized )
345-             input1_byte_size  =  utils .serialized_byte_size (input1_data_serialized )
346- 
347-             # Create Input0 and Input1 in Shared Memory and store shared memory handles 
348-             shm_ip0_handle  =  shm .create_shared_memory_region (
349-                 "input0_data" , "/input0_simple" , input0_byte_size 
350-             )
351-             shm_ip1_handle  =  shm .create_shared_memory_region (
352-                 "input1_data" , "/input1_simple" , input1_byte_size 
353-             )
354- 
355-             # Put input data values into shared memory 
356-             shm .set_shared_memory_region (shm_ip0_handle , [input0_data_serialized ])
357-             shm .set_shared_memory_region (shm_ip1_handle , [input1_data_serialized ])
358- 
359-             # Register Input0 and Input1 shared memory with Triton Server 
360-             triton_client .register_system_shared_memory (
361-                 "input0_data" , "/input0_simple" , input0_byte_size 
362-             )
363-             triton_client .register_system_shared_memory (
364-                 "input1_data" , "/input1_simple" , input1_byte_size 
365-             )
366- 
367-             # Set the parameters to use data from shared memory 
368-             inputs  =  []
369-             if  client_type  ==  "http" :
370-                 inputs .append (tritonhttpclient .InferInput ("INPUT0" , [1 , 16 ], "BYTES" ))
371-                 inputs .append (tritonhttpclient .InferInput ("INPUT1" , [1 , 16 ], "BYTES" ))
372-             else :
373-                 inputs .append (tritongrpcclient .InferInput ("INPUT0" , [1 , 16 ], "BYTES" ))
374-                 inputs .append (tritongrpcclient .InferInput ("INPUT1" , [1 , 16 ], "BYTES" ))
375-             inputs [- 2 ].set_shared_memory ("input0_data" , input0_byte_size  +  4 )
376-             inputs [- 1 ].set_shared_memory ("input1_data" , input1_byte_size )
377- 
378-             with  self .assertRaises (InferenceServerException ) as  e :
379-                 triton_client .infer (model_name = model_name , inputs = inputs )
380-             err_str  =  str (e .exception )
381- 
382-             # BYTES inputs in shared memory will skip the check at the client 
383-             self .assertIn (
384-                 f"Invalid offset + byte size for shared memory region: 'input0_data'" ,
385-                 err_str ,
386-             )
387- 
388-             print (triton_client .get_system_shared_memory_status ())
389-             triton_client .unregister_system_shared_memory ()
390-             assert  len (shm .mapped_shared_memory_regions ()) ==  2 
391-             shm .destroy_shared_memory_region (shm_ip0_handle )
392-             shm .destroy_shared_memory_region (shm_ip1_handle )
393-             assert  len (shm .mapped_shared_memory_regions ()) ==  0 
394- 
395229    def  test_wrong_input_shape_tensor_size (self ):
396230        def  inference_helper (model_name , batch_size = 1 ):
397231            triton_client  =  tritongrpcclient .InferenceServerClient ("localhost:8001" )
0 commit comments