Workaround with L0_trt_reformat_free by removing shm checks

yinggeh · yinggeh · commit abd692485f8d · 2024-08-02T14:47:27.000-07:00
diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py
@@ -226,172 +226,6 @@ def identity_inference(triton_client, np_array, binary_data):
             identity_inference(triton_client, np_bytes_data, True)  # Using binary data
             identity_inference(triton_client, np_bytes_data, False)  # Using JSON data
 
-    def test_client_input_shm_size_validation(self):
-        # We use a simple model that takes 2 input tensors of 16 integers
-        # each and returns 2 output tensors of 16 integers each. One
-        # output tensor is the element-wise sum of the inputs and one
-        # output is the element-wise difference.
-        model_name = "simple"
-
-        for client_type in ["http", "grpc"]:
-            if client_type == "http":
-                triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
-            else:
-                triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
-            # To make sure no shared memory regions are registered with the
-            # server.
-            triton_client.unregister_system_shared_memory()
-            triton_client.unregister_cuda_shared_memory()
-
-            # Create the data for the two input tensors. Initialize the first
-            # to unique integers and the second to all ones.
-            input0_data = np.arange(start=0, stop=16, dtype=np.int32)
-            input1_data = np.ones(shape=16, dtype=np.int32)
-
-            input_byte_size = input0_data.size * input0_data.itemsize
-
-            # Create shared memory region for input and store shared memory handle
-            shm_ip_handle = shm.create_shared_memory_region(
-                "input_data", "/input_simple", input_byte_size * 2
-            )
-
-            # Put input data values into shared memory
-            shm.set_shared_memory_region(shm_ip_handle, [input0_data])
-            shm.set_shared_memory_region(
-                shm_ip_handle, [input1_data], offset=input_byte_size
-            )
-
-            # Register shared memory region for inputs with Triton Server
-            triton_client.register_system_shared_memory(
-                "input_data", "/input_simple", input_byte_size * 2
-            )
-
-            # Set the parameters to use data from shared memory
-            inputs = []
-            if client_type == "http":
-                inputs.append(tritonhttpclient.InferInput("INPUT0", [1, 16], "INT32"))
-                inputs.append(tritonhttpclient.InferInput("INPUT1", [1, 16], "INT32"))
-            else:
-                inputs.append(tritongrpcclient.InferInput("INPUT0", [1, 16], "INT32"))
-                inputs.append(tritongrpcclient.InferInput("INPUT1", [1, 16], "INT32"))
-            inputs[-2].set_shared_memory("input_data", input_byte_size + 4)
-            inputs[-1].set_shared_memory(
-                "input_data", input_byte_size, offset=input_byte_size
-            )
-
-            with self.assertRaises(InferenceServerException) as e:
-                triton_client.infer(model_name=model_name, inputs=inputs)
-            err_str = str(e.exception)
-            self.assertIn(
-                f"input 'INPUT0' got unexpected byte size {input_byte_size+4}, expected {input_byte_size}",
-                err_str,
-            )
-
-            # Set the parameters to use data from shared memory
-            inputs[-2].set_shared_memory("input_data", input_byte_size)
-            inputs[-1].set_shared_memory(
-                "input_data", input_byte_size - 4, offset=input_byte_size
-            )
-
-            with self.assertRaises(InferenceServerException) as e:
-                triton_client.infer(model_name=model_name, inputs=inputs)
-            err_str = str(e.exception)
-            self.assertIn(
-                f"input 'INPUT1' got unexpected byte size {input_byte_size-4}, expected {input_byte_size}",
-                err_str,
-            )
-
-            print(triton_client.get_system_shared_memory_status())
-            triton_client.unregister_system_shared_memory()
-            assert len(shm.mapped_shared_memory_regions()) == 1
-            shm.destroy_shared_memory_region(shm_ip_handle)
-            assert len(shm.mapped_shared_memory_regions()) == 0
-
-    def test_client_input_string_shm_size_validation(self):
-        # We use a simple model that takes 2 input tensors of 16 strings
-        # each and returns 2 output tensors of 16 strings each. The input
-        # strings must represent integers. One output tensor is the
-        # element-wise sum of the inputs and one output is the element-wise
-        # difference.
-        model_name = "simple_string"
-
-        for client_type in ["http", "grpc"]:
-            if client_type == "http":
-                triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
-            else:
-                triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
-
-            # To make sure no shared memory regions are registered with the
-            # server.
-            triton_client.unregister_system_shared_memory()
-            triton_client.unregister_cuda_shared_memory()
-
-            # Create the data for the two input tensors. Initialize the first
-            # to unique integers and the second to all ones.
-            in0 = np.arange(start=0, stop=16, dtype=np.int32)
-            in0n = np.array(
-                [str(x).encode("utf-8") for x in in0.flatten()], dtype=object
-            )
-            input0_data = in0n.reshape(in0.shape)
-            in1 = np.ones(shape=16, dtype=np.int32)
-            in1n = np.array(
-                [str(x).encode("utf-8") for x in in1.flatten()], dtype=object
-            )
-            input1_data = in1n.reshape(in1.shape)
-
-            input0_data_serialized = utils.serialize_byte_tensor(input0_data)
-            input1_data_serialized = utils.serialize_byte_tensor(input1_data)
-            input0_byte_size = utils.serialized_byte_size(input0_data_serialized)
-            input1_byte_size = utils.serialized_byte_size(input1_data_serialized)
-
-            # Create Input0 and Input1 in Shared Memory and store shared memory handles
-            shm_ip0_handle = shm.create_shared_memory_region(
-                "input0_data", "/input0_simple", input0_byte_size
-            )
-            shm_ip1_handle = shm.create_shared_memory_region(
-                "input1_data", "/input1_simple", input1_byte_size
-            )
-
-            # Put input data values into shared memory
-            shm.set_shared_memory_region(shm_ip0_handle, [input0_data_serialized])
-            shm.set_shared_memory_region(shm_ip1_handle, [input1_data_serialized])
-
-            # Register Input0 and Input1 shared memory with Triton Server
-            triton_client.register_system_shared_memory(
-                "input0_data", "/input0_simple", input0_byte_size
-            )
-            triton_client.register_system_shared_memory(
-                "input1_data", "/input1_simple", input1_byte_size
-            )
-
-            # Set the parameters to use data from shared memory
-            inputs = []
-            if client_type == "http":
-                inputs.append(tritonhttpclient.InferInput("INPUT0", [1, 16], "BYTES"))
-                inputs.append(tritonhttpclient.InferInput("INPUT1", [1, 16], "BYTES"))
-            else:
-                inputs.append(tritongrpcclient.InferInput("INPUT0", [1, 16], "BYTES"))
-                inputs.append(tritongrpcclient.InferInput("INPUT1", [1, 16], "BYTES"))
-            inputs[-2].set_shared_memory("input0_data", input0_byte_size + 4)
-            inputs[-1].set_shared_memory("input1_data", input1_byte_size)
-
-            with self.assertRaises(InferenceServerException) as e:
-                triton_client.infer(model_name=model_name, inputs=inputs)
-            err_str = str(e.exception)
-
-            # BYTES inputs in shared memory will skip the check at the client
-            self.assertIn(
-                f"Invalid offset + byte size for shared memory region: 'input0_data'",
-                err_str,
-            )
-
-            print(triton_client.get_system_shared_memory_status())
-            triton_client.unregister_system_shared_memory()
-            assert len(shm.mapped_shared_memory_regions()) == 2
-            shm.destroy_shared_memory_region(shm_ip0_handle)
-            shm.destroy_shared_memory_region(shm_ip1_handle)
-            assert len(shm.mapped_shared_memory_regions()) == 0
-
     def test_wrong_input_shape_tensor_size(self):
         def inference_helper(model_name, batch_size=1):
             triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")