Add client tests

yinggeh · yinggeh · commit 36114c44c16d · 2024-07-08T18:12:42.000-07:00
diff --git a/Dockerfile.QA b/Dockerfile.QA
@@ -149,8 +149,8 @@ RUN mkdir -p qa/common && \
     cp bin/triton_json_test qa/L0_json/. && \
     cp bin/backend_output_detail_test qa/L0_backend_output_detail/. && \
     cp -r deploy/mlflow-triton-plugin qa/L0_mlflow/. && \
-    cp bin/input_byte_size_test qa/L0_input_validation/. && \
-    cp -r docs/examples/model_repository/simple_identity qa/L0_input_validation/models
+    cp -r docs/examples/model_repository/{simple,simple_identity,simple_string} qa/L0_input_validation/models && \
+    cp bin/input_byte_size_test qa/L0_input_validation/.
 
 RUN mkdir -p qa/pkgs && \
     cp python/triton*.whl qa/pkgs/. && \
diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py
@@ -34,7 +34,13 @@
 import infer_util as iu
 import numpy as np
 import tritonclient.grpc as tritongrpcclient
-from tritonclient.utils import InferenceServerException, np_to_triton_dtype
+import tritonclient.http as tritonhttpclient
+import tritonclient.utils as utils
+from tritonclient.utils import (
+    InferenceServerException,
+    np_to_triton_dtype,
+    shared_memory,
+)
 
 
 class InputValTest(unittest.TestCase):
@@ -115,101 +121,283 @@ def test_input_validation_all_optional(self):
 
 
 class InputShapeTest(unittest.TestCase):
-    def test_input_shape_validation(self):
-        input_size = 8
-        model_name = "pt_identity"
-        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+    def test_client_input_shape_validation(self):
+        model_name = "simple"
 
-        # Pass
-        input_data = np.arange(input_size)[None].astype(np.float32)
-        inputs = [
-            tritongrpcclient.InferInput(
-                "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
-            )
-        ]
-        inputs[0].set_data_from_numpy(input_data)
-        triton_client.infer(model_name=model_name, inputs=inputs)
-
-        # Larger input byte size than expected
-        input_data = np.arange(input_size + 2)[None].astype(np.float32)
-        inputs = [
-            tritongrpcclient.InferInput(
-                "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
-            )
-        ]
-        inputs[0].set_data_from_numpy(input_data)
-        # Compromised input shape
-        inputs[0].set_shape((1, input_size))
-        with self.assertRaises(InferenceServerException) as e:
-            triton_client.infer(
-                model_name=model_name,
-                inputs=inputs,
+        for client_type in ["http", "grpc"]:
+            if client_type == "http":
+                triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+            else:
+                triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+
+            # Infer
+            inputs = []
+            if client_type == "http":
+                inputs.append(tritonhttpclient.InferInput("INPUT0", [1, 16], "INT32"))
+                inputs.append(tritonhttpclient.InferInput("INPUT1", [1, 16], "INT32"))
+            else:
+                inputs.append(tritongrpcclient.InferInput("INPUT0", [1, 16], "INT32"))
+                inputs.append(tritongrpcclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+            # Create the data for the two input tensors. Initialize the first
+            # to unique integers and the second to all ones.
+            input0_data = np.arange(start=0, stop=16, dtype=np.int32)
+            input0_data = np.expand_dims(input0_data, axis=0)
+            input1_data = np.ones(shape=(1, 16), dtype=np.int32)
+
+            # Initialize the data
+            inputs[0].set_data_from_numpy(input0_data)
+            inputs[1].set_data_from_numpy(input1_data)
+
+            # Compromised input shapes
+            inputs[0].set_shape([2, 8])
+            inputs[1].set_shape([2, 8])
+
+            with self.assertRaises(InferenceServerException) as e:
+                triton_client.infer(model_name=model_name, inputs=inputs)
+            err_str = str(e.exception)
+            self.assertIn(
+                f"unexpected shape for input 'INPUT1' for model 'simple'. Expected [-1,16], got [2,8]",
+                err_str,
             )
-        err_str = str(e.exception)
-        self.assertIn(
-            "input byte size mismatch for input 'INPUT0' for model 'pt_identity'. Expected 32, got 40",
-            err_str,
-        )
 
-    def test_input_string_shape_validation(self):
-        input_size = 16
-        model_name = "graphdef_object_int32_int32"
-        np_dtype_string = np.dtype(object)
-        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+            # Compromised input shapes
+            inputs[0].set_shape([1, 8])
+            inputs[1].set_shape([1, 8])
 
-        def get_input_array(input_size, np_dtype):
-            rinput_dtype = iu._range_repr_dtype(np_dtype)
-            input_array = np.random.randint(
-                low=0, high=127, size=(1, input_size), dtype=rinput_dtype
+            with self.assertRaises(InferenceServerException) as e:
+                triton_client.infer(model_name=model_name, inputs=inputs)
+            err_str = str(e.exception)
+            self.assertIn(
+                f"'INPUT0' got unexpected elements count 16, expected 8",
+                err_str,
             )
 
-            # Convert to string type
-            inn = np.array(
-                [str(x) for x in input_array.reshape(input_array.size)], dtype=object
+    def test_client_input_string_shape_validation(self):
+        for client_type in ["http", "grpc"]:
+
+            def identity_inference(triton_client, np_array, binary_data):
+                model_name = "simple_identity"
+
+                # Total elements no change
+                inputs = []
+                if client_type == "http":
+                    inputs.append(
+                        tritonhttpclient.InferInput("INPUT0", np_array.shape, "BYTES")
+                    )
+                    inputs[0].set_data_from_numpy(np_array, binary_data=binary_data)
+                    inputs[0].set_shape([2, 8])
+                else:
+                    inputs.append(
+                        tritongrpcclient.InferInput("INPUT0", np_array.shape, "BYTES")
+                    )
+                    inputs[0].set_data_from_numpy(np_array)
+                    inputs[0].set_shape([2, 8])
+                triton_client.infer(model_name=model_name, inputs=inputs)
+
+                # Compromised input shape
+                inputs[0].set_shape([1, 8])
+
+                with self.assertRaises(InferenceServerException) as e:
+                    triton_client.infer(model_name=model_name, inputs=inputs)
+                err_str = str(e.exception)
+                self.assertIn(
+                    f"'INPUT0' got unexpected elements count 16, expected 8",
+                    err_str,
+                )
+
+            if client_type == "http":
+                triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+            else:
+                triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+
+            # Example using BYTES input tensor with utf-8 encoded string that
+            # has an embedded null character.
+            null_chars_array = np.array(
+                ["he\x00llo".encode("utf-8") for i in range(16)], dtype=np.object_
             )
-            input_array = inn.reshape(input_array.shape)
+            null_char_data = null_chars_array.reshape([1, 16])
+            identity_inference(triton_client, null_char_data, True)  # Using binary data
+            identity_inference(triton_client, null_char_data, False)  # Using JSON data
+
+            # Example using BYTES input tensor with 16 elements, where each
+            # element is a 4-byte binary blob with value 0x00010203. Can use
+            # dtype=np.bytes_ in this case.
+            bytes_data = [b"\x00\x01\x02\x03" for i in range(16)]
+            np_bytes_data = np.array(bytes_data, dtype=np.bytes_)
+            np_bytes_data = np_bytes_data.reshape([1, 16])
+            identity_inference(triton_client, np_bytes_data, True)  # Using binary data
+            identity_inference(triton_client, np_bytes_data, False)  # Using JSON data
+
+    def test_client_input_shm_size_validation(self):
+        # We use a simple model that takes 2 input tensors of 16 integers
+        # each and returns 2 output tensors of 16 integers each. One
+        # output tensor is the element-wise sum of the inputs and one
+        # output is the element-wise difference.
+        model_name = "simple"
+
+        for client_type in ["http", "grpc"]:
+            if client_type == "http":
+                triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+            else:
+                triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+            # To make sure no shared memory regions are registered with the
+            # server.
+            triton_client.unregister_system_shared_memory()
+            triton_client.unregister_cuda_shared_memory()
+
+            # Create the data for the two input tensors. Initialize the first
+            # to unique integers and the second to all ones.
+            input0_data = np.arange(start=0, stop=16, dtype=np.int32)
+            input1_data = np.ones(shape=16, dtype=np.int32)
+
+            input_byte_size = input0_data.size * input0_data.itemsize
 
+            # Create shared memory region for input and store shared memory handle
+            shm_ip_handle = shared_memory.create_shared_memory_region(
+                "input_data", "/input_simple", input_byte_size * 2
+            )
+
+            # Put input data values into shared memory
+            shared_memory.set_shared_memory_region(shm_ip_handle, [input0_data])
+            shared_memory.set_shared_memory_region(
+                shm_ip_handle, [input1_data], offset=input_byte_size
+            )
+
+            # Register shared memory region for inputs with Triton Server
+            triton_client.register_system_shared_memory(
+                "input_data", "/input_simple", input_byte_size * 2
+            )
+
+            # Set the parameters to use data from shared memory
             inputs = []
-            inputs.append(
-                tritongrpcclient.InferInput(
-                    "INPUT0", input_array.shape, np_to_triton_dtype(np_dtype)
-                )
+            if client_type == "http":
+                inputs.append(tritonhttpclient.InferInput("INPUT0", [1, 16], "INT32"))
+                inputs.append(tritonhttpclient.InferInput("INPUT1", [1, 16], "INT32"))
+            else:
+                inputs.append(tritongrpcclient.InferInput("INPUT0", [1, 16], "INT32"))
+                inputs.append(tritongrpcclient.InferInput("INPUT1", [1, 16], "INT32"))
+            inputs[-2].set_shared_memory("input_data", input_byte_size + 4)
+            inputs[-1].set_shared_memory(
+                "input_data", input_byte_size, offset=input_byte_size
             )
-            inputs.append(
-                tritongrpcclient.InferInput(
-                    "INPUT1", input_array.shape, np_to_triton_dtype(np_dtype)
-                )
+
+            with self.assertRaises(InferenceServerException) as e:
+                triton_client.infer(model_name=model_name, inputs=inputs)
+            err_str = str(e.exception)
+            self.assertIn(
+                f"'INPUT0' got unexpected byte size {input_byte_size+4}, expected {input_byte_size}",
+                err_str,
             )
 
-            inputs[0].set_data_from_numpy(input_array)
-            inputs[1].set_data_from_numpy(input_array)
-            return inputs
+            # Set the parameters to use data from shared memory
+            inputs[-2].set_shared_memory("input_data", input_byte_size)
+            inputs[-1].set_shared_memory(
+                "input_data", input_byte_size - 4, offset=input_byte_size
+            )
 
-        # Input size is less than expected
-        inputs = get_input_array(input_size - 2, np_dtype_string)
-        # Compromised input shape
-        inputs[0].set_shape((1, input_size))
-        inputs[1].set_shape((1, input_size))
-        with self.assertRaises(InferenceServerException) as e:
-            triton_client.infer(model_name=model_name, inputs=inputs)
-        err_str = str(e.exception)
-        self.assertIn(
-            f"expected {input_size} string elements for inference input 'INPUT1', got {input_size-2}",
-            err_str,
-        )
+            with self.assertRaises(InferenceServerException) as e:
+                triton_client.infer(model_name=model_name, inputs=inputs)
+            err_str = str(e.exception)
+            self.assertIn(
+                f"'INPUT1' got unexpected byte size {input_byte_size-4}, expected {input_byte_size}",
+                err_str,
+            )
 
-        # Input size is greater than expected
-        inputs = get_input_array(input_size + 2, np_dtype_string)
-        # Compromised input shape
-        inputs[0].set_shape((1, input_size))
-        inputs[1].set_shape((1, input_size))
-        with self.assertRaises(InferenceServerException) as e:
-            triton_client.infer(model_name=model_name, inputs=inputs)
-        err_str = str(e.exception)
-        self.assertIn(
-            f"expected {input_size} string elements for inference input 'INPUT1', got {input_size+2}",
-            err_str,
-        )
+            print(triton_client.get_system_shared_memory_status())
+            triton_client.unregister_system_shared_memory()
+            assert len(shared_memory.mapped_shared_memory_regions()) == 1
+            shared_memory.destroy_shared_memory_region(shm_ip_handle)
+            assert len(shared_memory.mapped_shared_memory_regions()) == 0
+
+    def test_client_input_string_shm_size_validation(self):
+        # We use a simple model that takes 2 input tensors of 16 strings
+        # each and returns 2 output tensors of 16 strings each. The input
+        # strings must represent integers. One output tensor is the
+        # element-wise sum of the inputs and one output is the element-wise
+        # difference.
+        model_name = "simple_string"
+
+        for client_type in ["http", "grpc"]:
+            if client_type == "http":
+                triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+            else:
+                triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+
+            # To make sure no shared memory regions are registered with the
+            # server.
+            triton_client.unregister_system_shared_memory()
+            triton_client.unregister_cuda_shared_memory()
+
+            # Create the data for the two input tensors. Initialize the first
+            # to unique integers and the second to all ones.
+            in0 = np.arange(start=0, stop=16, dtype=np.int32)
+            in0n = np.array(
+                [str(x).encode("utf-8") for x in in0.flatten()], dtype=object
+            )
+            input0_data = in0n.reshape(in0.shape)
+            in1 = np.ones(shape=16, dtype=np.int32)
+            in1n = np.array(
+                [str(x).encode("utf-8") for x in in1.flatten()], dtype=object
+            )
+            input1_data = in1n.reshape(in1.shape)
+
+            input0_data_serialized = utils.serialize_byte_tensor(input0_data)
+            input1_data_serialized = utils.serialize_byte_tensor(input1_data)
+            input0_byte_size = utils.serialized_byte_size(input0_data_serialized)
+            input1_byte_size = utils.serialized_byte_size(input1_data_serialized)
+
+            # Create Input0 and Input1 in Shared Memory and store shared memory handles
+            shm_ip0_handle = shared_memory.create_shared_memory_region(
+                "input0_data", "/input0_simple", input0_byte_size
+            )
+            shm_ip1_handle = shared_memory.create_shared_memory_region(
+                "input1_data", "/input1_simple", input1_byte_size
+            )
+
+            # Put input data values into shared memory
+            shared_memory.set_shared_memory_region(
+                shm_ip0_handle, [input0_data_serialized]
+            )
+            shared_memory.set_shared_memory_region(
+                shm_ip1_handle, [input1_data_serialized]
+            )
+
+            # Register Input0 and Input1 shared memory with Triton Server
+            triton_client.register_system_shared_memory(
+                "input0_data", "/input0_simple", input0_byte_size
+            )
+            triton_client.register_system_shared_memory(
+                "input1_data", "/input1_simple", input1_byte_size
+            )
+
+            # Set the parameters to use data from shared memory
+            inputs = []
+            if client_type == "http":
+                inputs.append(tritonhttpclient.InferInput("INPUT0", [1, 16], "BYTES"))
+                inputs.append(tritonhttpclient.InferInput("INPUT1", [1, 16], "BYTES"))
+            else:
+                inputs.append(tritongrpcclient.InferInput("INPUT0", [1, 16], "BYTES"))
+                inputs.append(tritongrpcclient.InferInput("INPUT1", [1, 16], "BYTES"))
+            inputs[-2].set_shared_memory("input0_data", input0_byte_size + 4)
+            inputs[-1].set_shared_memory("input1_data", input1_byte_size)
+
+            with self.assertRaises(InferenceServerException) as e:
+                triton_client.infer(model_name=model_name, inputs=inputs)
+            err_str = str(e.exception)
+
+            # BYTES inputs in shared memory will skip the check at the client
+            self.assertIn(
+                f"Invalid offset + byte size for shared memory region: 'input0_data'",
+                err_str,
+            )
+
+            print(triton_client.get_system_shared_memory_status())
+            triton_client.unregister_system_shared_memory()
+            assert len(shared_memory.mapped_shared_memory_regions()) == 2
+            shared_memory.destroy_shared_memory_region(shm_ip0_handle)
+            shared_memory.destroy_shared_memory_region(shm_ip1_handle)
+            assert len(shared_memory.mapped_shared_memory_regions()) == 0
 
 
 if __name__ == "__main__":
diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh