Workaround with L0_trt_reformat_free by removing shm checks

yinggeh · yinggeh · commit 6dc2a0b6d1a9 · 2024-08-04T15:56:41.000-07:00
diff --git a/Dockerfile.QA b/Dockerfile.QA
@@ -149,8 +149,8 @@ RUN mkdir -p qa/common && \
     cp bin/triton_json_test qa/L0_json/. && \
     cp bin/backend_output_detail_test qa/L0_backend_output_detail/. && \
     cp -r deploy/mlflow-triton-plugin qa/L0_mlflow/. && \
-    cp bin/input_byte_size_test qa/L0_input_validation/. && \
-    cp -r docs/examples/model_repository/simple_identity qa/L0_input_validation/models
+    cp -r docs/examples/model_repository/{simple,simple_identity,simple_string} qa/L0_input_validation/models && \
+    cp bin/input_byte_size_test qa/L0_input_validation/.
 
 RUN mkdir -p qa/pkgs && \
     cp python/triton*.whl qa/pkgs/. && \
diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py
@@ -34,8 +34,10 @@
 import infer_util as iu
 import numpy as np
 import tritonclient.grpc as tritongrpcclient
+import tritonclient.http as tritonhttpclient
+import tritonclient.utils as utils
 import tritonclient.utils.shared_memory as shm
-from tritonclient.utils import InferenceServerException, np_to_triton_dtype
+from tritonclient.utils import InferenceServerException
 
 
 class InputValTest(unittest.TestCase):
@@ -116,101 +118,113 @@ def test_input_validation_all_optional(self):
 
 
 class InputShapeTest(unittest.TestCase):
-    def test_input_shape_validation(self):
-        input_size = 8
-        model_name = "pt_identity"
-        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+    def test_client_input_shape_validation(self):
+        model_name = "simple"
 
-        # Pass
-        input_data = np.arange(input_size)[None].astype(np.float32)
-        inputs = [
-            tritongrpcclient.InferInput(
-                "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
-            )
-        ]
-        inputs[0].set_data_from_numpy(input_data)
-        triton_client.infer(model_name=model_name, inputs=inputs)
-
-        # Larger input byte size than expected
-        input_data = np.arange(input_size + 2)[None].astype(np.float32)
-        inputs = [
-            tritongrpcclient.InferInput(
-                "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
-            )
-        ]
-        inputs[0].set_data_from_numpy(input_data)
-        # Compromised input shape
-        inputs[0].set_shape((1, input_size))
-        with self.assertRaises(InferenceServerException) as e:
-            triton_client.infer(
-                model_name=model_name,
-                inputs=inputs,
+        for client_type in ["http", "grpc"]:
+            if client_type == "http":
+                triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+            else:
+                triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+
+            # Infer
+            inputs = []
+            if client_type == "http":
+                inputs.append(tritonhttpclient.InferInput("INPUT0", [1, 16], "INT32"))
+                inputs.append(tritonhttpclient.InferInput("INPUT1", [1, 16], "INT32"))
+            else:
+                inputs.append(tritongrpcclient.InferInput("INPUT0", [1, 16], "INT32"))
+                inputs.append(tritongrpcclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+            # Create the data for the two input tensors. Initialize the first
+            # to unique integers and the second to all ones.
+            input0_data = np.arange(start=0, stop=16, dtype=np.int32)
+            input0_data = np.expand_dims(input0_data, axis=0)
+            input1_data = np.ones(shape=(1, 16), dtype=np.int32)
+
+            # Initialize the data
+            inputs[0].set_data_from_numpy(input0_data)
+            inputs[1].set_data_from_numpy(input1_data)
+
+            # Compromised input shapes
+            inputs[0].set_shape([2, 8])
+            inputs[1].set_shape([2, 8])
+
+            with self.assertRaises(InferenceServerException) as e:
+                triton_client.infer(model_name=model_name, inputs=inputs)
+            err_str = str(e.exception)
+            self.assertIn(
+                f"unexpected shape for input 'INPUT1' for model 'simple'. Expected [-1,16], got [2,8]",
+                err_str,
             )
-        err_str = str(e.exception)
-        self.assertIn(
-            "input byte size mismatch for input 'INPUT0' for model 'pt_identity'. Expected 32, got 40",
-            err_str,
-        )
 
-    def test_input_string_shape_validation(self):
-        input_size = 16
-        model_name = "graphdef_object_int32_int32"
-        np_dtype_string = np.dtype(object)
-        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+            # Compromised input shapes
+            inputs[0].set_shape([1, 8])
+            inputs[1].set_shape([1, 8])
 
-        def get_input_array(input_size, np_dtype):
-            rinput_dtype = iu._range_repr_dtype(np_dtype)
-            input_array = np.random.randint(
-                low=0, high=127, size=(1, input_size), dtype=rinput_dtype
+            with self.assertRaises(InferenceServerException) as e:
+                triton_client.infer(model_name=model_name, inputs=inputs)
+            err_str = str(e.exception)
+            self.assertIn(
+                f"input 'INPUT0' got unexpected elements count 16, expected 8",
+                err_str,
             )
 
-            # Convert to string type
-            inn = np.array(
-                [str(x) for x in input_array.reshape(input_array.size)], dtype=object
-            )
-            input_array = inn.reshape(input_array.shape)
+    def test_client_input_string_shape_validation(self):
+        for client_type in ["http", "grpc"]:
 
-            inputs = []
-            inputs.append(
-                tritongrpcclient.InferInput(
-                    "INPUT0", input_array.shape, np_to_triton_dtype(np_dtype)
-                )
-            )
-            inputs.append(
-                tritongrpcclient.InferInput(
-                    "INPUT1", input_array.shape, np_to_triton_dtype(np_dtype)
-                )
-            )
+            def identity_inference(triton_client, np_array, binary_data):
+                model_name = "simple_identity"
 
-            inputs[0].set_data_from_numpy(input_array)
-            inputs[1].set_data_from_numpy(input_array)
-            return inputs
+                # Total elements no change
+                inputs = []
+                if client_type == "http":
+                    inputs.append(
+                        tritonhttpclient.InferInput("INPUT0", np_array.shape, "BYTES")
+                    )
+                    inputs[0].set_data_from_numpy(np_array, binary_data=binary_data)
+                    inputs[0].set_shape([2, 8])
+                else:
+                    inputs.append(
+                        tritongrpcclient.InferInput("INPUT0", np_array.shape, "BYTES")
+                    )
+                    inputs[0].set_data_from_numpy(np_array)
+                    inputs[0].set_shape([2, 8])
+                triton_client.infer(model_name=model_name, inputs=inputs)
 
-        # Input size is less than expected
-        inputs = get_input_array(input_size - 2, np_dtype_string)
-        # Compromised input shape
-        inputs[0].set_shape((1, input_size))
-        inputs[1].set_shape((1, input_size))
-        with self.assertRaises(InferenceServerException) as e:
-            triton_client.infer(model_name=model_name, inputs=inputs)
-        err_str = str(e.exception)
-        self.assertIn(
-            f"expected {input_size} string elements for inference input 'INPUT1', got {input_size-2}",
-            err_str,
-        )
+                # Compromised input shape
+                inputs[0].set_shape([1, 8])
 
-        # Input size is greater than expected
-        inputs = get_input_array(input_size + 2, np_dtype_string)
-        # Compromised input shape
-        inputs[0].set_shape((1, input_size))
-        inputs[1].set_shape((1, input_size))
-        with self.assertRaises(InferenceServerException) as e:
-            triton_client.infer(model_name=model_name, inputs=inputs)
-        err_str = str(e.exception)
-        self.assertIn(
-            f"expected {input_size} string elements for inference input 'INPUT1', got {input_size+2}",
-            err_str,
-        )
+                with self.assertRaises(InferenceServerException) as e:
+                    triton_client.infer(model_name=model_name, inputs=inputs)
+                err_str = str(e.exception)
+                self.assertIn(
+                    f"input 'INPUT0' got unexpected elements count 16, expected 8",
+                    err_str,
+                )
+
+            if client_type == "http":
+                triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+            else:
+                triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+
+            # Example using BYTES input tensor with utf-8 encoded string that
+            # has an embedded null character.
+            null_chars_array = np.array(
+                ["he\x00llo".encode("utf-8") for i in range(16)], dtype=np.object_
+            )
+            null_char_data = null_chars_array.reshape([1, 16])
+            identity_inference(triton_client, null_char_data, True)  # Using binary data
+            identity_inference(triton_client, null_char_data, False)  # Using JSON data
+
+            # Example using BYTES input tensor with 16 elements, where each
+            # element is a 4-byte binary blob with value 0x00010203. Can use
+            # dtype=np.bytes_ in this case.
+            bytes_data = [b"\x00\x01\x02\x03" for i in range(16)]
+            np_bytes_data = np.array(bytes_data, dtype=np.bytes_)
+            np_bytes_data = np_bytes_data.reshape([1, 16])
+            identity_inference(triton_client, np_bytes_data, True)  # Using binary data
+            identity_inference(triton_client, np_bytes_data, False)  # Using JSON data
 
     def test_wrong_input_shape_tensor_size(self):
         def inference_helper(model_name, batch_size=1):
@@ -246,12 +260,12 @@ def inference_helper(model_name, batch_size=1):
                 tritongrpcclient.InferInput(
                     "DUMMY_INPUT0",
                     dummy_input_data.shape,
-                    np_to_triton_dtype(np.float32),
+                    utils.np_to_triton_dtype(np.float32),
                 ),
                 tritongrpcclient.InferInput(
                     "INPUT0",
                     shape_tensor_data.shape,
-                    np_to_triton_dtype(np.int32),
+                    utils.np_to_triton_dtype(np.int32),
                 ),
             ]
             inputs[0].set_data_from_numpy(dummy_input_data)
diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh
@@ -68,6 +68,7 @@ set +e
 python3 -m pytest --junitxml="input_validation.report.xml" $TEST_PY::InputValTest >> $CLIENT_LOG 2>&1
 
 if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
     echo -e "\n***\n*** input_validation_test.py FAILED. \n***"
     RET=1
 fi
@@ -80,49 +81,6 @@ wait $SERVER_PID
 pip install torch
 pip install pytest-asyncio
 
-mkdir -p models/pt_identity/1
-PYTHON_CODE=$(cat <<END
-import torch
-torch.jit.save(
-    torch.jit.script(torch.nn.Identity()),
-    "`pwd`/models/pt_identity/1/model.pt",
-)
-END
-)
-res="$(python3 -c "$PYTHON_CODE")"
-
-if [ $? -ne 0 ]; then
-    echo -e "\n***\n*** model "pt_identity" initialization FAILED. \n***"
-    echo $res
-    exit 1
-fi
-
-# Create the config.pbtxt file with the specified configuration
-cat > models/pt_identity/config.pbtxt << EOL
-name: "pt_identity"
-backend: "pytorch"
-max_batch_size: 8
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [8]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [8]
-  }
-]
-# ensure we batch requests together
-dynamic_batching {
-    max_queue_delay_microseconds: 1000000
-}
-EOL
-
-cp -r $DATADIR/qa_model_repository/graphdef_object_int32_int32 models/.
 cp -r $DATADIR/qa_shapetensor_model_repository/plan_nobatch_zero_1_float32_int32 models/.
 cp -r $DATADIR/qa_shapetensor_model_repository/plan_zero_1_float32_int32 models/.
 
@@ -138,6 +96,7 @@ set +e
 python3 -m pytest --junitxml="input_shape_validation.report.xml" $TEST_PY::InputShapeTest >> $CLIENT_LOG 2>&1
 
 if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
     echo -e "\n***\n*** input_validation_test.py FAILED. \n***"
     RET=1
 fi
@@ -147,18 +106,20 @@ kill $SERVER_PID
 wait $SERVER_PID
 
 # input_byte_size_test
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/{savedmodel_zero_1_float32,savedmodel_zero_1_object} ./models
+
 set +e
 LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >>$TEST_LOG 2>&1
 if [ $? -ne 0 ]; then
-    echo -e "\n***\n*** Query Unit Test Failed\n***"
+    cat $TEST_LOG
+    echo -e "\n***\n*** input_byte_size_test FAILED\n***"
     RET=1
 fi
 set -e
 
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Input Validation Test Passed\n***"
 else
-    cat $CLIENT_LOG
     cat $SERVER_LOG
     echo -e "\n***\n*** Input Validation Test FAILED\n***"
 fi