Fix CPU-only mode, add test for it (#64)

szalpal · web-flow · commit be32a4ed1652 · 2021-05-28T10:52:01.000+02:00
Signed-off-by: szalpal &lt;mszolucha@nvidia.com&gt;
diff --git a/qa/L0_identity_cpu/identity_client.py b/qa/L0_identity_cpu/identity_client.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+
+# The MIT License (MIT)
+#
+# Copyright (c) 2020 NVIDIA CORPORATION
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import argparse, os, sys
+import numpy as np
+from numpy.random import randint
+import tritongrpcclient
+from PIL import Image
+import math
+
+np.random.seed(100019)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False,
+                        help='Enable verbose output')
+    parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001',
+                        help='Inference server URL. Default is localhost:8001.')
+    parser.add_argument('--batch_size', type=int, required=False, default=4,
+                        help='Batch size')
+    parser.add_argument('--n_iter', type=int, required=False, default=-1,
+                        help='Number of iterations , with `batch_size` size')
+    parser.add_argument('--model_name', type=str, required=False, default="dali_identity_cpu",
+                        help='Model name')
+    return parser.parse_args()
+
+
+def array_from_list(arrays):
+    """
+    Convert list of ndarrays to single ndarray with ndims+=1
+    """
+    lengths = list(map(lambda x, arr=arrays: arr[x].shape[0], [x for x in range(len(arrays))]))
+    max_len = max(lengths)
+    arrays = list(map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), arrays))
+    for arr in arrays:
+        assert arr.shape == arrays[0].shape, "Arrays must have the same shape"
+    return np.stack(arrays)
+
+
+def batcher(dataset, max_batch_size, n_iterations=-1):
+    """
+    Generator, that splits dataset into batches with given batch size
+    """
+    iter_idx = 0
+    data_idx = 0
+    while data_idx < len(dataset):
+        if 0 < n_iterations <= iter_idx:
+            raise StopIteration
+        batch_size = min(randint(1, max_batch_size), len(dataset) - data_idx)
+        iter_idx += 1
+        yield dataset[data_idx : data_idx + batch_size]
+        data_idx += batch_size
+
+
+def main():
+    FLAGS = parse_args()
+    try:
+        triton_client = tritongrpcclient.InferenceServerClient(url=FLAGS.url, verbose=FLAGS.verbose)
+    except Exception as e:
+        print("channel creation failed: " + str(e))
+        sys.exit(1)
+
+    if not (triton_client.is_server_live() or
+            triton_client.is_server_ready() or
+            triton_client.is_model_ready(model_name=FLAGS.model_name)):
+        print("Error connecting to server: Server live {}. Server ready {}. Model ready {}".format(
+            triton_client.is_server_live, triton_client.is_server_ready,
+            triton_client.is_model_ready(model_name=FLAGS.model_name)))
+        sys.exit(1)
+
+    model_name = FLAGS.model_name
+    model_version = -1
+
+    input_data = [randint(0, 255, size=randint(100), dtype='uint8') for _ in
+                  range(randint(100) * FLAGS.batch_size)]
+    input_data = array_from_list(input_data)
+
+    # Infer
+    outputs = []
+    input_name = "DALI_INPUT_0"
+    output_name = "DALI_OUTPUT_0"
+    input_shape = list(input_data.shape)
+    outputs.append(tritongrpcclient.InferRequestedOutput(output_name))
+
+    for batch in batcher(input_data, FLAGS.batch_size):
+        print("Input mean before backend processing:", np.mean(batch))
+        input_shape[0] = np.shape(batch)[0]
+        print("Batch size: ", input_shape[0])
+        inputs = [tritongrpcclient.InferInput(input_name, input_shape, "UINT8")]
+        # Initialize the data
+        inputs[0].set_data_from_numpy(batch)
+
+        # Test with outputs
+        results = triton_client.infer(model_name=model_name,
+                                      inputs=inputs,
+                                      outputs=outputs)
+
+        # Get the output arrays from the results
+        output0_data = results.as_numpy(output_name)
+        print("Output mean after backend processing:", np.mean(output0_data))
+        print("Output shape: ", np.shape(output0_data))
+        if not math.isclose(np.mean(output0_data), np.mean(batch)):
+            print("Pre/post average does not match")
+            sys.exit(1)
+        else:
+            print("pass")
+
+    statistics = triton_client.get_inference_statistics(model_name=model_name)
+    if len(statistics.model_stats) != 1:
+        print("FAILED: Inference Statistics")
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/qa/L0_identity_cpu/model_repository/dali_identity_cpu/config.pbtxt b/qa/L0_identity_cpu/model_repository/dali_identity_cpu/config.pbtxt
@@ -0,0 +1,39 @@
+# The MIT License (MIT)
+#
+# Copyright (c) 2020 NVIDIA CORPORATION
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+name: "dali_identity_cpu"
+backend: "dali"
+max_batch_size: 256
+input [
+  {
+    name: "DALI_INPUT_0"
+    data_type: TYPE_UINT8
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "DALI_OUTPUT_0"
+    data_type: TYPE_UINT8
+    dims: [ -1 ]
+  }
+]
diff --git a/qa/L0_identity_cpu/model_repository/identity_pipeline.py b/qa/L0_identity_cpu/model_repository/identity_pipeline.py
@@ -0,0 +1,44 @@
+# The MIT License (MIT)
+#
+# Copyright (c) 2021 NVIDIA CORPORATION
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import nvidia.dali as dali
+
+
+def _parse_args():
+    import argparse
+    parser = argparse.ArgumentParser(description="Serialize the pipeline and save it to a file")
+    parser.add_argument('file_path', type=str, help='The path where to save the serialized pipeline')
+    return parser.parse_args()
+
+
+@dali.pipeline_def(batch_size=3, num_threads=1, device_id=None)
+def pipe():
+    data = dali.fn.external_source(device="cpu", name="DALI_INPUT_0")
+    return data
+
+
+def main(filename):
+    pipe().serialize(filename=filename)
+
+
+if __name__ == '__main__':
+    args = _parse_args()
+    main(args.file_path)
diff --git a/qa/L0_identity_cpu/setup.sh b/qa/L0_identity_cpu/setup.sh
@@ -0,0 +1,9 @@
+#!/bin/bash -ex
+
+pushd model_repository
+
+mkdir -p dali_identity_cpu/1
+python identity_pipeline.py dali_identity_cpu/1/model.dali
+echo "Identity model ready."
+
+popd
diff --git a/qa/L0_identity_cpu/test.sh b/qa/L0_identity_cpu/test.sh
@@ -0,0 +1,5 @@
+#!/bin/bash -ex
+
+: ${GRPC_ADDR:=${1:-"localhost:8001"}}
+
+python identity_client.py -u "$GRPC_ADDR"
diff --git a/src/dali_backend.cc b/src/dali_backend.cc
@@ -178,7 +178,7 @@ class DaliModelInstance : public ::triton::backend::BackendModelInstance {
   }
 
   void Execute(const std::vector<TritonRequest>& requests) {
-    DeviceGuard dg(device_id_);
+    DeviceGuard dg(DetermineDeviceId());
     int total_batch_size = 0;
     TimeInterval batch_compute_interval{};
     TimeInterval batch_exec_interval{};
@@ -215,7 +215,7 @@ class DaliModelInstance : public ::triton::backend::BackendModelInstance {
     auto serialized_pipeline = dali_model_->GetModelProvider().GetModel();
     auto max_batch_size = dali_model_->MaxBatchSize();
     auto num_threads = dali_model_->GetModelParamters().GetNumThreads();
-    DaliPipeline pipeline(serialized_pipeline, max_batch_size, num_threads, device_id_);
+    DaliPipeline pipeline(serialized_pipeline, max_batch_size, num_threads, DetermineDeviceId());
     dali_executor_ = std::make_unique<DaliExecutor>(std::move(pipeline));
   }
 
@@ -261,14 +261,18 @@ class DaliModelInstance : public ::triton::backend::BackendModelInstance {
       std::vector<IBufferDescr> buffers;
       buffers.reserve(input_buffer_count);
       for (uint32_t buffer_idx = 0; buffer_idx < input_buffer_count; ++buffer_idx) {
-        auto buffer = input.GetBuffer(buffer_idx, device_type_t::CPU, device_id_);
+        auto buffer = input.GetBuffer(buffer_idx, device_type_t::CPU, DetermineDeviceId());
         buffers.push_back(buffer);
       }
       ret.push_back({input.Meta(), std::move(buffers)});
     }
     return ret;
   }
 
+  int32_t DetermineDeviceId() {
+    return !CudaStream() ? ::dali::CPU_ONLY_DEVICE_ID : device_id_;
+  }
+
   /**
    * @brief Allocate outputs required by a given request.
    *
@@ -292,7 +296,7 @@ class DaliModelInstance : public ::triton::backend::BackendModelInstance {
       out_meta.type = outputs_info[output_idx].type;
       out_meta.shape = outputs_info[output_idx].shape;
       auto output = response.GetOutput(out_meta);
-      auto buffer = output.AllocateBuffer(outputs_info[output_idx].device, device_id_);
+      auto buffer = output.AllocateBuffer(outputs_info[output_idx].device, DetermineDeviceId());
       outputs[output_idx] = {out_meta, {buffer}};
     }
     return outputs;
diff --git a/src/dali_executor/dali_executor.cc b/src/dali_executor/dali_executor.cc
@@ -50,6 +50,7 @@ void DaliExecutor::SetupInputs(const std::vector<IDescr>& inputs) {
   }
 }
 
+
 IDescr DaliExecutor::ScheduleInputCopy(const IDescr& input) {
   assert(input.buffers.size() > 0);
   IOBufferI* buffer;
@@ -79,6 +80,7 @@ void DaliExecutor::RunInputCopy() {
   thread_pool_.RunAll();
 }
 
+
 bool DaliExecutor::IsNoCopy(const IDescr& input) {
   return input.buffers.size() == 1 && (input.buffers[0].device == device_type_t::CPU ||
                                        input.buffers[0].device_id == pipeline_.DeviceId());
diff --git a/src/dali_executor/dali_executor.h b/src/dali_executor/dali_executor.h
@@ -61,7 +61,9 @@ class DaliExecutor {
   void SetupInputs(const std::vector<IDescr>& inputs);
 
   /**
-   * @brief Schedule copy to a continous buffer and return IDecr to the new buffer.
+   * @brief Schedule a copy off all buffers within input IDescr to a continuous buffer.
+   *        The copy will be performed after calling RunInputCopy().
+   * @return IDecr to the new, continuous, buffer.
    */
   IDescr ScheduleInputCopy(const IDescr& buffers);
 
diff --git a/src/dali_executor/dali_pipeline.cc b/src/dali_executor/dali_pipeline.cc
@@ -78,8 +78,10 @@ void DaliPipeline::SetInput(const IDescr& io_descr) {
 }
 
 void DaliPipeline::SyncOutputStream() {
+  if (NoGpu())
+    return;
   DeviceGuard dg(device_id_);
-  CUDA_CALL(cudaStreamSynchronize(output_stream_));
+  CUDA_CALL_GUARD(cudaStreamSynchronize(output_stream_));
 }
 
 void DaliPipeline::PutOutput(void* destination, int output_idx, device_type_t destination_device) {
diff --git a/src/dali_executor/dali_pipeline.h b/src/dali_executor/dali_pipeline.h
@@ -32,8 +32,6 @@
 #include "src/dali_executor/utils/utils.h"
 #include "src/error_handling.h"
 
-using std::cout;
-using std::endl;
 
 namespace triton { namespace backend { namespace dali {
 
@@ -138,15 +136,25 @@ class DaliPipeline {
     CreatePipeline();
   }
 
-  int DeviceId() {
+
+  int DeviceId() const {
     return device_id_;
   }
 
-  int NumThreadsArg() {
+
+  int NumThreadsArg() const {
     return num_threads_;
   }
 
+
  private:
+  /**
+   * @return True, if this DALI Pipeline does not have GPU available
+   */
+  bool NoGpu() const noexcept {
+    return device_id_ < 0;
+  }
+
   void CreatePipeline() {
     daliCreatePipeline(&handle_, serialized_pipeline_.c_str(), serialized_pipeline_.length(),
                        max_batch_size_, num_threads_, device_id_, 0, 1, 0, 0, 0);
@@ -160,9 +168,11 @@ class DaliPipeline {
   }
 
   void ReleaseStream() {
+    if (NoGpu())
+      return;
     if (output_stream_) {
-      CUDA_CALL(cudaStreamSynchronize(output_stream_));
-      CUDA_CALL(cudaStreamDestroy(output_stream_));
+      CUDA_CALL_GUARD(cudaStreamSynchronize(output_stream_));
+      CUDA_CALL_GUARD(cudaStreamDestroy(output_stream_));
       output_stream_ = nullptr;
     }
   }
@@ -175,7 +185,9 @@ class DaliPipeline {
   }
 
   void InitStream() {
-    CUDA_CALL(cudaStreamCreate(&output_stream_));
+    if (NoGpu())
+      return;
+    CUDA_CALL_GUARD(cudaStreamCreate(&output_stream_));
   }
 
   std::string serialized_pipeline_{};

Original file line number	Diff line number	Diff line change
`@@ -50,6 +50,7 @@ void DaliExecutor::SetupInputs(const std::vector<IDescr>& inputs) {`
`50`	`50`	`}`
`51`	`51`	`}`
`52`	`52`
	`53`	`+`
`53`	`54`	`IDescr DaliExecutor::ScheduleInputCopy(const IDescr& input) {`
`54`	`55`	`assert(input.buffers.size() > 0);`
`55`	`56`	`IOBufferI* buffer;`
`@@ -79,6 +80,7 @@ void DaliExecutor::RunInputCopy() {`
`79`	`80`	`thread_pool_.RunAll();`
`80`	`81`	`}`
`81`	`82`
	`83`	`+`
`82`	`84`	`bool DaliExecutor::IsNoCopy(const IDescr& input) {`
`83`	`85`	`return input.buffers.size() == 1 && (input.buffers[0].device == device_type_t::CPU \|\|`
`84`	`86`	`input.buffers[0].device_id == pipeline_.DeviceId());`
Original file line number	Diff line number	Diff line change
`@@ -78,8 +78,10 @@ void DaliPipeline::SetInput(const IDescr& io_descr) {`
`78`	`78`	`}`
`79`	`79`
`80`	`80`	`void DaliPipeline::SyncOutputStream() {`
	`81`	`+ if (NoGpu())`
	`82`	`+ return;`
`81`	`83`	`DeviceGuard dg(device_id_);`
`82`		`- CUDA_CALL(cudaStreamSynchronize(output_stream_));`
	`84`	`+ CUDA_CALL_GUARD(cudaStreamSynchronize(output_stream_));`
`83`	`85`	`}`
`84`	`86`
`85`	`87`	`void DaliPipeline::PutOutput(void* destination, int output_idx, device_type_t destination_device) {`