From b84940eed9672ee1d33a036cf3bdd6a5c9c92d9c Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Tue, 26 Mar 2024 08:51:07 +0100 Subject: [PATCH 01/15] update openvino backend version --- README.md | 4 +++- tools/gen_openvino_dockerfile.py | 6 ++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e93dbe4..56433c1 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Follow the steps below to build the backend shared library. ``` $ mkdir build $ cd build -$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_OPENVINO_VERSION=2021.2.200 -DTRITON_BUILD_CONTAINER_VERSION=20.12 .. +$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_OPENVINO_VERSION=2024.0.0 -DTRITON_BUILD_CONTAINER_VERSION=24.02 .. $ make install ``` @@ -71,6 +71,8 @@ but the listed CMake argument can be used to override. * triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag] * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag] + + ## Using the OpenVINO Backend ### Parameters diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py index 8c97950..1b8cd92 100755 --- a/tools/gen_openvino_dockerfile.py +++ b/tools/gen_openvino_dockerfile.py @@ -62,7 +62,6 @@ def dockerfile_for_linux(output_file): RUN apt-get update && apt-get install -y --no-install-recommends \ cmake \ libglib2.0-dev \ - libtbb-dev \ patchelf \ git \ make \ @@ -104,11 +103,10 @@ def dockerfile_for_linux(output_file): WORKDIR /opt/openvino RUN cp -r /workspace/openvino/licensing LICENSE.openvino RUN mkdir -p include && \ - cp -r /workspace/install/runtime/include/ngraph include/. && \ cp -r /workspace/install/runtime/include/openvino include/. RUN mkdir -p lib && \ - cp -P /usr/lib/x86_64-linux-gnu/libtbb.so* lib/. && \ - cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. \ + cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \ + cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* /lib/. """ df += """ From d69adca23644f6c5d1c57f01e529148787a6733e Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Tue, 26 Mar 2024 11:47:52 +0100 Subject: [PATCH 02/15] fix tbb --- tools/gen_openvino_dockerfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py index 1b8cd92..2691571 100755 --- a/tools/gen_openvino_dockerfile.py +++ b/tools/gen_openvino_dockerfile.py @@ -103,10 +103,10 @@ def dockerfile_for_linux(output_file): WORKDIR /opt/openvino RUN cp -r /workspace/openvino/licensing LICENSE.openvino RUN mkdir -p include && \ - cp -r /workspace/install/runtime/include/openvino include/. + cp -r /workspace/install/runtime/include/* include/. RUN mkdir -p lib && \ cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \ - cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* /lib/. + cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* lib/. """ df += """ From f6a696c215012483366b648600accf3ebba0997a Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 27 Mar 2024 15:23:37 +0100 Subject: [PATCH 03/15] added support for intel gpu and virtual devices --- CMakeLists.txt | 1 - README.md | 41 +++++++++++++++++++++ src/openvino.cc | 97 ++++++++++++++++++++++++++++++++----------------- 3 files changed, 105 insertions(+), 34 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0948e11..071cdb8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,7 +210,6 @@ else() COMMAND rm -fr openvino COMMAND docker cp openvino_backend_ov:/opt/openvino openvino COMMAND docker rm openvino_backend_ov - COMMAND echo '' >> openvino/lib/plugins.xml COMMENT "Building OpenVino" ) endif() # WIN32 diff --git a/README.md b/README.md index 56433c1..ea25b08 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,11 @@ but the listed CMake argument can be used to override. * triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag] * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag] +## Build a complete image with OpenVINO backend including Intel GPU drivers +``` +python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics \ +--backend openvino:pull/74/head --enable-intel-gpu +``` ## Using the OpenVINO Backend @@ -90,6 +95,7 @@ to skip the dynamic batch sizes in backend. * `ENABLE_BATCH_PADDING`: By default an error will be generated if backend receives a request with batch size less than max_batch_size specified in the configuration. This error can be avoided at a cost of performance by specifying `ENABLE_BATCH_PADDING` parameter as `YES`. * `RESHAPE_IO_LAYERS`: By setting this parameter as `YES`, the IO layers are reshaped to the dimensions provided in model configuration. By default, the dimensions in the model is used. +* `TARGET_DEVICE`: Choose the OpenVINO device for running the inference. It could be CPU (default), GPU or any of the virtual devices like AUTO, MULTI, HETERO. Note: using Intel GPU is possible only if `--device /dev/dri` is passed to the container and is supported only on linux with x86_64 arch. @@ -232,6 +238,41 @@ string_value:"yes" } } ``` +### Running the models on Intel GPU + +Build the custom triton image with the required runtime drivers using the script from . + +``` +python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics +``` +Add to your config.pbtxt a parameter `TARGET_DEVICE`: +``` +parameters: [ +{ + key: "NUM_STREAMS" + value: { + string_value: "1" + } +}, +{ + key: "PERFORMANCE_HINT" + value: { + string_value: "THROUGHPUT" + } +}, +{ + key: "TARGET_DEVICE" + value: { + string_value: "GPU" + } +} +] +``` + +Start the container with extra parameter to pass the device `/dev/dri`: +``` +docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* ) tritonserver:latest +``` ## Known Issues diff --git a/src/openvino.cc b/src/openvino.cc index bcd556f..5fd398f 100644 --- a/src/openvino.cc +++ b/src/openvino.cc @@ -84,6 +84,9 @@ class ModelState : public BackendModel { TRITONSERVER_Error* ParseParameter( const std::string& mkey, triton::common::TritonJson::Value& params, std::vector>* device_config); + TRITONSERVER_Error* ParseStringParameter( + const std::string& mkey, triton::common::TritonJson::Value& params, + std::string* value); TRITONSERVER_Error* ParseParameterHelper( const std::string& mkey, std::string* value, std::pair* ov_property); @@ -118,6 +121,7 @@ class ModelState : public BackendModel { bool SkipDynamicBatchSize() { return skip_dynamic_batchsize_; } bool EnableBatchPadding() { return enable_padding_; } + std::string TargetDevice() {return target_device_;} private: ModelState(TRITONBACKEND_Model* triton_model); @@ -140,6 +144,7 @@ class ModelState : public BackendModel { bool skip_dynamic_batchsize_; bool enable_padding_; bool reshape_io_layers_; + std::string target_device_; }; TRITONSERVER_Error* @@ -179,7 +184,7 @@ ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state) ModelState::ModelState(TRITONBACKEND_Model* triton_model) : BackendModel(triton_model), model_read_(false), skip_dynamic_batchsize_(false), enable_padding_(false), - reshape_io_layers_(false) + reshape_io_layers_(false), target_device_("CPU") { } @@ -238,12 +243,10 @@ ModelState::ParseParameters() bool status = model_config_.Find("parameters", ¶ms); if (status) { RETURN_IF_ERROR(LoadCpuExtensions(params)); - RETURN_IF_ERROR(ParseBoolParameter( - "SKIP_OV_DYNAMIC_BATCHSIZE", params, &skip_dynamic_batchsize_)); - RETURN_IF_ERROR( - ParseBoolParameter("ENABLE_BATCH_PADDING", params, &enable_padding_)); - RETURN_IF_ERROR( - ParseBoolParameter("RESHAPE_IO_LAYERS", params, &reshape_io_layers_)); + ParseBoolParameter("SKIP_OV_DYNAMIC_BATCHSIZE", params, &skip_dynamic_batchsize_); + ParseBoolParameter("ENABLE_BATCH_PADDING", params, &enable_padding_); + ParseBoolParameter("RESHAPE_IO_LAYERS", params, &reshape_io_layers_); + ParseStringParameter("TARGET_DEVICE", params, &target_device_); } return nullptr; @@ -256,18 +259,13 @@ ModelState::ParseParameters(const std::string& device) triton::common::TritonJson::Value params; bool status = model_config_.Find("parameters", ¶ms); if (status) { - if (device == "CPU") { - config_[device] = {}; - auto& device_config = config_.at(device); - RETURN_IF_ERROR( - ParseParameter("INFERENCE_NUM_THREADS", params, &device_config)); - RETURN_IF_ERROR( - ParseParameter("COMPILATION_NUM_THREADS", params, &device_config)); - RETURN_IF_ERROR(ParseParameter("HINT_BF16", params, &device_config)); - RETURN_IF_ERROR(ParseParameter("NUM_STREAMS", params, &device_config)); - RETURN_IF_ERROR( - ParseParameter("PERFORMANCE_HINT", params, &device_config)); - } + config_[device] = {}; + auto& device_config = config_.at(device); + ParseParameter("INFERENCE_NUM_THREADS", params, &device_config); + ParseParameter("COMPILATION_NUM_THREADS", params, &device_config); + ParseParameter("HINT_BF16", params, &device_config); + ParseParameter("NUM_STREAMS", params, &device_config); + ParseParameter("PERFORMANCE_HINT", params, &device_config); } return nullptr; @@ -277,9 +275,7 @@ TRITONSERVER_Error* ModelState::LoadCpuExtensions(triton::common::TritonJson::Value& params) { std::string cpu_ext_path; - LOG_IF_ERROR( - ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path)), - "error when reading parameters"); + ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path)); if (!cpu_ext_path.empty()) { // CPU (MKLDNN) extensions is loaded as a shared library and passed as a // pointer to base extension @@ -301,8 +297,8 @@ ModelState::ParseBoolParameter( bool* setting) { std::string value; - LOG_IF_ERROR( - ReadParameter(params, mkey, &(value)), "error when reading parameters"); + RETURN_IF_ERROR( + ReadParameter(params, mkey, &(value))); std::transform( value.begin(), value.end(), value.begin(), [](unsigned char c) { return std::tolower(c); }); @@ -313,14 +309,32 @@ ModelState::ParseBoolParameter( return nullptr; } +TRITONSERVER_Error* +ModelState::ParseStringParameter( + const std::string& mkey, triton::common::TritonJson::Value& params, + std::string* setting) +{ + std::string value; + RETURN_IF_ERROR( + ReadParameter(params, mkey, &(value))); + std::transform( + value.begin(), value.end(), value.begin(), + [](unsigned char c) { return std::toupper(c); }); + if (value.length() > 0) { + *setting = value; + } + + return nullptr; +} + TRITONSERVER_Error* ModelState::ParseParameter( const std::string& mkey, triton::common::TritonJson::Value& params, std::vector>* device_config) { std::string value; - LOG_IF_ERROR( - ReadParameter(params, mkey, &(value)), "error when reading parameters"); + RETURN_IF_ERROR( + ReadParameter(params, mkey, &(value))); if (!value.empty()) { std::pair ov_property; RETURN_IF_ERROR(ParseParameterHelper(mkey, &value, &ov_property)); @@ -410,6 +424,16 @@ ModelState::ParseParameterHelper( TRITONSERVER_Error* ModelState::ConfigureOpenvinoCore() { + auto availableDevices = ov_core_.get_available_devices(); + std::stringstream list_of_devices; + + for (auto & element : availableDevices) { + list_of_devices << element << ","; + } + LOG_MESSAGE( + TRITONSERVER_LOG_VERBOSE, + (std::string("Available OpenVINO devices: " + list_of_devices.str())) + .c_str()); for (auto&& item : config_) { std::string device_name = item.first; std::vector> properties = item.second; @@ -438,9 +462,10 @@ ModelState::LoadModel( std::to_string(OPENVINO_VERSION_MINOR) + "." + std::to_string(OPENVINO_VERSION_PATCH)) .c_str()); + LOG_MESSAGE( TRITONSERVER_LOG_VERBOSE, - (std::string("Device info: \n") + + (std::string("Device info: ") + ConvertVersionMapToString(ov_core_.get_versions(device))) .c_str()); @@ -932,19 +957,26 @@ ModelInstanceState::Create( ModelInstanceState::ModelInstanceState( ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance) : BackendModelInstance(model_state, triton_model_instance), - model_state_(model_state), device_("CPU"), batch_pad_size_(0) + model_state_(model_state), device_(model_state->TargetDevice()), batch_pad_size_(0) { - if (Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) { + if ((Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) && (Kind() != TRITONSERVER_INSTANCEGROUPKIND_AUTO)) { throw triton::backend::BackendModelInstanceException(TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, (std::string("unable to load model '") + model_state_->Name() + - "', Triton openVINO backend supports only CPU device") + "', Triton OpenVINO backend supports only Kind CPU and AUTO") .c_str())); } if (model_state_->ModelNotRead()) { std::string model_path; THROW_IF_BACKEND_INSTANCE_ERROR(model_state_->ParseParameters()); + device_ = model_state->TargetDevice(); + LOG_MESSAGE( + TRITONSERVER_LOG_INFO, + (std::string("Target device " + device_)) + .c_str()); + + THROW_IF_BACKEND_INSTANCE_ERROR( model_state_->ReadModel(ArtifactFilename(), &model_path)); THROW_IF_BACKEND_INSTANCE_ERROR(model_state_->ValidateConfigureModel()); @@ -1518,8 +1550,7 @@ TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance) LOG_MESSAGE( TRITONSERVER_LOG_INFO, (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" + - TRITONSERVER_InstanceGroupKindString(kind) + " device " + - std::to_string(device_id) + ")") + TRITONSERVER_InstanceGroupKindString(kind)+")") .c_str()); // Get the model state associated with this instance's model. @@ -1607,7 +1638,7 @@ TRITONBACKEND_GetBackendAttribute( TRITONSERVER_LOG_VERBOSE, "TRITONBACKEND_GetBackendAttribute: setting attributes"); RETURN_IF_ERROR(TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup( - backend_attributes, TRITONSERVER_INSTANCEGROUPKIND_CPU, 0, nullptr, 0)); + backend_attributes, TRITONSERVER_INSTANCEGROUPKIND_AUTO, 0, nullptr, 0)); return nullptr; } From 2f41e5f7f3c3f29299d754542148bf19c248dd30 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 27 Mar 2024 15:41:17 +0100 Subject: [PATCH 04/15] readme corrections --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ea25b08..c7239e3 100644 --- a/README.md +++ b/README.md @@ -243,7 +243,7 @@ string_value:"yes" Build the custom triton image with the required runtime drivers using the script from . ``` -python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics +python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 ``` Add to your config.pbtxt a parameter `TARGET_DEVICE`: ``` From 174aede58ed6c1d47d71ad0739589b71173b58d8 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 27 Mar 2024 15:45:07 +0100 Subject: [PATCH 05/15] link to updated build.py --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c7239e3..c1df23d 100644 --- a/README.md +++ b/README.md @@ -240,7 +240,7 @@ string_value:"yes" ``` ### Running the models on Intel GPU -Build the custom triton image with the required runtime drivers using the script from . +Build the custom triton image with the required runtime drivers using the script from [build.py](https://github.com/dtrawins/server/blob/igpu/build.py). ``` python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 From 76ab8237257b9b5160df87f66df26063d8e0ac89 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Tue, 9 Apr 2024 16:51:59 +0200 Subject: [PATCH 06/15] separate ov update and fix style --- README.md | 12 +++++------- src/openvino.cc | 33 ++++++++++++++++---------------- tools/gen_openvino_dockerfile.py | 8 +++++--- 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index c1df23d..887a5cc 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Follow the steps below to build the backend shared library. ``` $ mkdir build $ cd build -$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_OPENVINO_VERSION=2024.0.0 -DTRITON_BUILD_CONTAINER_VERSION=24.02 .. +$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_OPENVINO_VERSION=2021.2.200 -DTRITON_BUILD_CONTAINER_VERSION=20.12 .. $ make install ``` @@ -72,8 +72,11 @@ but the listed CMake argument can be used to override. * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag] ## Build a complete image with OpenVINO backend including Intel GPU drivers + +Build the custom triton image with the required runtime drivers using the script from [build.py](https://github.com/dtrawins/server/blob/igpu/build.py). + ``` -python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics \ +python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 \ --backend openvino:pull/74/head --enable-intel-gpu ``` @@ -240,11 +243,6 @@ string_value:"yes" ``` ### Running the models on Intel GPU -Build the custom triton image with the required runtime drivers using the script from [build.py](https://github.com/dtrawins/server/blob/igpu/build.py). - -``` -python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 -``` Add to your config.pbtxt a parameter `TARGET_DEVICE`: ``` parameters: [ diff --git a/src/openvino.cc b/src/openvino.cc index 5fd398f..e116b90 100644 --- a/src/openvino.cc +++ b/src/openvino.cc @@ -121,7 +121,7 @@ class ModelState : public BackendModel { bool SkipDynamicBatchSize() { return skip_dynamic_batchsize_; } bool EnableBatchPadding() { return enable_padding_; } - std::string TargetDevice() {return target_device_;} + std::string TargetDevice() { return target_device_; } private: ModelState(TRITONBACKEND_Model* triton_model); @@ -243,7 +243,8 @@ ModelState::ParseParameters() bool status = model_config_.Find("parameters", ¶ms); if (status) { RETURN_IF_ERROR(LoadCpuExtensions(params)); - ParseBoolParameter("SKIP_OV_DYNAMIC_BATCHSIZE", params, &skip_dynamic_batchsize_); + ParseBoolParameter( + "SKIP_OV_DYNAMIC_BATCHSIZE", params, &skip_dynamic_batchsize_); ParseBoolParameter("ENABLE_BATCH_PADDING", params, &enable_padding_); ParseBoolParameter("RESHAPE_IO_LAYERS", params, &reshape_io_layers_); ParseStringParameter("TARGET_DEVICE", params, &target_device_); @@ -297,8 +298,7 @@ ModelState::ParseBoolParameter( bool* setting) { std::string value; - RETURN_IF_ERROR( - ReadParameter(params, mkey, &(value))); + RETURN_IF_ERROR(ReadParameter(params, mkey, &(value))); std::transform( value.begin(), value.end(), value.begin(), [](unsigned char c) { return std::tolower(c); }); @@ -315,8 +315,7 @@ ModelState::ParseStringParameter( std::string* setting) { std::string value; - RETURN_IF_ERROR( - ReadParameter(params, mkey, &(value))); + RETURN_IF_ERROR(ReadParameter(params, mkey, &(value))); std::transform( value.begin(), value.end(), value.begin(), [](unsigned char c) { return std::toupper(c); }); @@ -333,8 +332,7 @@ ModelState::ParseParameter( std::vector>* device_config) { std::string value; - RETURN_IF_ERROR( - ReadParameter(params, mkey, &(value))); + RETURN_IF_ERROR(ReadParameter(params, mkey, &(value))); if (!value.empty()) { std::pair ov_property; RETURN_IF_ERROR(ParseParameterHelper(mkey, &value, &ov_property)); @@ -427,8 +425,8 @@ ModelState::ConfigureOpenvinoCore() auto availableDevices = ov_core_.get_available_devices(); std::stringstream list_of_devices; - for (auto & element : availableDevices) { - list_of_devices << element << ","; + for (auto& element : availableDevices) { + list_of_devices << element << ","; } LOG_MESSAGE( TRITONSERVER_LOG_VERBOSE, @@ -957,9 +955,11 @@ ModelInstanceState::Create( ModelInstanceState::ModelInstanceState( ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance) : BackendModelInstance(model_state, triton_model_instance), - model_state_(model_state), device_(model_state->TargetDevice()), batch_pad_size_(0) + model_state_(model_state), device_(model_state->TargetDevice()), + batch_pad_size_(0) { - if ((Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) && (Kind() != TRITONSERVER_INSTANCEGROUPKIND_AUTO)) { + if ((Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) && + (Kind() != TRITONSERVER_INSTANCEGROUPKIND_AUTO)) { throw triton::backend::BackendModelInstanceException(TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, (std::string("unable to load model '") + model_state_->Name() + @@ -972,10 +972,9 @@ ModelInstanceState::ModelInstanceState( THROW_IF_BACKEND_INSTANCE_ERROR(model_state_->ParseParameters()); device_ = model_state->TargetDevice(); LOG_MESSAGE( - TRITONSERVER_LOG_INFO, - (std::string("Target device " + device_)) - .c_str()); - + TRITONSERVER_LOG_INFO, + (std::string("Target device " + device_)).c_str()); + THROW_IF_BACKEND_INSTANCE_ERROR( model_state_->ReadModel(ArtifactFilename(), &model_path)); @@ -1550,7 +1549,7 @@ TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance) LOG_MESSAGE( TRITONSERVER_LOG_INFO, (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" + - TRITONSERVER_InstanceGroupKindString(kind)+")") + TRITONSERVER_InstanceGroupKindString(kind) + ")") .c_str()); // Get the model state associated with this instance's model. diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py index 2691571..8c97950 100755 --- a/tools/gen_openvino_dockerfile.py +++ b/tools/gen_openvino_dockerfile.py @@ -62,6 +62,7 @@ def dockerfile_for_linux(output_file): RUN apt-get update && apt-get install -y --no-install-recommends \ cmake \ libglib2.0-dev \ + libtbb-dev \ patchelf \ git \ make \ @@ -103,10 +104,11 @@ def dockerfile_for_linux(output_file): WORKDIR /opt/openvino RUN cp -r /workspace/openvino/licensing LICENSE.openvino RUN mkdir -p include && \ - cp -r /workspace/install/runtime/include/* include/. + cp -r /workspace/install/runtime/include/ngraph include/. && \ + cp -r /workspace/install/runtime/include/openvino include/. RUN mkdir -p lib && \ - cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \ - cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* lib/. + cp -P /usr/lib/x86_64-linux-gnu/libtbb.so* lib/. && \ + cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. \ """ df += """ From c05a182b6ed2f82ba91f8ed729cc6831e6b9d840 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Tue, 9 Apr 2024 16:54:55 +0200 Subject: [PATCH 07/15] drop trailing space --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 887a5cc..8341b30 100644 --- a/README.md +++ b/README.md @@ -269,7 +269,7 @@ parameters: [ Start the container with extra parameter to pass the device `/dev/dri`: ``` -docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* ) tritonserver:latest +docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* ) tritonserver:latest ``` ## Known Issues From ffc18892a7a3a4ba53e8b1206e5372a62247ed0f Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 24 Jul 2024 08:23:00 +0200 Subject: [PATCH 08/15] included runtime libraries to execute on GPU --- tools/gen_openvino_dockerfile.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py index 8c97950..0a2543c 100755 --- a/tools/gen_openvino_dockerfile.py +++ b/tools/gen_openvino_dockerfile.py @@ -77,6 +77,15 @@ def dockerfile_for_linux(output_file): # pre-build archive. # TODO: Unify build steps between linux and windows. +# Get intel GPU drivers +WORKDIR /drv +RUN curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.15468.11/intel-igc-core_1.0.15468.11_amd64.deb ; \ + curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.15468.11/intel-igc-opencl_1.0.15468.11_amd64.deb ; \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/23.43.27642.18/intel-opencl-icd_23.43.27642.18_amd64.deb ; \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/23.43.27642.18/libigdgmm12_22.3.11_amd64.deb ; \ + apt-get download ocl-icd-libopencl1 ; \ + find . -iname '*.deb' -exec dpkg-deb -X {} . \; + ARG OPENVINO_VERSION ARG OPENVINO_BUILD_TYPE WORKDIR /workspace @@ -104,11 +113,11 @@ def dockerfile_for_linux(output_file): WORKDIR /opt/openvino RUN cp -r /workspace/openvino/licensing LICENSE.openvino RUN mkdir -p include && \ - cp -r /workspace/install/runtime/include/ngraph include/. && \ - cp -r /workspace/install/runtime/include/openvino include/. + cp -r /workspace/install/runtime/include/* include/. RUN mkdir -p lib && \ cp -P /usr/lib/x86_64-linux-gnu/libtbb.so* lib/. && \ - cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. \ + cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. && \ + find /drv/usr/ -iname '*.so*' -exec cp -P {} lib/. \; """ df += """ From cda8a940f99fa265f2d00dc95e26b14499dba503 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 24 Jul 2024 23:54:51 +0200 Subject: [PATCH 09/15] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c336adc..6495f6b 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ Build the custom triton image with the required runtime drivers using the script ``` python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 \ ---backend openvino:pull/74/head --enable-intel-gpu +--backend openvino ``` From 81381d2cade9e34ba07256cb90bbbd650b075169 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 31 Jul 2024 14:40:47 +0200 Subject: [PATCH 10/15] training spaces --- tools/gen_openvino_dockerfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py index a1cb8f9..7b567b9 100755 --- a/tools/gen_openvino_dockerfile.py +++ b/tools/gen_openvino_dockerfile.py @@ -112,11 +112,11 @@ def dockerfile_for_linux(output_file): WORKDIR /opt/openvino RUN cp -r /workspace/openvino/licensing LICENSE.openvino RUN mkdir -p include && \ - cp -r /workspace/install/runtime/include/* include/. + cp -r /workspace/install/runtime/include/* include/. RUN mkdir -p lib && \ cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \ cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. && \ - find /drv/usr/ -iname '*.so*' -exec cp -P {} lib/. \; + find /drv/usr/ -iname '*.so*' -exec cp -P {} lib/. \; """ df += """ From b3f5126e4434c516a2ea5a6bdd8d64d828f24333 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 27 Nov 2024 23:39:49 +0100 Subject: [PATCH 11/15] review changes --- Dockerfile.drivers | 16 +++++++++++++ README.md | 40 ++++++++++++++++++++++++++++---- src/openvino.cc | 30 +++++++++++++----------- src/openvino_utils.cc | 11 +++++---- src/openvino_utils.h | 2 +- tools/gen_openvino_dockerfile.py | 12 +--------- 6 files changed, 76 insertions(+), 35 deletions(-) create mode 100644 Dockerfile.drivers diff --git a/Dockerfile.drivers b/Dockerfile.drivers new file mode 100644 index 0000000..52d3227 --- /dev/null +++ b/Dockerfile.drivers @@ -0,0 +1,16 @@ +ARG BASE_OS=tritonserver:latest +FROM $BASE_OS +RUN mkdir /tmp/neo && cd /tmp/neo && \ + apt-get update && apt-get install -y libtbb12 curl && \ + curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb && \ + curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb && \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb && \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb && \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb && \ + curl -L -O https://github.com/oneapi-src/level-zero/releases/download/v1.17.44/level-zero_1.17.44+u24.04_amd64.deb && \ + curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-driver-compiler-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \ + curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-fw-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \ + curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-level-zero-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \ + dpkg -i *.deb && \ + apt-get install -y ocl-icd-libopencl1 && \ + rm -Rf /tmp/neo diff --git a/README.md b/README.md index f46cd2f..1e9a8d6 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ $ cd build $ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_OPENVINO_VERSION=2024.4.0 -DTRITON_BUILD_CONTAINER_VERSION=24.03 .. $ make install ``` +The compiled backend will be added to `build/install/backends/openvino` folder. The following required Triton repositories will be pulled and used in the build. By default the "main" branch/tag will be used for each repo @@ -71,14 +72,24 @@ but the listed CMake argument can be used to override. * triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag] * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag] -## Build a complete image with OpenVINO backend including Intel GPU drivers - -Build the custom triton image with the required runtime drivers using the script from [build.py](https://github.com/dtrawins/server/blob/igpu/build.py). +## Build a complete triton custom image with OpenVINO backend ``` +git clone https://github.com/triton-inference-server/server +cd server +pip install distro requests python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 \ --backend openvino ``` +It will create an image called `tritonserver:latest` + +## Add Intel GPU and NPU dependencies to the image + +The `Dockerfile.drivers` adds OpenVINO runtime drivers needed to run inference on the accelerators. Use, as the base image, public image with OpenVINO backend or the custom one. + +``` +docker build -f Dockerfile.drivers --build-arg BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.11-py3 -t tritonserver:latest . +``` ## Using the OpenVINO Backend @@ -98,7 +109,7 @@ to skip the dynamic batch sizes in backend. * `ENABLE_BATCH_PADDING`: By default an error will be generated if backend receives a request with batch size less than max_batch_size specified in the configuration. This error can be avoided at a cost of performance by specifying `ENABLE_BATCH_PADDING` parameter as `YES`. * `RESHAPE_IO_LAYERS`: By setting this parameter as `YES`, the IO layers are reshaped to the dimensions provided in model configuration. By default, the dimensions in the model is used. -* `TARGET_DEVICE`: Choose the OpenVINO device for running the inference. It could be CPU (default), GPU or any of the virtual devices like AUTO, MULTI, HETERO. Note: using Intel GPU is possible only if `--device /dev/dri` is passed to the container and is supported only on linux with x86_64 arch. +* `TARGET_DEVICE`: Choose the OpenVINO device for running the inference. It could be CPU (default), GPU, NPU or any of the virtual devices like AUTO, MULTI, HETERO. @@ -270,7 +281,26 @@ parameters: [ Start the container with extra parameter to pass the device `/dev/dri`: ``` -docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* ) tritonserver:latest +docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1 ) tritonserver:latest +``` + +### Running the models on Intel NPU + +Add to your config.pbtxt a parameter `TARGET_DEVICE`: +``` +parameters: [ +{ + key: "TARGET_DEVICE" + value: { + string_value: "NPU" + } +} +] +``` + +Start the container with extra parameter to pass the device `/dev/accel`: +``` +docker run -it --rm --device --device /dev/accel --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) tritonserver:latest ``` ## Known Issues diff --git a/src/openvino.cc b/src/openvino.cc index 4070c70..7273251 100644 --- a/src/openvino.cc +++ b/src/openvino.cc @@ -262,11 +262,16 @@ ModelState::ParseParameters(const std::string& device) if (status) { config_[device] = {}; auto& device_config = config_.at(device); - ParseParameter("INFERENCE_NUM_THREADS", params, &device_config); - ParseParameter("COMPILATION_NUM_THREADS", params, &device_config); - ParseParameter("HINT_BF16", params, &device_config); - ParseParameter("NUM_STREAMS", params, &device_config); - ParseParameter("PERFORMANCE_HINT", params, &device_config); + RETURN_IF_ERROR( + ParseParameter("INFERENCE_NUM_THREADS", params, &device_config)); + RETURN_IF_ERROR( + ParseParameter("COMPILATION_NUM_THREADS", params, &device_config)); + RETURN_IF_ERROR( + ParseParameter("HINT_BF16", params, &device_config)); + RETURN_IF_ERROR( + ParseParameter("NUM_STREAMS", params, &device_config)); + RETURN_IF_ERROR( + ParseParameter("PERFORMANCE_HINT", params, &device_config)); } return nullptr; @@ -276,7 +281,7 @@ TRITONSERVER_Error* ModelState::LoadCpuExtensions(triton::common::TritonJson::Value& params) { std::string cpu_ext_path; - ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path)); + RETURN_IF_ERROR(ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path), "")); if (!cpu_ext_path.empty()) { // CPU (MKLDNN) extensions is loaded as a shared library and passed as a // pointer to base extension @@ -284,7 +289,7 @@ ModelState::LoadCpuExtensions(triton::common::TritonJson::Value& params) ov_core_.add_extension(cpu_ext_path), " loading custom CPU extensions"); LOG_MESSAGE( TRITONSERVER_LOG_INFO, - (std::string("CPU (MKLDNN) extensions is loaded") + cpu_ext_path) + (std::string("CPU extensions is loaded") + cpu_ext_path) .c_str()); } @@ -298,7 +303,7 @@ ModelState::ParseBoolParameter( bool* setting) { std::string value; - RETURN_IF_ERROR(ReadParameter(params, mkey, &(value))); + RETURN_IF_ERROR(ReadParameter(params, mkey, &(value),"")); std::transform( value.begin(), value.end(), value.begin(), [](unsigned char c) { return std::tolower(c); }); @@ -315,7 +320,7 @@ ModelState::ParseStringParameter( std::string* setting) { std::string value; - RETURN_IF_ERROR(ReadParameter(params, mkey, &(value))); + RETURN_IF_ERROR(ReadParameter(params, mkey, &(value), "")); std::transform( value.begin(), value.end(), value.begin(), [](unsigned char c) { return std::toupper(c); }); @@ -332,7 +337,7 @@ ModelState::ParseParameter( std::vector>* device_config) { std::string value; - RETURN_IF_ERROR(ReadParameter(params, mkey, &(value))); + RETURN_IF_ERROR(ReadParameter(params, mkey, &(value),"")); if (!value.empty()) { std::pair ov_property; RETURN_IF_ERROR(ParseParameterHelper(mkey, &value, &ov_property)); @@ -958,8 +963,7 @@ ModelInstanceState::ModelInstanceState( model_state_(model_state), device_(model_state->TargetDevice()), batch_pad_size_(0) { - if ((Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) && - (Kind() != TRITONSERVER_INSTANCEGROUPKIND_AUTO)) { + if (Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) { throw triton::backend::BackendModelInstanceException(TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, (std::string("unable to load model '") + model_state_->Name() + @@ -1638,7 +1642,7 @@ TRITONBACKEND_GetBackendAttribute( TRITONSERVER_LOG_VERBOSE, "TRITONBACKEND_GetBackendAttribute: setting attributes"); RETURN_IF_ERROR(TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup( - backend_attributes, TRITONSERVER_INSTANCEGROUPKIND_AUTO, 0, nullptr, 0)); + backend_attributes, TRITONSERVER_INSTANCEGROUPKIND_CPU, 0, nullptr, 0)); return nullptr; } diff --git a/src/openvino_utils.cc b/src/openvino_utils.cc index c5bc1a1..ac5a666 100644 --- a/src/openvino_utils.cc +++ b/src/openvino_utils.cc @@ -277,13 +277,14 @@ CompareDimsSupported( TRITONSERVER_Error* ReadParameter( triton::common::TritonJson::Value& params, const std::string& key, - std::string* param) + std::string* param, const std::string default_value) { triton::common::TritonJson::Value value; - RETURN_ERROR_IF_FALSE( - params.Find(key.c_str(), &value), TRITONSERVER_ERROR_INVALID_ARG, - std::string("model configuration is missing the parameter ") + key); - RETURN_IF_ERROR(value.MemberAsString("string_value", param)); + if (params.Find(key.c_str(), &value)){ + RETURN_IF_ERROR(value.MemberAsString("string_value", param)); + } else { + *param = default_value; + } return nullptr; // success } diff --git a/src/openvino_utils.h b/src/openvino_utils.h index 2fbaadb..a894937 100644 --- a/src/openvino_utils.h +++ b/src/openvino_utils.h @@ -97,7 +97,7 @@ TRITONSERVER_Error* CompareDimsSupported( TRITONSERVER_Error* ReadParameter( triton::common::TritonJson::Value& params, const std::string& key, - std::string* param); + std::string* param, const std::string default_value); std::vector ConvertToSignedShape(const ov::PartialShape& shape); diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py index 5adcace..4b0d835 100755 --- a/tools/gen_openvino_dockerfile.py +++ b/tools/gen_openvino_dockerfile.py @@ -76,15 +76,6 @@ def dockerfile_for_linux(output_file): # pre-build archive. # TODO: Unify build steps between linux and windows. -# Get intel GPU drivers -WORKDIR /drv -RUN curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.15468.11/intel-igc-core_1.0.15468.11_amd64.deb ; \ - curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.15468.11/intel-igc-opencl_1.0.15468.11_amd64.deb ; \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/23.43.27642.18/intel-opencl-icd_23.43.27642.18_amd64.deb ; \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/23.43.27642.18/libigdgmm12_22.3.11_amd64.deb ; \ - apt-get download ocl-icd-libopencl1 ; \ - find . -iname '*.deb' -exec dpkg-deb -X {} . \; - ARG OPENVINO_VERSION ARG OPENVINO_BUILD_TYPE WORKDIR /workspace @@ -115,8 +106,7 @@ def dockerfile_for_linux(output_file): cp -r /workspace/install/runtime/include/* include/. RUN mkdir -p lib && \ cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \ - cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. && \ - find /drv/usr/ -iname '*.so*' -exec cp -P {} lib/. \; + cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. """ df += """ From aba01374c08c80808de29facdabbcab408c7c424 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Thu, 28 Nov 2024 16:22:30 +0100 Subject: [PATCH 12/15] style and minor fixes --- Dockerfile.drivers | 8 ++++---- README.md | 11 ++++++----- src/openvino.cc | 23 ++++++++++------------- src/openvino_utils.cc | 2 +- 4 files changed, 21 insertions(+), 23 deletions(-) diff --git a/Dockerfile.drivers b/Dockerfile.drivers index 52d3227..1994015 100644 --- a/Dockerfile.drivers +++ b/Dockerfile.drivers @@ -1,5 +1,5 @@ -ARG BASE_OS=tritonserver:latest -FROM $BASE_OS +ARG BASE_IMAGE=tritonserver:latest +FROM $BASE_IMAGE RUN mkdir /tmp/neo && cd /tmp/neo && \ apt-get update && apt-get install -y libtbb12 curl && \ curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb && \ @@ -12,5 +12,5 @@ RUN mkdir /tmp/neo && cd /tmp/neo && \ curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-fw-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \ curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-level-zero-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \ dpkg -i *.deb && \ - apt-get install -y ocl-icd-libopencl1 && \ - rm -Rf /tmp/neo + apt-get install -y ocl-icd-libopencl1 --no-install-recommends && \ + rm -rf /var/lib/apt/lists/* && rm -Rf /tmp/neo diff --git a/README.md b/README.md index 1e9a8d6..8356f33 100644 --- a/README.md +++ b/README.md @@ -79,8 +79,9 @@ git clone https://github.com/triton-inference-server/server cd server pip install distro requests python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 \ ---backend openvino +--backend openvino:pull/87/head ``` +In the backend value, the pull request is optional. Use `--backend openvino` to build from `main` branch. It will create an image called `tritonserver:latest` ## Add Intel GPU and NPU dependencies to the image @@ -88,7 +89,7 @@ It will create an image called `tritonserver:latest` The `Dockerfile.drivers` adds OpenVINO runtime drivers needed to run inference on the accelerators. Use, as the base image, public image with OpenVINO backend or the custom one. ``` -docker build -f Dockerfile.drivers --build-arg BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.11-py3 -t tritonserver:latest . +docker build -f Dockerfile.drivers --build-arg BASE_IMAGE=tritonserver:latest -t tritonserver:xpu . ``` @@ -281,7 +282,7 @@ parameters: [ Start the container with extra parameter to pass the device `/dev/dri`: ``` -docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1 ) tritonserver:latest +docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1 ) tritonserver:xpu ``` ### Running the models on Intel NPU @@ -300,10 +301,10 @@ parameters: [ Start the container with extra parameter to pass the device `/dev/accel`: ``` -docker run -it --rm --device --device /dev/accel --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) tritonserver:latest +docker run -it --rm --device --device /dev/accel --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) tritonserver:xpu ``` ## Known Issues * Models with the scalar on the input (shape without any dimension are not supported) -* Reshaping using [dimension ranges](https://docs.openvino.ai/2023.3/ovms_docs_dynamic_shape_dynamic_model.html) is not supported. +* Reshaping using [dimension ranges](https://docs.openvino.ai/2024/openvino-workflow/running-inference/dynamic-shapes.html#dimension-bounds) is not supported. diff --git a/src/openvino.cc b/src/openvino.cc index 7273251..1bde0a9 100644 --- a/src/openvino.cc +++ b/src/openvino.cc @@ -263,15 +263,12 @@ ModelState::ParseParameters(const std::string& device) config_[device] = {}; auto& device_config = config_.at(device); RETURN_IF_ERROR( - ParseParameter("INFERENCE_NUM_THREADS", params, &device_config)); + ParseParameter("INFERENCE_NUM_THREADS", params, &device_config)); RETURN_IF_ERROR( - ParseParameter("COMPILATION_NUM_THREADS", params, &device_config)); - RETURN_IF_ERROR( - ParseParameter("HINT_BF16", params, &device_config)); - RETURN_IF_ERROR( - ParseParameter("NUM_STREAMS", params, &device_config)); - RETURN_IF_ERROR( - ParseParameter("PERFORMANCE_HINT", params, &device_config)); + ParseParameter("COMPILATION_NUM_THREADS", params, &device_config)); + RETURN_IF_ERROR(ParseParameter("HINT_BF16", params, &device_config)); + RETURN_IF_ERROR(ParseParameter("NUM_STREAMS", params, &device_config)); + RETURN_IF_ERROR(ParseParameter("PERFORMANCE_HINT", params, &device_config)); } return nullptr; @@ -281,7 +278,8 @@ TRITONSERVER_Error* ModelState::LoadCpuExtensions(triton::common::TritonJson::Value& params) { std::string cpu_ext_path; - RETURN_IF_ERROR(ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path), "")); + RETURN_IF_ERROR( + ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path), "")); if (!cpu_ext_path.empty()) { // CPU (MKLDNN) extensions is loaded as a shared library and passed as a // pointer to base extension @@ -289,8 +287,7 @@ ModelState::LoadCpuExtensions(triton::common::TritonJson::Value& params) ov_core_.add_extension(cpu_ext_path), " loading custom CPU extensions"); LOG_MESSAGE( TRITONSERVER_LOG_INFO, - (std::string("CPU extensions is loaded") + cpu_ext_path) - .c_str()); + (std::string("CPU extensions is loaded") + cpu_ext_path).c_str()); } return nullptr; @@ -303,7 +300,7 @@ ModelState::ParseBoolParameter( bool* setting) { std::string value; - RETURN_IF_ERROR(ReadParameter(params, mkey, &(value),"")); + RETURN_IF_ERROR(ReadParameter(params, mkey, &(value), "")); std::transform( value.begin(), value.end(), value.begin(), [](unsigned char c) { return std::tolower(c); }); @@ -337,7 +334,7 @@ ModelState::ParseParameter( std::vector>* device_config) { std::string value; - RETURN_IF_ERROR(ReadParameter(params, mkey, &(value),"")); + RETURN_IF_ERROR(ReadParameter(params, mkey, &(value), "")); if (!value.empty()) { std::pair ov_property; RETURN_IF_ERROR(ParseParameterHelper(mkey, &value, &ov_property)); diff --git a/src/openvino_utils.cc b/src/openvino_utils.cc index ac5a666..9c6f312 100644 --- a/src/openvino_utils.cc +++ b/src/openvino_utils.cc @@ -280,7 +280,7 @@ ReadParameter( std::string* param, const std::string default_value) { triton::common::TritonJson::Value value; - if (params.Find(key.c_str(), &value)){ + if (params.Find(key.c_str(), &value)) { RETURN_IF_ERROR(value.MemberAsString("string_value", param)); } else { *param = default_value; From 107a41a469776711b04d4040bae75653e653661b Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 3 Sep 2025 19:33:07 +0200 Subject: [PATCH 13/15] update --- Dockerfile.drivers | 18 +++++++++--------- tools/gen_openvino_dockerfile.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Dockerfile.drivers b/Dockerfile.drivers index 1994015..e955040 100644 --- a/Dockerfile.drivers +++ b/Dockerfile.drivers @@ -2,15 +2,15 @@ ARG BASE_IMAGE=tritonserver:latest FROM $BASE_IMAGE RUN mkdir /tmp/neo && cd /tmp/neo && \ apt-get update && apt-get install -y libtbb12 curl && \ - curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb && \ - curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb && \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb && \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb && \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb && \ - curl -L -O https://github.com/oneapi-src/level-zero/releases/download/v1.17.44/level-zero_1.17.44+u24.04_amd64.deb && \ - curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-driver-compiler-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \ - curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-fw-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \ - curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-level-zero-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/libze-intel-gpu1_25.31.34666.3-0_amd64.deb && \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/intel-opencl-icd_25.31.34666.3-0_amd64.deb && \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/libigdgmm12_22.8.1_amd64.deb && \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/intel-ocloc_25.31.34666.3-0_amd64.deb && \ + curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.16.0/intel-igc-core-2_2.16.0+19683_amd64.deb && \ + curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.16.0/intel-igc-opencl-2_2.16.0+19683_amd64.deb && \ + curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.23.0/linux-npu-driver-v1.23.0.20250827-17270089246-ubuntu2404.tar.gz && \ + tar -xf linux-npu-driver-v1.23.0.20250827-17270089246-ubuntu2404.tar.gz && \ + curl -L -O https://github.com/oneapi-src/level-zero/releases/download/v1.22.4/level-zero_1.22.4+u24.04_amd64.deb && \ dpkg -i *.deb && \ apt-get install -y ocl-icd-libopencl1 --no-install-recommends && \ rm -rf /var/lib/apt/lists/* && rm -Rf /tmp/neo diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py index 2702ac2..e0fc499 100755 --- a/tools/gen_openvino_dockerfile.py +++ b/tools/gen_openvino_dockerfile.py @@ -124,7 +124,7 @@ def dockerfile_for_linux(output_file): RUN mkdir -p include && \ cp -r /workspace/install/runtime/include/* include/. RUN mkdir -p lib && \ - cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \ + cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* lib/. \ cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. """ From b95056a3c1cb48d270e6e55fb04996560a19f175 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 3 Sep 2025 19:45:44 +0200 Subject: [PATCH 14/15] fix --- tools/gen_openvino_dockerfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py index e0fc499..0d31784 100755 --- a/tools/gen_openvino_dockerfile.py +++ b/tools/gen_openvino_dockerfile.py @@ -124,7 +124,7 @@ def dockerfile_for_linux(output_file): RUN mkdir -p include && \ cp -r /workspace/install/runtime/include/* include/. RUN mkdir -p lib && \ - cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* lib/. \ + cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* lib/. && \ cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. """ From bd97dda0682884a10b33beacc7eff91df2e31b3a Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 3 Sep 2025 20:24:43 +0200 Subject: [PATCH 15/15] update readme --- README.md | 2 +- tests/README.md | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5b3a3dd..8096904 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ git clone https://github.com/triton-inference-server/server cd server pip install distro requests python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 \ ---backend openvino:pull/87/head +--backend openvino ``` In the backend value, the pull request is optional. Use `--backend openvino` to build from `main` branch. It will create an image called `tritonserver:latest` diff --git a/tests/README.md b/tests/README.md index 5a7e6f9..a1ec7b0 100644 --- a/tests/README.md +++ b/tests/README.md @@ -3,16 +3,17 @@ Installing and running tests ```bash pip install -r requirements.txt -pytest +pytest -sv --image=tritonserver:latest ``` Running tests with gpu ```bash -pytest --gpu +pytest -sv --gpu --image=tritonserver:latest ``` Run tests while caching downloaded models ```bash -pytest --model-cache ./cache +mkdir cache +pytest -sv --model-cache ./cache --image=tritonserver:latest ```