From b84940eed9672ee1d33a036cf3bdd6a5c9c92d9c Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <dariusz.trawinski@intel.com>
Date: Tue, 26 Mar 2024 08:51:07 +0100
Subject: [PATCH 01/15] update openvino backend version

---
 README.md                        | 4 +++-
 tools/gen_openvino_dockerfile.py | 6 ++----
 2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index e93dbe4..56433c1 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ Follow the steps below to build the backend shared library.
 ```
 $ mkdir build
 $ cd build
-$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_OPENVINO_VERSION=2021.2.200 -DTRITON_BUILD_CONTAINER_VERSION=20.12 ..
+$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_OPENVINO_VERSION=2024.0.0 -DTRITON_BUILD_CONTAINER_VERSION=24.02 ..
 $ make install
 ```
 
@@ -71,6 +71,8 @@ but the listed CMake argument can be used to override.
 * triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag]
 * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
 
+
+
 ## Using the OpenVINO Backend
 
 ### Parameters
diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py
index 8c97950..1b8cd92 100755
--- a/tools/gen_openvino_dockerfile.py
+++ b/tools/gen_openvino_dockerfile.py
@@ -62,7 +62,6 @@ def dockerfile_for_linux(output_file):
 RUN apt-get update && apt-get install -y --no-install-recommends \
         cmake \
         libglib2.0-dev \
-        libtbb-dev \
         patchelf \
         git \
         make \
@@ -104,11 +103,10 @@ def dockerfile_for_linux(output_file):
 WORKDIR /opt/openvino
 RUN cp -r /workspace/openvino/licensing LICENSE.openvino
 RUN mkdir -p include && \
-    cp -r /workspace/install/runtime/include/ngraph include/. && \
     cp -r /workspace/install/runtime/include/openvino include/.
 RUN mkdir -p lib && \
-    cp -P /usr/lib/x86_64-linux-gnu/libtbb.so* lib/. && \
-    cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. \
+    cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \
+    cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* /lib/.
 """
 
     df += """

From d69adca23644f6c5d1c57f01e529148787a6733e Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <dariusz.trawinski@intel.com>
Date: Tue, 26 Mar 2024 11:47:52 +0100
Subject: [PATCH 02/15] fix tbb

---
 tools/gen_openvino_dockerfile.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py
index 1b8cd92..2691571 100755
--- a/tools/gen_openvino_dockerfile.py
+++ b/tools/gen_openvino_dockerfile.py
@@ -103,10 +103,10 @@ def dockerfile_for_linux(output_file):
 WORKDIR /opt/openvino
 RUN cp -r /workspace/openvino/licensing LICENSE.openvino
 RUN mkdir -p include && \
-    cp -r /workspace/install/runtime/include/openvino include/.
+    cp -r /workspace/install/runtime/include/*  include/.
 RUN mkdir -p lib && \
     cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \
-    cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* /lib/.
+    cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* lib/.
 """
 
     df += """

From f6a696c215012483366b648600accf3ebba0997a Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <dariusz.trawinski@intel.com>
Date: Wed, 27 Mar 2024 15:23:37 +0100
Subject: [PATCH 03/15] added support for intel gpu and virtual devices

---
 CMakeLists.txt  |  1 -
 README.md       | 41 +++++++++++++++++++++
 src/openvino.cc | 97 ++++++++++++++++++++++++++++++++-----------------
 3 files changed, 105 insertions(+), 34 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0948e11..071cdb8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -210,7 +210,6 @@ else()
     COMMAND rm -fr openvino
     COMMAND docker cp openvino_backend_ov:/opt/openvino openvino
     COMMAND docker rm openvino_backend_ov
-    COMMAND echo '<ie><plugins><plugin name=\"CPU\" location=\"libopenvino_intel_cpu_plugin.so\"></plugin></plugins></ie>' >> openvino/lib/plugins.xml
     COMMENT "Building OpenVino"
   )
 endif() # WIN32
diff --git a/README.md b/README.md
index 56433c1..ea25b08 100644
--- a/README.md
+++ b/README.md
@@ -71,6 +71,11 @@ but the listed CMake argument can be used to override.
 * triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag]
 * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
 
+## Build a complete image with OpenVINO backend including Intel GPU drivers
+```
+python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics \
+--backend openvino:pull/74/head --enable-intel-gpu
+```
 
 
 ## Using the OpenVINO Backend
@@ -90,6 +95,7 @@ to skip the dynamic batch sizes in backend.
 * `ENABLE_BATCH_PADDING`: By default an error will be generated if backend receives a request with batch size less than max_batch_size specified in the configuration. This error can be avoided at a cost of performance by specifying `ENABLE_BATCH_PADDING` parameter as `YES`.
 * `RESHAPE_IO_LAYERS`: By setting this parameter as `YES`, the IO layers are reshaped to the dimensions provided in
 model configuration. By default, the dimensions in the model is used.
+* `TARGET_DEVICE`: Choose the OpenVINO device for running the inference. It could be CPU (default), GPU or any of the virtual devices like AUTO, MULTI, HETERO. Note: using Intel GPU is possible only if `--device /dev/dri` is passed to the container and is supported only on linux with x86_64 arch.
 
 
 
@@ -232,6 +238,41 @@ string_value:"yes"
 }
 }
 ```
+### Running the models on Intel GPU
+
+Build the custom triton image with the required runtime drivers using the script from .
+
+```
+python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics
+```
+Add to your config.pbtxt a parameter `TARGET_DEVICE`:
+```
+parameters: [
+{
+   key: "NUM_STREAMS"
+   value: {
+     string_value: "1"
+   }
+},
+{
+   key: "PERFORMANCE_HINT"
+   value: {
+     string_value: "THROUGHPUT"
+   }
+},
+{
+   key: "TARGET_DEVICE"
+   value: {
+     string_value: "GPU"
+   }
+}
+]
+```
+
+Start the container with extra parameter to pass the device `/dev/dri`:
+```
+docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* )  tritonserver:latest 
+```
 
 ## Known Issues
 
diff --git a/src/openvino.cc b/src/openvino.cc
index bcd556f..5fd398f 100644
--- a/src/openvino.cc
+++ b/src/openvino.cc
@@ -84,6 +84,9 @@ class ModelState : public BackendModel {
   TRITONSERVER_Error* ParseParameter(
       const std::string& mkey, triton::common::TritonJson::Value& params,
       std::vector<std::pair<std::string, ov::Any>>* device_config);
+  TRITONSERVER_Error* ParseStringParameter(
+      const std::string& mkey, triton::common::TritonJson::Value& params,
+      std::string* value);
   TRITONSERVER_Error* ParseParameterHelper(
       const std::string& mkey, std::string* value,
       std::pair<std::string, ov::Any>* ov_property);
@@ -118,6 +121,7 @@ class ModelState : public BackendModel {
 
   bool SkipDynamicBatchSize() { return skip_dynamic_batchsize_; }
   bool EnableBatchPadding() { return enable_padding_; }
+  std::string TargetDevice() {return target_device_;}
 
  private:
   ModelState(TRITONBACKEND_Model* triton_model);
@@ -140,6 +144,7 @@ class ModelState : public BackendModel {
   bool skip_dynamic_batchsize_;
   bool enable_padding_;
   bool reshape_io_layers_;
+  std::string target_device_;
 };
 
 TRITONSERVER_Error*
@@ -179,7 +184,7 @@ ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
 ModelState::ModelState(TRITONBACKEND_Model* triton_model)
     : BackendModel(triton_model), model_read_(false),
       skip_dynamic_batchsize_(false), enable_padding_(false),
-      reshape_io_layers_(false)
+      reshape_io_layers_(false), target_device_("CPU")
 {
 }
 
@@ -238,12 +243,10 @@ ModelState::ParseParameters()
   bool status = model_config_.Find("parameters", &params);
   if (status) {
     RETURN_IF_ERROR(LoadCpuExtensions(params));
-    RETURN_IF_ERROR(ParseBoolParameter(
-        "SKIP_OV_DYNAMIC_BATCHSIZE", params, &skip_dynamic_batchsize_));
-    RETURN_IF_ERROR(
-        ParseBoolParameter("ENABLE_BATCH_PADDING", params, &enable_padding_));
-    RETURN_IF_ERROR(
-        ParseBoolParameter("RESHAPE_IO_LAYERS", params, &reshape_io_layers_));
+    ParseBoolParameter("SKIP_OV_DYNAMIC_BATCHSIZE", params, &skip_dynamic_batchsize_);
+    ParseBoolParameter("ENABLE_BATCH_PADDING", params, &enable_padding_);
+    ParseBoolParameter("RESHAPE_IO_LAYERS", params, &reshape_io_layers_);
+    ParseStringParameter("TARGET_DEVICE", params, &target_device_);
   }
 
   return nullptr;
@@ -256,18 +259,13 @@ ModelState::ParseParameters(const std::string& device)
   triton::common::TritonJson::Value params;
   bool status = model_config_.Find("parameters", &params);
   if (status) {
-    if (device == "CPU") {
-      config_[device] = {};
-      auto& device_config = config_.at(device);
-      RETURN_IF_ERROR(
-          ParseParameter("INFERENCE_NUM_THREADS", params, &device_config));
-      RETURN_IF_ERROR(
-          ParseParameter("COMPILATION_NUM_THREADS", params, &device_config));
-      RETURN_IF_ERROR(ParseParameter("HINT_BF16", params, &device_config));
-      RETURN_IF_ERROR(ParseParameter("NUM_STREAMS", params, &device_config));
-      RETURN_IF_ERROR(
-          ParseParameter("PERFORMANCE_HINT", params, &device_config));
-    }
+    config_[device] = {};
+    auto& device_config = config_.at(device);
+    ParseParameter("INFERENCE_NUM_THREADS", params, &device_config);
+    ParseParameter("COMPILATION_NUM_THREADS", params, &device_config);
+    ParseParameter("HINT_BF16", params, &device_config);
+    ParseParameter("NUM_STREAMS", params, &device_config);
+    ParseParameter("PERFORMANCE_HINT", params, &device_config);
   }
 
   return nullptr;
@@ -277,9 +275,7 @@ TRITONSERVER_Error*
 ModelState::LoadCpuExtensions(triton::common::TritonJson::Value& params)
 {
   std::string cpu_ext_path;
-  LOG_IF_ERROR(
-      ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path)),
-      "error when reading parameters");
+  ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path));
   if (!cpu_ext_path.empty()) {
     // CPU (MKLDNN) extensions is loaded as a shared library and passed as a
     // pointer to base extension
@@ -301,8 +297,8 @@ ModelState::ParseBoolParameter(
     bool* setting)
 {
   std::string value;
-  LOG_IF_ERROR(
-      ReadParameter(params, mkey, &(value)), "error when reading parameters");
+  RETURN_IF_ERROR(
+      ReadParameter(params, mkey, &(value)));
   std::transform(
       value.begin(), value.end(), value.begin(),
       [](unsigned char c) { return std::tolower(c); });
@@ -313,14 +309,32 @@ ModelState::ParseBoolParameter(
   return nullptr;
 }
 
+TRITONSERVER_Error*
+ModelState::ParseStringParameter(
+    const std::string& mkey, triton::common::TritonJson::Value& params,
+    std::string* setting)
+{
+  std::string value;
+  RETURN_IF_ERROR(
+      ReadParameter(params, mkey, &(value)));
+  std::transform(
+      value.begin(), value.end(), value.begin(),
+      [](unsigned char c) { return std::toupper(c); });
+  if (value.length() > 0) {
+    *setting = value;
+  }
+
+  return nullptr;
+}
+
 TRITONSERVER_Error*
 ModelState::ParseParameter(
     const std::string& mkey, triton::common::TritonJson::Value& params,
     std::vector<std::pair<std::string, ov::Any>>* device_config)
 {
   std::string value;
-  LOG_IF_ERROR(
-      ReadParameter(params, mkey, &(value)), "error when reading parameters");
+  RETURN_IF_ERROR(
+      ReadParameter(params, mkey, &(value)));
   if (!value.empty()) {
     std::pair<std::string, ov::Any> ov_property;
     RETURN_IF_ERROR(ParseParameterHelper(mkey, &value, &ov_property));
@@ -410,6 +424,16 @@ ModelState::ParseParameterHelper(
 TRITONSERVER_Error*
 ModelState::ConfigureOpenvinoCore()
 {
+  auto availableDevices = ov_core_.get_available_devices();
+  std::stringstream list_of_devices;
+
+  for (auto & element : availableDevices) {
+    list_of_devices << element << ",";  
+  }
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      (std::string("Available OpenVINO devices: " + list_of_devices.str()))
+          .c_str());
   for (auto&& item : config_) {
     std::string device_name = item.first;
     std::vector<std::pair<std::string, ov::Any>> properties = item.second;
@@ -438,9 +462,10 @@ ModelState::LoadModel(
                                  std::to_string(OPENVINO_VERSION_MINOR) + "." +
                                  std::to_string(OPENVINO_VERSION_PATCH))
                                     .c_str());
+
   LOG_MESSAGE(
       TRITONSERVER_LOG_VERBOSE,
-      (std::string("Device info: \n") +
+      (std::string("Device info: ") +
        ConvertVersionMapToString(ov_core_.get_versions(device)))
           .c_str());
 
@@ -932,19 +957,26 @@ ModelInstanceState::Create(
 ModelInstanceState::ModelInstanceState(
     ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
     : BackendModelInstance(model_state, triton_model_instance),
-      model_state_(model_state), device_("CPU"), batch_pad_size_(0)
+      model_state_(model_state), device_(model_state->TargetDevice()), batch_pad_size_(0)
 {
-  if (Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) {
+  if ((Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) && (Kind() != TRITONSERVER_INSTANCEGROUPKIND_AUTO)) {
     throw triton::backend::BackendModelInstanceException(TRITONSERVER_ErrorNew(
         TRITONSERVER_ERROR_INVALID_ARG,
         (std::string("unable to load model '") + model_state_->Name() +
-         "', Triton openVINO backend supports only CPU device")
+         "', Triton OpenVINO backend supports only Kind CPU and AUTO")
             .c_str()));
   }
 
   if (model_state_->ModelNotRead()) {
     std::string model_path;
     THROW_IF_BACKEND_INSTANCE_ERROR(model_state_->ParseParameters());
+    device_ = model_state->TargetDevice();
+    LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("Target device " + device_))
+          .c_str());
+    
+
     THROW_IF_BACKEND_INSTANCE_ERROR(
         model_state_->ReadModel(ArtifactFilename(), &model_path));
     THROW_IF_BACKEND_INSTANCE_ERROR(model_state_->ValidateConfigureModel());
@@ -1518,8 +1550,7 @@ TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
   LOG_MESSAGE(
       TRITONSERVER_LOG_INFO,
       (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
-       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
-       std::to_string(device_id) + ")")
+       TRITONSERVER_InstanceGroupKindString(kind)+")")
           .c_str());
 
   // Get the model state associated with this instance's model.
@@ -1607,7 +1638,7 @@ TRITONBACKEND_GetBackendAttribute(
       TRITONSERVER_LOG_VERBOSE,
       "TRITONBACKEND_GetBackendAttribute: setting attributes");
   RETURN_IF_ERROR(TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup(
-      backend_attributes, TRITONSERVER_INSTANCEGROUPKIND_CPU, 0, nullptr, 0));
+      backend_attributes, TRITONSERVER_INSTANCEGROUPKIND_AUTO, 0, nullptr, 0));
 
   return nullptr;
 }

From 2f41e5f7f3c3f29299d754542148bf19c248dd30 Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <dariusz.trawinski@intel.com>
Date: Wed, 27 Mar 2024 15:41:17 +0100
Subject: [PATCH 04/15] readme corrections

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ea25b08..c7239e3 100644
--- a/README.md
+++ b/README.md
@@ -243,7 +243,7 @@ string_value:"yes"
 Build the custom triton image with the required runtime drivers using the script from .
 
 ```
-python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics
+python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3
 ```
 Add to your config.pbtxt a parameter `TARGET_DEVICE`:
 ```

From 174aede58ed6c1d47d71ad0739589b71173b58d8 Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <dariusz.trawinski@intel.com>
Date: Wed, 27 Mar 2024 15:45:07 +0100
Subject: [PATCH 05/15] link to updated build.py

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c7239e3..c1df23d 100644
--- a/README.md
+++ b/README.md
@@ -240,7 +240,7 @@ string_value:"yes"
 ```
 ### Running the models on Intel GPU
 
-Build the custom triton image with the required runtime drivers using the script from .
+Build the custom triton image with the required runtime drivers using the script from [build.py](https://github.com/dtrawins/server/blob/igpu/build.py).
 
 ```
 python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3

From 76ab8237257b9b5160df87f66df26063d8e0ac89 Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <dariusz.trawinski@intel.com>
Date: Tue, 9 Apr 2024 16:51:59 +0200
Subject: [PATCH 06/15] separate ov update and fix style

---
 README.md                        | 12 +++++-------
 src/openvino.cc                  | 33 ++++++++++++++++----------------
 tools/gen_openvino_dockerfile.py |  8 +++++---
 3 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index c1df23d..887a5cc 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ Follow the steps below to build the backend shared library.
 ```
 $ mkdir build
 $ cd build
-$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_OPENVINO_VERSION=2024.0.0 -DTRITON_BUILD_CONTAINER_VERSION=24.02 ..
+$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_OPENVINO_VERSION=2021.2.200 -DTRITON_BUILD_CONTAINER_VERSION=20.12 ..
 $ make install
 ```
 
@@ -72,8 +72,11 @@ but the listed CMake argument can be used to override.
 * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
 
 ## Build a complete image with OpenVINO backend including Intel GPU drivers
+
+Build the custom triton image with the required runtime drivers using the script from [build.py](https://github.com/dtrawins/server/blob/igpu/build.py).
+
 ```
-python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics \
+python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 \
 --backend openvino:pull/74/head --enable-intel-gpu
 ```
 
@@ -240,11 +243,6 @@ string_value:"yes"
 ```
 ### Running the models on Intel GPU
 
-Build the custom triton image with the required runtime drivers using the script from [build.py](https://github.com/dtrawins/server/blob/igpu/build.py).
-
-```
-python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3
-```
 Add to your config.pbtxt a parameter `TARGET_DEVICE`:
 ```
 parameters: [
diff --git a/src/openvino.cc b/src/openvino.cc
index 5fd398f..e116b90 100644
--- a/src/openvino.cc
+++ b/src/openvino.cc
@@ -121,7 +121,7 @@ class ModelState : public BackendModel {
 
   bool SkipDynamicBatchSize() { return skip_dynamic_batchsize_; }
   bool EnableBatchPadding() { return enable_padding_; }
-  std::string TargetDevice() {return target_device_;}
+  std::string TargetDevice() { return target_device_; }
 
  private:
   ModelState(TRITONBACKEND_Model* triton_model);
@@ -243,7 +243,8 @@ ModelState::ParseParameters()
   bool status = model_config_.Find("parameters", &params);
   if (status) {
     RETURN_IF_ERROR(LoadCpuExtensions(params));
-    ParseBoolParameter("SKIP_OV_DYNAMIC_BATCHSIZE", params, &skip_dynamic_batchsize_);
+    ParseBoolParameter(
+        "SKIP_OV_DYNAMIC_BATCHSIZE", params, &skip_dynamic_batchsize_);
     ParseBoolParameter("ENABLE_BATCH_PADDING", params, &enable_padding_);
     ParseBoolParameter("RESHAPE_IO_LAYERS", params, &reshape_io_layers_);
     ParseStringParameter("TARGET_DEVICE", params, &target_device_);
@@ -297,8 +298,7 @@ ModelState::ParseBoolParameter(
     bool* setting)
 {
   std::string value;
-  RETURN_IF_ERROR(
-      ReadParameter(params, mkey, &(value)));
+  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value)));
   std::transform(
       value.begin(), value.end(), value.begin(),
       [](unsigned char c) { return std::tolower(c); });
@@ -315,8 +315,7 @@ ModelState::ParseStringParameter(
     std::string* setting)
 {
   std::string value;
-  RETURN_IF_ERROR(
-      ReadParameter(params, mkey, &(value)));
+  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value)));
   std::transform(
       value.begin(), value.end(), value.begin(),
       [](unsigned char c) { return std::toupper(c); });
@@ -333,8 +332,7 @@ ModelState::ParseParameter(
     std::vector<std::pair<std::string, ov::Any>>* device_config)
 {
   std::string value;
-  RETURN_IF_ERROR(
-      ReadParameter(params, mkey, &(value)));
+  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value)));
   if (!value.empty()) {
     std::pair<std::string, ov::Any> ov_property;
     RETURN_IF_ERROR(ParseParameterHelper(mkey, &value, &ov_property));
@@ -427,8 +425,8 @@ ModelState::ConfigureOpenvinoCore()
   auto availableDevices = ov_core_.get_available_devices();
   std::stringstream list_of_devices;
 
-  for (auto & element : availableDevices) {
-    list_of_devices << element << ",";  
+  for (auto& element : availableDevices) {
+    list_of_devices << element << ",";
   }
   LOG_MESSAGE(
       TRITONSERVER_LOG_VERBOSE,
@@ -957,9 +955,11 @@ ModelInstanceState::Create(
 ModelInstanceState::ModelInstanceState(
     ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
     : BackendModelInstance(model_state, triton_model_instance),
-      model_state_(model_state), device_(model_state->TargetDevice()), batch_pad_size_(0)
+      model_state_(model_state), device_(model_state->TargetDevice()),
+      batch_pad_size_(0)
 {
-  if ((Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) && (Kind() != TRITONSERVER_INSTANCEGROUPKIND_AUTO)) {
+  if ((Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) &&
+      (Kind() != TRITONSERVER_INSTANCEGROUPKIND_AUTO)) {
     throw triton::backend::BackendModelInstanceException(TRITONSERVER_ErrorNew(
         TRITONSERVER_ERROR_INVALID_ARG,
         (std::string("unable to load model '") + model_state_->Name() +
@@ -972,10 +972,9 @@ ModelInstanceState::ModelInstanceState(
     THROW_IF_BACKEND_INSTANCE_ERROR(model_state_->ParseParameters());
     device_ = model_state->TargetDevice();
     LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("Target device " + device_))
-          .c_str());
-    
+        TRITONSERVER_LOG_INFO,
+        (std::string("Target device " + device_)).c_str());
+
 
     THROW_IF_BACKEND_INSTANCE_ERROR(
         model_state_->ReadModel(ArtifactFilename(), &model_path));
@@ -1550,7 +1549,7 @@ TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
   LOG_MESSAGE(
       TRITONSERVER_LOG_INFO,
       (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
-       TRITONSERVER_InstanceGroupKindString(kind)+")")
+       TRITONSERVER_InstanceGroupKindString(kind) + ")")
           .c_str());
 
   // Get the model state associated with this instance's model.
diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py
index 2691571..8c97950 100755
--- a/tools/gen_openvino_dockerfile.py
+++ b/tools/gen_openvino_dockerfile.py
@@ -62,6 +62,7 @@ def dockerfile_for_linux(output_file):
 RUN apt-get update && apt-get install -y --no-install-recommends \
         cmake \
         libglib2.0-dev \
+        libtbb-dev \
         patchelf \
         git \
         make \
@@ -103,10 +104,11 @@ def dockerfile_for_linux(output_file):
 WORKDIR /opt/openvino
 RUN cp -r /workspace/openvino/licensing LICENSE.openvino
 RUN mkdir -p include && \
-    cp -r /workspace/install/runtime/include/*  include/.
+    cp -r /workspace/install/runtime/include/ngraph include/. && \
+    cp -r /workspace/install/runtime/include/openvino include/.
 RUN mkdir -p lib && \
-    cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \
-    cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* lib/.
+    cp -P /usr/lib/x86_64-linux-gnu/libtbb.so* lib/. && \
+    cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. \
 """
 
     df += """

From c05a182b6ed2f82ba91f8ed729cc6831e6b9d840 Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <dariusz.trawinski@intel.com>
Date: Tue, 9 Apr 2024 16:54:55 +0200
Subject: [PATCH 07/15] drop trailing space

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 887a5cc..8341b30 100644
--- a/README.md
+++ b/README.md
@@ -269,7 +269,7 @@ parameters: [
 
 Start the container with extra parameter to pass the device `/dev/dri`:
 ```
-docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* )  tritonserver:latest 
+docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* )  tritonserver:latest
 ```
 
 ## Known Issues

From ffc18892a7a3a4ba53e8b1206e5372a62247ed0f Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <Dariusz.Trawinski@intel.com>
Date: Wed, 24 Jul 2024 08:23:00 +0200
Subject: [PATCH 08/15] included runtime libraries to execute on GPU

---
 tools/gen_openvino_dockerfile.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py
index 8c97950..0a2543c 100755
--- a/tools/gen_openvino_dockerfile.py
+++ b/tools/gen_openvino_dockerfile.py
@@ -77,6 +77,15 @@ def dockerfile_for_linux(output_file):
 # pre-build archive.
 # TODO: Unify build steps between linux and windows.
 
+# Get intel GPU drivers
+WORKDIR /drv
+RUN curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.15468.11/intel-igc-core_1.0.15468.11_amd64.deb ; \
+    curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.15468.11/intel-igc-opencl_1.0.15468.11_amd64.deb ; \
+    curl -L -O https://github.com/intel/compute-runtime/releases/download/23.43.27642.18/intel-opencl-icd_23.43.27642.18_amd64.deb ; \
+    curl -L -O https://github.com/intel/compute-runtime/releases/download/23.43.27642.18/libigdgmm12_22.3.11_amd64.deb ; \
+    apt-get download ocl-icd-libopencl1 ; \
+    find . -iname '*.deb' -exec  dpkg-deb -X {} . \;
+
 ARG OPENVINO_VERSION
 ARG OPENVINO_BUILD_TYPE
 WORKDIR /workspace
@@ -104,11 +113,11 @@ def dockerfile_for_linux(output_file):
 WORKDIR /opt/openvino
 RUN cp -r /workspace/openvino/licensing LICENSE.openvino
 RUN mkdir -p include && \
-    cp -r /workspace/install/runtime/include/ngraph include/. && \
-    cp -r /workspace/install/runtime/include/openvino include/.
+    cp -r /workspace/install/runtime/include/* include/. 
 RUN mkdir -p lib && \
     cp -P /usr/lib/x86_64-linux-gnu/libtbb.so* lib/. && \
-    cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. \
+    cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. && \
+    find /drv/usr/ -iname '*.so*' -exec cp -P {} lib/. \; 
 """
 
     df += """

From cda8a940f99fa265f2d00dc95e26b14499dba503 Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <Dariusz.Trawinski@intel.com>
Date: Wed, 24 Jul 2024 23:54:51 +0200
Subject: [PATCH 09/15] update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c336adc..6495f6b 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,7 @@ Build the custom triton image with the required runtime drivers using the script
 
 ```
 python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 \
---backend openvino:pull/74/head --enable-intel-gpu
+--backend openvino
 ```
 
 

From 81381d2cade9e34ba07256cb90bbbd650b075169 Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <Dariusz.Trawinski@intel.com>
Date: Wed, 31 Jul 2024 14:40:47 +0200
Subject: [PATCH 10/15] training spaces

---
 tools/gen_openvino_dockerfile.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py
index a1cb8f9..7b567b9 100755
--- a/tools/gen_openvino_dockerfile.py
+++ b/tools/gen_openvino_dockerfile.py
@@ -112,11 +112,11 @@ def dockerfile_for_linux(output_file):
 WORKDIR /opt/openvino
 RUN cp -r /workspace/openvino/licensing LICENSE.openvino
 RUN mkdir -p include && \
-    cp -r /workspace/install/runtime/include/* include/. 
+    cp -r /workspace/install/runtime/include/* include/.
 RUN mkdir -p lib && \
     cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \
     cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. && \
-    find /drv/usr/ -iname '*.so*' -exec cp -P {} lib/. \; 
+    find /drv/usr/ -iname '*.so*' -exec cp -P {} lib/. \;
 """
 
     df += """

From b3f5126e4434c516a2ea5a6bdd8d64d828f24333 Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <Dariusz.Trawinski@intel.com>
Date: Wed, 27 Nov 2024 23:39:49 +0100
Subject: [PATCH 11/15] review changes

---
 Dockerfile.drivers               | 16 +++++++++++++
 README.md                        | 40 ++++++++++++++++++++++++++++----
 src/openvino.cc                  | 30 +++++++++++++-----------
 src/openvino_utils.cc            | 11 +++++----
 src/openvino_utils.h             |  2 +-
 tools/gen_openvino_dockerfile.py | 12 +---------
 6 files changed, 76 insertions(+), 35 deletions(-)
 create mode 100644 Dockerfile.drivers

diff --git a/Dockerfile.drivers b/Dockerfile.drivers
new file mode 100644
index 0000000..52d3227
--- /dev/null
+++ b/Dockerfile.drivers
@@ -0,0 +1,16 @@
+ARG BASE_OS=tritonserver:latest
+FROM $BASE_OS
+RUN mkdir /tmp/neo && cd /tmp/neo && \
+    apt-get update && apt-get install -y libtbb12 curl && \
+    curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb && \
+    curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb && \
+    curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb && \
+    curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb && \
+    curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb && \
+    curl -L -O https://github.com/oneapi-src/level-zero/releases/download/v1.17.44/level-zero_1.17.44+u24.04_amd64.deb && \
+    curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-driver-compiler-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \
+    curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-fw-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \
+    curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-level-zero-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \
+    dpkg -i *.deb && \
+    apt-get install -y ocl-icd-libopencl1 && \
+    rm -Rf /tmp/neo
diff --git a/README.md b/README.md
index f46cd2f..1e9a8d6 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,7 @@ $ cd build
 $ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_BUILD_OPENVINO_VERSION=2024.4.0 -DTRITON_BUILD_CONTAINER_VERSION=24.03 ..
 $ make install
 ```
+The compiled backend will be added to `build/install/backends/openvino` folder.
 
 The following required Triton repositories will be pulled and used in
 the build. By default the "main" branch/tag will be used for each repo
@@ -71,14 +72,24 @@ but the listed CMake argument can be used to override.
 * triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag]
 * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
 
-## Build a complete image with OpenVINO backend including Intel GPU drivers
-
-Build the custom triton image with the required runtime drivers using the script from [build.py](https://github.com/dtrawins/server/blob/igpu/build.py).
+## Build a complete triton custom image with OpenVINO backend
 
 ```
+git clone https://github.com/triton-inference-server/server
+cd server
+pip install distro requests
 python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 \
 --backend openvino
 ```
+It will create an image called `tritonserver:latest`
+
+## Add Intel GPU and NPU dependencies to the image
+
+The `Dockerfile.drivers` adds OpenVINO runtime drivers needed to run inference on the accelerators. Use, as the base image, public image with OpenVINO backend or the custom one.
+
+```
+docker build -f Dockerfile.drivers --build-arg BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.11-py3 -t tritonserver:latest .
+```
 
 
 ## Using the OpenVINO Backend
@@ -98,7 +109,7 @@ to skip the dynamic batch sizes in backend.
 * `ENABLE_BATCH_PADDING`: By default an error will be generated if backend receives a request with batch size less than max_batch_size specified in the configuration. This error can be avoided at a cost of performance by specifying `ENABLE_BATCH_PADDING` parameter as `YES`.
 * `RESHAPE_IO_LAYERS`: By setting this parameter as `YES`, the IO layers are reshaped to the dimensions provided in
 model configuration. By default, the dimensions in the model is used.
-* `TARGET_DEVICE`: Choose the OpenVINO device for running the inference. It could be CPU (default), GPU or any of the virtual devices like AUTO, MULTI, HETERO. Note: using Intel GPU is possible only if `--device /dev/dri` is passed to the container and is supported only on linux with x86_64 arch.
+* `TARGET_DEVICE`: Choose the OpenVINO device for running the inference. It could be CPU (default), GPU, NPU or any of the virtual devices like AUTO, MULTI, HETERO.
 
 
 
@@ -270,7 +281,26 @@ parameters: [
 
 Start the container with extra parameter to pass the device `/dev/dri`:
 ```
-docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* )  tritonserver:latest
+docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1 )  tritonserver:latest
+```
+
+### Running the models on Intel NPU
+
+Add to your config.pbtxt a parameter `TARGET_DEVICE`:
+```
+parameters: [
+{
+   key: "TARGET_DEVICE"
+   value: {
+     string_value: "NPU"
+   }
+}
+]
+```
+
+Start the container with extra parameter to pass the device `/dev/accel`:
+```
+docker run -it --rm --device --device /dev/accel --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1)  tritonserver:latest
 ```
 
 ## Known Issues
diff --git a/src/openvino.cc b/src/openvino.cc
index 4070c70..7273251 100644
--- a/src/openvino.cc
+++ b/src/openvino.cc
@@ -262,11 +262,16 @@ ModelState::ParseParameters(const std::string& device)
   if (status) {
     config_[device] = {};
     auto& device_config = config_.at(device);
-    ParseParameter("INFERENCE_NUM_THREADS", params, &device_config);
-    ParseParameter("COMPILATION_NUM_THREADS", params, &device_config);
-    ParseParameter("HINT_BF16", params, &device_config);
-    ParseParameter("NUM_STREAMS", params, &device_config);
-    ParseParameter("PERFORMANCE_HINT", params, &device_config);
+    RETURN_IF_ERROR(
+      ParseParameter("INFERENCE_NUM_THREADS", params, &device_config));
+    RETURN_IF_ERROR(
+      ParseParameter("COMPILATION_NUM_THREADS", params, &device_config));
+    RETURN_IF_ERROR(
+      ParseParameter("HINT_BF16", params, &device_config));
+    RETURN_IF_ERROR(
+      ParseParameter("NUM_STREAMS", params, &device_config));
+    RETURN_IF_ERROR(
+      ParseParameter("PERFORMANCE_HINT", params, &device_config));
   }
 
   return nullptr;
@@ -276,7 +281,7 @@ TRITONSERVER_Error*
 ModelState::LoadCpuExtensions(triton::common::TritonJson::Value& params)
 {
   std::string cpu_ext_path;
-  ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path));
+  RETURN_IF_ERROR(ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path), ""));
   if (!cpu_ext_path.empty()) {
     // CPU (MKLDNN) extensions is loaded as a shared library and passed as a
     // pointer to base extension
@@ -284,7 +289,7 @@ ModelState::LoadCpuExtensions(triton::common::TritonJson::Value& params)
         ov_core_.add_extension(cpu_ext_path), " loading custom CPU extensions");
     LOG_MESSAGE(
         TRITONSERVER_LOG_INFO,
-        (std::string("CPU (MKLDNN) extensions is loaded") + cpu_ext_path)
+        (std::string("CPU extensions is loaded") + cpu_ext_path)
             .c_str());
   }
 
@@ -298,7 +303,7 @@ ModelState::ParseBoolParameter(
     bool* setting)
 {
   std::string value;
-  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value)));
+  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value),""));
   std::transform(
       value.begin(), value.end(), value.begin(),
       [](unsigned char c) { return std::tolower(c); });
@@ -315,7 +320,7 @@ ModelState::ParseStringParameter(
     std::string* setting)
 {
   std::string value;
-  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value)));
+  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value), ""));
   std::transform(
       value.begin(), value.end(), value.begin(),
       [](unsigned char c) { return std::toupper(c); });
@@ -332,7 +337,7 @@ ModelState::ParseParameter(
     std::vector<std::pair<std::string, ov::Any>>* device_config)
 {
   std::string value;
-  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value)));
+  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value),""));
   if (!value.empty()) {
     std::pair<std::string, ov::Any> ov_property;
     RETURN_IF_ERROR(ParseParameterHelper(mkey, &value, &ov_property));
@@ -958,8 +963,7 @@ ModelInstanceState::ModelInstanceState(
       model_state_(model_state), device_(model_state->TargetDevice()),
       batch_pad_size_(0)
 {
-  if ((Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) &&
-      (Kind() != TRITONSERVER_INSTANCEGROUPKIND_AUTO)) {
+  if (Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) {
     throw triton::backend::BackendModelInstanceException(TRITONSERVER_ErrorNew(
         TRITONSERVER_ERROR_INVALID_ARG,
         (std::string("unable to load model '") + model_state_->Name() +
@@ -1638,7 +1642,7 @@ TRITONBACKEND_GetBackendAttribute(
       TRITONSERVER_LOG_VERBOSE,
       "TRITONBACKEND_GetBackendAttribute: setting attributes");
   RETURN_IF_ERROR(TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup(
-      backend_attributes, TRITONSERVER_INSTANCEGROUPKIND_AUTO, 0, nullptr, 0));
+      backend_attributes, TRITONSERVER_INSTANCEGROUPKIND_CPU, 0, nullptr, 0));
 
   return nullptr;
 }
diff --git a/src/openvino_utils.cc b/src/openvino_utils.cc
index c5bc1a1..ac5a666 100644
--- a/src/openvino_utils.cc
+++ b/src/openvino_utils.cc
@@ -277,13 +277,14 @@ CompareDimsSupported(
 TRITONSERVER_Error*
 ReadParameter(
     triton::common::TritonJson::Value& params, const std::string& key,
-    std::string* param)
+    std::string* param, const std::string default_value)
 {
   triton::common::TritonJson::Value value;
-  RETURN_ERROR_IF_FALSE(
-      params.Find(key.c_str(), &value), TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("model configuration is missing the parameter ") + key);
-  RETURN_IF_ERROR(value.MemberAsString("string_value", param));
+  if (params.Find(key.c_str(), &value)){
+    RETURN_IF_ERROR(value.MemberAsString("string_value", param));
+  } else {
+    *param = default_value;
+  }
   return nullptr;  // success
 }
 
diff --git a/src/openvino_utils.h b/src/openvino_utils.h
index 2fbaadb..a894937 100644
--- a/src/openvino_utils.h
+++ b/src/openvino_utils.h
@@ -97,7 +97,7 @@ TRITONSERVER_Error* CompareDimsSupported(
 
 TRITONSERVER_Error* ReadParameter(
     triton::common::TritonJson::Value& params, const std::string& key,
-    std::string* param);
+    std::string* param, const std::string default_value);
 
 std::vector<int64_t> ConvertToSignedShape(const ov::PartialShape& shape);
 
diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py
index 5adcace..4b0d835 100755
--- a/tools/gen_openvino_dockerfile.py
+++ b/tools/gen_openvino_dockerfile.py
@@ -76,15 +76,6 @@ def dockerfile_for_linux(output_file):
 # pre-build archive.
 # TODO: Unify build steps between linux and windows.
 
-# Get intel GPU drivers
-WORKDIR /drv
-RUN curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.15468.11/intel-igc-core_1.0.15468.11_amd64.deb ; \
-    curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.15468.11/intel-igc-opencl_1.0.15468.11_amd64.deb ; \
-    curl -L -O https://github.com/intel/compute-runtime/releases/download/23.43.27642.18/intel-opencl-icd_23.43.27642.18_amd64.deb ; \
-    curl -L -O https://github.com/intel/compute-runtime/releases/download/23.43.27642.18/libigdgmm12_22.3.11_amd64.deb ; \
-    apt-get download ocl-icd-libopencl1 ; \
-    find . -iname '*.deb' -exec  dpkg-deb -X {} . \;
-
 ARG OPENVINO_VERSION
 ARG OPENVINO_BUILD_TYPE
 WORKDIR /workspace
@@ -115,8 +106,7 @@ def dockerfile_for_linux(output_file):
     cp -r /workspace/install/runtime/include/* include/.
 RUN mkdir -p lib && \
     cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \
-    cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/. && \
-    find /drv/usr/ -iname '*.so*' -exec cp -P {} lib/. \;
+    cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/.
 """
 
     df += """

From aba01374c08c80808de29facdabbcab408c7c424 Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <Dariusz.Trawinski@intel.com>
Date: Thu, 28 Nov 2024 16:22:30 +0100
Subject: [PATCH 12/15] style and minor fixes

---
 Dockerfile.drivers    |  8 ++++----
 README.md             | 11 ++++++-----
 src/openvino.cc       | 23 ++++++++++-------------
 src/openvino_utils.cc |  2 +-
 4 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/Dockerfile.drivers b/Dockerfile.drivers
index 52d3227..1994015 100644
--- a/Dockerfile.drivers
+++ b/Dockerfile.drivers
@@ -1,5 +1,5 @@
-ARG BASE_OS=tritonserver:latest
-FROM $BASE_OS
+ARG BASE_IMAGE=tritonserver:latest
+FROM $BASE_IMAGE
 RUN mkdir /tmp/neo && cd /tmp/neo && \
     apt-get update && apt-get install -y libtbb12 curl && \
     curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb && \
@@ -12,5 +12,5 @@ RUN mkdir /tmp/neo && cd /tmp/neo && \
     curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-fw-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \
     curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-level-zero-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \
     dpkg -i *.deb && \
-    apt-get install -y ocl-icd-libopencl1 && \
-    rm -Rf /tmp/neo
+    apt-get install -y ocl-icd-libopencl1 --no-install-recommends && \
+    rm -rf /var/lib/apt/lists/* && rm -Rf /tmp/neo
diff --git a/README.md b/README.md
index 1e9a8d6..8356f33 100644
--- a/README.md
+++ b/README.md
@@ -79,8 +79,9 @@ git clone https://github.com/triton-inference-server/server
 cd server
 pip install distro requests
 python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 \
---backend openvino
+--backend openvino:pull/87/head
 ```
+In the backend value, the pull request is optional. Use `--backend openvino` to build from `main` branch.
 It will create an image called `tritonserver:latest`
 
 ## Add Intel GPU and NPU dependencies to the image
@@ -88,7 +89,7 @@ It will create an image called `tritonserver:latest`
 The `Dockerfile.drivers` adds OpenVINO runtime drivers needed to run inference on the accelerators. Use, as the base image, public image with OpenVINO backend or the custom one.
 
 ```
-docker build -f Dockerfile.drivers --build-arg BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.11-py3 -t tritonserver:latest .
+docker build -f Dockerfile.drivers --build-arg BASE_IMAGE=tritonserver:latest -t tritonserver:xpu .
 ```
 
 
@@ -281,7 +282,7 @@ parameters: [
 
 Start the container with extra parameter to pass the device `/dev/dri`:
 ```
-docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1 )  tritonserver:latest
+docker run -it --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1 )  tritonserver:xpu
 ```
 
 ### Running the models on Intel NPU
@@ -300,10 +301,10 @@ parameters: [
 
 Start the container with extra parameter to pass the device `/dev/accel`:
 ```
-docker run -it --rm --device --device /dev/accel --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1)  tritonserver:latest
+docker run -it --rm --device --device /dev/accel --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1)  tritonserver:xpu
 ```
 
 ## Known Issues
 
 * Models with the scalar on the input (shape without any dimension are not supported)
-* Reshaping using [dimension ranges](https://docs.openvino.ai/2023.3/ovms_docs_dynamic_shape_dynamic_model.html) is not supported.
+* Reshaping using [dimension ranges](https://docs.openvino.ai/2024/openvino-workflow/running-inference/dynamic-shapes.html#dimension-bounds) is not supported.
diff --git a/src/openvino.cc b/src/openvino.cc
index 7273251..1bde0a9 100644
--- a/src/openvino.cc
+++ b/src/openvino.cc
@@ -263,15 +263,12 @@ ModelState::ParseParameters(const std::string& device)
     config_[device] = {};
     auto& device_config = config_.at(device);
     RETURN_IF_ERROR(
-      ParseParameter("INFERENCE_NUM_THREADS", params, &device_config));
+        ParseParameter("INFERENCE_NUM_THREADS", params, &device_config));
     RETURN_IF_ERROR(
-      ParseParameter("COMPILATION_NUM_THREADS", params, &device_config));
-    RETURN_IF_ERROR(
-      ParseParameter("HINT_BF16", params, &device_config));
-    RETURN_IF_ERROR(
-      ParseParameter("NUM_STREAMS", params, &device_config));
-    RETURN_IF_ERROR(
-      ParseParameter("PERFORMANCE_HINT", params, &device_config));
+        ParseParameter("COMPILATION_NUM_THREADS", params, &device_config));
+    RETURN_IF_ERROR(ParseParameter("HINT_BF16", params, &device_config));
+    RETURN_IF_ERROR(ParseParameter("NUM_STREAMS", params, &device_config));
+    RETURN_IF_ERROR(ParseParameter("PERFORMANCE_HINT", params, &device_config));
   }
 
   return nullptr;
@@ -281,7 +278,8 @@ TRITONSERVER_Error*
 ModelState::LoadCpuExtensions(triton::common::TritonJson::Value& params)
 {
   std::string cpu_ext_path;
-  RETURN_IF_ERROR(ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path), ""));
+  RETURN_IF_ERROR(
+      ReadParameter(params, "CPU_EXTENSION_PATH", &(cpu_ext_path), ""));
   if (!cpu_ext_path.empty()) {
     // CPU (MKLDNN) extensions is loaded as a shared library and passed as a
     // pointer to base extension
@@ -289,8 +287,7 @@ ModelState::LoadCpuExtensions(triton::common::TritonJson::Value& params)
         ov_core_.add_extension(cpu_ext_path), " loading custom CPU extensions");
     LOG_MESSAGE(
         TRITONSERVER_LOG_INFO,
-        (std::string("CPU extensions is loaded") + cpu_ext_path)
-            .c_str());
+        (std::string("CPU extensions is loaded") + cpu_ext_path).c_str());
   }
 
   return nullptr;
@@ -303,7 +300,7 @@ ModelState::ParseBoolParameter(
     bool* setting)
 {
   std::string value;
-  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value),""));
+  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value), ""));
   std::transform(
       value.begin(), value.end(), value.begin(),
       [](unsigned char c) { return std::tolower(c); });
@@ -337,7 +334,7 @@ ModelState::ParseParameter(
     std::vector<std::pair<std::string, ov::Any>>* device_config)
 {
   std::string value;
-  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value),""));
+  RETURN_IF_ERROR(ReadParameter(params, mkey, &(value), ""));
   if (!value.empty()) {
     std::pair<std::string, ov::Any> ov_property;
     RETURN_IF_ERROR(ParseParameterHelper(mkey, &value, &ov_property));
diff --git a/src/openvino_utils.cc b/src/openvino_utils.cc
index ac5a666..9c6f312 100644
--- a/src/openvino_utils.cc
+++ b/src/openvino_utils.cc
@@ -280,7 +280,7 @@ ReadParameter(
     std::string* param, const std::string default_value)
 {
   triton::common::TritonJson::Value value;
-  if (params.Find(key.c_str(), &value)){
+  if (params.Find(key.c_str(), &value)) {
     RETURN_IF_ERROR(value.MemberAsString("string_value", param));
   } else {
     *param = default_value;

From 107a41a469776711b04d4040bae75653e653661b Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <Dariusz.Trawinski@intel.com>
Date: Wed, 3 Sep 2025 19:33:07 +0200
Subject: [PATCH 13/15] update

---
 Dockerfile.drivers               | 18 +++++++++---------
 tools/gen_openvino_dockerfile.py |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/Dockerfile.drivers b/Dockerfile.drivers
index 1994015..e955040 100644
--- a/Dockerfile.drivers
+++ b/Dockerfile.drivers
@@ -2,15 +2,15 @@ ARG BASE_IMAGE=tritonserver:latest
 FROM $BASE_IMAGE
 RUN mkdir /tmp/neo && cd /tmp/neo && \
     apt-get update && apt-get install -y libtbb12 curl && \
-    curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb && \
-    curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb && \
-    curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb && \
-    curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb && \
-    curl -L -O https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb && \
-    curl -L -O https://github.com/oneapi-src/level-zero/releases/download/v1.17.44/level-zero_1.17.44+u24.04_amd64.deb && \
-    curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-driver-compiler-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \
-    curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-fw-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \
-    curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-level-zero-npu_1.10.0.20241107-11729849322_ubuntu24.04_amd64.deb && \
+    curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/libze-intel-gpu1_25.31.34666.3-0_amd64.deb && \
+    curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/intel-opencl-icd_25.31.34666.3-0_amd64.deb && \
+    curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/libigdgmm12_22.8.1_amd64.deb && \
+    curl -L -O https://github.com/intel/compute-runtime/releases/download/25.31.34666.3/intel-ocloc_25.31.34666.3-0_amd64.deb && \
+    curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.16.0/intel-igc-core-2_2.16.0+19683_amd64.deb && \
+    curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/v2.16.0/intel-igc-opencl-2_2.16.0+19683_amd64.deb && \
+    curl -L -O https://github.com/intel/linux-npu-driver/releases/download/v1.23.0/linux-npu-driver-v1.23.0.20250827-17270089246-ubuntu2404.tar.gz && \
+    tar -xf linux-npu-driver-v1.23.0.20250827-17270089246-ubuntu2404.tar.gz && \
+    curl -L -O https://github.com/oneapi-src/level-zero/releases/download/v1.22.4/level-zero_1.22.4+u24.04_amd64.deb && \
     dpkg -i *.deb && \
     apt-get install -y ocl-icd-libopencl1 --no-install-recommends && \
     rm -rf /var/lib/apt/lists/* && rm -Rf /tmp/neo
diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py
index 2702ac2..e0fc499 100755
--- a/tools/gen_openvino_dockerfile.py
+++ b/tools/gen_openvino_dockerfile.py
@@ -124,7 +124,7 @@ def dockerfile_for_linux(output_file):
 RUN mkdir -p include && \
     cp -r /workspace/install/runtime/include/* include/.
 RUN mkdir -p lib && \
-    cp -P /workspace/install/runtime/lib/intel64/*.so* lib/. && \
+    cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* lib/. \
     cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/.
 """
 

From b95056a3c1cb48d270e6e55fb04996560a19f175 Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <Dariusz.Trawinski@intel.com>
Date: Wed, 3 Sep 2025 19:45:44 +0200
Subject: [PATCH 14/15] fix

---
 tools/gen_openvino_dockerfile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/gen_openvino_dockerfile.py b/tools/gen_openvino_dockerfile.py
index e0fc499..0d31784 100755
--- a/tools/gen_openvino_dockerfile.py
+++ b/tools/gen_openvino_dockerfile.py
@@ -124,7 +124,7 @@ def dockerfile_for_linux(output_file):
 RUN mkdir -p include && \
     cp -r /workspace/install/runtime/include/* include/.
 RUN mkdir -p lib && \
-    cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* lib/. \
+    cp -P /workspace/install/runtime/3rdparty/tbb/lib/libtbb.so* lib/. && \
     cp -P /workspace/install/runtime/lib/intel64/libopenvino*.so* lib/.
 """
 

From bd97dda0682884a10b33beacc7eff91df2e31b3a Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <Dariusz.Trawinski@intel.com>
Date: Wed, 3 Sep 2025 20:24:43 +0200
Subject: [PATCH 15/15] update readme

---
 README.md       | 2 +-
 tests/README.md | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 5b3a3dd..8096904 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,7 @@ git clone https://github.com/triton-inference-server/server
 cd server
 pip install distro requests
 python3 build.py --target-platform linux --enable-logging --enable-stats --enable-metrics --enable-cpu-metrics --endpoint grpc --endpoint http --filesystem s3 \
---backend openvino:pull/87/head
+--backend openvino
 ```
 In the backend value, the pull request is optional. Use `--backend openvino` to build from `main` branch.
 It will create an image called `tritonserver:latest`
diff --git a/tests/README.md b/tests/README.md
index 5a7e6f9..a1ec7b0 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -3,16 +3,17 @@
 Installing and running tests
 ```bash
 pip install -r requirements.txt
-pytest
+pytest -sv --image=tritonserver:latest
 ```
 
 Running tests with gpu
 ```bash
-pytest --gpu
+pytest -sv --gpu --image=tritonserver:latest
 ```
 
 Run tests while caching downloaded models
 ```bash
-pytest --model-cache ./cache
+mkdir cache
+pytest -sv --model-cache ./cache --image=tritonserver:latest
 ```