diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp index 51403d304718bd..cadc4f6aa8d270 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp @@ -842,7 +842,7 @@ struct COMPILER_TYPE final : OptionBase availableDevicesNames) { // Platform parameter has a higher priority than deviceID if (platform != ov::intel_npu::Platform::AUTO_DETECT) { + std::cout << " return 1 platform from getCompilationPlatform " << std::endl; return std::string(platform); } // Get compilation platform from deviceID if (!deviceId.empty()) { + std::cout << " return 2 deviceId from getCompilationPlatform " << std::endl; return utils::getPlatformByDeviceName(deviceId); } @@ -51,7 +53,7 @@ std::string utils::getCompilationPlatform(const std::string_view platform, if (availableDevicesNames.empty()) { OPENVINO_THROW("No NPU devices were found."); } - + std::cout << " return 3 availableDevicesNames from getCompilationPlatform, availableDevicesNames.at(0) is " << availableDevicesNames.at(0) << std::endl; return utils::getPlatformByDeviceName(availableDevicesNames.at(0)); } diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h new file mode 100644 index 00000000000000..409798e64b4a1b --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h @@ -0,0 +1,342 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifndef VPUX_COMPILER_L0_H +#define VPUX_COMPILER_L0_H + +#if defined(__cplusplus) +# include +# include +#else +# include +# include +#endif + +#if defined(__cplusplus) +# pragma once +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +#define VCL_COMPILER_VERSION_MAJOR 7 +#define VCL_COMPILER_VERSION_MINOR 6 +#define VCL_PROFILING_VERSION_MAJOR 2 +#define VCL_PROFILING_VERSION_MINOR 0 + +#ifndef DEPRECATED +# define DEPRECATED // for documentation only +#endif + +/////////////////////////////////////////////////////////////////////////////// +#ifndef VCL_APICALL +# if defined(_WIN32) +/// @brief Calling convention for all API functions +# define VCL_APICALL __cdecl +# else +# define VCL_APICALL +# endif // defined(_WIN32) +#endif // VCL_APICALL + +/////////////////////////////////////////////////////////////////////////////// +#ifndef VCL_APIEXPORT +# if defined(_WIN32) +/// @brief Windows-specific dllexport storage-class attribute +# define VCL_APIEXPORT __declspec(dllexport) +# else +# define VCL_APIEXPORT +# endif // defined(_WIN32) +#endif // VCL_APIEXPORT + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Compiler handle +typedef struct __vcl_compiler_handle_t* vcl_compiler_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Executable handle +typedef struct __vcl_executable_handle_t* vcl_executable_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Profiling handle +typedef struct __vcl_profiling_handle_t* vcl_profiling_handle_t; + +/////////////////////////////////////////////////////////////////////////////// + +/// @brief QueryNetwork handle +typedef struct __vcl_query_handle_t* vcl_query_handle_t; + +/// @brief Error log handle +typedef struct __vcl_log_handle_t* vcl_log_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines type of requested data. +/// Must be in sync with \b _ze_graph_profiling_type_t +typedef enum __vcl_profiling_request_type_t { + VCL_PROFILING_LAYER_LEVEL = 0x1, + VCL_PROFILING_TASK_LEVEL = 0x2, + VCL_PROFILING_RAW = 0x3, + + VCL_PROFILING_FORCE_UINT32 = 0x7fffffff +} vcl_profiling_request_type_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines version info for the VPUXCompilerL0 API +typedef struct __vcl_version_info_t { + uint16_t major; + uint16_t minor; + +} vcl_version_info_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines return/error codes +typedef enum __vcl_result_t { + VCL_RESULT_SUCCESS = 0, ///< [Core] success + VCL_RESULT_ERROR_OUT_OF_MEMORY = 0x70000002, ///< [Core] insufficient memory to satisfy call + VCL_RESULT_ERROR_UNSUPPORTED_FEATURE = 0x78000003, ///< [Validation] generic error code for unsupported features + VCL_RESULT_ERROR_INVALID_ARGUMENT = 0x78000004, ///< [Validation] generic error code for invalid arguments + VCL_RESULT_ERROR_INVALID_NULL_HANDLE = 0x78000005, ///< [Validation] handle argument is not valid + VCL_RESULT_ERROR_IO = 0x78000006, ///< [Core] IO error + VCL_RESULT_ERROR_INVALID_IR = 0x78000007, ///< [Validation] the member of modelIR is not valid + VCL_RESULT_ERROR_UNKNOWN = 0x7ffffffe, ///< [Core] unknown or internal error + +} vcl_result_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines compiler properties +typedef struct __vcl_compiler_properties_t { + const char* id; + vcl_version_info_t version; + uint32_t supportedOpsets; + +} vcl_compiler_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines profiling properties +typedef struct __vcl_profiling_properties_t { + vcl_version_info_t version; ///< Profiling module version + +} vcl_profiling_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines debug level for VCL +typedef enum __vcl_log_level_t { + VCL_LOG_NONE = 0, ///< Log is disabled + VCL_LOG_ERROR = 1, ///< Events which are not expected, containing probable reason + VCL_LOG_WARNING = 2, ///< Events which are unusal + VCL_LOG_INFO = 3, ///< Short messages about ongoing activity + VCL_LOG_DEBUG = 4, ///< Messages with praticular data and explanations + VCL_LOG_TRACE = 5, ///< Messages with detailed information about execution + +} vcl_log_level_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines device desc to be passed during creation +/// +/// For online compilation, revision is always valid value and -1u for offline compilation. +/// 1. In offline mode the driver does not know the stepping and provides -1 (unknown) to VCL +/// 2. In VCL +/// If driver provides valid revsion, the value will be default value for NPU_STEPPING +/// If driver provides -1u as value for revision, VCL will not set NPU_STEPPING +/// 3. If NPU_STEPPING is set by user with config, VCL will use user config instead of default value. +/// 4. If NPU_STEPPING is not passed to compiler, compiler will choose default stepping. +typedef struct __vcl_device_desc_t { + uint64_t size; /// Size of vcl_device_desc_t + uint32_t deviceID; /// The lower 16 bits equal to PCI Device ID, the upper 16 bits are zero + uint16_t revision; /// NPU Revision Identifier, -1u as invalid value + uint32_t tileCount; /// Value equals maximum number of slices +} vcl_device_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines compiler desc to be passed during creation +typedef struct __vcl_compiler_desc_t { + vcl_version_info_t version; /// The host vcl version + vcl_log_level_t debugLevel; /// Debug level for VCL +} vcl_compiler_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines executable description to be passed during executable +/// creation +/// +/// Format of modelIRData (defined in L0 adaptor): +/// 1. API version : vcl_version_info_t +/// 2. Num of data elements (now only xml + weights = 2) : uint32_t +/// 3. Size of data 1 (xml) : uint64_t +/// 4. Data 1 : $2 bytes +/// 5. Size of data 2 (weights) : uint64_t +/// 6. Data 2 : $4 bytes +typedef struct __vcl_executable_desc_t { + const uint8_t* modelIRData; + uint64_t modelIRSize; ///< Size of modelIRData + const char* options; ///< Compiler config options + uint64_t optionsSize; ///< Size of options +} vcl_executable_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines query description to be passed during query network creation +/// +/// Format of modelIRData (defined in L0 adaptor): +/// 1. API version : vcl_version_info_t +/// 2. Num of data elements (now only xml + weights = 2) : uint32_t +/// 3. Size of data 1 (xml) : uint64_t +/// 4. Data 1 : $2 bytes +/// 5. Size of data 2 (weights) : uint64_t +/// 6. Data 2 : $4 bytes +typedef struct __vcl_query_desc_t { + const uint8_t* modelIRData; + uint64_t modelIRSize; ///< Size of modelIRData + const char* options; ///< Compiler config options + uint64_t optionsSize; ///< Size of options +} vcl_query_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines input that is required to create profiling handler +typedef struct __vcl_profiling_input_t { + const uint8_t* blobData; ///< Pointer to the buffer with the blob + uint64_t blobSize; ///< Size of the blob in bytes + const uint8_t* profData; ///< Pointer to the raw profiling output + uint64_t profSize; ///< Size of the raw profiling output +} vcl_profiling_input_t, *p_vcl_profiling_input_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Decoded profiling output +typedef struct __vcl_profiling_output_t { + const uint8_t* data; ///< Either a pointer to raw data or pointer to the array of structures + uint64_t size; ///< Size of the buffer in bytes +} vcl_profiling_output_t, *p_vcl_profiling_output_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Return VCL API version to caller, shall never change this interface to support backward compatibility check +VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetVersion(vcl_version_info_t* compilerVersion, + vcl_version_info_t* profilingVersion); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Creates a compiler object and returns the compiler handle +VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerCreate(vcl_compiler_desc_t* compilerDesc, + vcl_device_desc_t* deviceDesc, + vcl_compiler_handle_t* compiler, + vcl_log_handle_t* logHandle); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Destroys the compiler +VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerDestroy(vcl_compiler_handle_t compiler); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieves the compiler properties, include the version and supported_opsets +VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerGetProperties(vcl_compiler_handle_t compiler, + vcl_compiler_properties_t* properties); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Create an querynetwork object and return the handle +VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetworkCreate(vcl_compiler_handle_t compiler, + vcl_query_desc_t desc, + vcl_query_handle_t* query); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieve result of query network +/// @attention Should be called twice, first time to retrieve data size, second time to get data. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetwork(vcl_query_handle_t query, uint8_t* queryResult, uint64_t* size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Destroys the queryNetwork and releases the cached query result +VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetworkDestroy(vcl_query_handle_t query); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Creates an executable object and returns the executable handle. +/// Parse modelIRData in the executable descriptor to blob and store it in the executable. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableCreate(vcl_compiler_handle_t compiler, + vcl_executable_desc_t desc, + vcl_executable_handle_t* executable); + +DEPRECATED typedef struct __vcl_allocator_t { + uint8_t* (*allocate)(uint64_t); + void (*deallocate)(uint8_t*); +} vcl_allocator_t; + +typedef struct __vcl_allocator2_t { + uint8_t* (*allocate)(struct __vcl_allocator2_t*, uint64_t); + void (*deallocate)(struct __vcl_allocator2_t*, uint8_t*); +} vcl_allocator2_t; + +DEPRECATED VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate(vcl_compiler_handle_t compiler, + vcl_executable_desc_t desc, + const vcl_allocator_t* allocator, + uint8_t** blobBuffer, + uint64_t* blobSize); + +VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate2(vcl_compiler_handle_t compiler, + vcl_executable_desc_t desc, + vcl_allocator2_t* allocator, + uint8_t** blobBuffer, + uint64_t* blobSize); + +VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreateWSOneShot(vcl_compiler_handle_t compiler, + vcl_executable_desc_t desc, + vcl_allocator2_t* allocator); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Destroys the executable and releases the cached blob. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableDestroy(vcl_executable_handle_t executable); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief If blobBuffer is null, the function returns the size of the blob stored in the executable. +/// Otherwise the function copies the executable cached blob to the blobBuffer provided by the caller. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableGetSerializableBlob(vcl_executable_handle_t executable, + uint8_t* blobBuffer, + uint64_t* blobSize); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Creates a buffer with decoded profiling info. +/// This is the most computationally expensive profiling API. +/// It does all memory allocations and postprocessing. +/// @warning Caller must keep \b p_vcl_profiling_input_t::profData buffer alive until +/// \b vclProfilingDestroy call if \b VCL_PROFILING_RAW request is expected. +/// \b vclProfilingCreate function doesn't copy profiling output buffer but will +/// return pointer to it as a response to \b VCL_PROFILING_RAW request. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingCreate(p_vcl_profiling_input_t profilingInput, + vcl_profiling_handle_t* profilingHandle, + vcl_log_handle_t* logHandle); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Provides profiling information based on request argument. +/// @warning For \b VCL_PROFILING_RAW request it returns a pointer to the buffer that was provided to +/// \b vclProfilingCreate function call. This means that original buffer with profiling output must +/// be alive till this call. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetDecodedProfilingBuffer(vcl_profiling_handle_t profilingHandle, + vcl_profiling_request_type_t requestType, + p_vcl_profiling_output_t profilingOutput); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Destroys the buffer with decoded profiling info. +/// Now caller may safely dispose raw profiling output. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingDestroy(vcl_profiling_handle_t profilingHandle); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get version of post-processing module +VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingGetProperties(vcl_profiling_handle_t profilingHandle, + vcl_profiling_properties_t* properties); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieves error message from log handler. +/// Handle is released automatically with related compiler or Profiler. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclLogHandleGetString(vcl_log_handle_t logHandle, size_t* logSize, char* log); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieve the list of supported compiler options +/// @attention Should be called twice, first time to retrieve data size, second time to get data. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetCompilerSupportedOptions(vcl_compiler_handle_t compiler, + char* result, + uint64_t* size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Verifies if a given config option (or option-value pair) is supported by the compiler +VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetCompilerIsOptionSupported(vcl_compiler_handle_t compiler, + const char* option, + const char* value); + +#if defined(__cplusplus) +} // extern "C" +#endif + +#endif // VPUX_COMPILER_L0_H diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp new file mode 100644 index 00000000000000..a55c12faded9ec --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp @@ -0,0 +1,73 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "compiler.h" +#include "intel_npu/common/filtered_config.hpp" +#include "intel_npu/icompiler.hpp" +#include "openvino/core/except.hpp" + +namespace intel_npu { + +bool isUseBaseModelSerializer(const FilteredConfig& config); +std::string supportVclCompiler(int major, int minor); +class VCLApi; + +class VCLCompilerImpl final : public intel_npu::ICompiler { +public: + VCLCompilerImpl(); + ~VCLCompilerImpl() override; + + static std::shared_ptr getInstance() { + static std::mutex mutex; + static std::weak_ptr weak_compiler; + + std::lock_guard lock(mutex); + auto compiler = weak_compiler.lock(); + if (!compiler) { + compiler = std::make_shared(); + weak_compiler = compiler; + } + return compiler; + } + + NetworkDescription compile(const std::shared_ptr& model, const Config& config) const override; + + std::vector> compileWsOneShot(const std::shared_ptr& model, + const Config& config) const override; + + NetworkDescription compileWsIterative(const std::shared_ptr& model, + const Config& config, + size_t callNumber) const override; + + ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; + + NetworkMetadata parse(const std::vector& network, const Config& config) const override; + + uint32_t get_version() const override; + + std::vector process_profiling_output(const std::vector& profData, + const std::vector& network, + const intel_npu::Config& config) const final override; + + bool get_supported_options(std::vector& options) const; + + bool is_option_supported(const std::string& option) const; + + std::shared_ptr getLinkedLibrary() const; + +private: + vcl_log_handle_t _logHandle = nullptr; + vcl_compiler_handle_t _compilerHandle = nullptr; + vcl_compiler_properties_t _compilerProperties; + vcl_version_info_t _vclVersion; + vcl_version_info_t _vclProfilingVersion; + Logger _logger; +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp new file mode 100644 index 00000000000000..d32abbd6ab4509 --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/model.hpp" + +namespace intel_npu { +/** + * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon + * serialization. + * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information + * regarding the offset of the weights within the binary file, as well as the original size and precision. This + * information is required within the "weights separation" flow, therefore this function is here to store it. + * @note Not calling this function in the weights separation flow would lead to this information being lost upon + * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent + * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be + * misinformed and lookups of weights offsets could fail. + * + * @param model Both source and target. + */ +void storeWeightlessCacheAttribute(const std::shared_ptr& model); +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index eb529a34efee9b..5af539901ef786 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -75,4 +75,7 @@ class ZeGraphExtWrappers { Logger _logger; }; +// Parse the result string of query from format to unordered_set of string +std::unordered_set parseQueryResult(std::vector& data); + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp new file mode 100644 index 00000000000000..2ebdce1aec4898 --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp @@ -0,0 +1,719 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "compiler_impl.hpp" + +#include "intel_npu/config/options.hpp" +#include "intel_npu/npu_private_properties.hpp" +#include "intel_npu/profiling.hpp" +#include "openvino/runtime/make_tensor.hpp" +#include "openvino/util/file_util.hpp" +#include "openvino/util/shared_object.hpp" +#include "vcl_serializer.hpp" +#include "ze_graph_ext_wrappers.hpp" + +namespace intel_npu { + +// clang-format off +#define vcl_symbols_list() \ + vcl_symbol_statement(vclGetVersion) \ + vcl_symbol_statement(vclCompilerCreate) \ + vcl_symbol_statement(vclCompilerDestroy) \ + vcl_symbol_statement(vclCompilerGetProperties) \ + vcl_symbol_statement(vclQueryNetworkCreate) \ + vcl_symbol_statement(vclQueryNetwork) \ + vcl_symbol_statement(vclQueryNetworkDestroy) \ + vcl_symbol_statement(vclExecutableCreate) \ + vcl_symbol_statement(vclAllocatedExecutableCreate) \ + vcl_symbol_statement(vclExecutableDestroy) \ + vcl_symbol_statement(vclExecutableGetSerializableBlob) \ + vcl_symbol_statement(vclProfilingCreate) \ + vcl_symbol_statement(vclGetDecodedProfilingBuffer) \ + vcl_symbol_statement(vclProfilingDestroy) \ + vcl_symbol_statement(vclProfilingGetProperties) \ + vcl_symbol_statement(vclLogHandleGetString) \ + vcl_symbol_statement(vclAllocatedExecutableCreate2) \ + vcl_symbol_statement(vclGetCompilerSupportedOptions) \ + vcl_symbol_statement(vclGetCompilerIsOptionSupported) \ + + +//unsupported symbols with older ze_loader versions +#define vcl_weak_symbols_list() \ + vcl_symbol_statement(vclAllocatedExecutableCreateWSOneShot) +// clang-format on + +class VCLApi { +public: + VCLApi(); + VCLApi(const VCLApi& other) = delete; + VCLApi(VCLApi&& other) = delete; + void operator=(const VCLApi&) = delete; + void operator=(VCLApi&&) = delete; + + static const std::shared_ptr& getInstance(); + std::shared_ptr getLibrary() const { + return lib; + } + +#define vcl_symbol_statement(vcl_symbol) decltype(&::vcl_symbol) vcl_symbol; + vcl_symbols_list(); + vcl_weak_symbols_list(); +#undef vcl_symbol_statement + +private: + std::shared_ptr lib; + Logger _logger; +}; + +#define vcl_symbol_statement(vcl_symbol) \ + template \ + inline typename std::invoke_result::type wrapped_##vcl_symbol(Args... args) { \ + const auto& ptr = VCLApi::getInstance(); \ + if (ptr->vcl_symbol == nullptr) { \ + OPENVINO_THROW("Unsupported vcl_symbol " #vcl_symbol); \ + } \ + return ptr->vcl_symbol(std::forward(args)...); \ + } +vcl_symbols_list(); +vcl_weak_symbols_list(); +#undef vcl_symbol_statement +#define vcl_symbol_statement(vcl_symbol) inline decltype(&::vcl_symbol) vcl_symbol = wrapped_##vcl_symbol; +vcl_symbols_list(); +vcl_weak_symbols_list(); +#undef vcl_symbol_statement + +static inline std::string getLatestVCLLog(vcl_log_handle_t logHandle) { + Logger _logger("VCLAPI", Logger::global().level()); + _logger.debug("getLatestVCLLog start"); + + vcl_version_info_t compilerVersion; + vcl_version_info_t profilingVersion; + vcl_result_t ret = vclGetVersion(&compilerVersion, &profilingVersion); + + if (ret != VCL_RESULT_SUCCESS || compilerVersion.major < 3) { + _logger.warning("Failed to get VCL version: 0x%x", ret); + return "Can not get VCL log, VCL version is too old!"; + } + + // Get log size + size_t size = 0; + // Null graph handle to get error log + ret = vclLogHandleGetString(logHandle, &size, nullptr); + if (VCL_RESULT_SUCCESS != ret) { + return "Failed to get size of latest VCL log"; + } + + if (size <= 0) { + return "No error stored in VCL when error detected"; + } + + // Get log content + std::string logContent{}; + logContent.resize(size); + ret = vclLogHandleGetString(logHandle, &size, const_cast(logContent.data())); + if (VCL_RESULT_SUCCESS != ret) { + return "Size of latest error log > 0, failed to get content"; + } + _logger.debug("getLatestBuildError end"); + return logContent; +} + +#define THROW_ON_FAIL_FOR_VCL(step, ret, logHandle) \ + { \ + vcl_result_t result = ret; \ + if (result != VCL_RESULT_SUCCESS) { \ + OPENVINO_THROW("Failed to call VCL API : ", \ + step, \ + " result: 0x", \ + std::hex, \ + result, \ + " - ", \ + getLatestVCLLog(logHandle)); \ + } \ + } + +VCLApi::VCLApi() : _logger("VCLApi", Logger::global().level()) { + const std::string baseName = "openvino_intel_npu_compiler"; + try { + auto libpath = ov::util::make_plugin_library_name({}, baseName); + _logger.debug("Try to load openvino_intel_npu_compiler"); + +#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) + this->lib = ov::util::load_shared_object(ov::util::string_to_wstring(libpath).c_str()); +#else + this->lib = ov::util::load_shared_object(libpath.c_str()); +#endif + } catch (const std::runtime_error& error) { + _logger.debug("Failed to load openvino_intel_npu_compiler"); + OPENVINO_THROW(error.what()); + } + + try { +#define vcl_symbol_statement(vcl_symbol) \ + this->vcl_symbol = reinterpret_cast(ov::util::get_symbol(lib, #vcl_symbol)); + vcl_symbols_list(); +#undef vcl_symbol_statement + } catch (const std::runtime_error& error) { + _logger.debug("Failed to get formal symbols from openvino_intel_npu_compiler"); + OPENVINO_THROW(error.what()); + } + +#define vcl_symbol_statement(vcl_symbol) \ + try { \ + this->vcl_symbol = reinterpret_cast(ov::util::get_symbol(lib, #vcl_symbol)); \ + } catch (const std::runtime_error&) { \ + _logger.debug("Failed to get %s from openvino_intel_npu_compiler", #vcl_symbol); \ + this->vcl_symbol = nullptr; \ + } + vcl_weak_symbols_list(); +#undef vcl_symbol_statement + +#define vcl_symbol_statement(vcl_symbol) vcl_symbol = this->vcl_symbol; + vcl_symbols_list(); + vcl_weak_symbols_list(); +#undef vcl_symbol_statement +} + +const std::shared_ptr& VCLApi::getInstance() { + static std::shared_ptr instance = std::make_shared(); + return instance; +} + +VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerImpl", Logger::global().level()) { + _logger.debug("VCLCompilerImpl constructor start"); + + // Load VCL library + (void)VCLApi::getInstance(); + + // Initialize the VCL API + THROW_ON_FAIL_FOR_VCL("vclGetVersion", vclGetVersion(&_vclVersion, &_vclProfilingVersion), nullptr); + + _logger.info("Plugin VCL API Version: %d.%d", VCL_COMPILER_VERSION_MAJOR, VCL_COMPILER_VERSION_MINOR); + _logger.info("Plugin VCL Profiling API Version: %d.%d", VCL_PROFILING_VERSION_MAJOR, VCL_PROFILING_VERSION_MINOR); + _logger.info("Lib VCL Compiler Version: %d.%d", _vclVersion.major, _vclVersion.minor); + _logger.info("Lib VCL Profiling Version: %d.%d", _vclProfilingVersion.major, _vclProfilingVersion.minor); + _logger.info("Use Lib VCL version to create compiler"); + if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major || + (VCL_COMPILER_VERSION_MAJOR == _vclVersion.major && VCL_COMPILER_VERSION_MINOR < _vclVersion.minor)) { + _logger.warning("inside supported VCL version is lower than loaded VCL api:\n plugin was built with VCL %d.%d, " + "\n but loaded VCL is %d.%d.\n" + "Will downgrade to use the latest plugin vcl compiler!!!", + VCL_COMPILER_VERSION_MAJOR, + VCL_COMPILER_VERSION_MINOR, + _vclVersion.major, + _vclVersion.minor); + } + + _logger.info("Use Lib VCL version to create compiler"); + vcl_compiler_desc_t compilerDesc; + compilerDesc.version = _vclVersion; + compilerDesc.debugLevel = static_cast<__vcl_log_level_t>(static_cast(Logger::global().level()) - 1); + + // Set device description as empty, the related info will be processed in compile phase if passed by user. + vcl_device_desc_t device_desc = {}; + + THROW_ON_FAIL_FOR_VCL("vclCompilerCreate", + vclCompilerCreate(&compilerDesc, &device_desc, &_compilerHandle, &_logHandle), + nullptr); + + THROW_ON_FAIL_FOR_VCL("vclCompilerGetProperties", + vclCompilerGetProperties(_compilerHandle, &_compilerProperties), + _logHandle); + + _logger.info("VCL Compiler created successfully"); + _logger.info("VCL Compiler Properties: ID: %s, Version: %d.%d, Supported Opsets: %u", + _compilerProperties.id, + _compilerProperties.version.major, + _compilerProperties.version.minor, + _compilerProperties.supportedOpsets); +} + +VCLCompilerImpl::~VCLCompilerImpl() { + if (_compilerHandle) { + THROW_ON_FAIL_FOR_VCL("vclCompilerDestroy", vclCompilerDestroy(_compilerHandle), _logHandle); + } + if (_logHandle) { + _logHandle = nullptr; // Log handle is released automatically with the compiler + } + _logger.info("VCL Compiler destroyed successfully"); +} + +std::shared_ptr VCLCompilerImpl::getLinkedLibrary() const { + return VCLApi::getInstance(); +} + +struct vcl_allocator_vector : vcl_allocator2_t { + vcl_allocator_vector() : vcl_allocator2_t{vector_allocate, vector_deallocate} {} + + static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) { + vcl_allocator_vector* vecAllocator = static_cast(allocator); + vecAllocator->m_vec.resize(size); + return vecAllocator->m_vec.data(); + } + + static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) { + vcl_allocator_vector* vecAllocator = static_cast(allocator); + vecAllocator->m_vec.clear(); + vecAllocator->m_vec.shrink_to_fit(); + } + + std::vector m_vec; +}; + +struct vcl_allocator_vector_2 : vcl_allocator2_t { + vcl_allocator_vector_2() : vcl_allocator2_t{vector_allocate, vector_deallocate} {} + + static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) { + vcl_allocator_vector_2* vecAllocator = static_cast(allocator); + auto newVec = std::make_shared>(); + newVec->resize(size); + uint8_t* ptr = newVec->data(); + vecAllocator->m_vector.emplace_back(newVec); + return ptr; + } + + static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) { + vcl_allocator_vector_2* vecAllocator = static_cast(allocator); + vecAllocator->m_vector.clear(); + vecAllocator->m_vector.shrink_to_fit(); + } + + std::vector>> m_vector; +}; + +struct vcl_allocator_malloc { + static uint8_t* vcl_allocate(uint64_t size) { + return reinterpret_cast(malloc(size)); + } + + static void vcl_deallocate(uint8_t* ptr) { + free(ptr); + } +}; + +bool isUseBaseModelSerializer(const FilteredConfig& config) { + // user pass use_base_model_serializer config + if (config.isAvailable(ov::intel_npu::use_base_model_serializer.name()) && + config.has(ov::intel_npu::use_base_model_serializer.name())) { + return config.get(); + } + + // user pass model_serializer_version config + if (config.isAvailable(ov::intel_npu::model_serializer_version.name()) && + config.has(ov::intel_npu::use_base_model_serializer.name())) { + return (config.get() == + ov::intel_npu::ModelSerializerVersion::ALL_WEIGHTS_COPY); + } + + // vcl serializer method is not set by user, will default to use it. + return false; +} + +std::string supportVclCompiler(int major, int minor) { + if (major >= 7 && minor >= 4) { + return "vclAllocatedExecutableCreate2"; + } else if (major >= 6 && minor >= 1) { + return "vclAllocatedExecutableCreate"; + } else { + return "vclExecutableCreate"; + } +} + +NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, const Config& config) const { + _logger.debug("compile start"); + + /// Check the linked vcl version whether supported in plugin + uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR; + if (static_cast(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) { + usedMinor = std::min(static_cast(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor); + } else if (static_cast(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) { + usedMajor = _vclVersion.major; + usedMinor = _vclVersion.minor; + } + _logger.debug("the finally used vcl version is %d.%d", usedMajor, usedMinor); + + const auto maxOpsetVersion = _compilerProperties.supportedOpsets; + _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion); + + _logger.debug("serialize IR"); + ze_graph_compiler_version_info_t compilerVersion; + compilerVersion.major = _compilerProperties.version.major; + compilerVersion.minor = _compilerProperties.version.minor; + + const FilteredConfig* filteredConfig = dynamic_cast(&config); + if (filteredConfig == nullptr) { + OPENVINO_THROW("config is not FilteredConfig"); + } + FilteredConfig updatedConfig = *filteredConfig; + bool useBaseModelSerializer = true; + + // vcl serializer is only support for vcl version >= 7.5 + if (usedMajor >= 7 && usedMinor >= 5) { + useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig); + } + + if (useBaseModelSerializer) { + _logger.debug("serialize IR is base method, useBaseModelSerializer is %d", useBaseModelSerializer); + } else { + _logger.debug("serialize IR is vcl method, useBaseModelSerializer is %d", useBaseModelSerializer); + + // To resolve the issue with the default configuration where no user passes the serializer config, the VCL + // serializer will be used as the default in the plugin adapter. You need to pass the serializer config; + // otherwise, you will encounter a deserialization issue within the compiler. + _logger.warning("Add serializer config"); + if (updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())) { + updatedConfig.update({{ov::intel_npu::use_base_model_serializer.name(), "NO"}}); + } else if (updatedConfig.isAvailable(ov::intel_npu::model_serializer_version.name())) { + updatedConfig.update({{ov::intel_npu::model_serializer_version.name(), "NO_WEIGHTS_COPY"}}); + } + } + + _logger.error("manual set serializeIR(model, compilerVersion, maxOpsetVersion, true) in compile0"); + auto serializedIR = + driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer); + _logger.error("manual set serializeIR(model, compilerVersion, maxOpsetVersion, true) in compile1"); + + std::string buildFlags; + + _logger.debug("create build flags"); + buildFlags += driver_compiler_utils::serializeIOInfo(model, true); + _logger.error("manual set serializeIR(model, compilerVersion, maxOpsetVersion, true) in compile2"); + buildFlags += " "; + buildFlags += driver_compiler_utils::serializeConfig(updatedConfig, compilerVersion); + _logger.error("manual set serializeIR(model, compilerVersion, maxOpsetVersion, true) in compile3"); + _logger.debug("final build flags to compiler: %s", buildFlags.c_str()); + + vcl_executable_desc_t exeDesc = {serializedIR.second.get(), + serializedIR.first, + buildFlags.c_str(), + buildFlags.size()}; + _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor); + if (usedMajor >= 7 && usedMinor >= 4) { + if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) { + _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL " + "%d.%d, \n but loaded VCL is %d.%d.\n" + "Will downgrade to form %s to use vclAllocatedExecutableCreate2", + VCL_COMPILER_VERSION_MAJOR, + VCL_COMPILER_VERSION_MINOR, + _vclVersion.major, + _vclVersion.minor, + supportVclCompiler(usedMajor, usedMinor).c_str()); + } + // support the lastest vcl api + // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2 + _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL"); + vcl_allocator_vector allocator; + uint8_t* blob = nullptr; + size_t size = 0; + _logger.error("manual set serializeIR(model, compilerVersion, maxOpsetVersion, true) in compile4"); + + THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreate2", + vclAllocatedExecutableCreate2(_compilerHandle, exeDesc, &allocator, &blob, &size), + _logHandle);/// get issue form here + + if (size == 0 || blob == nullptr) { + OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null"); + } + + // Use empty metadata as VCL does not support metadata extraction + NetworkMetadata metadata; + + _logger.debug("compile end, blob size:%d", allocator.m_vec.size()); + return NetworkDescription(std::move(allocator.m_vec), std::move(metadata)); + } else if (usedMajor >= 6 && usedMinor >= 1) { + if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) { + _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL " + "%d.%d, \n but loaded VCL is %d.%d.\n" + "Will downgrade to form %s to use vclAllocatedExecutableCreate2", + VCL_COMPILER_VERSION_MAJOR, + VCL_COMPILER_VERSION_MINOR, + _vclVersion.major, + _vclVersion.minor, + supportVclCompiler(usedMajor, usedMinor).c_str()); + } + // For older versions, we use vclAllocatedExecutableCreate + _logger.debug("Using vclAllocatedExecutableCreate for 6.1 < VCL < 7.4"); + + vcl_allocator_t allocator; + allocator.allocate = vcl_allocator_malloc::vcl_allocate; + allocator.deallocate = vcl_allocator_malloc::vcl_deallocate; + uint8_t* blob = nullptr; + size_t size = 0; + THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreate", + vclAllocatedExecutableCreate(_compilerHandle, exeDesc, &allocator, &blob, &size), + _logHandle); + if (size == 0 || blob == nullptr) { + OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null"); + } + + std::vector compiledNetwork(blob, blob + size); + allocator.deallocate(blob); + + // Use empty metadata as VCL does not support metadata extraction + NetworkMetadata metadata; + + _logger.debug("compile end, blob size:%d", compiledNetwork.size()); + return NetworkDescription(std::move(compiledNetwork), std::move(metadata)); + } else { + OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later", + _vclVersion.major, + _vclVersion.minor); + } +} + +std::vector> VCLCompilerImpl::compileWsOneShot( + const std::shared_ptr& model, + const Config& config) const { + _logger.debug("compileWsOneShot start"); + + const auto maxOpsetVersion = _compilerProperties.supportedOpsets; + _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion); + + _logger.debug("serialize IR"); + ze_graph_compiler_version_info_t compilerVersion; + compilerVersion.major = _compilerProperties.version.major; + compilerVersion.minor = _compilerProperties.version.minor; + + const FilteredConfig* filteredConfig = dynamic_cast(&config); + if (filteredConfig == nullptr) { + OPENVINO_THROW("config is not FilteredConfig"); + } + FilteredConfig updatedConfig = *filteredConfig; + bool useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig); + auto serializedIR = + driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer); + + std::string buildFlags; + + _logger.debug("create build flags"); + buildFlags += driver_compiler_utils::serializeIOInfo(model, true); + buildFlags += " "; + buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion); + _logger.debug("final build flags to compiler: %s", buildFlags.c_str()); + + vcl_executable_desc_t exeDesc = {serializedIR.second.get(), + serializedIR.first, + buildFlags.c_str(), + buildFlags.size()}; + _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor); + + _logger.debug("Using vclAllocatedExecutableCreateWSOneShot"); + vcl_allocator_vector_2 allocator; + + THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreateWSOneShot", + vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, &allocator), + _logHandle); + + if (allocator.m_vector.size() == 0) { + OPENVINO_THROW("Failed to create VCL executable, blobCount is zero"); + } + + std::vector> networkDescrs; + for (uint32_t i = 0; i < allocator.m_vector.size(); i++) { + // Use empty metadata as VCL does not support metadata extraction + NetworkMetadata metadata; + networkDescrs.emplace_back( + std::make_shared(std::move(*allocator.m_vector[i]), std::move(metadata))); + } + return networkDescrs; +} + +NetworkDescription VCLCompilerImpl::compileWsIterative(const std::shared_ptr& model, + const Config& config, + size_t callNumber) const { + _logger.debug("compileWsIterative start"); + const FilteredConfig* filteredConfig = dynamic_cast(&config); + if (filteredConfig == nullptr) { + OPENVINO_THROW("config is not FilteredConfig"); + } + FilteredConfig updatedConfig = *filteredConfig; + updatedConfig.update({{ov::intel_npu::ws_compile_call_number.name(), std::to_string(callNumber)}}); + return compile(model, config); +} + +intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector& network, const Config& config) const { + _logger.debug("parse start"); + // VCL does not support parse, return empty metadata + return intel_npu::NetworkMetadata(); +} + +std::vector VCLCompilerImpl::process_profiling_output(const std::vector& profData, + const std::vector& network, + const intel_npu::Config& config) const { + _logger.debug("process_profiling_output start"); + + vcl_profiling_handle_t profilingHandle; + vcl_profiling_input_t profilingInput = {network.data(), network.size(), profData.data(), profData.size()}; + vcl_log_handle_t logHandle; + THROW_ON_FAIL_FOR_VCL("vclProfilingCreate", + vclProfilingCreate(&profilingInput, &profilingHandle, &logHandle), + nullptr); + + vcl_profiling_properties_t profProperties; + THROW_ON_FAIL_FOR_VCL("vclProfilingGetProperties", + vclProfilingGetProperties(profilingHandle, &profProperties), + logHandle); + + _logger.info("VCL Profiling Properties: Version: %d.%d", + profProperties.version.major, + profProperties.version.minor); + + // We only use layer level info + vcl_profiling_request_type_t request = VCL_PROFILING_LAYER_LEVEL; + + vcl_profiling_output_t profOutput; + profOutput.data = NULL; + THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer", + vclGetDecodedProfilingBuffer(profilingHandle, request, &profOutput), + logHandle); + if (profOutput.data == NULL) { + OPENVINO_THROW("Failed to get VCL profiling output"); + } + + std::vector layerInfo(profOutput.size / sizeof(ze_profiling_layer_info)); + if (profOutput.size > 0) { + _logger.debug("VCL profiling output size: %d", profOutput.size); + std::memcpy(layerInfo.data(), profOutput.data, profOutput.size); + } + + THROW_ON_FAIL_FOR_VCL("vclProfilingDestroy", vclProfilingDestroy(profilingHandle), logHandle); + + // Return processed profiling info + return intel_npu::profiling::convertLayersToIeProfilingInfo(layerInfo); +} + +uint32_t VCLCompilerImpl::get_version() const { + return ZE_MAKE_VERSION(_compilerProperties.version.major, _compilerProperties.version.minor); +} + +ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr& model, const Config& config) const { + _logger.debug("query start"); + + /// Check the linked vcl version whether supported in plugin + uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR; + if (static_cast(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) { + usedMinor = std::min(static_cast(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor); + } else if (static_cast(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) { + usedMajor = _vclVersion.major; + usedMinor = _vclVersion.minor; + } + _logger.debug("the finally used vcl version is %d.%d", usedMajor, usedMinor); + + const auto maxOpsetVersion = _compilerProperties.supportedOpsets; + _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion); + + _logger.debug("serialize IR"); + ze_graph_compiler_version_info_t compilerVersion; + compilerVersion.major = _compilerProperties.version.major; + compilerVersion.minor = _compilerProperties.version.minor; + const FilteredConfig* filteredConfig = dynamic_cast(&config); + if (filteredConfig == nullptr) { + OPENVINO_THROW("config is not FilteredConfig"); + } + FilteredConfig updatedConfig = *filteredConfig; + bool useBaseModelSerializer = true; + // vcl serializer is only support for vcl version >= 7.5 + if (usedMajor >= 7 && usedMinor >= 5) { + useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig); + } + + if (useBaseModelSerializer) { + _logger.debug("serialize IR is base method, useBaseModelSerializer is %d", useBaseModelSerializer); + std::cout << "1) serialize IR is base method (copy weights), useBaseModelSerializer is " << useBaseModelSerializer << std::endl; + } else { + _logger.debug("serialize IR is vcl method, useBaseModelSerializer is %d", useBaseModelSerializer); + std::cout << "2) serialize IR is vcl method(No copy weights), useBaseModelSerializer is " << useBaseModelSerializer << std::endl; + + // To resolve the issue with the default configuration where no user passes the serializer config, the VCL + // serializer will be used as the default in the plugin adapter. You need to pass the serializer config; + // otherwise, you will encounter a deserialization issue within the compiler. + _logger.warning("Add serializer config"); + if (updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())) { + updatedConfig.update({{ov::intel_npu::use_base_model_serializer.name(), "NO"}}); + } else if (updatedConfig.isAvailable(ov::intel_npu::model_serializer_version.name())) { + updatedConfig.update({{ov::intel_npu::model_serializer_version.name(), "NO_WEIGHTS_COPY"}}); + } + } + auto serializedIR = + driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer); + + std::string buildFlags; + buildFlags += driver_compiler_utils::serializeConfig(updatedConfig, compilerVersion); + _logger.debug("queryImpl build flags : %s", buildFlags.c_str()); + + vcl_query_handle_t queryHandle; + vcl_query_desc_t queryDesc = {serializedIR.second.get(), serializedIR.first, buildFlags.c_str(), buildFlags.size()}; + THROW_ON_FAIL_FOR_VCL("vclQueryNetworkCreate", + vclQueryNetworkCreate(_compilerHandle, queryDesc, &queryHandle), + _logHandle); + + uint64_t size = 0; + THROW_ON_FAIL_FOR_VCL("vclQueryNetwork", vclQueryNetwork(queryHandle, nullptr, &size), _logHandle); + + std::vector supportedLayers(size); + THROW_ON_FAIL_FOR_VCL("vclQueryNetwork", + vclQueryNetwork(queryHandle, reinterpret_cast(supportedLayers.data()), &size), + _logHandle); + + THROW_ON_FAIL_FOR_VCL("vclQueryNetworkDestroy", vclQueryNetworkDestroy(queryHandle), _logHandle); + + const std::string deviceName = "NPU"; + ov::SupportedOpsMap result; + const auto parsedSupportedLayers = parseQueryResult(supportedLayers); + for (auto&& layerName : parsedSupportedLayers) { + result.emplace(layerName, deviceName); + } + _logger.info("For given model, there are %d supported layers", parsedSupportedLayers.size()); + + return result; +} + +bool VCLCompilerImpl::get_supported_options(std::vector& options) const { + _logger.debug("get_supported_options start"); + // 1. get size of compiler supported options list + size_t str_size = 0; + try { + THROW_ON_FAIL_FOR_VCL("vclGetCompilerSupportedOptions", + vclGetCompilerSupportedOptions(_compilerHandle, nullptr, &str_size), + _logHandle); + + if (str_size > 0) { + _logger.debug("obtain list"); + // 2. allocate buffer for it + options.resize(str_size); + // 3. populate char list + THROW_ON_FAIL_FOR_VCL("vclGetCompilerSupportedOptions", + vclGetCompilerSupportedOptions(_compilerHandle, options.data(), &str_size), + _logHandle); + + _logger.debug("Option list size %d, got option list", str_size); + return true; + } else { + _logger.debug("Option list size 0 - skipping!"); + } + } catch (const std::exception& e) { + // The API is only supported in new version, just add log here + _logger.debug("Exception in get_supported_options: %s", e.what()); + } + _logger.debug("get_supported_options end, no options found"); + return false; +} + +bool VCLCompilerImpl::is_option_supported(const std::string& option) const { + try { + const char* optname_ch = option.c_str(); + _logger.debug("is_option_supported start for option: %s", optname_ch); + THROW_ON_FAIL_FOR_VCL("vclGetCompilerIsOptionSupported", + vclGetCompilerIsOptionSupported(_compilerHandle, optname_ch, nullptr), + _logHandle); + return true; + } catch (const std::exception& e) { + // The API is only supported in new version, just add log here + _logger.debug("Exception in is_option_supported: %s", e.what()); + } + _logger.debug("option: %s is not supported", option.c_str()); + return false; +} + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 15d0091e73bd4f..f8d91edb8d62bb 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -13,9 +13,9 @@ #include "intel_npu/utils/logger/logger.hpp" #include "mem_usage.hpp" #include "openvino/core/model.hpp" -#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "vcl_serializer.hpp" #include "weightless_graph.hpp" +#include "weightless_utils.hpp" namespace { @@ -26,38 +26,6 @@ bool isInitMetadata(const intel_npu::NetworkMetadata& networkMetadata) { return networkMetadata.inputs.at(0).isInitInputWeights; } -/** - * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon - * serialization. - * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information - * regarding the offset of the weights within the binary file, as well as the original size and precision. This - * information is required within the "weights separation" flow, therefore this function is here to store it. - * @note Not calling this function in the weights separation flow would lead to this information being lost upon - * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent - * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be - * misinformed and lookups of weights offsets could fail. - * - * @param model Both source and target. - */ -void storeWeightlessCacheAttribute(const std::shared_ptr& model) { - size_t constantId = 0; - for (auto&& node : model->get_ordered_ops()) { - if (ov::is_type(node)) { - ov::RTMap& runtimeInfoMap = node->get_rt_info(); - const auto& weightlessCacheAttrIt = - runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static()); - - const std::string constantIdString = std::to_string(constantId++); - if (weightlessCacheAttrIt != runtimeInfoMap.end()) { - auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as(); - model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString); - model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString); - model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString); - } - } - } -} - /** * @brief On-going migration from "use_base_model_serializer" to "model_serializer_version". So we have to check both, * depending on which one is supported by the compiler. diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index c0c0b3ddff3d15..62466ab3aae8af 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -7,6 +7,7 @@ #include #include +#include "compiler_impl.hpp" #include "graph.hpp" #include "intel_npu/common/device_helpers.hpp" #include "intel_npu/common/itt.hpp" @@ -22,6 +23,7 @@ #include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" #include "weightless_graph.hpp" +#include "weightless_utils.hpp" namespace { @@ -61,6 +63,13 @@ ov::Tensor make_tensor_from_vector(std::vector& vector) { return ov::make_tensor(impl); } +bool isInitMetadata(const intel_npu::NetworkMetadata& networkMetadata) { + if (networkMetadata.inputs.size() == 0) { + return false; + } + return networkMetadata.inputs.at(0).isInitInputWeights; +} + } // namespace namespace intel_npu { @@ -70,10 +79,32 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptrgetLinkedLibrary(); + _logger.info("PLUGIN VCL compiler is loading"); + if (vclCompilerPtr && vclLib) { + _compiler = ov::SoPtr(vclCompilerPtr, vclLib); + } else { + throw std::runtime_error("VCL compiler or library is nullptr"); + } + } catch (const std::exception& vcl_exception) { + _logger.warning("VCL compiler load failed: %s. Trying to load MLIR compiler...", vcl_exception.what()); + std::string baseName = "npu_mlir_compiler"; + auto libPath = ov::util::make_plugin_library_name(ov::util::get_ov_lib_path(), baseName + OV_BUILD_POSTFIX); + try { + _compiler = load_compiler(libPath); + if (!_compiler) { + throw std::runtime_error("MLIR compiler load returned nullptr"); + } else { + _logger.info("MLIR compiler loaded successfully. PLUGIN compiler will be used."); + } + } catch (const std::exception& mlir_exception) { + _logger.error("MLIR compiler load failed: %s", mlir_exception.what()); + throw std::runtime_error("Both VCL and MLIR compiler load failed, aborting."); + } + } if (_zeroInitStruct == nullptr) { return; @@ -108,10 +139,17 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphDescriptor(tensor.data(), tensor.get_byte_size()); networkMeta = _zeGraphExt->getNetworkMeta(graphDesc); + networkMeta.name = model->get_friendly_name(); + } catch (const std::exception& ex) { + _logger.info("Failed to use the level zero graph handle: %s. Inference requests for this model are not " + "allowed. Only exports are available", + ex.what()); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); } + } else { + _logger.warning("No driver is found, zeGraphExt is nullptr, so metadata is empty. Only exports are available"); } return std::make_shared( @@ -129,23 +167,11 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr> initNetworkDescriptions; - std::shared_ptr mainNetworkDescription; + storeWeightlessCacheAttribute(model); _logger.debug("compile start"); - const auto starts_with = [](const std::string& str, const std::string& prefix) { - return str.substr(0, prefix.size()) == prefix; - }; - const auto isInit = [&](std::string name) { - return starts_with(name, "init"); - }; - - const auto isMain = [&](std::string name) { - return starts_with(name, "main"); - }; - - Config localConfig = config; + FilteredConfig localConfig = config; if (!localConfig.has()) { localConfig.update({{ov::intel_npu::separate_weights_version.name(), "ONE_SHOT"}}); } @@ -157,37 +183,89 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr= ov::log::Level::INFO) { compile_model_mem_start = get_peak_memory_usage(); } + + std::vector initGraphDescriptors; + std::vector tensorsInits; + std::vector initNetworkMetadata; + std::vector> initNetworkDescriptions; + + ov::Tensor tensorMain; + GraphDescriptor mainGraphDesc; + NetworkMetadata mainNetworkMetadata; + std::shared_ptr mainNetworkDescription; + switch (localConfig.get()) { case ov::intel_npu::WSVersion::ONE_SHOT: { std::vector> initMainNetworkDescriptions = _compiler->compileWsOneShot(model, localConfig); -#if 0 // TODO: it is not clear whether we should change the name - OPENVINO_ASSERT(isMain(initMainNetworkDescriptions.back()->metadata.name), - "Unexpected network name for main:", - initMainNetworkDescriptions.back()->metadata.name); -#endif - mainNetworkDescription = initMainNetworkDescriptions.back(); initMainNetworkDescriptions.pop_back(); + OPENVINO_ASSERT(initMainNetworkDescriptions.size() > 0, + "The initMainNetworkDescriptions after getting mainNetworkDescription must not be empty!"); initNetworkDescriptions = std::move(initMainNetworkDescriptions); + + tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork); + if (_zeGraphExt) { + // Depending on the config, we may get an error when trying to + // get the graph handle from the compiled network + try { + mainGraphDesc = _zeGraphExt->getGraphDescriptor(tensorMain.data(), tensorMain.get_byte_size()); + mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc); + } catch (...) { + _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " + "allowed. Only exports are available"); + } + } + + initGraphDescriptors.reserve(initNetworkDescriptions.size()); + tensorsInits.reserve(initNetworkDescriptions.size()); + initNetworkMetadata.reserve(initNetworkDescriptions.size()); + for (auto& networkDesc : initNetworkDescriptions) { + ov::Tensor tensor = make_tensor_from_vector(networkDesc->compiledNetwork); + GraphDescriptor initGraphDesc; + NetworkMetadata initNetworkMeta; + if (_zeGraphExt) { + try { + initGraphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size()); + initNetworkMeta = _zeGraphExt->getNetworkMeta(initGraphDesc); + } catch (...) { + } + } + + initGraphDescriptors.push_back(initGraphDesc); + tensorsInits.push_back(std::move(tensor)); + initNetworkMetadata.push_back(std::move(initNetworkMeta)); + } } break; case ov::intel_npu::WSVersion::ITERATIVE: { + OPENVINO_ASSERT(_zeGraphExt, + "The \"iterative\" implementation of the weights separation feature requires a Level Zero " + "graph handle to compile a model."); + + // The state of the model needs to be reset every iteration const std::shared_ptr originalModel = model->clone(); std::shared_ptr targetModel = model; size_t i = 0; while (auto networkDescription = std::make_shared(_compiler->compileWsIterative(targetModel, localConfig, i++))) { - if (isInit(networkDescription->metadata.name)) { - initNetworkDescriptions.push_back(networkDescription); + ov::Tensor tensor = make_tensor_from_vector(networkDescription->compiledNetwork); + GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size()); + NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc); + + if (isInitMetadata(networkDescription->metadata)) { targetModel = originalModel->clone(); + initGraphDescriptors.push_back(graphDesc); + tensorsInits.push_back(std::move(tensor)); + initNetworkMetadata.push_back(std::move(networkMetadata)); + initNetworkDescriptions.push_back(networkDescription); continue; } - OPENVINO_ASSERT(isMain(networkDescription->metadata.name), - "Unexpected network name: ", - networkDescription->metadata.name); + tensorMain = std::move(tensor); + mainGraphDesc = graphDesc; + mainNetworkMetadata = std::move(networkMetadata); mainNetworkDescription = std::move(networkDescription); break; } @@ -208,44 +286,6 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptrcompiledNetwork); - GraphDescriptor mainGraphDesc; - NetworkMetadata mainNetworkMetadata; - if (_zeGraphExt) { - // Depending on the config, we may get an error when trying to - // get the graph handle from the compiled network - try { - mainGraphDesc = _zeGraphExt->getGraphDescriptor(tensorMain.data(), tensorMain.get_byte_size()); - mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc); - } catch (...) { - _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " - "allowed. Only exports are available"); - } - } - - std::vector initGraphDescriptors; - std::vector tensorsInits; - std::vector initNetworkMetadata; - initGraphDescriptors.reserve(initNetworkDescriptions.size()); - tensorsInits.reserve(initNetworkDescriptions.size()); - initNetworkMetadata.reserve(initNetworkDescriptions.size()); - for (auto& networkDesc : initNetworkDescriptions) { - ov::Tensor tensor = make_tensor_from_vector(networkDesc->compiledNetwork); - GraphDescriptor initGraphDesc; - NetworkMetadata initNetworkMeta; - if (_zeGraphExt) { - try { - initGraphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size()); - initNetworkMeta = _zeGraphExt->getNetworkMeta(initGraphDesc); - } catch (...) { - } - } - - initGraphDescriptors.push_back(initGraphDesc); - tensorsInits.push_back(std::move(tensor)); - initNetworkMetadata.push_back(std::move(initNetworkMeta)); - } - return std::make_shared( _zeGraphExt, _zeroInitStruct, @@ -276,6 +316,13 @@ std::shared_ptr PluginCompilerAdapter::parse( mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size()); mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc); _logger.debug("main schedule parse end"); + if (model) { + mainNetworkMetadata.name = model.value()->get_friendly_name(); + } else { + _logger.warning("networkMeta name is empty in parse!"); + } + } else { + _logger.warning("no zeGraphExt, metadata is empty from vcl compiler."); } // exporting the blob when we get it from cache or ov::hint::compiled_blob property @@ -338,15 +385,55 @@ uint32_t PluginCompilerAdapter::get_version() const { } std::vector PluginCompilerAdapter::get_supported_options() const { - // PluginCompiler has all the same options as plugin - // Returing empty string to let the plugin fallback to legacy registration - return {}; + // For VCL, we can return the supported options from compiler + VCLCompilerImpl* vclCompiler = dynamic_cast(_compiler.operator->()); + if (vclCompiler == nullptr) { + // If _compiler cannot be cast to VCLCompilerImpl, it should use the mlir library. + // PluginCompiler has all the same options as plugin + // Returing empty string to let the plugin fallback to legacy registration + _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning empty supported options."); + return {}; + } + std::vector options; + if (!vclCompiler->get_supported_options(options)) { + _logger.warning("VCLCompilerImpl get_supported_options failed. Returning empty supported options."); + return {}; + } + + if (options.empty()) { + _logger.warning("get_supported_options returned empty options."); + return {}; + } + + std::string compilerOptionsStr(options.data(), options.size()); + _logger.debug("VCLCompilerImpl return supported_options: %s", compilerOptionsStr.c_str()); + // vectorize string + std::istringstream suppstream(compilerOptionsStr); + std::vector compilerOpts = {}; + std::string option; + while (suppstream >> option) { + compilerOpts.push_back(option); + } + return compilerOpts; } bool PluginCompilerAdapter::is_option_supported(std::string optname) const { - // This functions has no utility in PluginCompiler - // returning false for any request to avoid the option of spaming the plugin - return false; + VCLCompilerImpl* vclCompiler = dynamic_cast(_compiler.operator->()); + if (vclCompiler == nullptr) { + // If _compiler cannot be cast to VCLCompilerImpl, it should use the mlir library. + // This functions has no utility in PluginCompiler + // returning false for any request to avoid the option of spamming the plugin + _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning false for check."); + return false; + } + + if (vclCompiler->is_option_supported(optname)) { + _logger.debug("Option %s is supported by VCLCompilerImpl", optname.c_str()); + return true; + } else { + _logger.debug("Option %s is not supported by VCLCompilerImpl", optname.c_str()); + return false; + } } } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp index 1d8549f57f24b8..ec74095e410105 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp @@ -275,6 +275,10 @@ std::pair>> WeightlessGraph::expor } void WeightlessGraph::initialize(const Config& config) { + if (!_zeroInitStruct) { + _wgLogger.warning("_zeroInitStruct is nullptr!"); + return; + } // Simplified version for init schedules const size_t numberOfInits = _initsGraphDesc.size(); _initsCommandQueueOrdinals.resize(numberOfInits); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp new file mode 100644 index 00000000000000..987ff372ea8532 --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "weightless_utils.hpp" + +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" +#include "openvino/op/constant.hpp" + +namespace intel_npu { +/** + * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon + * serialization. + * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information + * regarding the offset of the weights within the binary file, as well as the original size and precision. This + * information is required within the "weights separation" flow, therefore this function is here to store it. + * @note Not calling this function in the weights separation flow would lead to this information being lost upon + * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent + * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be + * misinformed and lookups of weights offsets could fail. + * + * @param model Both source and target. + */ +void storeWeightlessCacheAttribute(const std::shared_ptr& model) { + size_t constantId = 0; + for (auto&& node : model->get_ordered_ops()) { + if (ov::is_type(node)) { + ov::RTMap& runtimeInfoMap = node->get_rt_info(); + const auto& weightlessCacheAttrIt = + runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static()); + + const std::string constantIdString = std::to_string(constantId++); + if (weightlessCacheAttrIt != runtimeInfoMap.end()) { + auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as(); + model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString); + model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString); + model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString); + } + } + } +} +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index 52e8e4a3455540..50b042f46f9b26 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -252,7 +252,7 @@ void ZeGraphExtWrappers::initializeGraphThroughCommandList(ze_graph_handle_t gra } // Parse the result string of query from format to unordered_set of string -static std::unordered_set parseQueryResult(std::vector& data) { +std::unordered_set parseQueryResult(std::vector& data) { std::string dataString(data.begin(), data.end()); std::unordered_set result; size_t i = 0, start = 0; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 97c6a7e09ceeac..a9345390a4823c 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -670,20 +670,44 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config"); auto localConfig = fork_local_config(localPropertiesMap, compiler); - const auto set_cache_dir = localConfig.get(); - if (!set_cache_dir.empty()) { - const auto compilerType = localConfig.get(); - if (compilerType == ov::intel_npu::CompilerType::PLUGIN) { - OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type"); - } - } + // const auto set_cache_dir = localConfig.get(); + // if (!set_cache_dir.empty()) { + // const auto compilerType = localConfig.get(); + // if (compilerType == ov::intel_npu::CompilerType::PLUGIN) { + // OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type"); + // } + // } const auto platform = utils::getCompilationPlatform(localConfig.get(), localConfig.get(), _backend == nullptr ? std::vector() : _backend->getDeviceNames()); auto device = _backend == nullptr ? nullptr : _backend->getDevice(localConfig.get()); - localConfig.update({{ov::intel_npu::platform.name(), platform}}); + std::cout << " ==plugin check =0===fin=> the get platform is " << platform << std::endl; + std::cout << " ==plugin check =1====> localConfig.get() is " << localConfig.get() << std::endl; + std::cout << " ==plugin check =2====> localConfig.get() is " << localConfig.get() << std::endl; + auto name_vector = _backend == nullptr ? std::vector() : _backend->getDeviceNames(); + std::cout << " ===plugin check=4====> name_vector's size is " << name_vector.size() << std::endl; + if (name_vector.size() == 0) { + std::cout << " =====> _backend->getDeviceNames() size is zero " << std::endl; + } else { + for (size_t i = 0; i < name_vector.size(); ++i) { + std::cout << " =====> _backend->getDeviceNames() name_vector[" << i << "] is " << name_vector[i] + << std::endl; + } + } + + std::cout << " =====> update platfrom by manual " << platform << std::endl; + const auto compilerType = localConfig.get(); + if(compilerType == ov::intel_npu::CompilerType::PLUGIN) { + std::cout << " =====> the compilerType is PLUGIN " << std::endl; + auto deviceBeforeCompilerCreate = _backend == nullptr ? nullptr : _backend->getDevice(); + std::string deviceName = deviceBeforeCompilerCreate != nullptr ? deviceBeforeCompilerCreate->getName() : ""; + std::cout << " =====> the deviceName before compiler create is " << deviceName << "=====" << std::endl; + localConfig.update({{ov::intel_npu::platform.name(), deviceName}}); + } else if(compilerType == ov::intel_npu::CompilerType::DRIVER) { + localConfig.update({{ov::intel_npu::platform.name(), platform}}); + } auto updateBatchMode = [&](ov::intel_npu::BatchMode mode) { std::stringstream strStream; @@ -790,11 +814,13 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< try { _logger.debug("performing compile"); - + std::cout << " ======> before compiler print the config: " << localConfig.toString() << std::endl; // Determine which model to use - auto modelToCompile = successfullyDebatched ? batchedModel : model->clone(); + auto modelToCompile = successfullyDebatched ? batchedModel : model->clone(); //lin724 + std::cout << " ==========plugin-check successfullyDebatched is " << successfullyDebatched << std::endl; if (successfullyDebatched && localConfig.get() == ov::hint::PerformanceMode::LATENCY) { + std::cout << " ====line 805======graph = compileWithConfig(modelToCompile, modifiedConfig); " << std::endl; _logger.info("Override performance mode to THROUGHPUT for compilation"); auto modifiedConfig = localConfig; // Copy only when needed @@ -804,10 +830,11 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< graph = compileWithConfig(modelToCompile, modifiedConfig); } else { + std::cout << " ====line 809======graph = compileWithConfig(modelToCompile, localConfig); " << std::endl; graph = compileWithConfig(modelToCompile, localConfig); // No copy } } catch (const std::exception& ex) { - OPENVINO_THROW(ex.what()); + OPENVINO_THROW(ex.what()); //// get exception } catch (...) { _logger.error("Unexpected exception"); OPENVINO_THROW("NPU plugin: got an unexpected exception from compiler");