diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
index 51403d304718bd..cadc4f6aa8d270 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
@@ -842,7 +842,7 @@ struct COMPILER_TYPE final : OptionBase<COMPILER_TYPE, ov::intel_npu::CompilerTy
     }
 
     static ov::intel_npu::CompilerType defaultValue() {
-        return ov::intel_npu::CompilerType::DRIVER;
+        return ov::intel_npu::CompilerType::PLUGIN;
     }
 
     static ov::intel_npu::CompilerType parse(std::string_view val) {
diff --git a/src/plugins/intel_npu/src/common/src/device_helpers.cpp b/src/plugins/intel_npu/src/common/src/device_helpers.cpp
index 3b21815ae0c65c..f5721de2140d16 100644
--- a/src/plugins/intel_npu/src/common/src/device_helpers.cpp
+++ b/src/plugins/intel_npu/src/common/src/device_helpers.cpp
@@ -39,11 +39,13 @@ std::string utils::getCompilationPlatform(const std::string_view platform,
                                           std::vector<std::string> availableDevicesNames) {
     // Platform parameter has a higher priority than deviceID
     if (platform != ov::intel_npu::Platform::AUTO_DETECT) {
+        std::cout << "   return 1 platform from getCompilationPlatform " << std::endl;
         return std::string(platform);
     }
 
     // Get compilation platform from deviceID
     if (!deviceId.empty()) {
+        std::cout << "   return 2 deviceId from getCompilationPlatform " << std::endl;
         return utils::getPlatformByDeviceName(deviceId);
     }
 
@@ -51,7 +53,7 @@ std::string utils::getCompilationPlatform(const std::string_view platform,
     if (availableDevicesNames.empty()) {
         OPENVINO_THROW("No NPU devices were found.");
     }
-
+    std::cout << "   return 3 availableDevicesNames from getCompilationPlatform, availableDevicesNames.at(0) is " << availableDevicesNames.at(0) << std::endl;
     return utils::getPlatformByDeviceName(availableDevicesNames.at(0));
 }
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h
new file mode 100644
index 00000000000000..409798e64b4a1b
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h
@@ -0,0 +1,342 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef VPUX_COMPILER_L0_H
+#define VPUX_COMPILER_L0_H
+
+#if defined(__cplusplus)
+#    include <cstdint>
+#    include <cstdlib>
+#else
+#    include <stdint.h>
+#    include <stdlib.h>
+#endif
+
+#if defined(__cplusplus)
+#    pragma once
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define VCL_COMPILER_VERSION_MAJOR  7
+#define VCL_COMPILER_VERSION_MINOR  6
+#define VCL_PROFILING_VERSION_MAJOR 2
+#define VCL_PROFILING_VERSION_MINOR 0
+
+#ifndef DEPRECATED
+#    define DEPRECATED  // for documentation only
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+#ifndef VCL_APICALL
+#    if defined(_WIN32)
+/// @brief Calling convention for all API functions
+#        define VCL_APICALL __cdecl
+#    else
+#        define VCL_APICALL
+#    endif  // defined(_WIN32)
+#endif      // VCL_APICALL
+
+///////////////////////////////////////////////////////////////////////////////
+#ifndef VCL_APIEXPORT
+#    if defined(_WIN32)
+/// @brief Windows-specific dllexport storage-class attribute
+#        define VCL_APIEXPORT __declspec(dllexport)
+#    else
+#        define VCL_APIEXPORT
+#    endif  // defined(_WIN32)
+#endif      // VCL_APIEXPORT
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Compiler handle
+typedef struct __vcl_compiler_handle_t* vcl_compiler_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Executable handle
+typedef struct __vcl_executable_handle_t* vcl_executable_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Profiling handle
+typedef struct __vcl_profiling_handle_t* vcl_profiling_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+
+/// @brief QueryNetwork handle
+typedef struct __vcl_query_handle_t* vcl_query_handle_t;
+
+/// @brief Error log handle
+typedef struct __vcl_log_handle_t* vcl_log_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines type of requested data.
+/// Must be in sync with \b _ze_graph_profiling_type_t
+typedef enum __vcl_profiling_request_type_t {
+    VCL_PROFILING_LAYER_LEVEL = 0x1,
+    VCL_PROFILING_TASK_LEVEL = 0x2,
+    VCL_PROFILING_RAW = 0x3,
+
+    VCL_PROFILING_FORCE_UINT32 = 0x7fffffff
+} vcl_profiling_request_type_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines version info for the VPUXCompilerL0 API
+typedef struct __vcl_version_info_t {
+    uint16_t major;
+    uint16_t minor;
+
+} vcl_version_info_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines return/error codes
+typedef enum __vcl_result_t {
+    VCL_RESULT_SUCCESS = 0,                             ///< [Core] success
+    VCL_RESULT_ERROR_OUT_OF_MEMORY = 0x70000002,        ///< [Core] insufficient memory to satisfy call
+    VCL_RESULT_ERROR_UNSUPPORTED_FEATURE = 0x78000003,  ///< [Validation] generic error code for unsupported features
+    VCL_RESULT_ERROR_INVALID_ARGUMENT = 0x78000004,     ///< [Validation] generic error code for invalid arguments
+    VCL_RESULT_ERROR_INVALID_NULL_HANDLE = 0x78000005,  ///< [Validation] handle argument is not valid
+    VCL_RESULT_ERROR_IO = 0x78000006,                   ///< [Core] IO error
+    VCL_RESULT_ERROR_INVALID_IR = 0x78000007,           ///< [Validation] the member of modelIR is not valid
+    VCL_RESULT_ERROR_UNKNOWN = 0x7ffffffe,              ///< [Core] unknown or internal error
+
+} vcl_result_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines compiler properties
+typedef struct __vcl_compiler_properties_t {
+    const char* id;
+    vcl_version_info_t version;
+    uint32_t supportedOpsets;
+
+} vcl_compiler_properties_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines profiling properties
+typedef struct __vcl_profiling_properties_t {
+    vcl_version_info_t version;  ///< Profiling module version
+
+} vcl_profiling_properties_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines debug level for VCL
+typedef enum __vcl_log_level_t {
+    VCL_LOG_NONE = 0,     ///< Log is disabled
+    VCL_LOG_ERROR = 1,    ///< Events which are not expected, containing probable reason
+    VCL_LOG_WARNING = 2,  ///< Events which are unusal
+    VCL_LOG_INFO = 3,     ///< Short messages about ongoing activity
+    VCL_LOG_DEBUG = 4,    ///< Messages with praticular data and explanations
+    VCL_LOG_TRACE = 5,    ///< Messages with detailed information about execution
+
+} vcl_log_level_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines device desc to be passed during creation
+///
+///        For online compilation, revision is always valid value and -1u for offline compilation.
+///        1. In offline mode the driver does not know the stepping and provides -1 (unknown) to VCL
+///        2. In VCL
+///               If driver provides valid revsion, the value will be default value for NPU_STEPPING
+///               If driver provides -1u as value for revision, VCL will not set NPU_STEPPING
+///        3. If NPU_STEPPING is set by user with config, VCL will use user config instead of default value.
+///        4. If NPU_STEPPING is not passed to compiler, compiler will choose default stepping.
+typedef struct __vcl_device_desc_t {
+    uint64_t size;       /// Size of vcl_device_desc_t
+    uint32_t deviceID;   /// The lower 16 bits equal to PCI Device ID, the upper 16 bits are zero
+    uint16_t revision;   /// NPU Revision Identifier, -1u as invalid value
+    uint32_t tileCount;  /// Value equals maximum number of slices
+} vcl_device_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines compiler desc to be passed during creation
+typedef struct __vcl_compiler_desc_t {
+    vcl_version_info_t version;  /// The host vcl version
+    vcl_log_level_t debugLevel;  /// Debug level for VCL
+} vcl_compiler_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines executable description to be passed during executable
+///        creation
+///
+///        Format of modelIRData (defined in L0 adaptor):
+///        1. API version : vcl_version_info_t
+///        2. Num of data elements (now only xml + weights = 2) : uint32_t
+///        3. Size of data 1 (xml) : uint64_t
+///        4. Data 1 : $2 bytes
+///        5. Size of data 2 (weights) : uint64_t
+///        6. Data 2 : $4 bytes
+typedef struct __vcl_executable_desc_t {
+    const uint8_t* modelIRData;
+    uint64_t modelIRSize;  ///< Size of modelIRData
+    const char* options;   ///< Compiler config options
+    uint64_t optionsSize;  ///< Size of options
+} vcl_executable_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines query description to be passed during query network creation
+///
+///        Format of modelIRData (defined in L0 adaptor):
+///        1. API version : vcl_version_info_t
+///        2. Num of data elements (now only xml + weights = 2) : uint32_t
+///        3. Size of data 1 (xml) : uint64_t
+///        4. Data 1 : $2 bytes
+///        5. Size of data 2 (weights) : uint64_t
+///        6. Data 2 : $4 bytes
+typedef struct __vcl_query_desc_t {
+    const uint8_t* modelIRData;
+    uint64_t modelIRSize;  ///< Size of modelIRData
+    const char* options;   ///< Compiler config options
+    uint64_t optionsSize;  ///< Size of options
+} vcl_query_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines input that is required to create profiling handler
+typedef struct __vcl_profiling_input_t {
+    const uint8_t* blobData;  ///< Pointer to the buffer with the blob
+    uint64_t blobSize;        ///< Size of the blob in bytes
+    const uint8_t* profData;  ///< Pointer to the raw profiling output
+    uint64_t profSize;        ///< Size of the raw profiling output
+} vcl_profiling_input_t, *p_vcl_profiling_input_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Decoded profiling output
+typedef struct __vcl_profiling_output_t {
+    const uint8_t* data;  ///< Either a pointer to raw data or pointer to the array of structures
+    uint64_t size;        ///< Size of the buffer in bytes
+} vcl_profiling_output_t, *p_vcl_profiling_output_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Return VCL API version to caller, shall never change this interface to support backward compatibility check
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetVersion(vcl_version_info_t* compilerVersion,
+                                                     vcl_version_info_t* profilingVersion);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Creates a compiler object and returns the compiler handle
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerCreate(vcl_compiler_desc_t* compilerDesc,
+                                                         vcl_device_desc_t* deviceDesc,
+                                                         vcl_compiler_handle_t* compiler,
+                                                         vcl_log_handle_t* logHandle);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Destroys the compiler
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerDestroy(vcl_compiler_handle_t compiler);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieves the compiler properties, include the version and supported_opsets
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerGetProperties(vcl_compiler_handle_t compiler,
+                                                                vcl_compiler_properties_t* properties);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create an querynetwork object and return the handle
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetworkCreate(vcl_compiler_handle_t compiler,
+                                                             vcl_query_desc_t desc,
+                                                             vcl_query_handle_t* query);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieve result of query network
+/// @attention Should be called twice, first time to retrieve data size, second time to get data.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetwork(vcl_query_handle_t query, uint8_t* queryResult, uint64_t* size);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Destroys the queryNetwork and releases the cached query result
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetworkDestroy(vcl_query_handle_t query);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Creates an executable object and returns the executable handle.
+/// Parse modelIRData in the executable descriptor to blob and store it in the executable.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableCreate(vcl_compiler_handle_t compiler,
+                                                           vcl_executable_desc_t desc,
+                                                           vcl_executable_handle_t* executable);
+
+DEPRECATED typedef struct __vcl_allocator_t {
+    uint8_t* (*allocate)(uint64_t);
+    void (*deallocate)(uint8_t*);
+} vcl_allocator_t;
+
+typedef struct __vcl_allocator2_t {
+    uint8_t* (*allocate)(struct __vcl_allocator2_t*, uint64_t);
+    void (*deallocate)(struct __vcl_allocator2_t*, uint8_t*);
+} vcl_allocator2_t;
+
+DEPRECATED VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate(vcl_compiler_handle_t compiler,
+                                                                               vcl_executable_desc_t desc,
+                                                                               const vcl_allocator_t* allocator,
+                                                                               uint8_t** blobBuffer,
+                                                                               uint64_t* blobSize);
+
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate2(vcl_compiler_handle_t compiler,
+                                                                     vcl_executable_desc_t desc,
+                                                                     vcl_allocator2_t* allocator,
+                                                                     uint8_t** blobBuffer,
+                                                                     uint64_t* blobSize);
+
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreateWSOneShot(vcl_compiler_handle_t compiler,
+                                                                             vcl_executable_desc_t desc,
+                                                                             vcl_allocator2_t* allocator);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Destroys the executable and releases the cached blob.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableDestroy(vcl_executable_handle_t executable);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief If blobBuffer is null, the function returns the size of the blob stored in the executable.
+/// Otherwise the function copies the executable cached blob to the blobBuffer provided by the caller.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableGetSerializableBlob(vcl_executable_handle_t executable,
+                                                                        uint8_t* blobBuffer,
+                                                                        uint64_t* blobSize);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Creates a buffer with decoded profiling info.
+/// This is the most computationally expensive profiling API.
+/// It does all memory allocations and postprocessing.
+/// @warning Caller must keep \b p_vcl_profiling_input_t::profData buffer alive until
+/// \b vclProfilingDestroy call if \b VCL_PROFILING_RAW request is expected.
+/// \b vclProfilingCreate function doesn't copy profiling output buffer but will
+/// return pointer to it as a response to \b VCL_PROFILING_RAW request.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingCreate(p_vcl_profiling_input_t profilingInput,
+                                                          vcl_profiling_handle_t* profilingHandle,
+                                                          vcl_log_handle_t* logHandle);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Provides profiling information based on request argument.
+/// @warning For \b VCL_PROFILING_RAW request it returns a pointer to the buffer that was provided to
+/// \b vclProfilingCreate function call. This means that original buffer with profiling output must
+/// be alive till this call.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetDecodedProfilingBuffer(vcl_profiling_handle_t profilingHandle,
+                                                                    vcl_profiling_request_type_t requestType,
+                                                                    p_vcl_profiling_output_t profilingOutput);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Destroys the buffer with decoded profiling info.
+/// Now caller may safely dispose raw profiling output.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingDestroy(vcl_profiling_handle_t profilingHandle);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Get version of post-processing module
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingGetProperties(vcl_profiling_handle_t profilingHandle,
+                                                                 vcl_profiling_properties_t* properties);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieves error message from log handler.
+/// Handle is released automatically with related compiler or Profiler.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclLogHandleGetString(vcl_log_handle_t logHandle, size_t* logSize, char* log);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieve the list of supported compiler options
+/// @attention Should be called twice, first time to retrieve data size, second time to get data.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetCompilerSupportedOptions(vcl_compiler_handle_t compiler,
+                                                                      char* result,
+                                                                      uint64_t* size);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Verifies if a given config option (or option-value pair) is supported by the compiler
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetCompilerIsOptionSupported(vcl_compiler_handle_t compiler,
+                                                                       const char* option,
+                                                                       const char* value);
+
+#if defined(__cplusplus)
+}  // extern "C"
+#endif
+
+#endif  // VPUX_COMPILER_L0_H
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
new file mode 100644
index 00000000000000..a55c12faded9ec
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
@@ -0,0 +1,73 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+
+#include "compiler.h"
+#include "intel_npu/common/filtered_config.hpp"
+#include "intel_npu/icompiler.hpp"
+#include "openvino/core/except.hpp"
+
+namespace intel_npu {
+
+bool isUseBaseModelSerializer(const FilteredConfig& config);
+std::string supportVclCompiler(int major, int minor);
+class VCLApi;
+
+class VCLCompilerImpl final : public intel_npu::ICompiler {
+public:
+    VCLCompilerImpl();
+    ~VCLCompilerImpl() override;
+
+    static std::shared_ptr<VCLCompilerImpl> getInstance() {
+        static std::mutex mutex;
+        static std::weak_ptr<VCLCompilerImpl> weak_compiler;
+
+        std::lock_guard<std::mutex> lock(mutex);
+        auto compiler = weak_compiler.lock();
+        if (!compiler) {
+            compiler = std::make_shared<VCLCompilerImpl>();
+            weak_compiler = compiler;
+        }
+        return compiler;
+    }
+
+    NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+
+    std::vector<std::shared_ptr<NetworkDescription>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
+                                                                      const Config& config) const override;
+
+    NetworkDescription compileWsIterative(const std::shared_ptr<ov::Model>& model,
+                                          const Config& config,
+                                          size_t callNumber) const override;
+
+    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+
+    NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const override;
+
+    uint32_t get_version() const override;
+
+    std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
+                                                            const std::vector<uint8_t>& network,
+                                                            const intel_npu::Config& config) const final override;
+
+    bool get_supported_options(std::vector<char>& options) const;
+
+    bool is_option_supported(const std::string& option) const;
+
+    std::shared_ptr<VCLApi> getLinkedLibrary() const;
+
+private:
+    vcl_log_handle_t _logHandle = nullptr;
+    vcl_compiler_handle_t _compilerHandle = nullptr;
+    vcl_compiler_properties_t _compilerProperties;
+    vcl_version_info_t _vclVersion;
+    vcl_version_info_t _vclProfilingVersion;
+    Logger _logger;
+};
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp
new file mode 100644
index 00000000000000..d32abbd6ab4509
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/model.hpp"
+
+namespace intel_npu {
+/**
+ * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon
+ * serialization.
+ * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information
+ * regarding the offset of the weights within the binary file, as well as the original size and precision. This
+ * information is required within the "weights separation" flow, therefore this function is here to store it.
+ * @note Not calling this function in the weights separation flow would lead to this information being lost upon
+ * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent
+ * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be
+ * misinformed and lookups of weights offsets could fail.
+ *
+ * @param model Both source and target.
+ */
+void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model);
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
index eb529a34efee9b..5af539901ef786 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
@@ -75,4 +75,7 @@ class ZeGraphExtWrappers {
     Logger _logger;
 };
 
+// Parse the result string of query from format <name_0><name_1><name_2> to unordered_set of string
+std::unordered_set<std::string> parseQueryResult(std::vector<char>& data);
+
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
new file mode 100644
index 00000000000000..2ebdce1aec4898
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
@@ -0,0 +1,719 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "compiler_impl.hpp"
+
+#include "intel_npu/config/options.hpp"
+#include "intel_npu/npu_private_properties.hpp"
+#include "intel_npu/profiling.hpp"
+#include "openvino/runtime/make_tensor.hpp"
+#include "openvino/util/file_util.hpp"
+#include "openvino/util/shared_object.hpp"
+#include "vcl_serializer.hpp"
+#include "ze_graph_ext_wrappers.hpp"
+
+namespace intel_npu {
+
+// clang-format off
+#define vcl_symbols_list()                                  \
+    vcl_symbol_statement(vclGetVersion)                     \
+    vcl_symbol_statement(vclCompilerCreate)                 \
+    vcl_symbol_statement(vclCompilerDestroy)                \
+    vcl_symbol_statement(vclCompilerGetProperties)          \
+    vcl_symbol_statement(vclQueryNetworkCreate)             \
+    vcl_symbol_statement(vclQueryNetwork)                   \
+    vcl_symbol_statement(vclQueryNetworkDestroy)            \
+    vcl_symbol_statement(vclExecutableCreate)               \
+    vcl_symbol_statement(vclAllocatedExecutableCreate)      \
+    vcl_symbol_statement(vclExecutableDestroy)              \
+    vcl_symbol_statement(vclExecutableGetSerializableBlob)  \
+    vcl_symbol_statement(vclProfilingCreate)                \
+    vcl_symbol_statement(vclGetDecodedProfilingBuffer)      \
+    vcl_symbol_statement(vclProfilingDestroy)               \
+    vcl_symbol_statement(vclProfilingGetProperties)         \
+    vcl_symbol_statement(vclLogHandleGetString)             \
+    vcl_symbol_statement(vclAllocatedExecutableCreate2)     \
+    vcl_symbol_statement(vclGetCompilerSupportedOptions)    \
+    vcl_symbol_statement(vclGetCompilerIsOptionSupported)   \
+
+
+//unsupported symbols with older ze_loader versions
+#define vcl_weak_symbols_list()                             \
+    vcl_symbol_statement(vclAllocatedExecutableCreateWSOneShot)
+// clang-format on
+
+class VCLApi {
+public:
+    VCLApi();
+    VCLApi(const VCLApi& other) = delete;
+    VCLApi(VCLApi&& other) = delete;
+    void operator=(const VCLApi&) = delete;
+    void operator=(VCLApi&&) = delete;
+
+    static const std::shared_ptr<VCLApi>& getInstance();
+    std::shared_ptr<void> getLibrary() const {
+        return lib;
+    }
+
+#define vcl_symbol_statement(vcl_symbol) decltype(&::vcl_symbol) vcl_symbol;
+    vcl_symbols_list();
+    vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+
+private:
+    std::shared_ptr<void> lib;
+    Logger _logger;
+};
+
+#define vcl_symbol_statement(vcl_symbol)                                                                            \
+    template <typename... Args>                                                                                     \
+    inline typename std::invoke_result<decltype(&::vcl_symbol), Args...>::type wrapped_##vcl_symbol(Args... args) { \
+        const auto& ptr = VCLApi::getInstance();                                                                    \
+        if (ptr->vcl_symbol == nullptr) {                                                                           \
+            OPENVINO_THROW("Unsupported vcl_symbol " #vcl_symbol);                                                  \
+        }                                                                                                           \
+        return ptr->vcl_symbol(std::forward<Args>(args)...);                                                        \
+    }
+vcl_symbols_list();
+vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+#define vcl_symbol_statement(vcl_symbol) inline decltype(&::vcl_symbol) vcl_symbol = wrapped_##vcl_symbol;
+vcl_symbols_list();
+vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+
+static inline std::string getLatestVCLLog(vcl_log_handle_t logHandle) {
+    Logger _logger("VCLAPI", Logger::global().level());
+    _logger.debug("getLatestVCLLog start");
+
+    vcl_version_info_t compilerVersion;
+    vcl_version_info_t profilingVersion;
+    vcl_result_t ret = vclGetVersion(&compilerVersion, &profilingVersion);
+
+    if (ret != VCL_RESULT_SUCCESS || compilerVersion.major < 3) {
+        _logger.warning("Failed to get VCL version: 0x%x", ret);
+        return "Can not get VCL log, VCL version is too old!";
+    }
+
+    // Get log size
+    size_t size = 0;
+    // Null graph handle to get error log
+    ret = vclLogHandleGetString(logHandle, &size, nullptr);
+    if (VCL_RESULT_SUCCESS != ret) {
+        return "Failed to get size of latest VCL log";
+    }
+
+    if (size <= 0) {
+        return "No error stored in VCL when error detected";
+    }
+
+    // Get log content
+    std::string logContent{};
+    logContent.resize(size);
+    ret = vclLogHandleGetString(logHandle, &size, const_cast<char*>(logContent.data()));
+    if (VCL_RESULT_SUCCESS != ret) {
+        return "Size of latest error log > 0, failed to get content";
+    }
+    _logger.debug("getLatestBuildError end");
+    return logContent;
+}
+
+#define THROW_ON_FAIL_FOR_VCL(step, ret, logHandle)     \
+    {                                                   \
+        vcl_result_t result = ret;                      \
+        if (result != VCL_RESULT_SUCCESS) {             \
+            OPENVINO_THROW("Failed to call VCL API : ", \
+                           step,                        \
+                           " result: 0x",               \
+                           std::hex,                    \
+                           result,                      \
+                           " - ",                       \
+                           getLatestVCLLog(logHandle)); \
+        }                                               \
+    }
+
+VCLApi::VCLApi() : _logger("VCLApi", Logger::global().level()) {
+    const std::string baseName = "openvino_intel_npu_compiler";
+    try {
+        auto libpath = ov::util::make_plugin_library_name({}, baseName);
+        _logger.debug("Try to load openvino_intel_npu_compiler");
+
+#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+        this->lib = ov::util::load_shared_object(ov::util::string_to_wstring(libpath).c_str());
+#else
+        this->lib = ov::util::load_shared_object(libpath.c_str());
+#endif
+    } catch (const std::runtime_error& error) {
+        _logger.debug("Failed to load openvino_intel_npu_compiler");
+        OPENVINO_THROW(error.what());
+    }
+
+    try {
+#define vcl_symbol_statement(vcl_symbol) \
+    this->vcl_symbol = reinterpret_cast<decltype(&::vcl_symbol)>(ov::util::get_symbol(lib, #vcl_symbol));
+        vcl_symbols_list();
+#undef vcl_symbol_statement
+    } catch (const std::runtime_error& error) {
+        _logger.debug("Failed to get formal symbols from openvino_intel_npu_compiler");
+        OPENVINO_THROW(error.what());
+    }
+
+#define vcl_symbol_statement(vcl_symbol)                                                                      \
+    try {                                                                                                     \
+        this->vcl_symbol = reinterpret_cast<decltype(&::vcl_symbol)>(ov::util::get_symbol(lib, #vcl_symbol)); \
+    } catch (const std::runtime_error&) {                                                                     \
+        _logger.debug("Failed to get %s from openvino_intel_npu_compiler", #vcl_symbol);                      \
+        this->vcl_symbol = nullptr;                                                                           \
+    }
+    vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+
+#define vcl_symbol_statement(vcl_symbol) vcl_symbol = this->vcl_symbol;
+    vcl_symbols_list();
+    vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+}
+
+const std::shared_ptr<VCLApi>& VCLApi::getInstance() {
+    static std::shared_ptr<VCLApi> instance = std::make_shared<VCLApi>();
+    return instance;
+}
+
+VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerImpl", Logger::global().level()) {
+    _logger.debug("VCLCompilerImpl constructor start");
+
+    // Load VCL library
+    (void)VCLApi::getInstance();
+
+    // Initialize the VCL API
+    THROW_ON_FAIL_FOR_VCL("vclGetVersion", vclGetVersion(&_vclVersion, &_vclProfilingVersion), nullptr);
+
+    _logger.info("Plugin VCL API Version: %d.%d", VCL_COMPILER_VERSION_MAJOR, VCL_COMPILER_VERSION_MINOR);
+    _logger.info("Plugin VCL Profiling API Version: %d.%d", VCL_PROFILING_VERSION_MAJOR, VCL_PROFILING_VERSION_MINOR);
+    _logger.info("Lib VCL Compiler Version: %d.%d", _vclVersion.major, _vclVersion.minor);
+    _logger.info("Lib VCL Profiling Version: %d.%d", _vclProfilingVersion.major, _vclProfilingVersion.minor);
+    _logger.info("Use Lib VCL version to create compiler");
+    if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major ||
+        (VCL_COMPILER_VERSION_MAJOR == _vclVersion.major && VCL_COMPILER_VERSION_MINOR < _vclVersion.minor)) {
+        _logger.warning("inside supported VCL version is lower than loaded VCL api:\n plugin was built with VCL %d.%d, "
+                        "\n      but loaded VCL is %d.%d.\n"
+                        "Will downgrade to use the latest plugin vcl compiler!!!",
+                        VCL_COMPILER_VERSION_MAJOR,
+                        VCL_COMPILER_VERSION_MINOR,
+                        _vclVersion.major,
+                        _vclVersion.minor);
+    }
+
+    _logger.info("Use Lib VCL version to create compiler");
+    vcl_compiler_desc_t compilerDesc;
+    compilerDesc.version = _vclVersion;
+    compilerDesc.debugLevel = static_cast<__vcl_log_level_t>(static_cast<int>(Logger::global().level()) - 1);
+
+    // Set device description as empty, the related info will be processed in compile phase if passed by user.
+    vcl_device_desc_t device_desc = {};
+
+    THROW_ON_FAIL_FOR_VCL("vclCompilerCreate",
+                          vclCompilerCreate(&compilerDesc, &device_desc, &_compilerHandle, &_logHandle),
+                          nullptr);
+
+    THROW_ON_FAIL_FOR_VCL("vclCompilerGetProperties",
+                          vclCompilerGetProperties(_compilerHandle, &_compilerProperties),
+                          _logHandle);
+
+    _logger.info("VCL Compiler created successfully");
+    _logger.info("VCL Compiler Properties: ID: %s, Version: %d.%d, Supported Opsets: %u",
+                 _compilerProperties.id,
+                 _compilerProperties.version.major,
+                 _compilerProperties.version.minor,
+                 _compilerProperties.supportedOpsets);
+}
+
+VCLCompilerImpl::~VCLCompilerImpl() {
+    if (_compilerHandle) {
+        THROW_ON_FAIL_FOR_VCL("vclCompilerDestroy", vclCompilerDestroy(_compilerHandle), _logHandle);
+    }
+    if (_logHandle) {
+        _logHandle = nullptr;  // Log handle is released automatically with the compiler
+    }
+    _logger.info("VCL Compiler destroyed successfully");
+}
+
+std::shared_ptr<VCLApi> VCLCompilerImpl::getLinkedLibrary() const {
+    return VCLApi::getInstance();
+}
+
+struct vcl_allocator_vector : vcl_allocator2_t {
+    vcl_allocator_vector() : vcl_allocator2_t{vector_allocate, vector_deallocate} {}
+
+    static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) {
+        vcl_allocator_vector* vecAllocator = static_cast<vcl_allocator_vector*>(allocator);
+        vecAllocator->m_vec.resize(size);
+        return vecAllocator->m_vec.data();
+    }
+
+    static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
+        vcl_allocator_vector* vecAllocator = static_cast<vcl_allocator_vector*>(allocator);
+        vecAllocator->m_vec.clear();
+        vecAllocator->m_vec.shrink_to_fit();
+    }
+
+    std::vector<uint8_t> m_vec;
+};
+
+struct vcl_allocator_vector_2 : vcl_allocator2_t {
+    vcl_allocator_vector_2() : vcl_allocator2_t{vector_allocate, vector_deallocate} {}
+
+    static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) {
+        vcl_allocator_vector_2* vecAllocator = static_cast<vcl_allocator_vector_2*>(allocator);
+        auto newVec = std::make_shared<std::vector<uint8_t>>();
+        newVec->resize(size);
+        uint8_t* ptr = newVec->data();
+        vecAllocator->m_vector.emplace_back(newVec);
+        return ptr;
+    }
+
+    static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
+        vcl_allocator_vector_2* vecAllocator = static_cast<vcl_allocator_vector_2*>(allocator);
+        vecAllocator->m_vector.clear();
+        vecAllocator->m_vector.shrink_to_fit();
+    }
+
+    std::vector<std::shared_ptr<std::vector<uint8_t>>> m_vector;
+};
+
+struct vcl_allocator_malloc {
+    static uint8_t* vcl_allocate(uint64_t size) {
+        return reinterpret_cast<uint8_t*>(malloc(size));
+    }
+
+    static void vcl_deallocate(uint8_t* ptr) {
+        free(ptr);
+    }
+};
+
+bool isUseBaseModelSerializer(const FilteredConfig& config) {
+    // user pass use_base_model_serializer config
+    if (config.isAvailable(ov::intel_npu::use_base_model_serializer.name()) &&
+        config.has(ov::intel_npu::use_base_model_serializer.name())) {
+        return config.get<intel_npu::USE_BASE_MODEL_SERIALIZER>();
+    }
+
+    // user pass model_serializer_version config
+    if (config.isAvailable(ov::intel_npu::model_serializer_version.name()) &&
+        config.has(ov::intel_npu::use_base_model_serializer.name())) {
+        return (config.get<intel_npu::MODEL_SERIALIZER_VERSION>() ==
+                ov::intel_npu::ModelSerializerVersion::ALL_WEIGHTS_COPY);
+    }
+
+    // vcl serializer method is not set by user, will default to use it.
+    return false;
+}
+
+std::string supportVclCompiler(int major, int minor) {
+    if (major >= 7 && minor >= 4) {
+        return "vclAllocatedExecutableCreate2";
+    } else if (major >= 6 && minor >= 1) {
+        return "vclAllocatedExecutableCreate";
+    } else {
+        return "vclExecutableCreate";
+    }
+}
+
+NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
+    _logger.debug("compile start");
+
+    /// Check the linked vcl version whether supported in plugin
+    uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR;
+    if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) {
+        usedMinor = std::min(static_cast<uint16_t>(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor);
+    } else if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) {
+        usedMajor = _vclVersion.major;
+        usedMinor = _vclVersion.minor;
+    }
+    _logger.debug("the finally used vcl version is %d.%d", usedMajor, usedMinor);
+
+    const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
+    _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
+
+    _logger.debug("serialize IR");
+    ze_graph_compiler_version_info_t compilerVersion;
+    compilerVersion.major = _compilerProperties.version.major;
+    compilerVersion.minor = _compilerProperties.version.minor;
+
+    const FilteredConfig* filteredConfig = dynamic_cast<const FilteredConfig*>(&config);
+    if (filteredConfig == nullptr) {
+        OPENVINO_THROW("config is not FilteredConfig");
+    }
+    FilteredConfig updatedConfig = *filteredConfig;
+    bool useBaseModelSerializer = true;
+
+    // vcl serializer is only support for vcl version >= 7.5
+    if (usedMajor >= 7 && usedMinor >= 5) {
+        useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig);
+    }
+
+    if (useBaseModelSerializer) {
+        _logger.debug("serialize IR is base method, useBaseModelSerializer is %d", useBaseModelSerializer);
+    } else {
+        _logger.debug("serialize IR is vcl method, useBaseModelSerializer is %d", useBaseModelSerializer);
+
+        // To resolve the issue with the default configuration where no user passes the serializer config, the VCL
+        // serializer will be used as the default in the plugin adapter. You need to pass the serializer config;
+        // otherwise, you will encounter a deserialization issue within the compiler.
+        _logger.warning("Add serializer config");
+        if (updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())) {
+            updatedConfig.update({{ov::intel_npu::use_base_model_serializer.name(), "NO"}});
+        } else if (updatedConfig.isAvailable(ov::intel_npu::model_serializer_version.name())) {
+            updatedConfig.update({{ov::intel_npu::model_serializer_version.name(), "NO_WEIGHTS_COPY"}});
+        }
+    }
+
+    _logger.error("manual set serializeIR(model, compilerVersion, maxOpsetVersion, true) in compile0");
+    auto serializedIR =
+        driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer);
+    _logger.error("manual set serializeIR(model, compilerVersion, maxOpsetVersion, true) in compile1");
+
+    std::string buildFlags;
+
+    _logger.debug("create build flags");
+    buildFlags += driver_compiler_utils::serializeIOInfo(model, true);
+    _logger.error("manual set serializeIR(model, compilerVersion, maxOpsetVersion, true) in compile2");
+    buildFlags += " ";
+    buildFlags += driver_compiler_utils::serializeConfig(updatedConfig, compilerVersion);
+    _logger.error("manual set serializeIR(model, compilerVersion, maxOpsetVersion, true) in compile3");
+    _logger.debug("final build flags to compiler: %s", buildFlags.c_str());
+
+    vcl_executable_desc_t exeDesc = {serializedIR.second.get(),
+                                     serializedIR.first,
+                                     buildFlags.c_str(),
+                                     buildFlags.size()};
+    _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
+    if (usedMajor >= 7 && usedMinor >= 4) {
+        if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) {
+            _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL "
+                            "%d.%d, \n      but loaded VCL is %d.%d.\n"
+                            "Will downgrade to form %s to use vclAllocatedExecutableCreate2",
+                            VCL_COMPILER_VERSION_MAJOR,
+                            VCL_COMPILER_VERSION_MINOR,
+                            _vclVersion.major,
+                            _vclVersion.minor,
+                            supportVclCompiler(usedMajor, usedMinor).c_str());
+        }
+        // support the lastest vcl api
+        // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2
+        _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL");
+        vcl_allocator_vector allocator;
+        uint8_t* blob = nullptr;
+        size_t size = 0;
+        _logger.error("manual set serializeIR(model, compilerVersion, maxOpsetVersion, true) in compile4");
+
+        THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreate2",
+                              vclAllocatedExecutableCreate2(_compilerHandle, exeDesc, &allocator, &blob, &size),
+                              _logHandle);/// get issue form here
+
+        if (size == 0 || blob == nullptr) {
+            OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null");
+        }
+
+        // Use empty metadata as VCL does not support metadata extraction
+        NetworkMetadata metadata;
+
+        _logger.debug("compile end, blob size:%d", allocator.m_vec.size());
+        return NetworkDescription(std::move(allocator.m_vec), std::move(metadata));
+    } else if (usedMajor >= 6 && usedMinor >= 1) {
+        if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) {
+            _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL "
+                            "%d.%d, \n      but loaded VCL is %d.%d.\n"
+                            "Will downgrade to form %s to use vclAllocatedExecutableCreate2",
+                            VCL_COMPILER_VERSION_MAJOR,
+                            VCL_COMPILER_VERSION_MINOR,
+                            _vclVersion.major,
+                            _vclVersion.minor,
+                            supportVclCompiler(usedMajor, usedMinor).c_str());
+        }
+        // For older versions, we use vclAllocatedExecutableCreate
+        _logger.debug("Using vclAllocatedExecutableCreate for 6.1 < VCL < 7.4");
+
+        vcl_allocator_t allocator;
+        allocator.allocate = vcl_allocator_malloc::vcl_allocate;
+        allocator.deallocate = vcl_allocator_malloc::vcl_deallocate;
+        uint8_t* blob = nullptr;
+        size_t size = 0;
+        THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreate",
+                              vclAllocatedExecutableCreate(_compilerHandle, exeDesc, &allocator, &blob, &size),
+                              _logHandle);
+        if (size == 0 || blob == nullptr) {
+            OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null");
+        }
+
+        std::vector<uint8_t> compiledNetwork(blob, blob + size);
+        allocator.deallocate(blob);
+
+        // Use empty metadata as VCL does not support metadata extraction
+        NetworkMetadata metadata;
+
+        _logger.debug("compile end, blob size:%d", compiledNetwork.size());
+        return NetworkDescription(std::move(compiledNetwork), std::move(metadata));
+    } else {
+        OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later",
+                       _vclVersion.major,
+                       _vclVersion.minor);
+    }
+}
+
+std::vector<std::shared_ptr<NetworkDescription>> VCLCompilerImpl::compileWsOneShot(
+    const std::shared_ptr<ov::Model>& model,
+    const Config& config) const {
+    _logger.debug("compileWsOneShot start");
+
+    const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
+    _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
+
+    _logger.debug("serialize IR");
+    ze_graph_compiler_version_info_t compilerVersion;
+    compilerVersion.major = _compilerProperties.version.major;
+    compilerVersion.minor = _compilerProperties.version.minor;
+
+    const FilteredConfig* filteredConfig = dynamic_cast<const FilteredConfig*>(&config);
+    if (filteredConfig == nullptr) {
+        OPENVINO_THROW("config is not FilteredConfig");
+    }
+    FilteredConfig updatedConfig = *filteredConfig;
+    bool useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig);
+    auto serializedIR =
+        driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer);
+
+    std::string buildFlags;
+
+    _logger.debug("create build flags");
+    buildFlags += driver_compiler_utils::serializeIOInfo(model, true);
+    buildFlags += " ";
+    buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
+    _logger.debug("final build flags to compiler: %s", buildFlags.c_str());
+
+    vcl_executable_desc_t exeDesc = {serializedIR.second.get(),
+                                     serializedIR.first,
+                                     buildFlags.c_str(),
+                                     buildFlags.size()};
+    _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
+
+    _logger.debug("Using vclAllocatedExecutableCreateWSOneShot");
+    vcl_allocator_vector_2 allocator;
+
+    THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreateWSOneShot",
+                          vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, &allocator),
+                          _logHandle);
+
+    if (allocator.m_vector.size() == 0) {
+        OPENVINO_THROW("Failed to create VCL executable, blobCount is zero");
+    }
+
+    std::vector<std::shared_ptr<NetworkDescription>> networkDescrs;
+    for (uint32_t i = 0; i < allocator.m_vector.size(); i++) {
+        // Use empty metadata as VCL does not support metadata extraction
+        NetworkMetadata metadata;
+        networkDescrs.emplace_back(
+            std::make_shared<NetworkDescription>(std::move(*allocator.m_vector[i]), std::move(metadata)));
+    }
+    return networkDescrs;
+}
+
+NetworkDescription VCLCompilerImpl::compileWsIterative(const std::shared_ptr<ov::Model>& model,
+                                                       const Config& config,
+                                                       size_t callNumber) const {
+    _logger.debug("compileWsIterative start");
+    const FilteredConfig* filteredConfig = dynamic_cast<const FilteredConfig*>(&config);
+    if (filteredConfig == nullptr) {
+        OPENVINO_THROW("config is not FilteredConfig");
+    }
+    FilteredConfig updatedConfig = *filteredConfig;
+    updatedConfig.update({{ov::intel_npu::ws_compile_call_number.name(), std::to_string(callNumber)}});
+    return compile(model, config);
+}
+
+intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector<uint8_t>& network, const Config& config) const {
+    _logger.debug("parse start");
+    // VCL does not support parse, return empty metadata
+    return intel_npu::NetworkMetadata();
+}
+
+std::vector<ov::ProfilingInfo> VCLCompilerImpl::process_profiling_output(const std::vector<uint8_t>& profData,
+                                                                         const std::vector<uint8_t>& network,
+                                                                         const intel_npu::Config& config) const {
+    _logger.debug("process_profiling_output start");
+
+    vcl_profiling_handle_t profilingHandle;
+    vcl_profiling_input_t profilingInput = {network.data(), network.size(), profData.data(), profData.size()};
+    vcl_log_handle_t logHandle;
+    THROW_ON_FAIL_FOR_VCL("vclProfilingCreate",
+                          vclProfilingCreate(&profilingInput, &profilingHandle, &logHandle),
+                          nullptr);
+
+    vcl_profiling_properties_t profProperties;
+    THROW_ON_FAIL_FOR_VCL("vclProfilingGetProperties",
+                          vclProfilingGetProperties(profilingHandle, &profProperties),
+                          logHandle);
+
+    _logger.info("VCL Profiling Properties: Version: %d.%d",
+                 profProperties.version.major,
+                 profProperties.version.minor);
+
+    // We only use layer level info
+    vcl_profiling_request_type_t request = VCL_PROFILING_LAYER_LEVEL;
+
+    vcl_profiling_output_t profOutput;
+    profOutput.data = NULL;
+    THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer",
+                          vclGetDecodedProfilingBuffer(profilingHandle, request, &profOutput),
+                          logHandle);
+    if (profOutput.data == NULL) {
+        OPENVINO_THROW("Failed to get VCL profiling output");
+    }
+
+    std::vector<ze_profiling_layer_info> layerInfo(profOutput.size / sizeof(ze_profiling_layer_info));
+    if (profOutput.size > 0) {
+        _logger.debug("VCL profiling output size: %d", profOutput.size);
+        std::memcpy(layerInfo.data(), profOutput.data, profOutput.size);
+    }
+
+    THROW_ON_FAIL_FOR_VCL("vclProfilingDestroy", vclProfilingDestroy(profilingHandle), logHandle);
+
+    // Return processed profiling info
+    return intel_npu::profiling::convertLayersToIeProfilingInfo(layerInfo);
+}
+
+uint32_t VCLCompilerImpl::get_version() const {
+    return ZE_MAKE_VERSION(_compilerProperties.version.major, _compilerProperties.version.minor);
+}
+
+ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
+    _logger.debug("query start");
+
+    /// Check the linked vcl version whether supported in plugin
+    uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR;
+    if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) {
+        usedMinor = std::min(static_cast<uint16_t>(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor);
+    } else if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) {
+        usedMajor = _vclVersion.major;
+        usedMinor = _vclVersion.minor;
+    }
+    _logger.debug("the finally used vcl version is %d.%d", usedMajor, usedMinor);
+
+    const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
+    _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
+
+    _logger.debug("serialize IR");
+    ze_graph_compiler_version_info_t compilerVersion;
+    compilerVersion.major = _compilerProperties.version.major;
+    compilerVersion.minor = _compilerProperties.version.minor;
+    const FilteredConfig* filteredConfig = dynamic_cast<const FilteredConfig*>(&config);
+    if (filteredConfig == nullptr) {
+        OPENVINO_THROW("config is not FilteredConfig");
+    }
+    FilteredConfig updatedConfig = *filteredConfig;
+    bool useBaseModelSerializer = true;
+    // vcl serializer is only support for vcl version >= 7.5
+    if (usedMajor >= 7 && usedMinor >= 5) {
+        useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig);
+    }
+
+    if (useBaseModelSerializer) {
+        _logger.debug("serialize IR is base method, useBaseModelSerializer is %d", useBaseModelSerializer);
+        std::cout << "1) serialize IR is base method (copy weights), useBaseModelSerializer is " << useBaseModelSerializer << std::endl;
+    } else {
+        _logger.debug("serialize IR is vcl method, useBaseModelSerializer is %d", useBaseModelSerializer);
+        std::cout << "2) serialize IR is vcl method(No copy weights), useBaseModelSerializer is " << useBaseModelSerializer << std::endl;
+
+        // To resolve the issue with the default configuration where no user passes the serializer config, the VCL
+        // serializer will be used as the default in the plugin adapter. You need to pass the serializer config;
+        // otherwise, you will encounter a deserialization issue within the compiler.
+        _logger.warning("Add serializer config");
+        if (updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())) {
+            updatedConfig.update({{ov::intel_npu::use_base_model_serializer.name(), "NO"}});
+        } else if (updatedConfig.isAvailable(ov::intel_npu::model_serializer_version.name())) {
+            updatedConfig.update({{ov::intel_npu::model_serializer_version.name(), "NO_WEIGHTS_COPY"}});
+        }
+    }
+    auto serializedIR =
+        driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer);
+
+    std::string buildFlags;
+    buildFlags += driver_compiler_utils::serializeConfig(updatedConfig, compilerVersion);
+    _logger.debug("queryImpl build flags : %s", buildFlags.c_str());
+
+    vcl_query_handle_t queryHandle;
+    vcl_query_desc_t queryDesc = {serializedIR.second.get(), serializedIR.first, buildFlags.c_str(), buildFlags.size()};
+    THROW_ON_FAIL_FOR_VCL("vclQueryNetworkCreate",
+                          vclQueryNetworkCreate(_compilerHandle, queryDesc, &queryHandle),
+                          _logHandle);
+
+    uint64_t size = 0;
+    THROW_ON_FAIL_FOR_VCL("vclQueryNetwork", vclQueryNetwork(queryHandle, nullptr, &size), _logHandle);
+
+    std::vector<char> supportedLayers(size);
+    THROW_ON_FAIL_FOR_VCL("vclQueryNetwork",
+                          vclQueryNetwork(queryHandle, reinterpret_cast<uint8_t*>(supportedLayers.data()), &size),
+                          _logHandle);
+
+    THROW_ON_FAIL_FOR_VCL("vclQueryNetworkDestroy", vclQueryNetworkDestroy(queryHandle), _logHandle);
+
+    const std::string deviceName = "NPU";
+    ov::SupportedOpsMap result;
+    const auto parsedSupportedLayers = parseQueryResult(supportedLayers);
+    for (auto&& layerName : parsedSupportedLayers) {
+        result.emplace(layerName, deviceName);
+    }
+    _logger.info("For given model, there are %d supported layers", parsedSupportedLayers.size());
+
+    return result;
+}
+
+bool VCLCompilerImpl::get_supported_options(std::vector<char>& options) const {
+    _logger.debug("get_supported_options start");
+    // 1. get size of compiler supported options list
+    size_t str_size = 0;
+    try {
+        THROW_ON_FAIL_FOR_VCL("vclGetCompilerSupportedOptions",
+                              vclGetCompilerSupportedOptions(_compilerHandle, nullptr, &str_size),
+                              _logHandle);
+
+        if (str_size > 0) {
+            _logger.debug("obtain list");
+            // 2. allocate buffer for it
+            options.resize(str_size);
+            // 3. populate char list
+            THROW_ON_FAIL_FOR_VCL("vclGetCompilerSupportedOptions",
+                                  vclGetCompilerSupportedOptions(_compilerHandle, options.data(), &str_size),
+                                  _logHandle);
+
+            _logger.debug("Option list size %d, got option list", str_size);
+            return true;
+        } else {
+            _logger.debug("Option list size 0 - skipping!");
+        }
+    } catch (const std::exception& e) {
+        // The API is only supported in new version, just add log here
+        _logger.debug("Exception in get_supported_options: %s", e.what());
+    }
+    _logger.debug("get_supported_options end, no options found");
+    return false;
+}
+
+bool VCLCompilerImpl::is_option_supported(const std::string& option) const {
+    try {
+        const char* optname_ch = option.c_str();
+        _logger.debug("is_option_supported start for option: %s", optname_ch);
+        THROW_ON_FAIL_FOR_VCL("vclGetCompilerIsOptionSupported",
+                              vclGetCompilerIsOptionSupported(_compilerHandle, optname_ch, nullptr),
+                              _logHandle);
+        return true;
+    } catch (const std::exception& e) {
+        // The API is only supported in new version, just add log here
+        _logger.debug("Exception in is_option_supported: %s", e.what());
+    }
+    _logger.debug("option: %s is not supported", option.c_str());
+    return false;
+}
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
index 15d0091e73bd4f..f8d91edb8d62bb 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
@@ -13,9 +13,9 @@
 #include "intel_npu/utils/logger/logger.hpp"
 #include "mem_usage.hpp"
 #include "openvino/core/model.hpp"
-#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
 #include "vcl_serializer.hpp"
 #include "weightless_graph.hpp"
+#include "weightless_utils.hpp"
 
 namespace {
 
@@ -26,38 +26,6 @@ bool isInitMetadata(const intel_npu::NetworkMetadata& networkMetadata) {
     return networkMetadata.inputs.at(0).isInitInputWeights;
 }
 
-/**
- * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon
- * serialization.
- * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information
- * regarding the offset of the weights within the binary file, as well as the original size and precision. This
- * information is required within the "weights separation" flow, therefore this function is here to store it.
- * @note Not calling this function in the weights separation flow would lead to this information being lost upon
- * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent
- * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be
- * misinformed and lookups of weights offsets could fail.
- *
- * @param model Both source and target.
- */
-void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model) {
-    size_t constantId = 0;
-    for (auto&& node : model->get_ordered_ops()) {
-        if (ov::is_type<ov::op::v0::Constant>(node)) {
-            ov::RTMap& runtimeInfoMap = node->get_rt_info();
-            const auto& weightlessCacheAttrIt =
-                runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static());
-
-            const std::string constantIdString = std::to_string(constantId++);
-            if (weightlessCacheAttrIt != runtimeInfoMap.end()) {
-                auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as<ov::WeightlessCacheAttribute>();
-                model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString);
-                model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString);
-                model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString);
-            }
-        }
-    }
-}
-
 /**
  * @brief On-going migration from "use_base_model_serializer" to "model_serializer_version". So we have to check both,
  * depending on which one is supported by the compiler.
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index c0c0b3ddff3d15..62466ab3aae8af 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -7,6 +7,7 @@
 #include <memory>
 #include <string>
 
+#include "compiler_impl.hpp"
 #include "graph.hpp"
 #include "intel_npu/common/device_helpers.hpp"
 #include "intel_npu/common/itt.hpp"
@@ -22,6 +23,7 @@
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/shared_object.hpp"
 #include "weightless_graph.hpp"
+#include "weightless_utils.hpp"
 
 namespace {
 
@@ -61,6 +63,13 @@ ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
     return ov::make_tensor(impl);
 }
 
+bool isInitMetadata(const intel_npu::NetworkMetadata& networkMetadata) {
+    if (networkMetadata.inputs.size() == 0) {
+        return false;
+    }
+    return networkMetadata.inputs.at(0).isInitInputWeights;
+}
+
 }  // namespace
 
 namespace intel_npu {
@@ -70,10 +79,32 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStruc
       _logger("PluginCompilerAdapter", Logger::global().level()) {
     _logger.debug("initialize PluginCompilerAdapter start");
 
-    _logger.info("PLUGIN compiler will be used.");
-    std::string baseName = "npu_mlir_compiler";
-    auto libPath = ov::util::make_plugin_library_name(ov::util::get_ov_lib_path(), baseName + OV_BUILD_POSTFIX);
-    _compiler = load_compiler(libPath);
+    _logger.info("Loading PLUGIN compiler");
+    try {
+        auto vclCompilerPtr = VCLCompilerImpl::getInstance();
+        auto vclLib = vclCompilerPtr->getLinkedLibrary();
+        _logger.info("PLUGIN VCL compiler is loading");
+        if (vclCompilerPtr && vclLib) {
+            _compiler = ov::SoPtr<intel_npu::ICompiler>(vclCompilerPtr, vclLib);
+        } else {
+            throw std::runtime_error("VCL compiler or library is nullptr");
+        }
+    } catch (const std::exception& vcl_exception) {
+        _logger.warning("VCL compiler load failed: %s. Trying to load MLIR compiler...", vcl_exception.what());
+        std::string baseName = "npu_mlir_compiler";
+        auto libPath = ov::util::make_plugin_library_name(ov::util::get_ov_lib_path(), baseName + OV_BUILD_POSTFIX);
+        try {
+            _compiler = load_compiler(libPath);
+            if (!_compiler) {
+                throw std::runtime_error("MLIR compiler load returned nullptr");
+            } else {
+                _logger.info("MLIR compiler loaded successfully. PLUGIN compiler will be used.");
+            }
+        } catch (const std::exception& mlir_exception) {
+            _logger.error("MLIR compiler load failed: %s", mlir_exception.what());
+            throw std::runtime_error("Both VCL and MLIR compiler load failed, aborting.");
+        }
+    }
 
     if (_zeroInitStruct == nullptr) {
         return;
@@ -108,10 +139,17 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
         try {
             graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
             networkMeta = _zeGraphExt->getNetworkMeta(graphDesc);
+            networkMeta.name = model->get_friendly_name();
+        } catch (const std::exception& ex) {
+            _logger.info("Failed to use the level zero graph handle: %s. Inference requests for this model are not "
+                         "allowed. Only exports are available",
+                         ex.what());
         } catch (...) {
             _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
                          "allowed. Only exports are available");
         }
+    } else {
+        _logger.warning("No driver is found, zeGraphExt is nullptr, so metadata is empty. Only exports are available");
     }
 
     return std::make_shared<Graph>(
@@ -129,23 +167,11 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
                                                          const FilteredConfig& config) const {
     OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "compileWS");
 
-    std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions;
-    std::shared_ptr<NetworkDescription> mainNetworkDescription;
+    storeWeightlessCacheAttribute(model);
 
     _logger.debug("compile start");
 
-    const auto starts_with = [](const std::string& str, const std::string& prefix) {
-        return str.substr(0, prefix.size()) == prefix;
-    };
-    const auto isInit = [&](std::string name) {
-        return starts_with(name, "init");
-    };
-
-    const auto isMain = [&](std::string name) {
-        return starts_with(name, "main");
-    };
-
-    Config localConfig = config;
+    FilteredConfig localConfig = config;
     if (!localConfig.has<SEPARATE_WEIGHTS_VERSION>()) {
         localConfig.update({{ov::intel_npu::separate_weights_version.name(), "ONE_SHOT"}});
     }
@@ -157,37 +183,89 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
     if (_logger.level() >= ov::log::Level::INFO) {
         compile_model_mem_start = get_peak_memory_usage();
     }
+
+    std::vector<GraphDescriptor> initGraphDescriptors;
+    std::vector<ov::Tensor> tensorsInits;
+    std::vector<NetworkMetadata> initNetworkMetadata;
+    std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions;
+
+    ov::Tensor tensorMain;
+    GraphDescriptor mainGraphDesc;
+    NetworkMetadata mainNetworkMetadata;
+    std::shared_ptr<NetworkDescription> mainNetworkDescription;
+
     switch (localConfig.get<SEPARATE_WEIGHTS_VERSION>()) {
     case ov::intel_npu::WSVersion::ONE_SHOT: {
         std::vector<std::shared_ptr<NetworkDescription>> initMainNetworkDescriptions =
             _compiler->compileWsOneShot(model, localConfig);
 
-#if 0  // TODO: it is not clear whether we should change the name
-            OPENVINO_ASSERT(isMain(initMainNetworkDescriptions.back()->metadata.name),
-                            "Unexpected network name for main:",
-                            initMainNetworkDescriptions.back()->metadata.name);
-#endif
-
         mainNetworkDescription = initMainNetworkDescriptions.back();
         initMainNetworkDescriptions.pop_back();
+        OPENVINO_ASSERT(initMainNetworkDescriptions.size() > 0,
+                        "The initMainNetworkDescriptions after getting mainNetworkDescription must not be empty!");
         initNetworkDescriptions = std::move(initMainNetworkDescriptions);
+
+        tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);
+        if (_zeGraphExt) {
+            // Depending on the config, we may get an error when trying to
+            // get the graph handle from the compiled network
+            try {
+                mainGraphDesc = _zeGraphExt->getGraphDescriptor(tensorMain.data(), tensorMain.get_byte_size());
+                mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc);
+            } catch (...) {
+                _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
+                             "allowed. Only exports are available");
+            }
+        }
+
+        initGraphDescriptors.reserve(initNetworkDescriptions.size());
+        tensorsInits.reserve(initNetworkDescriptions.size());
+        initNetworkMetadata.reserve(initNetworkDescriptions.size());
+        for (auto& networkDesc : initNetworkDescriptions) {
+            ov::Tensor tensor = make_tensor_from_vector(networkDesc->compiledNetwork);
+            GraphDescriptor initGraphDesc;
+            NetworkMetadata initNetworkMeta;
+            if (_zeGraphExt) {
+                try {
+                    initGraphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
+                    initNetworkMeta = _zeGraphExt->getNetworkMeta(initGraphDesc);
+                } catch (...) {
+                }
+            }
+
+            initGraphDescriptors.push_back(initGraphDesc);
+            tensorsInits.push_back(std::move(tensor));
+            initNetworkMetadata.push_back(std::move(initNetworkMeta));
+        }
     } break;
     case ov::intel_npu::WSVersion::ITERATIVE: {
+        OPENVINO_ASSERT(_zeGraphExt,
+                        "The \"iterative\" implementation of the weights separation feature requires a Level Zero "
+                        "graph handle to compile a model.");
+
+        // The state of the model needs to be reset every iteration
         const std::shared_ptr<ov::Model> originalModel = model->clone();
         std::shared_ptr<ov::Model> targetModel = model;
         size_t i = 0;
 
         while (auto networkDescription =
                    std::make_shared<NetworkDescription>(_compiler->compileWsIterative(targetModel, localConfig, i++))) {
-            if (isInit(networkDescription->metadata.name)) {
-                initNetworkDescriptions.push_back(networkDescription);
+            ov::Tensor tensor = make_tensor_from_vector(networkDescription->compiledNetwork);
+            GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
+            NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc);
+
+            if (isInitMetadata(networkDescription->metadata)) {
                 targetModel = originalModel->clone();
+                initGraphDescriptors.push_back(graphDesc);
+                tensorsInits.push_back(std::move(tensor));
+                initNetworkMetadata.push_back(std::move(networkMetadata));
+                initNetworkDescriptions.push_back(networkDescription);
                 continue;
             }
-            OPENVINO_ASSERT(isMain(networkDescription->metadata.name),
-                            "Unexpected network name: ",
-                            networkDescription->metadata.name);
 
+            tensorMain = std::move(tensor);
+            mainGraphDesc = graphDesc;
+            mainNetworkMetadata = std::move(networkMetadata);
             mainNetworkDescription = std::move(networkDescription);
             break;
         }
@@ -208,44 +286,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
 
     _logger.debug("compile end");
 
-    ov::Tensor tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);
-    GraphDescriptor mainGraphDesc;
-    NetworkMetadata mainNetworkMetadata;
-    if (_zeGraphExt) {
-        // Depending on the config, we may get an error when trying to
-        // get the graph handle from the compiled network
-        try {
-            mainGraphDesc = _zeGraphExt->getGraphDescriptor(tensorMain.data(), tensorMain.get_byte_size());
-            mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc);
-        } catch (...) {
-            _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
-                         "allowed. Only exports are available");
-        }
-    }
-
-    std::vector<GraphDescriptor> initGraphDescriptors;
-    std::vector<ov::Tensor> tensorsInits;
-    std::vector<NetworkMetadata> initNetworkMetadata;
-    initGraphDescriptors.reserve(initNetworkDescriptions.size());
-    tensorsInits.reserve(initNetworkDescriptions.size());
-    initNetworkMetadata.reserve(initNetworkDescriptions.size());
-    for (auto& networkDesc : initNetworkDescriptions) {
-        ov::Tensor tensor = make_tensor_from_vector(networkDesc->compiledNetwork);
-        GraphDescriptor initGraphDesc;
-        NetworkMetadata initNetworkMeta;
-        if (_zeGraphExt) {
-            try {
-                initGraphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
-                initNetworkMeta = _zeGraphExt->getNetworkMeta(initGraphDesc);
-            } catch (...) {
-            }
-        }
-
-        initGraphDescriptors.push_back(initGraphDesc);
-        tensorsInits.push_back(std::move(tensor));
-        initNetworkMetadata.push_back(std::move(initNetworkMeta));
-    }
-
     return std::make_shared<WeightlessGraph>(
         _zeGraphExt,
         _zeroInitStruct,
@@ -276,6 +316,13 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
         mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
         mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc);
         _logger.debug("main schedule parse end");
+        if (model) {
+            mainNetworkMetadata.name = model.value()->get_friendly_name();
+        } else {
+            _logger.warning("networkMeta name is empty in parse!");
+        }
+    } else {
+        _logger.warning("no zeGraphExt, metadata is empty from vcl compiler.");
     }
 
     // exporting the blob when we get it from cache or ov::hint::compiled_blob property
@@ -338,15 +385,55 @@ uint32_t PluginCompilerAdapter::get_version() const {
 }
 
 std::vector<std::string> PluginCompilerAdapter::get_supported_options() const {
-    // PluginCompiler has all the same options as plugin
-    // Returing empty string to let the plugin fallback to legacy registration
-    return {};
+    // For VCL, we can return the supported options from compiler
+    VCLCompilerImpl* vclCompiler = dynamic_cast<VCLCompilerImpl*>(_compiler.operator->());
+    if (vclCompiler == nullptr) {
+        // If _compiler  cannot be cast to VCLCompilerImpl, it should use the mlir library.
+        // PluginCompiler has all the same options as plugin
+        // Returing empty string to let the plugin fallback to legacy registration
+        _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning empty supported options.");
+        return {};
+    }
+    std::vector<char> options;
+    if (!vclCompiler->get_supported_options(options)) {
+        _logger.warning("VCLCompilerImpl get_supported_options failed. Returning empty supported options.");
+        return {};
+    }
+
+    if (options.empty()) {
+        _logger.warning("get_supported_options returned empty options.");
+        return {};
+    }
+
+    std::string compilerOptionsStr(options.data(), options.size());
+    _logger.debug("VCLCompilerImpl return supported_options: %s", compilerOptionsStr.c_str());
+    // vectorize string
+    std::istringstream suppstream(compilerOptionsStr);
+    std::vector<std::string> compilerOpts = {};
+    std::string option;
+    while (suppstream >> option) {
+        compilerOpts.push_back(option);
+    }
+    return compilerOpts;
 }
 
 bool PluginCompilerAdapter::is_option_supported(std::string optname) const {
-    // This functions has no utility in PluginCompiler
-    // returning false for any request to avoid the option of spaming the plugin
-    return false;
+    VCLCompilerImpl* vclCompiler = dynamic_cast<VCLCompilerImpl*>(_compiler.operator->());
+    if (vclCompiler == nullptr) {
+        // If _compiler  cannot be cast to VCLCompilerImpl, it should use the mlir library.
+        // This functions has no utility in PluginCompiler
+        // returning false for any request to avoid the option of spamming the plugin
+        _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning false for check.");
+        return false;
+    }
+
+    if (vclCompiler->is_option_supported(optname)) {
+        _logger.debug("Option %s is supported by VCLCompilerImpl", optname.c_str());
+        return true;
+    } else {
+        _logger.debug("Option %s is not supported by VCLCompilerImpl", optname.c_str());
+        return false;
+    }
 }
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp
index 1d8549f57f24b8..ec74095e410105 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp
@@ -275,6 +275,10 @@ std::pair<uint64_t, std::optional<std::vector<uint64_t>>> WeightlessGraph::expor
 }
 
 void WeightlessGraph::initialize(const Config& config) {
+    if (!_zeroInitStruct) {
+        _wgLogger.warning("_zeroInitStruct is nullptr!");
+        return;
+    }
     // Simplified version for init schedules
     const size_t numberOfInits = _initsGraphDesc.size();
     _initsCommandQueueOrdinals.resize(numberOfInits);
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp
new file mode 100644
index 00000000000000..987ff372ea8532
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "weightless_utils.hpp"
+
+#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
+#include "openvino/op/constant.hpp"
+
+namespace intel_npu {
+/**
+ * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon
+ * serialization.
+ * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information
+ * regarding the offset of the weights within the binary file, as well as the original size and precision. This
+ * information is required within the "weights separation" flow, therefore this function is here to store it.
+ * @note Not calling this function in the weights separation flow would lead to this information being lost upon
+ * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent
+ * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be
+ * misinformed and lookups of weights offsets could fail.
+ *
+ * @param model Both source and target.
+ */
+void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model) {
+    size_t constantId = 0;
+    for (auto&& node : model->get_ordered_ops()) {
+        if (ov::is_type<ov::op::v0::Constant>(node)) {
+            ov::RTMap& runtimeInfoMap = node->get_rt_info();
+            const auto& weightlessCacheAttrIt =
+                runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static());
+
+            const std::string constantIdString = std::to_string(constantId++);
+            if (weightlessCacheAttrIt != runtimeInfoMap.end()) {
+                auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as<ov::WeightlessCacheAttribute>();
+                model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString);
+                model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString);
+                model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString);
+            }
+        }
+    }
+}
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
index 52e8e4a3455540..50b042f46f9b26 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
@@ -252,7 +252,7 @@ void ZeGraphExtWrappers::initializeGraphThroughCommandList(ze_graph_handle_t gra
 }
 
 // Parse the result string of query from format <name_0><name_1><name_2> to unordered_set of string
-static std::unordered_set<std::string> parseQueryResult(std::vector<char>& data) {
+std::unordered_set<std::string> parseQueryResult(std::vector<char>& data) {
     std::string dataString(data.begin(), data.end());
     std::unordered_set<std::string> result;
     size_t i = 0, start = 0;
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 97c6a7e09ceeac..a9345390a4823c 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -670,20 +670,44 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config");
     auto localConfig = fork_local_config(localPropertiesMap, compiler);
 
-    const auto set_cache_dir = localConfig.get<CACHE_DIR>();
-    if (!set_cache_dir.empty()) {
-        const auto compilerType = localConfig.get<COMPILER_TYPE>();
-        if (compilerType == ov::intel_npu::CompilerType::PLUGIN) {
-            OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type");
-        }
-    }
+    // const auto set_cache_dir = localConfig.get<CACHE_DIR>();
+    // if (!set_cache_dir.empty()) {
+    //     const auto compilerType = localConfig.get<COMPILER_TYPE>();
+    //     if (compilerType == ov::intel_npu::CompilerType::PLUGIN) {
+    //         OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type");
+    //     }
+    // }
 
     const auto platform =
         utils::getCompilationPlatform(localConfig.get<PLATFORM>(),
                                       localConfig.get<DEVICE_ID>(),
                                       _backend == nullptr ? std::vector<std::string>() : _backend->getDeviceNames());
     auto device = _backend == nullptr ? nullptr : _backend->getDevice(localConfig.get<DEVICE_ID>());
-    localConfig.update({{ov::intel_npu::platform.name(), platform}});
+    std::cout << "   ==plugin check =0===fin=> the get platform is " << platform << std::endl;
+    std::cout << "   ==plugin check =1====> localConfig.get<PLATFORM>() is " << localConfig.get<PLATFORM>() << std::endl;
+    std::cout << "   ==plugin check =2====> localConfig.get<DEVICE_ID>() is " << localConfig.get<DEVICE_ID>() << std::endl;
+    auto name_vector = _backend == nullptr ? std::vector<std::string>() : _backend->getDeviceNames();
+    std::cout << "   ===plugin check=4====> name_vector's size  is " << name_vector.size() << std::endl;
+    if (name_vector.size() == 0) {
+        std::cout << "   =====> _backend->getDeviceNames() size is zero " << std::endl;
+    } else {
+        for (size_t i = 0; i < name_vector.size(); ++i) {
+            std::cout << "   =====> _backend->getDeviceNames() name_vector[" << i << "] is " << name_vector[i]
+                      << std::endl;
+        }
+    }
+
+    std::cout << "   =====> update platfrom by manual " << platform << std::endl;
+    const auto compilerType = localConfig.get<COMPILER_TYPE>();
+    if(compilerType == ov::intel_npu::CompilerType::PLUGIN) {
+        std::cout << "   =====> the compilerType is PLUGIN " << std::endl;
+        auto deviceBeforeCompilerCreate = _backend == nullptr ? nullptr : _backend->getDevice();
+        std::string deviceName = deviceBeforeCompilerCreate != nullptr ? deviceBeforeCompilerCreate->getName() : "";
+        std::cout << "   =====> the deviceName before compiler create is " << deviceName  << "=====" << std::endl;
+        localConfig.update({{ov::intel_npu::platform.name(), deviceName}});
+    } else if(compilerType == ov::intel_npu::CompilerType::DRIVER) {
+        localConfig.update({{ov::intel_npu::platform.name(), platform}});
+    }
 
     auto updateBatchMode = [&](ov::intel_npu::BatchMode mode) {
         std::stringstream strStream;
@@ -790,11 +814,13 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
 
     try {
         _logger.debug("performing compile");
-
+        std::cout << "    ======> before compiler print the config: " << localConfig.toString() << std::endl;
         // Determine which model to use
-        auto modelToCompile = successfullyDebatched ? batchedModel : model->clone();
+        auto modelToCompile = successfullyDebatched ? batchedModel : model->clone();   //lin724
+        std::cout << "   ==========plugin-check successfullyDebatched is " << successfullyDebatched << std::endl;
 
         if (successfullyDebatched && localConfig.get<PERFORMANCE_HINT>() == ov::hint::PerformanceMode::LATENCY) {
+            std::cout << "   ====line 805======graph = compileWithConfig(modelToCompile, modifiedConfig); " << std::endl;
             _logger.info("Override performance mode to THROUGHPUT for compilation");
 
             auto modifiedConfig = localConfig;  // Copy only when needed
@@ -804,10 +830,11 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
 
             graph = compileWithConfig(modelToCompile, modifiedConfig);
         } else {
+            std::cout << "   ====line 809======graph = compileWithConfig(modelToCompile, localConfig); " << std::endl;
             graph = compileWithConfig(modelToCompile, localConfig);  // No copy
         }
     } catch (const std::exception& ex) {
-        OPENVINO_THROW(ex.what());
+        OPENVINO_THROW(ex.what());   //// get exception
     } catch (...) {
         _logger.error("Unexpected exception");
         OPENVINO_THROW("NPU plugin: got an unexpected exception from compiler");