Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/vpux_compiler/include/vpux/compiler/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,16 @@ class CompilerImpl final : public intel_npu::ICompiler {
intel_npu::NetworkDescription compileWsIterative(const std::shared_ptr<ov::Model>& model,
const intel_npu::Config& config, size_t callIdx) const override;

// WS CiD-specific methods
// WS VCL-specific methods

/// @brief Sequentially compiles Init and Main schedules. The Main schedule is always last.
/// @brief Returns Init schedules and Main in a single call. The blobs are allocated using the provided allocator.
/// There is always exactly one Main schedule, placed at the back of the vector.
std::vector<std::shared_ptr<NetworkDescriptionView>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
const intel_npu::Config& config,
BlobAllocator& allocator) const;

/// @brief Sequentially compiles Init and Main schedules. The blob is allocated using the provided allocator. The
/// Main schedule is always last.
NetworkDescriptionView compileWsIterative(const std::shared_ptr<ov::Model>& model, const intel_npu::Config& config,
size_t callIdx, BlobAllocator& allocator) const;
};
Expand Down
35 changes: 35 additions & 0 deletions src/vpux_compiler/src/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,41 @@ std::vector<std::shared_ptr<intel_npu::NetworkDescription>> CompilerImpl::compil
return networkDescrs;
}

std::vector<std::shared_ptr<NetworkDescriptionView>> CompilerImpl::compileWsOneShot(
const std::shared_ptr<ov::Model>& model, const intel_npu::Config& config, BlobAllocator& allocator) const {
OV_ITT_SCOPED_TASK(itt::domains::VPUXPlugin, "CompilerImpl::compileWsOneShot");
checkPlatformSupportedForCompilation(config.get<intel_npu::PLATFORM>());

Logger log("vpux-compiler", getLogLevel(config));
log.info("Start oneshot WS compilation");

auto setup = CompilerSetup::create(config);

using CompilationReturnType = std::vector<CompilationResult>;
auto getCompilationResult = [&](const std::shared_ptr<ov::Model>& debatchedModel,
const std::vector<std::shared_ptr<const ov::Node>>& originalParameters,
const std::vector<std::shared_ptr<const ov::Node>>& originalResults,
const intel_npu::Config& debatchedConfig) -> CompilationReturnType {
return ws::compileImplWsOneShot(setup, originalParameters, originalResults, debatchedModel, debatchedConfig,
log);
};
auto [compilationResults, compiledConfig] =
tryCompileDebatchedModel<CompilationReturnType>(model, config, log, getCompilationResult);

OV_ITT_TASK_CHAIN(COMPILER_IMPLEMENTATION, itt::domains::VPUXPlugin, "CompilerImpl::compileWsOneShot",
"exportNetwork");
std::vector<std::shared_ptr<NetworkDescriptionView>> networkDescrs;
networkDescrs.reserve(compilationResults.size());
for (const auto& result : compilationResults) {
networkDescrs.emplace_back(std::make_shared<NetworkDescriptionView>(
exportNetwork(result.moduleOp.get(), compiledConfig, log, allocator)));
}
OV_ITT_TASK_SKIP(COMPILER_IMPLEMENTATION);

// Plugin will collect the compilation memory usage
return networkDescrs;
}

intel_npu::NetworkDescription CompilerImpl::compileWsIterative(const std::shared_ptr<ov::Model>& model,
const intel_npu::Config& config, size_t callIdx) const {
OV_ITT_SCOPED_TASK(itt::domains::VPUXPlugin, "CompilerImpl::compileWsIterative");
Expand Down
6 changes: 6 additions & 0 deletions src/vpux_driver_compiler/CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
Change Log:
-----------
VPUXCompilerL0 7.6.0:
- Add new weightless compilation API:
vclAllocatedExecutableCreateWSOneShot
saveVclAllocatorBlobWS
- Update compilerTest to support the new weightless compilation API

VPUXCompilerL0 7.5.0:
- Added support for the "USE_BASE_MODEL_SERIALIZER" config option. A new deserializer has been implemented,
one that matches the "no weights copy" serializer. This option can be used to switch between the two implementations.
Expand Down
6 changes: 5 additions & 1 deletion src/vpux_driver_compiler/include/npu_driver_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ extern "C" {
#endif

#define VCL_COMPILER_VERSION_MAJOR 7
#define VCL_COMPILER_VERSION_MINOR 5
#define VCL_COMPILER_VERSION_MINOR 6
#define VCL_PROFILING_VERSION_MAJOR 2
#define VCL_PROFILING_VERSION_MINOR 0

Expand Down Expand Up @@ -268,6 +268,10 @@ VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate2(vcl_compile
vcl_allocator2_t* allocator, uint8_t** blobBuffer,
uint64_t* blobSize);

VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreateWSOneShot(vcl_compiler_handle_t compiler,
vcl_executable_desc_t desc,
vcl_allocator2_t* allocator);

///////////////////////////////////////////////////////////////////////////////
/// @brief Destroys the executable and releases the cached blob.
VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableDestroy(vcl_executable_handle_t executable);
Expand Down
12 changes: 12 additions & 0 deletions src/vpux_driver_compiler/include/vcl_compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,18 @@ class VPUXCompilerL0 final {
*/
vpux::NetworkDescriptionView importNetwork(BuildInfo& buildInfo, vpux::BlobAllocator& allocator);

/**
* @brief Use VPUX MLIR compiler to create one shot weight-separated blob with user info
* @note Blob storage is allocated via given allocator
*
* @param buildInfo Include the model data, ioInfo, compilation configs
* @param allocator Allocator for blob storage allocation
* @return std::vector<std::shared_ptr<vpux::NetworkDescriptionView>> Include non-owning
* views into blobs and metadatas
*/
std::vector<std::shared_ptr<vpux::NetworkDescriptionView>> importNetworkWSOneShot(BuildInfo& buildInfo,
vpux::BlobAllocator& allocator);

/**
* @brief Check if a model can be supported by current compiler
*
Expand Down
50 changes: 50 additions & 0 deletions src/vpux_driver_compiler/src/vpux_compiler_l0/vcl_bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,51 @@ vcl_result_t allocatedExecutableCreate(vcl_compiler_handle_t compiler, vcl_execu
return VCL_RESULT_SUCCESS;
}

vcl_result_t allocatedExecutableCreateWSOneShot(vcl_compiler_handle_t compiler, vcl_executable_desc_t desc,
vcl_allocator2_t* allocator) {
if (!compiler || !allocator || !desc.modelIRData) {
return VCL_RESULT_ERROR_INVALID_ARGUMENT;
}

VPUXDriverCompiler::VPUXCompilerL0* pCompiler = reinterpret_cast<VPUXDriverCompiler::VPUXCompilerL0*>(compiler);
VPUXDriverCompiler::VCLLogger* vclLogger = pCompiler->getLogger();

/// To avoid access violation, need to convert to string
std::string descOptions(desc.options, desc.optionsSize);
vclLogger->info("config: {0}", descOptions);

/// Create info parser
VPUXDriverCompiler::BuildInfo buildInfo(pCompiler);
/// Parse user descriptions and store the input && output settings, compilation configs
if (auto ret = buildInfo.prepareBuildFlags(descOptions); ret != VCL_RESULT_SUCCESS) {
vclLogger->outputError(formatv("Failed to prepare io info and config! DescOptions: {0}", descOptions));
return ret;
}

/// Parse serialized model data and create the model container for compiler
if (auto ret = buildInfo.prepareModel(desc.modelIRData, desc.modelIRSize); ret != VCL_RESULT_SUCCESS) {
vclLogger->outputError("Failed to parse model info! Incorrect format!");
return ret;
}

try {
// NetworkMetadata is part of the result, but unused in VCL
// it'd just get destroyed at function call here
VCLBlobAllocator vcl_allocator{allocator};
auto result = pCompiler->importNetworkWSOneShot(buildInfo, vcl_allocator);
if (result.empty()) {
vclLogger->warning("Compiler successfully returned but the blob list is empty!");
}
} catch (const std::exception& error) {
vclLogger->outputError(formatv("Compiler returned msg:\n{0}", error.what()));
return VCL_RESULT_ERROR_INVALID_ARGUMENT;
} catch (...) {
vclLogger->outputError("Internal exception! Can't compile model!");
return VCL_RESULT_ERROR_INVALID_ARGUMENT;
}
return VCL_RESULT_SUCCESS;
}

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -335,6 +380,11 @@ DLLEXPORT vcl_result_t vclAllocatedExecutableCreate(vcl_compiler_handle_t compil
return allocatedExecutableCreate(compiler, desc, allocator, blob, size);
}

DLLEXPORT vcl_result_t vclAllocatedExecutableCreateWSOneShot(vcl_compiler_handle_t compiler, vcl_executable_desc_t desc,
vcl_allocator2_t* allocator) {
return allocatedExecutableCreateWSOneShot(compiler, desc, allocator);
}

DLLEXPORT vcl_result_t vclExecutableGetSerializableBlob(vcl_executable_handle_t executable, uint8_t* blobBuffer,
uint64_t* blobSize) {
vcl_result_t ret = VCL_RESULT_SUCCESS;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -443,9 +443,12 @@ vcl_result_t BuildInfo::prepareConfig(const std::string& descOptions) {
/// Update maxtiles config with compiler desc
/// - If deviceDesc is valid, compare its tileCount with user config and use the smaller value
/// - If deviceDesc is empty, its tileCount is invalid, then just handle it according to the config
if (!pvc->isDeviceDescEmpty()) {
/// - If deviceDesc's tileCount is invalid, then skip updating maxtiles
if (!pvc->isDeviceDescEmpty() && (deviceDesc.tileCount != static_cast<uint32_t>(-1))) {
config[ov::intel_npu::max_tiles.name()] = getValidTileValue(config, deviceDesc);
logger->debug("NPU_MAX_TILES is updated to {0}", config[ov::intel_npu::max_tiles.name()]);
} else {
logger->debug("DeviceDesc is empty or tileCount is invalid, skip updating NPU_MAX_TILES");
}

/// When we use LOG_INFO, show vcl level profiling log
Expand Down
26 changes: 26 additions & 0 deletions src/vpux_driver_compiler/src/vpux_compiler_l0/vcl_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,32 @@ NetworkDescriptionView VPUXCompilerL0::importNetwork(BuildInfo& buildInfo, BlobA
});
}

std::vector<std::shared_ptr<NetworkDescriptionView>> VPUXCompilerL0::importNetworkWSOneShot(BuildInfo& buildInfo,
BlobAllocator& allocator) {
StopWatch stopWatch;
if (buildInfo.enableProfiling) {
// Output time cost on vcl level
stopWatch.start();
}

auto scoped = Scoped{[&stopWatch, &buildInfo, this]() {
if (buildInfo.enableProfiling) {
stopWatch.stop();
_logger->info("Compile net time: {0} ms", stopWatch.delta_ms());
}
}};

std::shared_ptr<ov::Model> model;

model = preprocessModel(buildInfo.model, buildInfo.inputPrecisions, buildInfo.outputPrecisions,
buildInfo.inputLayouts, buildInfo.outputLayouts, _logger);

// Isolate the MLIR thread to safely destroy MLIR thread_local objects before CiD unload
return run_in_worker_thread_sync([&] {
return _compiler->compileWsOneShot(model, buildInfo.parsedConfig, allocator);
});
}

vcl_result_t VPUXCompilerL0::queryNetwork(const BuildInfo& buildInfo, VPUXQueryNetworkL0* pQueryNetwork) {
_logger->info("Start to call query function from compiler to get supported layers!");
ov::SupportedOpsMap queryNetworkResult;
Expand Down
1 change: 1 addition & 0 deletions src/vpux_driver_compiler/test/smoke/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ target_link_libraries(compilerTest
npu_driver_compiler # VCL function
npu_ov_utils # OV core function
npu_llvm_utils # CL command line
openvino::npu_logger_utils # logging utils
)
ov_add_api_validator_post_build_step(TARGET compilerTest)

Expand Down
Loading
Loading