Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
e8120dd
Creating some interfaces
razvanapetroaie Jan 3, 2026
6f21d77
Filling some basic functions
razvanapetroaie Jan 3, 2026
195eaf8
working on the CRE
razvanapetroaie Jan 3, 2026
02424d3
Maybe done CRE evaluation
razvanapetroaie Jan 3, 2026
b2dec5d
sketching BlobWriter::write
razvanapetroaie Jan 3, 2026
d381ab4
adding an optional function for providing the length of the section
razvanapetroaie Jan 4, 2026
1bb745e
added offsets table section
razvanapetroaie Jan 4, 2026
f107a10
registering the predefined capabilities
razvanapetroaie Jan 4, 2026
87a5269
moved to common, also now compilable
razvanapetroaie Jan 4, 2026
c1bddcf
starting to define the compiler schedules sections
razvanapetroaie Jan 5, 2026
3fcc969
working on graph.exports
razvanapetroaie Jan 5, 2026
7485858
Maybe done implementing the schedules sections writers
razvanapetroaie Jan 5, 2026
9349a5f
register the schedules in the blob writer
razvanapetroaie Jan 5, 2026
b690ca0
io layouts section, filled CompiledModel->export(), compilable again
razvanapetroaie Jan 5, 2026
e1e1dfb
added the batch size section
razvanapetroaie Jan 8, 2026
027f1b1
BlobWriter better API: ditched the cursor
razvanapetroaie Jan 8, 2026
9a00f21
centralized the sections IDs
razvanapetroaie Jan 8, 2026
b752c49
Starting the blobreader
razvanapetroaie Jan 8, 2026
98e4287
Almost done BlobReader::read
razvanapetroaie Jan 8, 2026
c00ef9e
Added a new persistent field: the number of sections
razvanapetroaie Jan 9, 2026
49214cc
Filled CRESection::read
razvanapetroaie Jan 9, 2026
8405a16
Filled OffsetsTableSection::read
razvanapetroaie Jan 9, 2026
0877f8c
Replaced the stop condition: NPU regions size instead of number of se…
razvanapetroaie Jan 9, 2026
0cf7206
BlobReader: switch to tensor operations only
razvanapetroaie Jan 9, 2026
ef0859f
Filled the read functions of the compiler schedules sections
razvanapetroaie Jan 9, 2026
28c8913
Filled Plugin::parse - importing the compiled model
razvanapetroaie Jan 9, 2026
eed585e
Moved the location of the blob size
razvanapetroaie Jan 13, 2026
282acd8
Filled the read method for BatchSize & IOLayouts sections
razvanapetroaie Jan 13, 2026
545f540
Registered blob readers
razvanapetroaie Jan 13, 2026
3d14fbf
Compilable and exportable
razvanapetroaie Jan 13, 2026
c86a835
TODO cleanup
razvanapetroaie Jan 14, 2026
902c8e1
Fixing the alignment of the compiler schedules
razvanapetroaie Jan 14, 2026
c5c705e
Solved the repeating export issue
razvanapetroaie Jan 14, 2026
067bcbd
Table of offsets API in the BlobReader
razvanapetroaie Jan 21, 2026
3385082
Moving the CRE near the end of the blob
razvanapetroaie Jan 21, 2026
216e97a
Cleaned up the CRE & OffsetsTable APIs
razvanapetroaie Jan 21, 2026
943bb90
BlobReader: added bound checking for all operations
razvanapetroaie Jan 21, 2026
43d3bc7
Implemented BlobReader::ger_npu_region_size properly
razvanapetroaie Jan 21, 2026
3d64946
Bound checking while evaluating the CRE
razvanapetroaie Jan 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <cinttypes>
#include <functional>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "intel_npu/common/cre.hpp"

namespace intel_npu {

class BlobReader {
public:
BlobReader(const ov::Tensor& source);

void read(const std::unordered_set<CRE::Token>& plugin_capabilities_ids);

void register_reader(const SectionID section_id,
std::function<std::shared_ptr<ISection>(BlobReader*, const size_t)> reader);

std::shared_ptr<ISection> retrieve_section(const SectionID section_id);

void copy_data_from_source(char* destination, const size_t size);

const void* interpret_data_from_source(const size_t size);

ov::Tensor get_roi_tensor(const size_t size);

std::optional<uint64_t> get_section_offset(const SectionID section_id) const;

size_t get_cursor_relative_position();

void move_cursor_to_relative_position(const size_t offset);

static size_t get_npu_region_size(std::istream& stream);

static size_t get_npu_region_size(const ov::Tensor& tensor);

private:
friend class BlobWriter;

std::reference_wrapper<const ov::Tensor> m_source;
size_t m_npu_region_size;
std::unordered_map<SectionID, uint64_t> m_offsets_table;
std::unordered_map<SectionID, std::shared_ptr<ISection>> m_parsed_sections;
std::unordered_map<SectionID, std::function<std::shared_ptr<ISection>(BlobReader*, const size_t)>> m_readers;

size_t m_cursor;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <cinttypes>
#include <memory>
#include <queue>
#include <unordered_map>
#include <unordered_set>

#include "cre.hpp"
#include "intel_npu/utils/logger/logger.hpp"

namespace intel_npu {

class BlobWriter {
public:
BlobWriter();

BlobWriter(const std::shared_ptr<BlobReader>& blob_reader);

void register_section(const std::shared_ptr<ISection>& section);

void register_offset_in_table(const SectionID id, const uint64_t offset);

void write(std::ostream& stream);

void append_compatibility_requirement(const CRE::Token requirement_token);

void append_compatibility_requirement(const std::vector<CRE::Token>& requirement_tokens);

std::streamoff get_stream_relative_position(std::ostream& stream) const;

private:
void write_section(std::ostream& stream, const std::shared_ptr<ISection>& section);

std::unordered_set<SectionID> m_registered_sections_ids;
std::queue<std::shared_ptr<ISection>> m_registered_sections;
CRE m_cre;
std::unordered_map<SectionID, uint64_t> m_offsets_table;

/**
* @brief TODO
*/
std::optional<std::streampos> m_stream_base = std::nullopt;

Logger m_logger;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "intel_npu/common/isection.hpp"
#include "openvino/core/except.hpp"

namespace intel_npu {

class InvalidCRE : public ov::Exception {};

class CRE final {
public:
using Token = uint16_t;

// TODO should we also add "NOT"?
enum ReservedToken : Token { AND = 50000, OR = 50001, OPEN = 50002, CLOSE = 50003 };

static inline const std::unordered_set<Token> RESERVED_TOKENS{ReservedToken::AND,
ReservedToken::OR,
ReservedToken::OPEN,
ReservedToken::CLOSE};

/**
* @brief All capability codes known in advance. Past codes should be recorded here as well, this helps avoid code
* collision.
*/
enum PredefinedCapabilityToken : Token {
CRE_EVALUATION = 100,
ELF_SCHEDULE = 101,
BATCHING = 102,
WEIGHTS_SEPARATION = 103
};

static inline const std::unordered_set<Token> DEFAULT_PLUGIN_CAPABILITIES_TOKENS{
PredefinedCapabilityToken::CRE_EVALUATION,
PredefinedCapabilityToken::ELF_SCHEDULE,
PredefinedCapabilityToken::BATCHING,
PredefinedCapabilityToken::WEIGHTS_SEPARATION};

CRE();

CRE(const std::vector<Token>& expression);

void append_to_expression(const CRE::Token requirement_token);

void append_to_expression(const std::vector<CRE::Token>& requirement_tokens);

size_t get_expression_length() const;

std::vector<Token> get_expression() const;

bool check_compatibility(const std::unordered_set<CRE::Token>& plugin_capabilities);

private:
enum class Delimiter { PARRENTHESIS, SIZE, NOT_CAPABILITY_ID };

void advance_iterator(std::vector<Token>::const_iterator& expression_iterator);

bool end_condition(const std::vector<Token>::const_iterator& expression_iterator, const Delimiter end_delimiter);

bool evaluate(std::vector<Token>::const_iterator& expression_iterator,
const std::unordered_set<CRE::Token>& plugin_capabilities,
const Delimiter end_delimiter);

std::vector<Token> m_expression;
};

class CRESection final : public ISection {
public:
CRESection(const CRE& cre);

void write(std::ostream& stream, BlobWriter* writer) override;

std::optional<uint64_t> get_length() const override;

CRE get_cre() const;

static std::shared_ptr<ISection> read(BlobReader* blob_reader, const size_t section_length);

private:
CRE m_cre;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#pragma once

#include "intel_npu/common/blob_writer.hpp"
#include "intel_npu/common/filtered_config.hpp"
#include "intel_npu/common/igraph.hpp"

Expand All @@ -12,7 +13,8 @@ namespace intel_npu {
class ICompilerAdapter {
public:
virtual std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config) const = 0;
const FilteredConfig& config,
const std::shared_ptr<BlobWriter>& blobWriter) const = 0;

/**
* @brief Compiles the model, weights separation enabled.
Expand All @@ -26,10 +28,12 @@ class ICompilerAdapter {
* @param config Will be passed to the compiler. Additionally, the "SEPARATE_WEIGHTS_VERSION" option will determine
* which weights separation implementation will be used. See the weights separation specific methods within
* "icompiler.hpp".
* @param blobWriter TODO
* @return A "WeightlessGraph" type of object.
*/
virtual std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config) const = 0;
const FilteredConfig& config,
const std::shared_ptr<BlobWriter>& blobWriter) const = 0;

/**
* @brief Parses the provided binary objects and returns a wrapper over the resulted L0 handles. The model may also
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
* later be used for importing the model.
*
* @param stream Where the content is placed
* @return A pair made of the size of the main binary object and an optional variable. The optional variable
* constitues the size of each init binary object if weights separation is enabled.
* @return TODO
*/
virtual std::pair<uint64_t, std::optional<std::vector<uint64_t>>> export_blob(std::ostream& stream) const = 0;
virtual uint64_t export_main_blob(std::ostream& stream) const = 0;

virtual std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const = 0;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <cinttypes>
#include <iostream>
#include <optional>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "openvino/runtime/tensor.hpp"

namespace intel_npu {

// TODOs: fix the circular dependencies
// Move sections in directory
// Unique SID - how do we reinforce this without compromising modularity? Description matching?

using SectionID = uint16_t;

class BlobWriter;
class BlobReader;

namespace PredefinedSectionID {
enum {
CRE = 100,
OFFSETS_TABLE = 101,
ELF_MAIN_SCHEDULE = 102,
ELF_INIT_SCHEDULES = 103,
IO_LAYOUTS = 104,
BATCH_SIZE = 105,
};
};

class ISection {
public:
ISection(const SectionID section_id);

virtual ~ISection() = default;

virtual void write(std::ostream& stream, BlobWriter* writer) = 0;

// note necessary, saves some performance if provided
virtual std::optional<uint64_t> get_length() const;

SectionID get_section_id() const;

private:
SectionID m_section_id;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <functional>

#include "intel_npu/common/isection.hpp"

namespace intel_npu {

class OffsetsTableSection final : public ISection {
public:
OffsetsTableSection(const std::unordered_map<SectionID, uint64_t>& offsets_table);

void write(std::ostream& stream, BlobWriter* writer) override;

std::optional<uint64_t> get_length() const override;

std::unordered_map<SectionID, uint64_t> get_table() const;

static std::shared_ptr<ISection> read(BlobReader* blob_reader, const size_t section_length);

private:
std::unordered_map<SectionID, uint64_t> m_offsets_table;
};

} // namespace intel_npu
Loading