Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions src/plugins/intel_gpu/src/runtime/memory_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,18 @@ void memory_pool::release_memory(memory* mem, const size_t& unique_id, primitive
#endif
}

static int get_feature_block_size(const cldnn::format& fmt) {
const auto& order = cldnn::format::internal_order(fmt);
int f_bs = 1;
for (const auto& [dim, bs] : cldnn::format::block_sizes(fmt)) {
if (dim < order.size() && order[dim] == 'f') {
f_bs = static_cast<int>(bs);
break;
}
}
return f_bs;
}

memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout,
const primitive_id& prim_id,
size_t unique_id,
Expand All @@ -159,15 +171,22 @@ memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout,
bool reset,
bool is_dynamic) {
const auto layout_bytes_count = layout.bytes_count();
const int f_block_size = get_feature_block_size(layout.format);
auto it = _non_padded_pool.lower_bound(layout_bytes_count);
while (it != _non_padded_pool.end()) {
if ((!is_dynamic || (layout_bytes_count > it->second._memory->get_layout().bytes_count() * _mem_pool_util_threshold)) &&
const auto& mem_layout = it->second._memory->get_layout();
if ((!is_dynamic || (layout_bytes_count > mem_layout.bytes_count() * _mem_pool_util_threshold)) &&
(it->second._network_id == network_id &&
it->second._type == type &&
it->second._memory->get_layout().format != format::fs_b_yx_fsv32 &&
mem_layout.format != format::fs_b_yx_fsv32 &&
layout.format != format::fs_b_yx_fsv32 &&
((layout.format != format::b_fs_yx_fsv32 && layout.format != format::b_fs_zyx_fsv32) ||
(layout.feature() % 32 == 0)) &&
#ifdef ENABLE_ONEDNN_FOR_GPU
(!format::is_blocked(layout.format) || layout.feature() % f_block_size == 0 ||
(mem_layout.format == layout.format &&
mem_layout.feature() % f_block_size == layout.feature() % f_block_size)) &&
#endif // ENABLE_ONEDNN_FOR_GPU
!has_conflict(it->second._users, restrictions))) {
it->second._users.insert(memory_user(MEM_USER(unique_id, network_id, prim_id, layout_bytes_count)));
auto ret_mem = _engine->reinterpret_buffer(*it->second._memory, layout);
Expand Down Expand Up @@ -202,17 +221,23 @@ memory::ptr memory_pool::get_from_padded_pool(const layout& layout,
uint32_t network_id,
const memory_restricter<uint32_t>& restrictions,
allocation_type type) {
const int f_block_size = get_feature_block_size(layout.format);
auto first_level_cache = _padded_pool.find(layout);
if (first_level_cache != _padded_pool.end()) {
for (auto& rec_list : first_level_cache->second) {
const auto& mem_layout = rec_list._memory->get_layout();
if (rec_list._network_id == network_id &&
rec_list._type == type &&
((layout.format != format::b_fs_yx_fsv32 && layout.format != format::b_fs_zyx_fsv32) ||
(layout.feature() % 32 == 0)) &&
#ifdef ENABLE_ONEDNN_FOR_GPU
(!format::is_blocked(layout.format) || layout.feature() % f_block_size == 0 ||
mem_layout.feature() % f_block_size == layout.feature() % f_block_size) &&
#endif // ENABLE_ONEDNN_FOR_GPU
// TODO: check if this condition always correct
layout.feature() <= rec_list._memory->get_layout().feature() &&
layout.batch() <= rec_list._memory->get_layout().batch() &&
rec_list._memory->get_layout().format != format::fs_b_yx_fsv32 &&
layout.feature() <= mem_layout.feature() &&
layout.batch() <= mem_layout.batch() &&
mem_layout.format != format::fs_b_yx_fsv32 &&
layout.format != format::fs_b_yx_fsv32 &&
!has_conflict(rec_list._users, restrictions)) {
auto ret_mem = _engine->reinterpret_buffer(*(rec_list._memory), layout);
Expand Down
57 changes: 57 additions & 0 deletions src/plugins/intel_gpu/tests/unit/test_cases/memory_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,57 @@ class memory_pool: public ::testing::Test {
std::static_pointer_cast<fully_connected_inst>(network.get_primitive("relu2"))->output_memory_ptr()->buffer_ptr());
}
}

#ifdef ENABLE_ONEDNN_FOR_GPU
void test_static_reuse_unaligned_feature() {
auto& engine = get_test_engine();
if (!engine.get_device_info().supports_immad)
return;

const int32_t x = 64;
const int32_t y = 64;
const int32_t f_can = 97;
const int32_t f_req = 88;

auto l_in = layout{ ov::PartialShape{1, f_can, y, x}, data_types::f16, format::bfyx };
auto l_can_blk = layout{ ov::PartialShape{1, f_can, y, x}, data_types::f16, format::b_fs_yx_fsv16 };
auto l_can_pln = layout{ ov::PartialShape{1, f_can, y, x}, data_types::f16, format::bfyx };
auto l_req_blk = layout{ ov::PartialShape{1, f_req, y, x}, data_types::f16, format::b_fs_yx_fsv16 };
auto m_in = engine.allocate_memory(l_in);

topology topology(
input_layout("input", l_in),
reorder("can_a", input_info("input"), l_can_blk),
reorder("can_b", input_info("input"), l_can_blk),
eltwise("reuse_can", { input_info("can_a"), input_info("can_b") }, eltwise_mode::sum),
reorder("can_planar", input_info("reuse_can"), l_can_pln),
activation("can_consume", input_info("can_planar"), activation_func::relu),
crop("req_crop", input_info("can_consume"),{ 1, f_req, x, y }, { 0, 0, 0, 0 }),
reorder("req_a", input_info("req_crop"), l_req_blk),
reorder("req_b", input_info("req_crop"), l_req_blk),
eltwise("reuse_req", { input_info("req_a"), input_info("req_b") }, eltwise_mode::sum),
activation("req_sink", input_info("reuse_req"), activation_func::relu)
);

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));

network network(engine, topology, config);
network.set_input_data("input", m_in);
network.execute();

auto m_can = network.get_primitive("reuse_can")->output_memory_ptr();
auto m_req = network.get_primitive("reuse_req")->output_memory_ptr();

ASSERT_NE(m_can, nullptr);
ASSERT_NE(m_req, nullptr);
ASSERT_NE(m_can->buffer_ptr(), nullptr);
ASSERT_NE(m_req->buffer_ptr(), nullptr);

EXPECT_NE(m_can->buffer_ptr(), m_req->buffer_ptr());
}
#endif // ENABLE_ONEDNN_FOR_GPU

};

TEST_F(memory_pool, basic_non_padded_relu_pipe) {
Expand Down Expand Up @@ -692,6 +743,12 @@ TEST_F(memory_pool, dynamic_mem_reuse_for_null_sel_impl) {
this->test_dynamic_mem_reuse_for_null_sel_impl();
}

#ifdef ENABLE_ONEDNN_FOR_GPU
TEST_F(memory_pool, test_static_reuse_unaligned_feature) {
this->test_static_reuse_unaligned_feature();
}
#endif

#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_F(memory_pool, basic_non_padded_relu_pipe_cached) {
this->test_basic_non_padded_relu_pipe(true);
Expand Down
Loading