Skip to content

Commit 0d0dee6

Browse files
e-ddykimsungeunk
andauthored
[GPU] OneDNN 3.8 fix (#30312)
### Details: - updates OneDNN for GPU plugin to v3.8. - updates quantize postop to preserve its original layout - updates not to serialize OneDNN engine - OneDNN v3.8 has an issue in serialization of OneDNN engine. I'll revert it after fixing the issue. ### Tickets: - 164106 --------- Co-authored-by: Kim, SungEun <[email protected]>
1 parent 2695fc7 commit 0d0dee6

File tree

5 files changed

+19
-53
lines changed

5 files changed

+19
-53
lines changed

src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,20 +92,6 @@ struct gemm_onednn : typed_primitive_onednn_impl<gemm> {
9292

9393
bool batched_dims_can_be_removed = false;
9494

95-
if (in0_l.count() != 0 && in1_l.count() != 0) {
96-
size_t in0_batched_size = in0_l.count() / (in0_l.spatial(0) * in0_l.spatial(1));
97-
size_t in1_batched_size = in1_l.count() / (in1_l.spatial(0) * in1_l.spatial(1));
98-
size_t out_batched_size = out_l.count() / (out_l.spatial(0) * out_l.spatial(1));
99-
100-
batched_dims_can_be_removed = in0_batched_size == 1 && in1_batched_size == 1 && out_batched_size == 1;
101-
}
102-
103-
if (gemm_with_bias) {
104-
const auto& bias_l = in_layouts[2];
105-
size_t bias_batched_size = bias_l.count() / (bias_l.spatial(0) * bias_l.spatial(1));
106-
batched_dims_can_be_removed &= bias_batched_size == 1;
107-
}
108-
10995
size_t rank = cldnn::format::dimension(out_l.format);
11096

11197
in0_dt = onednn::convert_data_type(in0_l.data_type);

src/plugins/intel_gpu/src/graph/program_node.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,6 +1521,15 @@ void program_node::create_onednn_primitive_attributes(
15211521
memory_offset++;
15221522
};
15231523

1524+
auto resize_layout_for_fc = [](const program_node *node, layout& in_layout) {
1525+
if (node->is_type<fully_connected>()) {
1526+
auto input_size = node->as<fully_connected>().get_primitive()->input_size;
1527+
auto new_pshape = in_layout.get_partial_shape();
1528+
new_pshape.resize(input_size);
1529+
in_layout.set_partial_shape(new_pshape);
1530+
}
1531+
};
1532+
15241533
int32_t num_sum_post_ops = 0;
15251534
for (size_t idx = 0; idx < cldnn_post_ops.size(); idx++) {
15261535
auto& desc = cldnn_post_ops[idx];
@@ -1582,8 +1591,7 @@ void program_node::create_onednn_primitive_attributes(
15821591
new_layout.set_partial_shape(new_input_pshape);
15831592
in = new_layout;
15841593
}
1585-
size_t in_batched_size = in.count() / (in.spatial(0) * in.spatial(1));
1586-
dnnl::memory::dims dims = onednn::convert_gemm_tensor(in.get_tensor(), rank, in_batched_size == 1);
1594+
dnnl::memory::dims dims = onednn::convert_gemm_tensor(in.get_tensor(), rank, false);
15871595
dnnl::memory::data_type dt = onednn::convert_data_type(in.data_type);
15881596
dnnl::memory::format_tag fmt = onednn::convert_gemm_data_format(dims, in.format);
15891597
post_ops.append_binary(alg, dnnl::memory::desc(dims, dt, fmt));
@@ -1648,6 +1656,7 @@ void program_node::create_onednn_primitive_attributes(
16481656
update_onednn_post_op_list(onednn_post_op_type::eltwise_linear, empty_mem);
16491657
} else {
16501658
auto in_scale = get_input_layout(dep_idx++);
1659+
resize_layout_for_fc(this, in_scale);
16511660
dnnl::memory::desc in_scale_desc = onednn::layout_to_memory_desc(in_scale, onednn::get_default_data_format(in_scale));
16521661
post_ops.append_binary(dnnl::algorithm::binary_mul, in_scale_desc);
16531662
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, onednn::get_default_data_format(in_scale), false,
@@ -1660,6 +1669,7 @@ void program_node::create_onednn_primitive_attributes(
16601669
update_onednn_post_op_list(onednn_post_op_type::eltwise_linear, empty_mem);
16611670
} else {
16621671
auto in_shift = get_input_layout(dep_idx++);
1672+
resize_layout_for_fc(this, in_shift);
16631673
dnnl::memory::desc in_shift_desc = onednn::layout_to_memory_desc(in_shift, onednn::get_default_data_format(in_shift));
16641674
post_ops.append_binary(dnnl::algorithm::binary_add, in_shift_desc);
16651675
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, onednn::get_default_data_format(in_shift), false,
@@ -1692,6 +1702,7 @@ void program_node::create_onednn_primitive_attributes(
16921702
update_onednn_post_op_list(onednn_post_op_type::eltwise_linear, empty_mem);
16931703
} else {
16941704
auto out_scale = get_input_layout(dep_idx++);
1705+
resize_layout_for_fc(this, out_scale);
16951706
dnnl::memory::desc out_scale_desc = onednn::layout_to_memory_desc(out_scale, onednn::get_default_data_format(out_scale));
16961707
post_ops.append_binary(dnnl::algorithm::binary_mul, out_scale_desc);
16971708
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, onednn::get_default_data_format(out_scale), false,
@@ -1705,6 +1716,7 @@ void program_node::create_onednn_primitive_attributes(
17051716
update_onednn_post_op_list(onednn_post_op_type::eltwise_linear, empty_mem);
17061717
} else {
17071718
auto out_shift = get_input_layout(dep_idx++);
1719+
resize_layout_for_fc(this, out_shift);
17081720
dnnl::memory::desc out_shift_desc = onednn::layout_to_memory_desc(out_shift, onednn::get_default_data_format(out_shift));
17091721
post_ops.append_binary(dnnl::algorithm::binary_add, out_shift_desc);
17101722
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, onednn::get_default_data_format(out_shift), false,

src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -322,10 +322,9 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex
322322

323323
#ifdef ENABLE_ONEDNN_FOR_GPU
324324
using namespace dnnl::impl::gpu::intel::jit;
325-
ngen::HW hw = ngen::HW::Unknown;
326-
ngen::Product product = {ngen::ProductFamily::Unknown, 0};
327-
generator_t<ngen::HW::Unknown>::detectHWInfo(context.get(), device.get(), hw, product);
328-
info.arch = convert_ngen_arch(hw);
325+
ngen::Product product = ngen::OpenCLCodeGenerator<ngen::HW::Unknown>::detectHWInfo(context.get(), device.get());
326+
info.arch = convert_ngen_arch(ngen::getCore(product.family));
327+
329328
// We change the value of this flag to avoid OneDNN usage for the platforms unknown to OneDNN
330329
// This is required to guarantee some level of forward compatibility for the new HW generations
331330
// as OneDNN code generators are not generic and typically requires some updates for the new architectures

src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -64,38 +64,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) {
6464
auto casted = std::dynamic_pointer_cast<ocl_device>(_device);
6565
OPENVINO_ASSERT(casted, "[GPU] Invalid device type stored in ocl_engine");
6666

67-
const auto& cache_dir = config.get_cache_dir();
68-
if (cache_dir.empty()) {
69-
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
70-
} else {
71-
// Use cached blob
72-
auto path = cache_dir;
73-
if (path.back() != '/' && path.back() != '\\') {
74-
path += "/";
75-
}
76-
77-
auto blob_id = dnnl::ocl_interop::get_engine_cache_blob_id(casted->get_device().get());
78-
if (blob_id.empty()) {
79-
// Create engine without cache_blob
80-
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
81-
return;
82-
}
83-
84-
std::string id_str(blob_id.begin(), blob_id.end());
85-
size_t hash = std::hash<std::string>()(id_str);
86-
path = path + std::to_string(hash) + ".onednn.cl_cache";
87-
88-
auto onednn_cache_blob = ov::util::load_binary(path);
89-
if (onednn_cache_blob.empty()) {
90-
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
91-
92-
onednn_cache_blob = dnnl::ocl_interop::get_engine_cache_blob(*_onednn_engine);
93-
ov::intel_gpu::save_binary(path, onednn_cache_blob);
94-
} else {
95-
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get(),
96-
onednn_cache_blob));
97-
}
98-
}
67+
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
9968
}
10069
}
10170

0 commit comments

Comments
 (0)