Skip to content

Commit 682e12d

Browse files
authored
[GPU] Select new impl when propagate constant makes dynamic shape layer to constant (#34115)
### Description of the issue(symptom, root-cause, how it was resolved) - Kernel compilation(compile model pass) selected shape agnostic kernel(generic_eltiwise_ref_sa) with input0 dynamic and output0 dynamic. But constant folding makes input0 as static shape. - The sa eliwise ref kernel neesd shape_info[0:7] for input0 and shape_info[8] for output batch. But only output shape info was provided during execution and it causes OOB. - If dynamic shape node is constant folded in propagate constant pass, check users output layout and impl.is_dynamic(). And try to choose new impl when they are not aligned. #### The code and line that caused this issue (if it is not changed directly) - https://github.com/openvinotoolkit/openvino/blob/b88f5e789217e164f7dab3ee4ff6e94df801ed19/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp#L113 #### Reproduction step and snapshot (if applicable. Do not attach for customer model) - $ ./benchmark_app -d GPU.1 -m ~/task/cvs173725/ssm_v2_dynamic.xml -hint none -nstreams 1 -nireq 1 -niter 1 #### Checklist - [x] Is it a proper fix? (not a workaround) - [x] Did you include test case for this fix, if necessary? - [x] Did you review existing test that can be extended to cover this scenario? Which test did you review? ### Tickets: - 173725
1 parent e319538 commit 682e12d

File tree

2 files changed

+295
-5
lines changed

2 files changed

+295
-5
lines changed

src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp

Lines changed: 101 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
#include "intel_gpu/graph/program.hpp"
1111
#include "intel_gpu/graph/network.hpp"
1212
#include "data_inst.h"
13+
#include "mutable_data_inst.h"
1314
#include "intel_gpu/runtime/itt.hpp"
15+
#include "registry/implementation_manager.hpp"
1416
#ifdef ENABLE_ONEDNN_FOR_GPU
1517
#include "reorder_inst.h"
1618
#include "graph/impls/onednn/utils.hpp"
@@ -19,9 +21,73 @@
1921
#include <list>
2022
#include <memory>
2123
#include <utility>
24+
#include <unordered_set>
25+
#include <queue>
2226

2327
using namespace cldnn;
2428

29+
namespace {
30+
// Attempts to reselect an appropriate implementation for a node after
31+
// propagate_constants transforms dynamic inputs into static data.
32+
// Refreshes stale output layouts before building kernel params to avoid
33+
// incorrect shape_type classification.
34+
void try_reselect_impl_for_node(program_node* node) {
35+
bool can_select_impl = !node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty());
36+
if (!can_select_impl)
37+
return;
38+
39+
auto selected_impl = node->get_selected_impl();
40+
bool has_selected_impl = selected_impl != nullptr;
41+
bool need_new_impl_selection = !has_selected_impl;
42+
43+
if (has_selected_impl && !node->is_valid_output_layout()) {
44+
bool is_node_dynamic = node->get_output_layout(false).is_dynamic();
45+
bool is_impl_dynamic = selected_impl->is_dynamic();
46+
need_new_impl_selection = (is_node_dynamic != is_impl_dynamic);
47+
}
48+
49+
if (!need_new_impl_selection)
50+
return;
51+
52+
// Refresh stale output layouts before building kernel params.
53+
// After invalidate_users(), cached output_layouts may still reflect
54+
// the old dynamic shape. Recomputing ensures get_kernel_impl_params()
55+
// uses up-to-date layouts for accurate shape_type determination.
56+
if (!node->is_all_valid_output_layouts()) {
57+
node->get_output_layouts(false);
58+
}
59+
60+
auto params = node->get_kernel_impl_params();
61+
auto shape_type = ImplementationManager::get_shape_type(*params);
62+
if (shape_type == shape_types::dynamic_shape)
63+
return;
64+
65+
auto selected_impl_manager = node->type()->choose_impl(*node, shape_type);
66+
std::string fail_reason;
67+
try {
68+
if (selected_impl_manager) {
69+
node->set_selected_impl(selected_impl_manager->create(*node, *params));
70+
} else {
71+
fail_reason = "choose_impl returned nullptr (no matching implementation found)";
72+
}
73+
} catch (const std::exception& e) {
74+
fail_reason = e.what();
75+
}
76+
77+
OPENVINO_ASSERT(node->get_selected_impl() != nullptr,
78+
"[GPU] Failed to select implementation after propagate_constants"
79+
"\nname:",
80+
node->id(),
81+
"\ntype: ",
82+
node->get_primitive()->type_string(),
83+
"\noriginal_type: ",
84+
node->get_primitive()->origin_op_type_name,
85+
" ",
86+
fail_reason);
87+
}
88+
89+
} // namespace
90+
2591
// ToDo remove friendship relation from program_node and program
2692
void propagate_constants::run(program& p) {
2793
OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "pass::PropagateConstants");
@@ -74,10 +140,11 @@ void propagate_constants::run(program& p) {
74140

75141
// replace all constant nodes which are relevant for inference (either used by non-const user or marked as output)
76142
// with recomputed cldnn::data
77-
for (auto& cout : to_replace) {
78-
auto& id_to_replace = std::get<0>(cout);
79-
auto mem_impl = std::get<1>(cout);
80-
auto cache_info = std::get<2>(cout);
143+
std::unordered_set<program_node*> reselection_targets;
144+
for (auto& entry : to_replace) {
145+
auto& id_to_replace = std::get<0>(entry);
146+
auto mem_impl = std::get<1>(entry);
147+
auto cache_info = std::get<2>(entry);
81148
auto cache_manager = std::get<0>(cache_info);
82149
auto in_layout = std::get<1>(cache_info);
83150
auto reorder = std::get<2>(cache_info);
@@ -109,8 +176,37 @@ void propagate_constants::run(program& p) {
109176
curr_node.users.end(),
110177
[](program_node* node) { return node->is_constant(); }),
111178
curr_node.users.end());
179+
bool was_dynamic = curr_node.get_output_layout().is_dynamic();
112180
p.replace(curr_node, new_node);
113-
new_node.recalc_output_layout(false);
181+
new_node.recalc_output_layout(was_dynamic);
182+
183+
// Collect transitively affected user nodes when dynamic → static transition occurs.
184+
// Only users of constants that transitioned from dynamic to static need impl reselection.
185+
if (was_dynamic && !new_node.get_output_layout(false).is_dynamic()) {
186+
std::queue<program_node*> queue;
187+
for (auto& user : new_node.get_users()) {
188+
queue.push(user);
189+
}
190+
while (!queue.empty()) {
191+
auto* n = queue.front();
192+
queue.pop();
193+
if (reselection_targets.count(n) > 0)
194+
continue;
195+
reselection_targets.insert(n);
196+
if (!n->is_all_valid_output_layouts()) {
197+
for (auto& user : n->get_users()) {
198+
queue.push(user);
199+
}
200+
}
201+
}
202+
}
203+
}
204+
205+
// propagate_constants is executed after compile_graph pass.
206+
// If some users become static due to propagated constants, they can end up without selected_impl.
207+
// Re-select implementation for affected nodes to avoid runtime _impl-nullptr validation failure.
208+
for (auto* node : reselection_targets) {
209+
try_reselect_impl_for_node(node);
114210
}
115211
}
116212

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
// Copyright (C) 2018-2026 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "test_utils.h"
6+
#include "program_wrapper.h"
7+
#include "pass_manager.h"
8+
9+
using namespace cldnn;
10+
using namespace ::tests;
11+
12+
// Verifies that when constant propagation transforms a dynamic-layout constant node into
13+
// static data, downstream consumers receive updated static implementations.
14+
//
15+
// In real models (e.g., SSM with dynamic shapes), shape inference may leave intermediate
16+
// constant computation nodes with unresolved dynamic layouts during compile_graph.
17+
// After propagate_constants evaluates and folds these constant subgraphs, the resulting
18+
// data nodes have fully resolved static shapes, causing downstream consumers to transition
19+
// from dynamic to static.
20+
//
21+
// This test uses a non-trivial constant subgraph (eltwise of two weight tensors) whose
22+
// layout is set to dynamic to simulate unresolved shape inference. Creating a genuinely
23+
// dynamic constant subgraph requires model-specific conditions that are impractical to
24+
// reproduce in a unit test, so the manual override is necessary.
25+
//
26+
// Topology:
27+
// data("weights_a") ---+
28+
// eltwise("w_sum") --> eltwise("eltwise", sum)
29+
// data("weights_b") ---+ /
30+
// input_layout("input", static) ---------/
31+
//
32+
// After propagation: w_sum is folded into a static data node (was_dynamic=true),
33+
// triggering impl reselection for eltwise which transitions to static.
34+
TEST(propagate_constants, need_impl_reselection_dynamic_to_static_transition) {
35+
auto& engine = get_test_engine();
36+
37+
auto static_layout = layout{{1, 3, 4, 4}, data_types::f32, format::bfyx};
38+
39+
topology topology(
40+
input_layout("input", static_layout),
41+
data("weights_a", engine.allocate_memory(layout{{1, 3, 4, 4}, data_types::f32, format::bfyx})),
42+
data("weights_b", engine.allocate_memory(layout{{1, 3, 4, 4}, data_types::f32, format::bfyx})),
43+
eltwise("w_sum", input_info("weights_a"), input_info("weights_b"), eltwise_mode::sum),
44+
eltwise("eltwise", input_info("input"), input_info("w_sum"), eltwise_mode::sum)
45+
);
46+
47+
ExecutionConfig config = get_test_default_config(engine);
48+
config.set_property(ov::intel_gpu::optimize_data(true));
49+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
50+
51+
auto prog = program::build_program(engine, topology, config, false, true);
52+
53+
// Simulate unresolved dynamic shape on the constant computation node.
54+
// This models real scenarios where shape inference cannot fully resolve
55+
// intermediate constant shapes during compile_graph.
56+
auto& w_sum_node = prog->get_node("w_sum");
57+
auto dyn_layout = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
58+
w_sum_node.set_output_layout(dyn_layout, true);
59+
60+
program_wrapper::apply_opt_pass<compile_graph>(*prog);
61+
62+
// After compile_graph: eltwise should have a dynamic impl since one input is dynamic.
63+
auto& eltwise_node = prog->get_node("eltwise");
64+
auto impl_before = eltwise_node.get_selected_impl();
65+
ASSERT_TRUE(impl_before == nullptr || impl_before->is_dynamic());
66+
67+
// propagate_constants evaluates and folds w_sum into static data.
68+
// Dynamic -> static transition triggers impl reselection for eltwise.
69+
program_wrapper::apply_opt_pass<propagate_constants>(*prog);
70+
71+
// w_sum should be replaced by a propagated data node
72+
// (program::replace renames the new node to the old id).
73+
auto& w_sum_replaced = prog->get_node("w_sum");
74+
ASSERT_TRUE(w_sum_replaced.is_type<data>());
75+
76+
auto impl_after = eltwise_node.get_selected_impl();
77+
ASSERT_NE(impl_after, nullptr);
78+
ASSERT_FALSE(impl_after->is_dynamic());
79+
}
80+
81+
// Verifies that when impl reselection is triggered, nodes that remain dynamic
82+
// (due to a dynamic non-constant input) are correctly skipped — they should NOT
83+
// receive a static implementation.
84+
//
85+
// Uses a non-trivial constant subgraph (eltwise of two weight tensors) shared by
86+
// two consumers: one with a static input and one with a dynamic input.
87+
//
88+
// Topology:
89+
// input_layout("input_static", static) ---> eltwise("eltwise_becomes_static", sum)
90+
// input_layout("input_dynamic", dynamic) --> eltwise("eltwise_stays_dynamic", sum)
91+
// data("weights_a") ---+ /
92+
// eltwise("w_sum") --/ (shared constant input for both)
93+
// data("weights_b") ---+
94+
//
95+
// After propagation: eltwise_becomes_static transitions to static (gets static impl),
96+
// while eltwise_stays_dynamic remains dynamic (keeps dynamic impl or nullptr).
97+
TEST(propagate_constants, need_impl_reselection_skips_still_dynamic_nodes) {
98+
auto& engine = get_test_engine();
99+
100+
auto static_input_layout = layout{{1, 3, 4, 4}, data_types::f32, format::bfyx};
101+
auto dynamic_input_layout = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
102+
103+
topology topology(
104+
input_layout("input_static", static_input_layout),
105+
input_layout("input_dynamic", dynamic_input_layout),
106+
data("weights_a", engine.allocate_memory(layout{{1, 3, 4, 4}, data_types::f32, format::bfyx})),
107+
data("weights_b", engine.allocate_memory(layout{{1, 3, 4, 4}, data_types::f32, format::bfyx})),
108+
eltwise("w_sum", input_info("weights_a"), input_info("weights_b"), eltwise_mode::sum),
109+
eltwise("eltwise_becomes_static", input_info("input_static"), input_info("w_sum"), eltwise_mode::sum),
110+
eltwise("eltwise_stays_dynamic", input_info("input_dynamic"), input_info("w_sum"), eltwise_mode::sum)
111+
);
112+
113+
ExecutionConfig config = get_test_default_config(engine);
114+
config.set_property(ov::intel_gpu::optimize_data(true));
115+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
116+
117+
auto prog = program::build_program(engine, topology, config, false, true);
118+
119+
// Simulate unresolved dynamic shape on the constant computation node.
120+
auto& w_sum_node = prog->get_node("w_sum");
121+
auto dyn_layout = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
122+
w_sum_node.set_output_layout(dyn_layout, true);
123+
124+
program_wrapper::apply_opt_pass<compile_graph>(*prog);
125+
126+
// Both eltwises should be dynamic after compile_graph (one dynamic input each).
127+
auto& static_eltwise = prog->get_node("eltwise_becomes_static");
128+
auto& dynamic_eltwise = prog->get_node("eltwise_stays_dynamic");
129+
130+
auto static_impl_before = static_eltwise.get_selected_impl();
131+
auto dynamic_impl_before = dynamic_eltwise.get_selected_impl();
132+
ASSERT_TRUE(static_impl_before == nullptr || static_impl_before->is_dynamic());
133+
ASSERT_TRUE(dynamic_impl_before == nullptr || dynamic_impl_before->is_dynamic());
134+
135+
program_wrapper::apply_opt_pass<propagate_constants>(*prog);
136+
137+
// eltwise_becomes_static: all inputs now static → should get static impl.
138+
auto static_impl_after = static_eltwise.get_selected_impl();
139+
ASSERT_NE(static_impl_after, nullptr);
140+
ASSERT_FALSE(static_impl_after->is_dynamic());
141+
142+
// eltwise_stays_dynamic: input_dynamic is still dynamic → should keep dynamic
143+
// impl or remain without impl (both are valid for dynamic nodes).
144+
auto dynamic_impl_after = dynamic_eltwise.get_selected_impl();
145+
ASSERT_TRUE(dynamic_impl_after == nullptr || dynamic_impl_after->is_dynamic());
146+
}
147+
148+
// Verifies that when all propagated constants are already static (was_dynamic=false),
149+
// need_impl_reselection is NOT triggered and existing implementations are preserved.
150+
//
151+
// Topology (same structure, but w_reorder keeps its static layout):
152+
// input_layout("input", static) ---> eltwise("eltwise", sum)
153+
// data("weights") --> reorder("w_reorder") --/
154+
//
155+
// Since w_reorder is static before propagation, was_dynamic=false.
156+
// The pass returns early without entering the impl reselection loop.
157+
TEST(propagate_constants, no_reselection_when_constants_are_static) {
158+
auto& engine = get_test_engine();
159+
160+
auto static_layout = layout{{1, 3, 4, 4}, data_types::f32, format::bfyx};
161+
162+
topology topology(
163+
input_layout("input", static_layout),
164+
data("weights", engine.allocate_memory(layout{{1, 3, 4, 4}, data_types::f32, format::bfyx})),
165+
reorder("w_reorder", input_info("weights"), format::bfyx, data_types::f32),
166+
eltwise("eltwise", input_info("input"), input_info("w_reorder"), eltwise_mode::sum)
167+
);
168+
169+
ExecutionConfig config = get_test_default_config(engine);
170+
config.set_property(ov::intel_gpu::optimize_data(true));
171+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
172+
173+
auto prog = program::build_program(engine, topology, config, false, true);
174+
175+
// Do NOT set w_reorder to dynamic — it stays static.
176+
program_wrapper::apply_opt_pass<compile_graph>(*prog);
177+
178+
auto& eltwise_node = prog->get_node("eltwise");
179+
auto impl_before = eltwise_node.get_selected_impl();
180+
ASSERT_NE(impl_before, nullptr);
181+
ASSERT_FALSE(impl_before->is_dynamic());
182+
183+
program_wrapper::apply_opt_pass<propagate_constants>(*prog);
184+
185+
// w_reorder should be folded into a data node.
186+
auto& w_reorder_replaced = prog->get_node("w_reorder");
187+
ASSERT_TRUE(w_reorder_replaced.is_type<data>());
188+
189+
// Eltwise's impl should be preserved (was_dynamic=false, no reselection triggered).
190+
auto impl_after = eltwise_node.get_selected_impl();
191+
ASSERT_NE(impl_after, nullptr);
192+
ASSERT_FALSE(impl_after->is_dynamic());
193+
ASSERT_EQ(impl_before, impl_after);
194+
}

0 commit comments

Comments
 (0)