Skip to content

Commit 65629a3

Browse files
committed
[CANN] Support eager execution mode under ACL graph compilation
Add support for running operators in eager mode while ACL graph compilation is enabled. This allows bypassing graph execution and directly submitting ops, which is useful for debugging and reducing graph build overhead in certain scenarios. Signed-off-by: noemotiovon <[email protected]>
1 parent 5d804a4 commit 65629a3

File tree

3 files changed

+13
-0
lines changed

3 files changed

+13
-0
lines changed

docs/backend/CANN.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,7 @@ Controls automatic cleanup of the memory pool. This option is only effective whe
314314

315315
Converting the matmul weight format from ND to NZ can significantly improve performance on the 310I DUO NPU.
316316

317+
### GGML_CANN_EAGER_MODE
318+
319+
Enabling eager execution mode will bypass ACL graph execution and submit operators directly.
320+
This is useful for debugging or scenarios where graph building overhead is undesirable.

ggml/src/ggml-cann/common.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ struct ggml_backend_cann_context {
398398
#endif
399399
cann_task_queue task_queue;
400400
bool async_mode;
401+
bool eager_mode; // not use acl graph
401402
// Rope Cache
402403
ggml_cann_rope_cache rope_cache;
403404
// Constant Pool
@@ -419,6 +420,10 @@ struct ggml_backend_cann_context {
419420
async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
420421
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
421422
device, async_mode ? "ON" : "OFF");
423+
424+
eager_mode = parse_bool(get_env("GGML_CANN_EAGER_MODE").value_or(""));
425+
GGML_LOG_INFO("%s: device %d eager execution mode is %s (acl graph disabled)\n",
426+
__func__, device, eager_mode ? "ON" : "OFF");
422427
}
423428

424429
/**

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,6 +2252,10 @@ static enum ggml_status ggml_backend_cann_graph_compute(
22522252
bool use_cann_graph = true;
22532253
bool cann_graph_update_required = false;
22542254

2255+
if (cann_ctx->eager_mode) {
2256+
use_cann_graph = false;
2257+
}
2258+
22552259
if (use_cann_graph) {
22562260
if (cann_ctx->cann_graph == nullptr) {
22572261
cann_ctx->cann_graph.reset(new ggml_cann_graph());

0 commit comments

Comments
 (0)