Skip to content

Commit 2f85368

Browse files
authored
CANN: Support eager execution mode under ACL graph compilation (ggml-org#15712)
* [CANN] Support eager execution mode under ACL graph compilation Add support for running operators in eager mode while ACL graph compilation is enabled. This allows bypassing graph execution and directly submitting ops, which is useful for debugging and reducing graph build overhead in certain scenarios. Signed-off-by: noemotiovon <[email protected]> * fix typo Signed-off-by: noemotiovon <[email protected]> * rename to acl_graph_mode Signed-off-by: noemotiovon <[email protected]> --------- Signed-off-by: noemotiovon <[email protected]>
1 parent ef2af57 commit 2f85368

File tree

3 files changed

+16
-1
lines changed

3 files changed

+16
-1
lines changed

docs/backend/CANN.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,7 @@ Controls automatic cleanup of the memory pool. This option is only effective whe
314314

315315
Converting the matmul weight format from ND to NZ can significantly improve performance on the 310I DUO NPU.
316316

317+
### GGML_CANN_DISABLE_ACL_GRAPH
318+
319+
When this variable is set, ACL graph execution is disabled and operators are executed in an op-by-op (eager) mode.
320+
This mode is mainly intended for debugging or for cases where the overhead of graph construction and execution is not desirable.

ggml/src/ggml-cann/common.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,7 @@ struct ggml_backend_cann_context {
395395
#ifdef USE_ACL_GRAPH
396396
/// Cached CANN ACL graph used for executing the current ggml computation graph.
397397
std::unique_ptr<ggml_cann_graph> cann_graph;
398+
bool acl_graph_mode = true;
398399
#endif
399400
cann_task_queue task_queue;
400401
bool async_mode;
@@ -404,7 +405,6 @@ struct ggml_backend_cann_context {
404405
ggml_cann_tensor_cache rms_norm_one_tensor_cache;
405406
ggml_cann_tensor_cache rms_norm_zero_tensor_cache;
406407

407-
408408
aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */
409409

410410
/**
@@ -419,6 +419,13 @@ struct ggml_backend_cann_context {
419419
async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
420420
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
421421
device, async_mode ? "ON" : "OFF");
422+
#ifdef USE_ACL_GRAPH
423+
acl_graph_mode = !(parse_bool(get_env("GGML_CANN_DISABLE_ACL_GRAPH").value_or("")));
424+
GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n",
425+
__func__, device,
426+
acl_graph_mode ? "GRAPH" : "EAGER",
427+
acl_graph_mode ? "acl graph enabled" : "acl graph disabled");
428+
#endif
422429
}
423430

424431
/**

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,6 +2252,10 @@ static enum ggml_status ggml_backend_cann_graph_compute(
22522252
bool use_cann_graph = true;
22532253
bool cann_graph_update_required = false;
22542254

2255+
if (!cann_ctx->acl_graph_mode) {
2256+
use_cann_graph = false;
2257+
}
2258+
22552259
if (use_cann_graph) {
22562260
if (cann_ctx->cann_graph == nullptr) {
22572261
cann_ctx->cann_graph.reset(new ggml_cann_graph());

0 commit comments

Comments
 (0)