Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/backend/CANN.md
Original file line number Diff line number Diff line change
Expand Up @@ -314,3 +314,7 @@ Controls automatic cleanup of the memory pool. This option is only effective whe

Converting the matmul weight format from ND to NZ can significantly improve performance on the 310I DUO NPU.

### GGML_CANN_DISABLE_ACL_GRAPH

When this variable is set, ACL graph execution is disabled and operators are executed in an op-by-op (eager) mode.
This mode is mainly intended for debugging or for cases where the overhead of graph construction and execution is not desirable.
9 changes: 8 additions & 1 deletion ggml/src/ggml-cann/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ struct ggml_backend_cann_context {
#ifdef USE_ACL_GRAPH
/// Cached CANN ACL graph used for executing the current ggml computation graph.
std::unique_ptr<ggml_cann_graph> cann_graph;
bool acl_graph_mode = true;
#endif
cann_task_queue task_queue;
bool async_mode;
Expand All @@ -404,7 +405,6 @@ struct ggml_backend_cann_context {
ggml_cann_tensor_cache rms_norm_one_tensor_cache;
ggml_cann_tensor_cache rms_norm_zero_tensor_cache;


aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */

/**
Expand All @@ -419,6 +419,13 @@ struct ggml_backend_cann_context {
async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
device, async_mode ? "ON" : "OFF");
#ifdef USE_ACL_GRAPH
acl_graph_mode = !(parse_bool(get_env("GGML_CANN_DISABLE_ACL_GRAPH").value_or("")));
GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n",
__func__, device,
acl_graph_mode ? "GRAPH" : "EAGER",
acl_graph_mode ? "acl graph enabled" : "acl graph disabled");
#endif
}

/**
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-cann/ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2252,6 +2252,10 @@ static enum ggml_status ggml_backend_cann_graph_compute(
bool use_cann_graph = true;
bool cann_graph_update_required = false;

if (!cann_ctx->acl_graph_mode) {
use_cann_graph = false;
}

if (use_cann_graph) {
if (cann_ctx->cann_graph == nullptr) {
cann_ctx->cann_graph.reset(new ggml_cann_graph());
Expand Down
Loading