diff --git a/docs/backend/CANN.md b/docs/backend/CANN.md index 325e09bd380..ac0f8375938 100755 --- a/docs/backend/CANN.md +++ b/docs/backend/CANN.md @@ -314,3 +314,10 @@ Controls automatic cleanup of the memory pool. This option is only effective whe Converting the matmul weight format from ND to NZ can significantly improve performance on the 310I DUO NPU. +### GGML_CANN_ACL_GRAPH + +Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default. + +### GGML_CANN_GRAPH_CACHE_CAPACITY + +Maximum number of compiled CANN graphs kept in the LRU cache, default is 12. When the number of cached graphs exceeds this capacity, the least recently used graph will be evicted. diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index 33794062f56..3cfb863c448 100755 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -38,6 +38,7 @@ #include #include #include +#include #include "../include/ggml-cann.h" #include "../include/ggml.h" @@ -106,6 +107,7 @@ int32_t ggml_cann_get_device(); std::optional get_env(const std::string& name); bool parse_bool(const std::string& value); +int parse_integer(const std::string& value); /** * @brief Abstract base class for memory pools used by CANN. @@ -339,24 +341,227 @@ class cann_task_queue { #ifdef USE_ACL_GRAPH struct ggml_graph_node_properties { + // dst tensor void * node_address; - ggml_op node_op; int64_t ne[GGML_MAX_DIMS]; size_t nb[GGML_MAX_DIMS]; + + // src tensor void * src_address[GGML_MAX_SRC]; + int64_t src_ne[GGML_MAX_SRC][GGML_MAX_DIMS]; + size_t src_nb[GGML_MAX_SRC][GGML_MAX_DIMS]; + + // op + ggml_op node_op; int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)]; + + /** + * @brief Check if a ggml tensor node matches this property set. + * + * This function compares all relevant fields (address, op type, shape, source inputs, op params) + * to determine whether the current node matches these previously recorded properties. + * + * @param node The current ggml tensor node. + * @return true if all fields match (excluding GGML_OP_VIEW); false otherwise. + */ + bool has_matching_properties(ggml_tensor * node) { + if (node->data != this->node_address && node->op != GGML_OP_VIEW) { + return false; + } + + if (node->op != this->node_op) { + return false; + } + + for (int i = 0; i < GGML_MAX_DIMS; i++) { + if (node->ne[i] != this->ne[i]) { + return false; + } + if (node->nb[i] != this->nb[i]) { + return false; + } + } + + for (int i = 0; i < GGML_MAX_SRC; i++) { + if (node->src[i]) { + if (node->src[i]->data != this->src_address[i] && node->op != GGML_OP_VIEW) { + return false; + } + + for (int d = 0; d < GGML_MAX_DIMS; d++) { + if (node->src[i]->ne[d] != this->src_ne[i][d]) { + return false; + } + if (node->src[i]->nb[d] != this->src_nb[i][d]) { + return false; + } + } + } else { + if (this->src_address[i] != nullptr) { + return false; + } + } + } + + if (node->op == GGML_OP_SCALE || node->op == GGML_OP_UNARY || node->op == GGML_OP_GLU) { + return memcmp(this->op_params, node->op_params, GGML_MAX_OP_PARAMS) == 0; + } + return true; + } }; struct ggml_cann_graph { ~ggml_cann_graph() { if (graph != nullptr) { - aclmdlRIDestroy(graph); + ACL_CHECK(aclmdlRIDestroy(graph)); } } aclmdlRI graph = nullptr; std::vector ggml_graph_properties; + + /** + * @brief Create a new CANN graph from a ggml computation graph. + * + * This function creates a new ggml_cann_graph object and fills its node properties + * (operation type, dimensions, strides, input sources, and operation parameters) + * based on the current ggml computation graph. + * + * Each node in the ggml graph is mapped to a property entry in the new CANN graph: + * - node address + * - operation type + * - shape (ne) and strides (nb) + * - source tensor addresses + * - operation parameters + * + * @param cgraph The current ggml computation graph. + * @return Pointer to the newly created ggml_cann_graph object. + */ + static ggml_cann_graph * create_from_cgraph(ggml_cgraph * cgraph) { + ggml_cann_graph * new_graph = new ggml_cann_graph(); + new_graph->ggml_graph_properties.resize(cgraph->n_nodes); + + for (int node_idx = 0; node_idx < cgraph->n_nodes; ++node_idx) { + ggml_tensor * node = cgraph->nodes[node_idx]; + auto & prop = new_graph->ggml_graph_properties[node_idx]; + + prop.node_address = node->data; + prop.node_op = node->op; + + std::copy_n(node->ne, GGML_MAX_DIMS, prop.ne); + std::copy_n(node->nb, GGML_MAX_DIMS, prop.nb); + + for (int src = 0; src < GGML_MAX_SRC; ++src) { + if (node->src[src]) { + prop.src_address[src] = node->src[src]->data; + std::copy_n(node->src[src]->ne, GGML_MAX_DIMS, prop.src_ne[src]); + std::copy_n(node->src[src]->nb, GGML_MAX_DIMS, prop.src_nb[src]); + } else { + prop.src_address[src] = nullptr; + std::fill_n(prop.src_ne[src], GGML_MAX_DIMS, 0); + std::fill_n(prop.src_nb[src], GGML_MAX_DIMS, 0); + } + } + + memcpy(prop.op_params, node->op_params, GGML_MAX_OP_PARAMS); + } + + return new_graph; + } + + /** + * @brief Check whether this CANN graph matches the given ggml computation graph. + * + * This function compares the number of nodes and each node's properties + * (operation type, dimensions, strides, inputs, and operation parameters) + * to determine whether this CANN graph matches the given ggml graph. + * + * @param cgraph The current ggml computation graph. + * @return true if this CANN graph matches the ggml graph; false otherwise. + */ + bool matches_cgraph(ggml_cgraph * cgraph) { + if (this->ggml_graph_properties.size() != static_cast(cgraph->n_nodes)) { + return false; + } + + for (int i = 0; i < cgraph->n_nodes; ++i) { + if (!this->ggml_graph_properties[i].has_matching_properties(cgraph->nodes[i])) { + return false; + } + } + + return true; + } +}; + +/** + * @brief LRU cache for managing ggml_cann_graph objects. + * + * This class maintains a list of shared_ptr to ggml_cann_graph objects + * and enforces a maximum capacity. It provides methods to push new graphs, + * move existing graphs to the front (most recently used), and clear the cache. + */ +struct ggml_cann_graph_lru_cache { + size_t capacity; /**< Maximum number of graphs in the cache. */ + + std::list cache_list; /**< List storing cached graphs as raw pointers. */ + + ggml_cann_graph_lru_cache() { + capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); + } + + /** + * @brief Push a new graph to the front of the cache. + * If the cache exceeds capacity, the least recently used graph is deleted. + * @param new_node Pointer to the new ggml_cann_graph to cache. + * Ownership is transferred to the cache (cache will delete it). + */ + void push(ggml_cann_graph* new_node) { + if (cache_list.size() >= capacity) { + ggml_cann_graph* old = cache_list.back(); + cache_list.pop_back(); + delete old; // free the old graph + } + cache_list.push_front(new_node); + } + + /** + * @brief Clear all graphs from the cache (also frees memory). + */ + void clear() { + for (auto ptr : cache_list) { + delete ptr; + } + cache_list.clear(); + } + + /** + * @brief Destructor that clears the cache and frees all cached graphs. + */ + ~ggml_cann_graph_lru_cache() { clear(); } + + /** + * @brief Find a cached CANN graph that matches the given ggml graph and move it to front. + * + * This function iterates through the cached CANN graphs stored in the LRU cache and + * compares them against the given ggml computation graph. If a matching graph is found, + * it is promoted to the front of the LRU cache and returned. Otherwise, the function + * returns nullptr. + * + * @param cgraph The current ggml computation graph. + * @return true if found; false otherwise. + */ + bool find_and_move_to_front(ggml_cgraph * cgraph) { + for (auto & graph_ptr : this->cache_list) { + if (graph_ptr->matches_cgraph(cgraph)) { + cache_list.remove(graph_ptr); + cache_list.push_front(graph_ptr); + return true; + } + } + return false; + } }; #endif // USE_ACL_GRAPH @@ -370,7 +575,8 @@ struct ggml_backend_cann_context { aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */ #ifdef USE_ACL_GRAPH /// Cached CANN ACL graph used for executing the current ggml computation graph. - std::unique_ptr cann_graph; + ggml_cann_graph_lru_cache graph_lru_cache; + bool acl_graph_mode = true; #endif cann_task_queue task_queue; bool async_mode; diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index cb8af42ebf9..09b179bf7df 100755 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -116,6 +116,24 @@ bool parse_bool(const std::string& value) { return valid_values.find(value) != valid_values.end(); } +/** + * @brief Parse a string as an integer, returning 0 if invalid. + * + * This function attempts to convert the input string `value` to an `int`. + * If the string is not a valid integer or is out of the `int` range, + * it returns 0. + * + * @param value The string to parse. + * @return The parsed integer, or 0 if conversion fails. + */ +int parse_integer(const std::string& value) { + try { + return std::stoi(value); + } catch (...) { + return 0; + } +} + /** * @brief Initialize the CANN device information. * @@ -2075,104 +2093,6 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) { ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream())); } -#ifdef USE_ACL_GRAPH -/** - * @brief Populate the internal CANN graph node properties from the ggml computation graph. - * - * This function copies all node attributes (operation type, dimensions, strides, input sources, - * and operation parameters) into the cached CANN graph structure for later reuse or comparison. - * - * @param cann_ctx The CANN backend context. - * @param cgraph The ggml computational graph. - */ -static void set_ggml_graph_node_properties(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) { - for (int node_idx = 0; node_idx < cgraph->n_nodes; node_idx++) { - ggml_tensor * node = cgraph->nodes[node_idx]; - cann_ctx->cann_graph->ggml_graph_properties[node_idx].node_address = node->data; - cann_ctx->cann_graph->ggml_graph_properties[node_idx].node_op = node->op; - - for (int dim = 0; dim < GGML_MAX_DIMS; dim++) { - cann_ctx->cann_graph->ggml_graph_properties[node_idx].ne[dim] = node->ne[dim]; - cann_ctx->cann_graph->ggml_graph_properties[node_idx].nb[dim] = node->nb[dim]; - } - for (int src = 0; src < GGML_MAX_SRC; src++) { - cann_ctx->cann_graph->ggml_graph_properties[node_idx].src_address[src] = - node->src[src] ? node->src[src]->data : nullptr; - } - memcpy(cann_ctx->cann_graph->ggml_graph_properties[node_idx].op_params, node->op_params, GGML_MAX_OP_PARAMS); - } -} - -/** - * @brief Check if a ggml tensor node matches a previously captured CANN graph node. - * - * This function compares all relevant fields (address, op type, shape, source inputs, op params) - * to determine whether the current node matches a previously recorded version. - * - * @param node The current ggml tensor node. - * @param graph_node_properties The stored properties of a CANN graph node. - * @return true if all fields match (excluding GGML_OP_VIEW); false otherwise. - */ -static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) { - if (node->data != graph_node_properties->node_address && - node->op != GGML_OP_VIEW) { - return false; - } - if (node->op != graph_node_properties->node_op) { - return false; - } - for (int i = 0; i < GGML_MAX_DIMS; i++) { - if (node->ne[i] != graph_node_properties->ne[i]) { - return false; - } - if (node->nb[i] != graph_node_properties->nb[i]) { - return false; - } - } - for (int i = 0; i < GGML_MAX_SRC; i++) { - if (node->src[i] && - node->src[i]->data != graph_node_properties->src_address[i] && - node->op != GGML_OP_VIEW - ) { - return false; - } - } - if (node->op == GGML_OP_SCALE && - memcmp(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS) != 0) { - return false; - } - return true; -} - -/** - * @brief Determine if the CANN graph needs to be rebuilt due to graph changes. - * - * This checks whether the number or properties of ggml graph nodes have changed - * compared to the last captured CANN graph. If so, the CANN graph must be re-captured. - * - * @param cann_ctx The CANN backend context. - * @param cgraph The current ggml computation graph. - * @return true if an update is required; false otherwise. - */ -static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) { - // The number of nodes is different, so the graph needs to be reconstructed. - if (cann_ctx->cann_graph->ggml_graph_properties.size() != (size_t)cgraph->n_nodes) { - cann_ctx->cann_graph->ggml_graph_properties.resize(cgraph->n_nodes); - return true; - } - - // The number of nodes is the same; iterate over each node to check whether they match. - for (int i = 0; i < cgraph->n_nodes; i++) { - bool has_matching_properties = ggml_graph_node_has_matching_properties( - cgraph->nodes[i], &cann_ctx->cann_graph->ggml_graph_properties[i]); - if(!has_matching_properties) { - return true; - } - } - return false; -} -#endif // USE_ACL_GRAPH - /** * @brief Evaluate the computation graph and optionally capture or execute it using CANN graph API. * @@ -2181,26 +2101,23 @@ static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx, * * Otherwise, it falls back to op-by-op execution using the CANN compute kernel dispatcher. * - * @param cann_ctx The CANN backend context. - * @param cgraph The ggml computation graph. - * @param use_cann_graph Whether to use CANN graph execution. - * @param cann_graph_update_required Whether graph capture is needed due to graph changes. + * @param cann_ctx The CANN backend context. + * @param cgraph The ggml computation graph. + * @param use_cann_graph Whether to use CANN graph execution. + * @param cann_graph_capture_required Whether graph capture is needed due to graph changes. */ -static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph, - bool & use_cann_graph, bool & cann_graph_update_required) { +static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx, + ggml_cgraph * cgraph, + bool & use_cann_graph, + bool & cann_graph_capture_required) { #ifdef USE_ACL_GRAPH - if (use_cann_graph && cann_graph_update_required) { - if (cann_ctx->cann_graph->graph != nullptr) { - ACL_CHECK(aclmdlRIDestroy(cann_ctx->cann_graph->graph)); - cann_ctx->cann_graph->graph = nullptr; - } + if (use_cann_graph && cann_graph_capture_required) { // Begin CANN graph capture ACL_CHECK(aclmdlRICaptureBegin(cann_ctx->stream(), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL)); } #endif // USE_ACL_GRAPH - // Only perform the graph execution if CANN graphs are not enabled, or we are capturing the graph. // With the use of CANN graphs, the execution will be performed by the graph launch. - if (!use_cann_graph || cann_graph_update_required) { + if (!use_cann_graph || cann_graph_capture_required) { for (int i = 0; i < cgraph->n_nodes; i++) { ggml_tensor * node = cgraph->nodes[i]; @@ -2217,13 +2134,16 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx } #ifdef USE_ACL_GRAPH - if (use_cann_graph && cann_graph_update_required) { // End CANN graph capture - ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &cann_ctx->cann_graph->graph)); - } - if (use_cann_graph) { - // Execute graph - ACL_CHECK(aclmdlRIExecuteAsync(cann_ctx->cann_graph->graph, cann_ctx->stream())); + GGML_ASSERT(!cann_ctx->graph_lru_cache.cache_list.empty()); + ggml_cann_graph * matched_graph = cann_ctx->graph_lru_cache.cache_list.front(); + + if (cann_graph_capture_required) { // End CANN graph capture + ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &matched_graph->graph)); + } + + // Execute CANN graph + ACL_CHECK(aclmdlRIExecuteAsync(matched_graph->graph, cann_ctx->stream())); } #endif // USE_ACL_GRAPH } @@ -2246,10 +2166,11 @@ static enum ggml_status ggml_backend_cann_graph_compute( ggml_backend_cann_context* cann_ctx = (ggml_backend_cann_context*)backend->context; ggml_cann_set_device(cann_ctx->device); - release_nz_workspace(); + g_nz_workspaces[cann_ctx->device].clear(); + + bool graph_capture_required = false; #ifdef USE_ACL_GRAPH bool use_cann_graph = true; - bool cann_graph_update_required = false; // check environment LLAMA_SET_ROWS if (!cann_ctx->support_set_rows) { @@ -2257,25 +2178,18 @@ static enum ggml_status ggml_backend_cann_graph_compute( } if (use_cann_graph) { - if (cann_ctx->cann_graph == nullptr) { - cann_ctx->cann_graph.reset(new ggml_cann_graph()); - cann_graph_update_required = true; + // If no matching graph is found, the graph needs to be recaptured. + graph_capture_required = !cann_ctx->graph_lru_cache.find_and_move_to_front(cgraph); + if (graph_capture_required) { + // If no matching graph is found, add a new ACL graph. + ggml_cann_graph * new_graph = ggml_cann_graph::create_from_cgraph(cgraph); + cann_ctx->graph_lru_cache.push(new_graph); } - - cann_graph_update_required = is_cann_graph_update_required(cann_ctx, cgraph); - set_ggml_graph_node_properties(cann_ctx, cgraph); } #else bool use_cann_graph = false; - bool cann_graph_update_required = false; #endif // USE_ACL_GRAPH - - evaluate_and_capture_cann_graph( - cann_ctx, - cgraph, - use_cann_graph, - cann_graph_update_required - ); + evaluate_and_capture_cann_graph(cann_ctx, cgraph, use_cann_graph, graph_capture_required); return GGML_STATUS_SUCCESS; }