@@ -2075,6 +2075,160 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
20752075 ACL_CHECK (aclrtSynchronizeStream (cann_ctx->stream ()));
20762076}
20772077
2078+ #ifdef USE_ACL_GRAPH
2079+ /* *
2080+ * @brief Populate the internal CANN graph node properties from the ggml computation graph.
2081+ *
2082+ * This function copies all node attributes (operation type, dimensions, strides, input sources,
2083+ * and operation parameters) into the cached CANN graph structure for later reuse or comparison.
2084+ *
2085+ * @param cann_ctx The CANN backend context.
2086+ * @param cgraph The ggml computational graph.
2087+ */
2088+ static void set_ggml_graph_node_properties (ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2089+ for (int node_idx = 0 ; node_idx < cgraph->n_nodes ; node_idx++) {
2090+ ggml_tensor * node = cgraph->nodes [node_idx];
2091+ cann_ctx->cann_graph ->ggml_graph_properties [node_idx].node_address = node->data ;
2092+ cann_ctx->cann_graph ->ggml_graph_properties [node_idx].node_op = node->op ;
2093+
2094+ for (int dim = 0 ; dim < GGML_MAX_DIMS; dim++) {
2095+ cann_ctx->cann_graph ->ggml_graph_properties [node_idx].ne [dim] = node->ne [dim];
2096+ cann_ctx->cann_graph ->ggml_graph_properties [node_idx].nb [dim] = node->nb [dim];
2097+ }
2098+ for (int src = 0 ; src < GGML_MAX_SRC; src++) {
2099+ cann_ctx->cann_graph ->ggml_graph_properties [node_idx].src_address [src] =
2100+ node->src [src] ? node->src [src]->data : nullptr ;
2101+ }
2102+ memcpy (cann_ctx->cann_graph ->ggml_graph_properties [node_idx].op_params , node->op_params , GGML_MAX_OP_PARAMS);
2103+ }
2104+ }
2105+
2106+ /* *
2107+ * @brief Check if a ggml tensor node matches a previously captured CANN graph node.
2108+ *
2109+ * This function compares all relevant fields (address, op type, shape, source inputs, op params)
2110+ * to determine whether the current node matches a previously recorded version.
2111+ *
2112+ * @param node The current ggml tensor node.
2113+ * @param graph_node_properties The stored properties of a CANN graph node.
2114+ * @return true if all fields match (excluding GGML_OP_VIEW); false otherwise.
2115+ */
2116+ static bool ggml_graph_node_has_matching_properties (ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
2117+ if (node->data != graph_node_properties->node_address &&
2118+ node->op != GGML_OP_VIEW) {
2119+ return false ;
2120+ }
2121+ if (node->op != graph_node_properties->node_op ) {
2122+ return false ;
2123+ }
2124+ for (int i = 0 ; i < GGML_MAX_DIMS; i++) {
2125+ if (node->ne [i] != graph_node_properties->ne [i]) {
2126+ return false ;
2127+ }
2128+ if (node->nb [i] != graph_node_properties->nb [i]) {
2129+ return false ;
2130+ }
2131+ }
2132+ for (int i = 0 ; i < GGML_MAX_SRC; i++) {
2133+ if (node->src [i] &&
2134+ node->src [i]->data != graph_node_properties->src_address [i] &&
2135+ node->op != GGML_OP_VIEW
2136+ ) {
2137+ return false ;
2138+ }
2139+ }
2140+ if (node->op == GGML_OP_SCALE &&
2141+ memcmp (graph_node_properties->op_params , node->op_params , GGML_MAX_OP_PARAMS) != 0 ) {
2142+ return false ;
2143+ }
2144+ return true ;
2145+ }
2146+
2147+ /* *
2148+ * @brief Determine if the CANN graph needs to be rebuilt due to graph changes.
2149+ *
2150+ * This checks whether the number or properties of ggml graph nodes have changed
2151+ * compared to the last captured CANN graph. If so, the CANN graph must be re-captured.
2152+ *
2153+ * @param cann_ctx The CANN backend context.
2154+ * @param cgraph The current ggml computation graph.
2155+ * @return true if an update is required; false otherwise.
2156+ */
2157+ static bool is_cann_graph_update_required (ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2158+ // The number of nodes is different, so the graph needs to be reconstructed.
2159+ if (cann_ctx->cann_graph ->ggml_graph_properties .size () != (size_t )cgraph->n_nodes ) {
2160+ cann_ctx->cann_graph ->ggml_graph_properties .resize (cgraph->n_nodes );
2161+ return true ;
2162+ }
2163+
2164+ // The number of nodes is the same; iterate over each node to check whether they match.
2165+ for (int i = 0 ; i < cgraph->n_nodes ; i++) {
2166+ bool has_matching_properties = ggml_graph_node_has_matching_properties (
2167+ cgraph->nodes [i], &cann_ctx->cann_graph ->ggml_graph_properties [i]);
2168+ if (!has_matching_properties) {
2169+ return true ;
2170+ }
2171+ }
2172+ return false ;
2173+ }
2174+ #endif // USE_ACL_GRAPH
2175+
2176+ /* *
2177+ * @brief Evaluate the computation graph and optionally capture or execute it using CANN graph API.
2178+ *
2179+ * If CANN graph execution is enabled and graph capture is required, this function begins
2180+ * graph capture, runs the graph, ends capture, and stores the captured graph.
2181+ *
2182+ * Otherwise, it falls back to op-by-op execution using the CANN compute kernel dispatcher.
2183+ *
2184+ * @param cann_ctx The CANN backend context.
2185+ * @param cgraph The ggml computation graph.
2186+ * @param use_cann_graph Whether to use CANN graph execution.
2187+ * @param cann_graph_update_required Whether graph capture is needed due to graph changes.
2188+ */
2189+ static void evaluate_and_capture_cann_graph (ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph,
2190+ bool & use_cann_graph, bool & cann_graph_update_required) {
2191+ #ifdef USE_ACL_GRAPH
2192+ if (use_cann_graph && cann_graph_update_required) {
2193+ if (cann_ctx->cann_graph ->graph != nullptr ) {
2194+ ACL_CHECK (aclmdlRIDestroy (cann_ctx->cann_graph ->graph ));
2195+ cann_ctx->cann_graph ->graph = nullptr ;
2196+ }
2197+ ACL_CHECK (aclmdlRICaptureBegin (cann_ctx->stream (), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL));
2198+ }
2199+ #endif // USE_ACL_GRAPH
2200+
2201+ // Only perform the graph execution if CANN graphs are not enabled, or we are capturing the graph.
2202+ // With the use of CANN graphs, the execution will be performed by the graph launch.
2203+ if (!use_cann_graph || cann_graph_update_required) {
2204+ for (int i = 0 ; i < cgraph->n_nodes ; i++) {
2205+ ggml_tensor * node = cgraph->nodes [i];
2206+
2207+ if (ggml_is_empty (node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
2208+ continue ;
2209+ }
2210+
2211+ bool ok = ggml_cann_compute_forward (*cann_ctx, node);
2212+ if (!ok) {
2213+ GGML_LOG_ERROR (" %s: op not supported %s (%s)\n " , __func__, node->name , ggml_op_name (node->op ));
2214+ }
2215+ GGML_ASSERT (ok);
2216+ }
2217+ }
2218+
2219+ #ifdef USE_ACL_GRAPH
2220+ if (use_cann_graph && cann_graph_update_required) { // End CANN graph capture
2221+ ACL_CHECK (aclmdlRICaptureEnd (cann_ctx->stream (), &cann_ctx->cann_graph ->graph ));
2222+ }
2223+
2224+ if (use_cann_graph) {
2225+ // Execute graph
2226+ ACL_CHECK (aclmdlRIExecuteAsync (cann_ctx->cann_graph ->graph , cann_ctx->stream ()));
2227+ }
2228+ #endif // USE_ACL_GRAPH
2229+ }
2230+
2231+
20782232/* *
20792233 * @brief Computes a computational graph using a CANN backend.
20802234 *
@@ -2091,26 +2245,37 @@ static enum ggml_status ggml_backend_cann_graph_compute(
20912245 ggml_backend_t backend, ggml_cgraph* cgraph) {
20922246 ggml_backend_cann_context* cann_ctx =
20932247 (ggml_backend_cann_context*)backend->context ;
2094-
20952248 ggml_cann_set_device (cann_ctx->device );
2096- // release temp buffer create by set tensor.
20972249 release_nz_workspace ();
2250+ #ifdef USE_ACL_GRAPH
2251+ bool use_cann_graph = true ;
2252+ bool cann_graph_update_required = false ;
20982253
2099- for (int i = 0 ; i < cgraph->n_nodes ; i++) {
2100- ggml_tensor* node = cgraph->nodes [i];
2254+ // check environment LLAMA_SET_ROWS
2255+ if (!cann_ctx->support_set_rows ) {
2256+ use_cann_graph = false ;
2257+ }
21012258
2102- if (ggml_is_empty (node) || node->op == GGML_OP_NONE) {
2103- continue ;
2259+ if (use_cann_graph) {
2260+ if (cann_ctx->cann_graph == nullptr ) {
2261+ cann_ctx->cann_graph .reset (new ggml_cann_graph ());
2262+ cann_graph_update_required = true ;
21042263 }
21052264
2106- bool ok = ggml_cann_compute_forward (*cann_ctx, node);
2107-
2108- if (!ok) {
2109- GGML_LOG_ERROR (" %s: error: op not supported %s (%s)\n " , __func__,
2110- node->name , ggml_op_name (node->op ));
2111- }
2112- GGML_ASSERT (ok);
2265+ cann_graph_update_required = is_cann_graph_update_required (cann_ctx, cgraph);
2266+ set_ggml_graph_node_properties (cann_ctx, cgraph);
21132267 }
2268+ #else
2269+ bool use_cann_graph = false ;
2270+ bool cann_graph_update_required = false ;
2271+ #endif // USE_ACL_GRAPH
2272+
2273+ evaluate_and_capture_cann_graph (
2274+ cann_ctx,
2275+ cgraph,
2276+ use_cann_graph,
2277+ cann_graph_update_required
2278+ );
21142279
21152280 return GGML_STATUS_SUCCESS;
21162281}
@@ -2226,12 +2391,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
22262391 // only support F32 and F16.
22272392 return false ;
22282393 }
2229-
2230- if (!ggml_are_same_shape (op, src) && !ggml_is_contiguous (op)) {
2231- // unsupport dst is not contiguous.
2232- return false ;
2233- }
2234-
22352394 return true ;
22362395 } break ;
22372396 case GGML_OP_CONT: {
0 commit comments