@@ -2075,6 +2075,160 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
20752075 ACL_CHECK (aclrtSynchronizeStream (cann_ctx->stream ()));
20762076}
20772077
2078+ #ifdef USE_CANN_GRAPH
2079+ /* *
2080+ * @brief Populate the internal CANN graph node properties from the ggml computation graph.
2081+ *
2082+ * This function copies all node attributes (operation type, dimensions, strides, input sources,
2083+ * and operation parameters) into the cached CANN graph structure for later reuse or comparison.
2084+ *
2085+ * @param cann_ctx The CANN backend context.
2086+ * @param cgraph The ggml computational graph.
2087+ */
2088+ static void set_ggml_graph_node_properties (ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2089+ for (int node_idx = 0 ; node_idx < cgraph->n_nodes ; node_idx++) {
2090+ ggml_tensor * node = cgraph->nodes [node_idx];
2091+ cann_ctx->cann_graph ->ggml_graph_properties [node_idx].node_address = node->data ;
2092+ cann_ctx->cann_graph ->ggml_graph_properties [node_idx].node_op = node->op ;
2093+
2094+ for (int dim = 0 ; dim < GGML_MAX_DIMS; dim++) {
2095+ cann_ctx->cann_graph ->ggml_graph_properties [node_idx].ne [dim] = node->ne [dim];
2096+ cann_ctx->cann_graph ->ggml_graph_properties [node_idx].nb [dim] = node->nb [dim];
2097+ }
2098+ for (int src = 0 ; src < GGML_MAX_SRC; src++) {
2099+ cann_ctx->cann_graph ->ggml_graph_properties [node_idx].src_address [src] =
2100+ node->src [src] ? node->src [src]->data : nullptr ;
2101+ }
2102+ memcpy (cann_ctx->cann_graph ->ggml_graph_properties [node_idx].op_params , node->op_params , GGML_MAX_OP_PARAMS);
2103+ }
2104+ }
2105+
2106+ /* *
2107+ * @brief Check if a ggml tensor node matches a previously captured CANN graph node.
2108+ *
2109+ * This function compares all relevant fields (address, op type, shape, source inputs, op params)
2110+ * to determine whether the current node matches a previously recorded version.
2111+ *
2112+ * @param node The current ggml tensor node.
2113+ * @param graph_node_properties The stored properties of a CANN graph node.
2114+ * @return true if all fields match (excluding GGML_OP_VIEW); false otherwise.
2115+ */
2116+ static bool ggml_graph_node_has_matching_properties (ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
2117+ if (node->data != graph_node_properties->node_address &&
2118+ node->op != GGML_OP_VIEW) {
2119+ return false ;
2120+ }
2121+ if (node->op != graph_node_properties->node_op ) {
2122+ return false ;
2123+ }
2124+ for (int i = 0 ; i < GGML_MAX_DIMS; i++) {
2125+ if (node->ne [i] != graph_node_properties->ne [i]) {
2126+ return false ;
2127+ }
2128+ if (node->nb [i] != graph_node_properties->nb [i]) {
2129+ return false ;
2130+ }
2131+ }
2132+ for (int i = 0 ; i < GGML_MAX_SRC; i++) {
2133+ if (node->src [i] &&
2134+ node->src [i]->data != graph_node_properties->src_address [i] &&
2135+ node->op != GGML_OP_VIEW
2136+ ) {
2137+ return false ;
2138+ }
2139+ }
2140+ if (node->op == GGML_OP_SCALE &&
2141+ memcmp (graph_node_properties->op_params , node->op_params , GGML_MAX_OP_PARAMS) != 0 ) {
2142+ return false ;
2143+ }
2144+ return true ;
2145+ }
2146+
2147+ /* *
2148+ * @brief Determine if the CANN graph needs to be rebuilt due to graph changes.
2149+ *
2150+ * This checks whether the number or properties of ggml graph nodes have changed
2151+ * compared to the last captured CANN graph. If so, the CANN graph must be re-captured.
2152+ *
2153+ * @param cann_ctx The CANN backend context.
2154+ * @param cgraph The current ggml computation graph.
2155+ * @return true if an update is required; false otherwise.
2156+ */
2157+ static bool is_cann_graph_update_required (ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2158+ // 节点个数不同,需重新构图
2159+ if (cann_ctx->cann_graph ->ggml_graph_properties .size () != (size_t )cgraph->n_nodes ) {
2160+ cann_ctx->cann_graph ->ggml_graph_properties .resize (cgraph->n_nodes );
2161+ return true ;
2162+ }
2163+
2164+ // 节点个数相同,便利每个节点,比较其是否匹配
2165+ for (int i = 0 ; i < cgraph->n_nodes ; i++) {
2166+ bool has_matching_properties = ggml_graph_node_has_matching_properties (
2167+ cgraph->nodes [i], &cann_ctx->cann_graph ->ggml_graph_properties [i]);
2168+ if (!has_matching_properties) {
2169+ return true ;
2170+ }
2171+ }
2172+ return false ;
2173+ }
2174+ #endif // USE_CANN_GRAPH
2175+
2176+ /* *
2177+ * @brief Evaluate the computation graph and optionally capture or execute it using CANN graph API.
2178+ *
2179+ * If CANN graph execution is enabled and graph capture is required, this function begins
2180+ * graph capture, runs the graph, ends capture, and stores the captured graph.
2181+ *
2182+ * Otherwise, it falls back to op-by-op execution using the CANN compute kernel dispatcher.
2183+ *
2184+ * @param cann_ctx The CANN backend context.
2185+ * @param cgraph The ggml computation graph.
2186+ * @param use_cann_graph Whether to use CANN graph execution.
2187+ * @param cann_graph_update_required Whether graph capture is needed due to graph changes.
2188+ */
2189+ static void evaluate_and_capture_cann_graph (ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph,
2190+ bool & use_cann_graph, bool & cann_graph_update_required) {
2191+ #ifdef USE_CANN_GRAPH
2192+ if (use_cann_graph && cann_graph_update_required) {
2193+ if (cann_ctx->cann_graph ->graph != nullptr ) {
2194+ ACL_CHECK (aclmdlRIDestroy (cann_ctx->cann_graph ->graph ));
2195+ cann_ctx->cann_graph ->graph = nullptr ;
2196+ }
2197+ ACL_CHECK (aclmdlRICaptureBegin (cann_ctx->stream (), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL));
2198+ }
2199+ #endif // USE_CANN_GRAPH
2200+
2201+ // Only perform the graph execution if CANN graphs are not enabled, or we are capturing the graph.
2202+ // With the use of CANN graphs, the execution will be performed by the graph launch.
2203+ if (!use_cann_graph || cann_graph_update_required) {
2204+ for (int i = 0 ; i < cgraph->n_nodes ; i++) {
2205+ ggml_tensor * node = cgraph->nodes [i];
2206+
2207+ if (ggml_is_empty (node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
2208+ continue ;
2209+ }
2210+
2211+ bool ok = ggml_cann_compute_forward (*cann_ctx, node);
2212+ if (!ok) {
2213+ GGML_LOG_ERROR (" %s: op not supported %s (%s)\n " , __func__, node->name , ggml_op_name (node->op ));
2214+ }
2215+ GGML_ASSERT (ok);
2216+ }
2217+ }
2218+
2219+ #ifdef USE_CANN_GRAPH
2220+ if (use_cann_graph && cann_graph_update_required) { // End CANN graph capture
2221+ ACL_CHECK (aclmdlRICaptureEnd (cann_ctx->stream (), &cann_ctx->cann_graph ->graph ));
2222+ }
2223+
2224+ if (use_cann_graph) {
2225+ // Execute graph
2226+ ACL_CHECK (aclmdlRIExecuteAsync (cann_ctx->cann_graph ->graph , cann_ctx->stream ()));
2227+ }
2228+ #endif
2229+ }
2230+
2231+
20782232/* *
20792233 * @brief Computes a computational graph using a CANN backend.
20802234 *
@@ -2089,28 +2243,52 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
20892243 */
20902244static enum ggml_status ggml_backend_cann_graph_compute (
20912245 ggml_backend_t backend, ggml_cgraph* cgraph) {
2246+
20922247 ggml_backend_cann_context* cann_ctx =
20932248 (ggml_backend_cann_context*)backend->context ;
2094-
20952249 ggml_cann_set_device (cann_ctx->device );
2096- // release temp buffer create by set tensor.
20972250 release_nz_workspace ();
2098-
2099- for (int i = 0 ; i < cgraph->n_nodes ; i++) {
2100- ggml_tensor* node = cgraph->nodes [i];
2101-
2102- if (ggml_is_empty (node) || node->op == GGML_OP_NONE) {
2103- continue ;
2251+ #ifdef USE_CANN_GRAPH
2252+ bool use_cann_graph = true ;
2253+ bool cann_graph_update_required = false ;
2254+
2255+ // 检查环境变量 LLAMA_SET_ROWS
2256+ const char * LLAMA_SET_ROWS_ENV = std::getenv (" LLAMA_SET_ROWS" );
2257+ bool supports_set_rows = LLAMA_SET_ROWS_ENV ? (std::atoi (LLAMA_SET_ROWS_ENV) != 0 ) : false ;
2258+
2259+ if (!supports_set_rows) {
2260+ if (cann_ctx->set_row_log ) {
2261+ GGML_LOG_ERROR (
2262+ " %s: CANN Graph disabled — environment variable LLAMA_SET_ROWS not set or invalid. "
2263+ " To enable CANN ACL Graph execution, export LLAMA_SET_ROWS=1. "
2264+ " Falling back to non-graph mode on device %d.\n " ,
2265+ __func__, cann_ctx->device
2266+ );
2267+ cann_ctx->set_row_log = false ;
21042268 }
2105-
2106- bool ok = ggml_cann_compute_forward (*cann_ctx, node);
2107-
2108- if (!ok) {
2109- GGML_LOG_ERROR (" %s: error: op not supported %s (%s)\n " , __func__,
2110- node->name , ggml_op_name (node->op ));
2269+ use_cann_graph = false ;
2270+ }
2271+
2272+ if (use_cann_graph) {
2273+ if (cann_ctx->cann_graph == nullptr ) {
2274+ cann_ctx->cann_graph .reset (new ggml_cann_graph ());
2275+ cann_graph_update_required = true ;
21112276 }
2112- GGML_ASSERT (ok);
2277+
2278+ cann_graph_update_required = is_cann_graph_update_required (cann_ctx, cgraph);
2279+ set_ggml_graph_node_properties (cann_ctx, cgraph);
21132280 }
2281+ #else
2282+ bool use_cann_graph = false ;
2283+ bool cann_graph_update_required = false ;
2284+ #endif // USE_CANN_GRAPH
2285+
2286+ evaluate_and_capture_cann_graph (
2287+ cann_ctx,
2288+ cgraph,
2289+ use_cann_graph,
2290+ cann_graph_update_required
2291+ );
21142292
21152293 return GGML_STATUS_SUCCESS;
21162294}
@@ -2226,12 +2404,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
22262404 // only support F32 and F16.
22272405 return false ;
22282406 }
2229-
2230- if (!ggml_are_same_shape (op, src) && !ggml_is_contiguous (op)) {
2231- // unsupport dst is not contiguous.
2232- return false ;
2233- }
2234-
22352407 return true ;
22362408 } break ;
22372409 case GGML_OP_CONT: {
0 commit comments