Skip to content

Commit 711433a

Browse files
noemotiovonggerganov
authored andcommitted
CANN: implement LRU cache for ACL graphs (llama/15814)
* CANN: implement LRU cache for ACL graphs in CANN backend - Introduce ggml_cann_graph_lru_cache to store multiple ggml_cann_graph objects. - Graphs are loaded on demand and evicted using LRU policy when capacity is exceeded. - Updated push, move_to_front, and clear methods to manage cached graphs efficiently. - Ensures reuse of graphs, reducing graph reconstruction overhead in CANN backend. * fix typo * The LRU cache capacity can be configured via an env variable Signed-off-by: noemotiovon <[email protected]> * refactory acl graph * refactory && fix review comments Signed-off-by: noemotiovon <[email protected]> --------- Signed-off-by: noemotiovon <[email protected]>
1 parent 97ec0e5 commit 711433a

File tree

2 files changed

+160
-51
lines changed

2 files changed

+160
-51
lines changed

src/ggml-cann/common.h

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include <unistd.h>
3939
#include <functional>
4040
#include <optional>
41+
#include <list>
4142

4243
#include "../include/ggml-cann.h"
4344
#include "../include/ggml.h"
@@ -106,6 +107,7 @@ int32_t ggml_cann_get_device();
106107

107108
std::optional<std::string> get_env(const std::string& name);
108109
bool parse_bool(const std::string& value);
110+
int parse_integer(const std::string& value);
109111

110112
/**
111113
* @brief Abstract base class for memory pools used by CANN.
@@ -350,14 +352,72 @@ struct ggml_graph_node_properties {
350352
struct ggml_cann_graph {
351353
~ggml_cann_graph() {
352354
if (graph != nullptr) {
353-
aclmdlRIDestroy(graph);
355+
ACL_CHECK(aclmdlRIDestroy(graph));
354356
}
355357
}
356358

357359
aclmdlRI graph = nullptr;
358360

359361
std::vector<ggml_graph_node_properties> ggml_graph_properties;
360362
};
363+
364+
/**
365+
* @brief LRU cache for managing ggml_cann_graph objects.
366+
*
367+
* This class maintains a list of shared_ptr to ggml_cann_graph objects
368+
* and enforces a maximum capacity. It provides methods to push new graphs,
369+
* move existing graphs to the front (most recently used), and clear the cache.
370+
*/
371+
struct ggml_cann_graph_lru_cache {
372+
size_t capacity; /**< Maximum number of graphs in the cache. */
373+
374+
std::list<ggml_cann_graph*> cache_list; /**< List storing cached graphs as raw pointers. */
375+
376+
ggml_cann_graph_lru_cache() {
377+
capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12"));
378+
}
379+
380+
/**
381+
* @brief Push a new graph to the front of the cache.
382+
* If the cache exceeds capacity, the least recently used graph is deleted.
383+
* @param new_node Pointer to the new ggml_cann_graph to cache.
384+
* Ownership is transferred to the cache (cache will delete it).
385+
*/
386+
void push(ggml_cann_graph* new_node) {
387+
if (cache_list.size() >= capacity) {
388+
ggml_cann_graph* old = cache_list.back();
389+
cache_list.pop_back();
390+
delete old; // free the old graph
391+
}
392+
cache_list.push_front(new_node);
393+
}
394+
395+
/**
396+
* @brief Move an existing graph to the front of the cache.
397+
* @param node Pointer to the ggml_cann_graph to move.
398+
*/
399+
void move_to_front(ggml_cann_graph* node) {
400+
cache_list.remove(node);
401+
cache_list.push_front(node);
402+
}
403+
404+
/**
405+
* @brief Clear all graphs from the cache (also frees memory).
406+
*/
407+
void clear() {
408+
for (auto ptr : cache_list) {
409+
delete ptr;
410+
}
411+
cache_list.clear();
412+
}
413+
414+
/**
415+
* @brief Destructor that clears the cache and frees all cached graphs.
416+
*/
417+
~ggml_cann_graph_lru_cache() {
418+
clear();
419+
}
420+
};
361421
#endif // USE_ACL_GRAPH
362422

363423
struct ggml_cann_rope_cache {
@@ -394,7 +454,7 @@ struct ggml_backend_cann_context {
394454
aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
395455
#ifdef USE_ACL_GRAPH
396456
/// Cached CANN ACL graph used for executing the current ggml computation graph.
397-
std::unique_ptr<ggml_cann_graph> cann_graph;
457+
ggml_cann_graph_lru_cache graph_lru_cache;
398458
bool acl_graph_mode = true;
399459
#endif
400460
cann_task_queue task_queue;

src/ggml-cann/ggml-cann.cpp

Lines changed: 98 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,24 @@ bool parse_bool(const std::string& value) {
116116
return valid_values.find(value) != valid_values.end();
117117
}
118118

119+
/**
120+
* @brief Parse a string as an integer, returning 0 if invalid.
121+
*
122+
* This function attempts to convert the input string `value` to an `int`.
123+
* If the string is not a valid integer or is out of the `int` range,
124+
* it returns 0.
125+
*
126+
* @param value The string to parse.
127+
* @return The parsed integer, or 0 if conversion fails.
128+
*/
129+
int parse_integer(const std::string& value) {
130+
try {
131+
return std::stoi(value);
132+
} catch (...) {
133+
return 0;
134+
}
135+
}
136+
119137
/**
120138
* @brief Initialize the CANN device information.
121139
*
@@ -2131,30 +2149,52 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
21312149

21322150
#ifdef USE_ACL_GRAPH
21332151
/**
2134-
* @brief Populate the internal CANN graph node properties from the ggml computation graph.
2152+
* @brief Add a new CANN graph to the LRU cache by populating node properties from the ggml graph.
2153+
*
2154+
* This function creates a new ggml_cann_graph object and fills its node properties
2155+
* (operation type, dimensions, strides, input sources, and operation parameters)
2156+
* based on the current ggml computation graph.
21352157
*
2136-
* This function copies all node attributes (operation type, dimensions, strides, input sources,
2137-
* and operation parameters) into the cached CANN graph structure for later reuse or comparison.
2158+
* Each node in the ggml graph is mapped to a property entry in the new CANN graph:
2159+
* - node address
2160+
* - operation type
2161+
* - shape (ne) and strides (nb)
2162+
* - source tensor addresses
2163+
* - operation parameters
21382164
*
2139-
* @param cann_ctx The CANN backend context.
2140-
* @param cgraph The ggml computational graph.
2165+
* After initialization, the new graph is pushed into the LRU cache owned by the
2166+
* CANN backend context. The cache takes ownership of the graph and manages its
2167+
* lifetime (including deletion upon eviction).
2168+
*
2169+
* @param cann_ctx The CANN backend context containing the graph cache.
2170+
* @param cgraph The current ggml computation graph.
21412171
*/
2142-
static void set_ggml_graph_node_properties(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2143-
for (int node_idx = 0; node_idx < cgraph->n_nodes; node_idx++) {
2172+
static void add_lru_matched_graph_node_properties(
2173+
ggml_backend_cann_context * cann_ctx,
2174+
ggml_cgraph * cgraph) {
2175+
// Create a new ggml_cann_graph object on the heap (its lifetime is managed by the cache).
2176+
ggml_cann_graph * new_graph = new ggml_cann_graph();
2177+
new_graph->ggml_graph_properties.resize(cgraph->n_nodes);
2178+
2179+
for (int node_idx = 0; node_idx < cgraph->n_nodes; ++node_idx) {
21442180
ggml_tensor * node = cgraph->nodes[node_idx];
2145-
cann_ctx->cann_graph->ggml_graph_properties[node_idx].node_address = node->data;
2146-
cann_ctx->cann_graph->ggml_graph_properties[node_idx].node_op = node->op;
2181+
auto & prop = new_graph->ggml_graph_properties[node_idx];
21472182

2148-
for (int dim = 0; dim < GGML_MAX_DIMS; dim++) {
2149-
cann_ctx->cann_graph->ggml_graph_properties[node_idx].ne[dim] = node->ne[dim];
2150-
cann_ctx->cann_graph->ggml_graph_properties[node_idx].nb[dim] = node->nb[dim];
2151-
}
2152-
for (int src = 0; src < GGML_MAX_SRC; src++) {
2153-
cann_ctx->cann_graph->ggml_graph_properties[node_idx].src_address[src] =
2154-
node->src[src] ? node->src[src]->data : nullptr;
2183+
prop.node_address = node->data;
2184+
prop.node_op = node->op;
2185+
2186+
std::copy_n(node->ne, GGML_MAX_DIMS, prop.ne);
2187+
std::copy_n(node->nb, GGML_MAX_DIMS, prop.nb);
2188+
2189+
for (int src = 0; src < GGML_MAX_SRC; ++src) {
2190+
prop.src_address[src] = node->src[src] ? node->src[src]->data : nullptr;
21552191
}
2156-
memcpy(cann_ctx->cann_graph->ggml_graph_properties[node_idx].op_params, node->op_params, GGML_MAX_OP_PARAMS);
2192+
2193+
memcpy(prop.op_params, node->op_params, GGML_MAX_OP_PARAMS);
21572194
}
2195+
2196+
// Insert into the LRU cache (cache takes ownership and will delete it when evicted).
2197+
cann_ctx->graph_lru_cache.push(new_graph);
21582198
}
21592199

21602200
/**
@@ -2199,30 +2239,45 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
21992239
}
22002240

22012241
/**
2202-
* @brief Determine if the CANN graph needs to be rebuilt due to graph changes.
2242+
* @brief Check whether there is a cached CANN graph that matches the current ggml graph.
2243+
*
2244+
* This function iterates through the cached CANN graphs stored in the LRU cache and
2245+
* compares them against the given ggml computation graph. A match requires that the
2246+
* number of nodes is the same and that each node’s properties (operation type,
2247+
* dimensions, strides, inputs, and operation parameters) are identical.
22032248
*
2204-
* This checks whether the number or properties of ggml graph nodes have changed
2205-
* compared to the last captured CANN graph. If so, the CANN graph must be re-captured.
2249+
* If a matching graph is found, it is promoted to the front of the LRU cache and the
2250+
* function returns true. Otherwise, the function returns false, indicating that a new
2251+
* CANN graph needs to be captured.
22062252
*
2207-
* @param cann_ctx The CANN backend context.
2253+
* @param cann_ctx The CANN backend context containing the graph cache.
22082254
* @param cgraph The current ggml computation graph.
2209-
* @return true if an update is required; false otherwise.
2210-
*/
2211-
static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2212-
// The number of nodes is different, so the graph needs to be reconstructed.
2213-
if (cann_ctx->cann_graph->ggml_graph_properties.size() != (size_t)cgraph->n_nodes) {
2214-
cann_ctx->cann_graph->ggml_graph_properties.resize(cgraph->n_nodes);
2215-
return true;
2216-
}
2255+
* @return true if a matching cached graph exists; false otherwise.
2256+
*/
2257+
static bool is_matched_graph(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2258+
ggml_cann_graph_lru_cache &lru_cache = cann_ctx->graph_lru_cache;
2259+
for (auto &graph_ptr : lru_cache.cache_list) {
2260+
// Skip graphs with a different number of nodes.
2261+
if (graph_ptr->ggml_graph_properties.size() != static_cast<size_t>(cgraph->n_nodes)) {
2262+
continue;
2263+
}
22172264

2218-
// The number of nodes is the same; iterate over each node to check whether they match.
2219-
for (int i = 0; i < cgraph->n_nodes; i++) {
2220-
bool has_matching_properties = ggml_graph_node_has_matching_properties(
2221-
cgraph->nodes[i], &cann_ctx->cann_graph->ggml_graph_properties[i]);
2222-
if(!has_matching_properties) {
2265+
// Check if all nodes match.
2266+
bool all_match = true;
2267+
for (int i = 0; i < cgraph->n_nodes; ++i) {
2268+
if (!ggml_graph_node_has_matching_properties(cgraph->nodes[i], &graph_ptr->ggml_graph_properties[i])) {
2269+
all_match = false;
2270+
break;
2271+
}
2272+
}
2273+
2274+
if (all_match) {
2275+
// update cache_list && renturn graph_ptr
2276+
lru_cache.move_to_front(graph_ptr);
22232277
return true;
22242278
}
22252279
}
2280+
22262281
return false;
22272282
}
22282283
#endif // USE_ACL_GRAPH
@@ -2241,17 +2296,13 @@ static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx,
22412296
* @param cann_graph_update_required Whether graph capture is needed due to graph changes.
22422297
*/
22432298
static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph,
2244-
bool & use_cann_graph, bool & cann_graph_update_required) {
2299+
bool & use_cann_graph, bool & cann_graph_update_required) {
22452300
#ifdef USE_ACL_GRAPH
2301+
ggml_cann_graph* matched_graph = cann_ctx->graph_lru_cache.cache_list.front();
22462302
if (use_cann_graph && cann_graph_update_required) {
2247-
if (cann_ctx->cann_graph->graph != nullptr) {
2248-
ACL_CHECK(aclmdlRIDestroy(cann_ctx->cann_graph->graph));
2249-
cann_ctx->cann_graph->graph = nullptr;
2250-
}
22512303
ACL_CHECK(aclmdlRICaptureBegin(cann_ctx->stream(), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL));
22522304
}
22532305
#endif // USE_ACL_GRAPH
2254-
22552306
// Only perform the graph execution if CANN graphs are not enabled, or we are capturing the graph.
22562307
// With the use of CANN graphs, the execution will be performed by the graph launch.
22572308
if (!use_cann_graph || cann_graph_update_required) {
@@ -2272,12 +2323,12 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx
22722323

22732324
#ifdef USE_ACL_GRAPH
22742325
if (use_cann_graph && cann_graph_update_required) { // End CANN graph capture
2275-
ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &cann_ctx->cann_graph->graph));
2326+
ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &matched_graph->graph));
22762327
}
22772328

22782329
if (use_cann_graph) {
22792330
// Execute graph
2280-
ACL_CHECK(aclmdlRIExecuteAsync(cann_ctx->cann_graph->graph, cann_ctx->stream()));
2331+
ACL_CHECK(aclmdlRIExecuteAsync(matched_graph->graph, cann_ctx->stream()));
22812332
}
22822333
#endif // USE_ACL_GRAPH
22832334
}
@@ -2311,19 +2362,17 @@ static enum ggml_status ggml_backend_cann_graph_compute(
23112362
}
23122363

23132364
if (use_cann_graph) {
2314-
if (cann_ctx->cann_graph == nullptr) {
2315-
cann_ctx->cann_graph.reset(new ggml_cann_graph());
2316-
cann_graph_update_required = true;
2365+
// If no matching graph is found, the graph needs to be recaptured.
2366+
cann_graph_update_required = !is_matched_graph(cann_ctx, cgraph);
2367+
if (cann_graph_update_required) {
2368+
// If no matching graph is found, add a new ACL graph.
2369+
add_lru_matched_graph_node_properties(cann_ctx, cgraph);
23172370
}
2318-
2319-
cann_graph_update_required = is_cann_graph_update_required(cann_ctx, cgraph);
2320-
set_ggml_graph_node_properties(cann_ctx, cgraph);
23212371
}
23222372
#else
23232373
bool use_cann_graph = false;
23242374
bool cann_graph_update_required = false;
23252375
#endif // USE_ACL_GRAPH
2326-
23272376
evaluate_and_capture_cann_graph(
23282377
cann_ctx,
23292378
cgraph,

0 commit comments

Comments
 (0)