Skip to content

Commit 1c8ddc8

Browse files
committed
cann : refactor ACL graph cache
Move the graph property checking code into methods of LRU cache. Signed-off-by: Wang Weixuan <[email protected]>
1 parent dc476ef commit 1c8ddc8

File tree

2 files changed

+223
-130
lines changed

2 files changed

+223
-130
lines changed

ggml/src/ggml-cann/common.h

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,60 @@ struct ggml_graph_node_properties {
345345
size_t nb[GGML_MAX_DIMS];
346346
void * src_address[GGML_MAX_SRC];
347347
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
348+
349+
/**
350+
* @brief Check if a ggml tensor node matches this property set.
351+
*
352+
* This function compares all relevant fields (address, op type, shape, source inputs, op params)
353+
* to determine whether the current node matches these previously recorded properties.
354+
*
355+
* @param node The current ggml tensor node.
356+
* @return true if all fields match (excluding GGML_OP_VIEW); false otherwise.
357+
*/
358+
bool has_matching_properties(ggml_tensor * node) {
359+
if (node->data != this->node_address && node->op != GGML_OP_VIEW) {
360+
return false;
361+
}
362+
363+
if (node->op != this->node_op) {
364+
return false;
365+
}
366+
367+
for (int i = 0; i < GGML_MAX_DIMS; i++) {
368+
if (node->ne[i] != this->ne[i]) {
369+
return false;
370+
}
371+
if (node->nb[i] != this->nb[i]) {
372+
return false;
373+
}
374+
}
375+
376+
for (int i = 0; i < GGML_MAX_SRC; i++) {
377+
if (node->src[i]) {
378+
if (node->src[i]->data != this->src_address[i] && node->op != GGML_OP_VIEW) {
379+
return false;
380+
}
381+
382+
for (int d = 0; d < GGML_MAX_DIMS; d++) {
383+
if (node->src[i]->ne[d] != this->src_ne[i][d]) {
384+
return false;
385+
}
386+
if (node->src[i]->nb[d] != this->src_nb[i][d]) {
387+
return false;
388+
}
389+
}
390+
} else {
391+
if (this->src_address[i] != nullptr) {
392+
return false;
393+
}
394+
}
395+
}
396+
397+
if (node->op == GGML_OP_SCALE || node->op == GGML_OP_UNARY || node->op == GGML_OP_GLU) {
398+
return memcmp(this->op_params, node->op_params, GGML_MAX_OP_PARAMS) == 0;
399+
}
400+
return true;
401+
}
348402
};
349403

350404
struct ggml_cann_graph {
@@ -357,6 +411,146 @@ struct ggml_cann_graph {
357411
aclmdlRI graph = nullptr;
358412

359413
std::vector<ggml_graph_node_properties> ggml_graph_properties;
414+
415+
/**
416+
* @brief Create a new CANN graph from a ggml computation graph.
417+
*
418+
* This function creates a new ggml_cann_graph object and fills its node properties
419+
* (operation type, dimensions, strides, input sources, and operation parameters)
420+
* based on the current ggml computation graph.
421+
*
422+
* Each node in the ggml graph is mapped to a property entry in the new CANN graph:
423+
* - node address
424+
* - operation type
425+
* - shape (ne) and strides (nb)
426+
* - source tensor addresses
427+
* - operation parameters
428+
*
429+
* @param cgraph The current ggml computation graph.
430+
* @return Pointer to the newly created ggml_cann_graph object.
431+
*/
432+
static ggml_cann_graph * create_from_cgraph(ggml_cgraph * cgraph) {
433+
ggml_cann_graph * new_graph = new ggml_cann_graph();
434+
new_graph->ggml_graph_properties.resize(cgraph->n_nodes);
435+
436+
for (int node_idx = 0; node_idx < cgraph->n_nodes; ++node_idx) {
437+
ggml_tensor * node = cgraph->nodes[node_idx];
438+
auto & prop = new_graph->ggml_graph_properties[node_idx];
439+
440+
prop.node_address = node->data;
441+
prop.node_op = node->op;
442+
443+
std::copy_n(node->ne, GGML_MAX_DIMS, prop.ne);
444+
std::copy_n(node->nb, GGML_MAX_DIMS, prop.nb);
445+
446+
for (int src = 0; src < GGML_MAX_SRC; ++src) {
447+
if (node->src[src]) {
448+
prop.src_address[src] = node->src[src]->data;
449+
std::copy_n(node->src[src]->ne, GGML_MAX_DIMS, prop.src_ne[src]);
450+
std::copy_n(node->src[src]->nb, GGML_MAX_DIMS, prop.src_nb[src]);
451+
} else {
452+
prop.src_address[src] = nullptr;
453+
std::fill_n(prop.src_ne[src], GGML_MAX_DIMS, 0);
454+
std::fill_n(prop.src_nb[src], GGML_MAX_DIMS, 0);
455+
}
456+
}
457+
458+
memcpy(prop.op_params, node->op_params, GGML_MAX_OP_PARAMS);
459+
}
460+
461+
return new_graph;
462+
}
463+
464+
/**
465+
* @brief Check whether this CANN graph matches the given ggml computation graph.
466+
*
467+
* This function compares the number of nodes and each node's properties
468+
* (operation type, dimensions, strides, inputs, and operation parameters)
469+
* to determine whether this CANN graph matches the given ggml graph.
470+
*
471+
* @param cgraph The current ggml computation graph.
472+
* @return true if this CANN graph matches the ggml graph; false otherwise.
473+
*/
474+
bool matches_cgraph(ggml_cgraph * cgraph) {
475+
if (this->ggml_graph_properties.size() != static_cast<size_t>(cgraph->n_nodes)) {
476+
return false;
477+
}
478+
479+
for (int i = 0; i < cgraph->n_nodes; ++i) {
480+
if (!this->ggml_graph_properties[i].has_matching_properties(cgraph->nodes[i])) {
481+
return false;
482+
}
483+
}
484+
485+
return true;
486+
}
487+
};
488+
489+
/**
490+
* @brief LRU cache for managing ggml_cann_graph objects.
491+
*
492+
* This class maintains a list of shared_ptr to ggml_cann_graph objects
493+
* and enforces a maximum capacity. It provides methods to push new graphs,
494+
* move existing graphs to the front (most recently used), and clear the cache.
495+
*/
496+
struct ggml_cann_graph_lru_cache {
497+
size_t capacity; /**< Maximum number of graphs in the cache. */
498+
499+
std::list<ggml_cann_graph *> cache_list; /**< List storing cached graphs as raw pointers. */
500+
501+
ggml_cann_graph_lru_cache() { capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); }
502+
503+
/**
504+
* @brief Push a new graph to the front of the cache.
505+
* If the cache exceeds capacity, the least recently used graph is deleted.
506+
* @param new_node Pointer to the new ggml_cann_graph to cache.
507+
* Ownership is transferred to the cache (cache will delete it).
508+
*/
509+
void push(ggml_cann_graph * new_node) {
510+
if (cache_list.size() >= capacity) {
511+
ggml_cann_graph * old = cache_list.back();
512+
cache_list.pop_back();
513+
delete old; // free the old graph
514+
}
515+
cache_list.push_front(new_node);
516+
}
517+
518+
/**
519+
* @brief Clear all graphs from the cache (also frees memory).
520+
*/
521+
void clear() {
522+
for (auto ptr : cache_list) {
523+
delete ptr;
524+
}
525+
cache_list.clear();
526+
}
527+
528+
/**
529+
* @brief Destructor that clears the cache and frees all cached graphs.
530+
*/
531+
~ggml_cann_graph_lru_cache() { clear(); }
532+
533+
/**
534+
* @brief Find a cached CANN graph that matches the given ggml graph and move it to front.
535+
*
536+
* This function iterates through the cached CANN graphs stored in the LRU cache and
537+
* compares them against the given ggml computation graph. If a matching graph is found,
538+
* it is promoted to the front of the LRU cache and returned. Otherwise, the function
539+
* returns nullptr.
540+
*
541+
* @param cgraph The current ggml computation graph.
542+
* @return true if found; false otherwise.
543+
*/
544+
bool find_and_move_to_front(ggml_cgraph * cgraph) {
545+
for (auto & graph_ptr : this->cache_list) {
546+
if (graph_ptr->matches_cgraph(cgraph)) {
547+
cache_list.remove(graph_ptr);
548+
cache_list.push_front(graph_ptr);
549+
return true;
550+
}
551+
}
552+
return false;
553+
}
360554
};
361555
#endif // USE_ACL_GRAPH
362556

0 commit comments

Comments
 (0)