-
Notifications
You must be signed in to change notification settings - Fork 13.4k
CANN: implement LRU cache for ACL graphs in CANN backend #15814
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
5ac42c3
4c9b10a
15b4ff7
81aa674
d91cefc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ | |
#include <unistd.h> | ||
#include <functional> | ||
#include <optional> | ||
#include <list> | ||
|
||
#include "../include/ggml-cann.h" | ||
#include "../include/ggml.h" | ||
|
@@ -358,6 +359,66 @@ struct ggml_cann_graph { | |
|
||
std::vector<ggml_graph_node_properties> ggml_graph_properties; | ||
}; | ||
|
||
/** | ||
* @brief LRU cache for managing ggml_cann_graph objects. | ||
* | ||
* This class maintains a list of shared_ptr to ggml_cann_graph objects | ||
* and enforces a maximum capacity. It provides methods to push new graphs, | ||
* move existing graphs to the front (most recently used), and clear the cache. | ||
*/ | ||
struct ggml_cann_graph_lru_cache { | ||
size_t capacity; /**< Maximum number of graphs in the cache. */ | ||
|
||
std::list<std::shared_ptr<ggml_cann_graph>> cache_list; /**< List storing cached graphs. */ | ||
|
||
std::shared_ptr<ggml_cann_graph> matched_graph = nullptr; /**< Pointer to a recently matched graph. */ | ||
|
||
ggml_cann_graph_lru_cache() { | ||
std::string env_val = get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12"); | ||
noemotiovon marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
try { | ||
capacity = std::stoul(env_val); | ||
} catch (...) { | ||
capacity = 12; // fallback to default if invalid | ||
} | ||
} | ||
|
||
/** | ||
* @brief Push a new graph to the front of the cache. | ||
* If the cache exceeds capacity, the least recently used graph is removed. | ||
* @param new_node Shared pointer to the new ggml_cann_graph to cache. | ||
*/ | ||
void push(std::shared_ptr<ggml_cann_graph> new_node) { | ||
if (cache_list.size() >= capacity) { | ||
cache_list.pop_back(); | ||
} | ||
|
||
cache_list.push_front(new_node); | ||
} | ||
|
||
/** | ||
* @brief Move an existing graph to the front of the cache. | ||
* @param node Shared pointer to the ggml_cann_graph to move. | ||
*/ | ||
void move_to_front(std::shared_ptr<ggml_cann_graph> node) { | ||
cache_list.remove(node); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Delete a list in array will go through all elements in array. It's better to use priority queue There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current implementation has a time complexity of O(n), but even if I switch to a priority queue, it would still require a full traversal. I plan to add a map member variable to reduce the time complexity to O(1). |
||
cache_list.push_front(node); | ||
} | ||
|
||
/** | ||
* @brief Clear all graphs from the cache. | ||
*/ | ||
void clear() { | ||
cache_list.clear(); | ||
noemotiovon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/** | ||
* @brief Destructor that clears the cache upon object destruction. | ||
*/ | ||
~ggml_cann_graph_lru_cache() { | ||
clear(); | ||
} | ||
}; | ||
#endif // USE_ACL_GRAPH | ||
|
||
struct ggml_cann_rope_cache { | ||
|
@@ -394,7 +455,7 @@ struct ggml_backend_cann_context { | |
aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */ | ||
#ifdef USE_ACL_GRAPH | ||
/// Cached CANN ACL graph used for executing the current ggml computation graph. | ||
std::unique_ptr<ggml_cann_graph> cann_graph; | ||
ggml_cann_graph_lru_cache graph_lru_cache; | ||
bool acl_graph_mode = true; | ||
#endif | ||
cann_task_queue task_queue; | ||
|
Uh oh!
There was an error while loading. Please reload this page.