Skip to content

Commit 04a7307

Browse files
committed
removed ggml_cuda_cpy_fn_ptrs
1 parent 6d7df91 commit 04a7307

File tree

1 file changed

+4
-14
lines changed

1 file changed

+4
-14
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2423,7 +2423,7 @@ static void ggml_backend_cuda_synchronize(ggml_backend_t backend) {
24232423

24242424
#ifdef USE_CUDA_GRAPH
24252425
static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph,
2426-
std::vector<void *> & ggml_cuda_cpy_fn_ptrs, bool use_cuda_graph) {
2426+
bool use_cuda_graph) {
24272427

24282428
// Loop over nodes in GGML graph to obtain info needed for CUDA graph
24292429
cuda_ctx->cuda_graph->cpy_dest_ptrs.clear();
@@ -2471,10 +2471,6 @@ static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cud
24712471
#ifndef NDEBUG
24722472
GGML_LOG_DEBUG("%s: disabling CUDA graphs due to unsupported copy op\n", __func__);
24732473
#endif
2474-
} else {
2475-
if (std::find(ggml_cuda_cpy_fn_ptrs.begin(), ggml_cuda_cpy_fn_ptrs.end(), ptr) == ggml_cuda_cpy_fn_ptrs.end()) {
2476-
ggml_cuda_cpy_fn_ptrs.push_back(ptr);
2477-
}
24782474
}
24792475
}
24802476

@@ -2600,8 +2596,7 @@ static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
26002596
#endif
26012597

26022598
static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph,
2603-
[[maybe_unused]] std::vector<void *> & ggml_cuda_cpy_fn_ptrs, bool & graph_evaluated_or_captured, bool & use_cuda_graph,
2604-
bool & cuda_graph_update_required) {
2599+
bool & graph_evaluated_or_captured, bool & use_cuda_graph, bool & cuda_graph_update_required) {
26052600

26062601
while (!graph_evaluated_or_captured) {
26072602
// Only perform the graph execution if CUDA graphs are not enabled, or we are capturing the graph.
@@ -2667,10 +2662,6 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
26672662

26682663
ggml_cuda_set_device(cuda_ctx->device);
26692664

2670-
// vector of pointers to CUDA cpy kernels, which are required to identify
2671-
// kernel parameters which need updated in the graph for each token
2672-
std::vector<void *> ggml_cuda_cpy_fn_ptrs;
2673-
26742665
#ifdef USE_CUDA_GRAPH
26752666
static const bool disable_cuda_graphs_due_to_env = (getenv("GGML_CUDA_DISABLE_GRAPHS") != nullptr);
26762667

@@ -2704,8 +2695,7 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
27042695
if (use_cuda_graph) {
27052696
cuda_graph_update_required = is_cuda_graph_update_required(cuda_ctx, cgraph);
27062697

2707-
use_cuda_graph = check_node_graph_compatibility_and_refresh_copy_ops(cuda_ctx, cgraph,
2708-
ggml_cuda_cpy_fn_ptrs, use_cuda_graph);
2698+
use_cuda_graph = check_node_graph_compatibility_and_refresh_copy_ops(cuda_ctx, cgraph, use_cuda_graph);
27092699

27102700
// Disable CUDA graphs (from the next token) if the use-case is demanding too many consecutive graph updates.
27112701
if (use_cuda_graph && cuda_graph_update_required) {
@@ -2733,7 +2723,7 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
27332723

27342724
bool graph_evaluated_or_captured = false;
27352725

2736-
evaluate_and_capture_cuda_graph(cuda_ctx, cgraph, ggml_cuda_cpy_fn_ptrs, graph_evaluated_or_captured, use_cuda_graph, cuda_graph_update_required);
2726+
evaluate_and_capture_cuda_graph(cuda_ctx, cgraph, graph_evaluated_or_captured, use_cuda_graph, cuda_graph_update_required);
27372727

27382728
return GGML_STATUS_SUCCESS;
27392729
}

0 commit comments

Comments
 (0)