Skip to content

Commit 3e23841

Browse files
committed
readd vulkan perf
1 parent b775345 commit 3e23841

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9004,6 +9004,10 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
90049004

90059005
vk_context subctx = ctx->tensor_ctxs[tensor_idx].lock();
90069006

9007+
#ifdef GGML_VULKAN_PERF
9008+
std::chrono::steady_clock::time_point start;
9009+
#endif
9010+
90079011
// always wait for the GPU work to be done for the last submit
90089012
if (tensor_idx == subctx->exit_tensor_idx) {
90099013
use_fence = true;
@@ -9013,6 +9017,8 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
90139017
if (!subctx->seqs.empty()) {
90149018
#ifdef GGML_VULKAN_CHECK_RESULTS
90159019
ggml_vk_check_results_0(tensor);
9020+
#endif
9021+
#if defined(GGML_VULKAN_CHECK_RESULTS) || defined(GGML_VULKAN_PERF)
90169022
use_fence = true;
90179023
#endif
90189024

@@ -9021,6 +9027,10 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
90219027
memcpy(cpy.dst, cpy.src, cpy.n);
90229028
}
90239029

9030+
#ifdef GGML_VULKAN_PERF
9031+
start = std::chrono::steady_clock::now();
9032+
#endif
9033+
90249034
if (almost_ready && !ctx->almost_ready_fence_pending && !use_fence) {
90259035
ggml_vk_submit(subctx, ctx->almost_ready_fence);
90269036
ctx->almost_ready_fence_pending = true;
@@ -9037,6 +9047,11 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
90379047
}
90389048

90399049
if (tensor_idx == subctx->exit_tensor_idx) {
9050+
#ifdef GGML_VULKAN_PERF
9051+
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now() - start);
9052+
ctx->device->perf_logger->log_timing(tensor, duration.count());
9053+
#endif
9054+
90409055
// Do staging buffer copies
90419056
for (auto& cpy : subctx->out_memcpys) {
90429057
memcpy(cpy.dst, cpy.src, cpy.n);
@@ -9534,7 +9549,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
95349549
if (enqueued) {
95359550
++submitted_nodes;
95369551

9537-
#ifndef GGML_VULKAN_CHECK_RESULTS
9552+
#if !defined(GGML_VULKAN_CHECK_RESULTS) && !defined(GGML_VULKAN_PERF)
95389553
if (first_node_in_batch) {
95399554
first_node_in_batch = false;
95409555
}

0 commit comments

Comments
 (0)