@@ -9004,6 +9004,10 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
90049004
90059005 vk_context subctx = ctx->tensor_ctxs[tensor_idx].lock();
90069006
9007+ #ifdef GGML_VULKAN_PERF
9008+ std::chrono::steady_clock::time_point start;
9009+ #endif
9010+
90079011 // always wait for the GPU work to be done for the last submit
90089012 if (tensor_idx == subctx->exit_tensor_idx) {
90099013 use_fence = true;
@@ -9013,6 +9017,8 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
90139017 if (!subctx->seqs.empty()) {
90149018#ifdef GGML_VULKAN_CHECK_RESULTS
90159019 ggml_vk_check_results_0(tensor);
9020+ #endif
9021+ #if defined(GGML_VULKAN_CHECK_RESULTS) || defined(GGML_VULKAN_PERF)
90169022 use_fence = true;
90179023#endif
90189024
@@ -9021,6 +9027,10 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
90219027 memcpy(cpy.dst, cpy.src, cpy.n);
90229028 }
90239029
9030+ #ifdef GGML_VULKAN_PERF
9031+ start = std::chrono::steady_clock::now();
9032+ #endif
9033+
90249034 if (almost_ready && !ctx->almost_ready_fence_pending && !use_fence) {
90259035 ggml_vk_submit(subctx, ctx->almost_ready_fence);
90269036 ctx->almost_ready_fence_pending = true;
@@ -9037,6 +9047,11 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
90379047 }
90389048
90399049 if (tensor_idx == subctx->exit_tensor_idx) {
9050+ #ifdef GGML_VULKAN_PERF
9051+ auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now() - start);
9052+ ctx->device->perf_logger->log_timing(tensor, duration.count());
9053+ #endif
9054+
90409055 // Do staging buffer copies
90419056 for (auto& cpy : subctx->out_memcpys) {
90429057 memcpy(cpy.dst, cpy.src, cpy.n);
@@ -9534,7 +9549,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
95349549 if (enqueued) {
95359550 ++submitted_nodes;
95369551
9537- #ifndef GGML_VULKAN_CHECK_RESULTS
9552+ #if !defined( GGML_VULKAN_CHECK_RESULTS) && !defined(GGML_VULKAN_PERF)
95389553 if (first_node_in_batch) {
95399554 first_node_in_batch = false;
95409555 }
0 commit comments