Skip to content

Commit 3692020

Browse files
committed
vulkan: fix build for GGML_VULKAN_RUN_TESTS, add TFLOPS to logging
1 parent fbac47b commit 3692020

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

src/ggml-vulkan.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
14
#include "ggml-vulkan.h"
25
#include <vulkan/vulkan_core.h>
36
#if defined(GGML_VULKAN_RUN_TESTS) || defined(GGML_VULKAN_PERF)
@@ -5005,6 +5008,8 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
50055008
}
50065009
}
50075010

5011+
ggml_pipeline_allocate_descriptor_sets(ctx->device);
5012+
50085013
vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
50095014
vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
50105015
vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
@@ -5121,7 +5126,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
51215126

51225127
avg_err /= m * n;
51235128

5124-
std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms avg_err=" << avg_err << std::endl;
5129+
double tflops = 2.0*m*n*k*batch*num_it / (time / 1000.0) / (1000.0*1000.0*1000.0*1000.0);
5130+
5131+
std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
51255132

51265133
if (avg_err > 0.1) {
51275134
std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
@@ -5243,12 +5250,14 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
52435250

52445251
ggml_pipeline_request_descriptor_sets(ctx->device, p, 1);
52455252

5253+
ggml_pipeline_allocate_descriptor_sets(ctx->device);
5254+
52465255
ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
52475256

52485257
vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
52495258
ggml_vk_ctx_begin(ctx->device, subctx);
52505259
const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
5251-
ggml_vk_dispatch_pipeline(ctx, subctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
5260+
ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
52525261
ggml_vk_ctx_end(subctx);
52535262

52545263
auto begin = std::chrono::high_resolution_clock::now();
@@ -5375,6 +5384,8 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
53755384
}
53765385
}
53775386

5387+
ggml_pipeline_allocate_descriptor_sets(ctx->device);
5388+
53785389
ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
53795390
ggml_vk_buffer_write(y_buf, 0, y, y_sz);
53805391

@@ -5442,7 +5453,9 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
54425453

54435454
avg_err /= m * n;
54445455

5445-
std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms avg_err=" << avg_err << std::endl;
5456+
double tflops = 2.0*m*n*k*batch*num_it / (time_ms / 1000.0) / (1000.0*1000.0*1000.0*1000.0);
5457+
5458+
std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
54465459

54475460
if (avg_err > 0.01 || std::isnan(avg_err)) {
54485461
std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
@@ -5494,9 +5507,6 @@ static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor)
54945507

54955508
static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
54965509
#if defined(GGML_VULKAN_RUN_TESTS)
5497-
ctx->staging = ggml_vk_create_buffer_check(ctx->device, 100ul * 1024ul * 1024ul,
5498-
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
5499-
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
55005510
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32);
55015511
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_0);
55025512
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_1);

0 commit comments

Comments
 (0)