|
| 1 | +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 2 | +// SPDX-License-Identifier: MIT |
| 3 | + |
1 | 4 | #include "ggml-vulkan.h" |
2 | 5 | #include <vulkan/vulkan_core.h> |
3 | 6 | #if defined(GGML_VULKAN_RUN_TESTS) || defined(GGML_VULKAN_PERF) |
@@ -5005,6 +5008,8 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t |
5005 | 5008 | } |
5006 | 5009 | } |
5007 | 5010 |
|
| 5011 | + ggml_pipeline_allocate_descriptor_sets(ctx->device); |
| 5012 | + |
5008 | 5013 | vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); |
5009 | 5014 | vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); |
5010 | 5015 | vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal); |
@@ -5121,7 +5126,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t |
5121 | 5126 |
|
5122 | 5127 | avg_err /= m * n; |
5123 | 5128 |
|
5124 | | - std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms avg_err=" << avg_err << std::endl; |
| 5129 | + double tflops = 2.0*m*n*k*batch*num_it / (time / 1000.0) / (1000.0*1000.0*1000.0*1000.0); |
| 5130 | + |
| 5131 | + std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl; |
5125 | 5132 |
|
5126 | 5133 | if (avg_err > 0.1) { |
5127 | 5134 | std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl; |
@@ -5243,12 +5250,14 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_ |
5243 | 5250 |
|
5244 | 5251 | ggml_pipeline_request_descriptor_sets(ctx->device, p, 1); |
5245 | 5252 |
|
| 5253 | + ggml_pipeline_allocate_descriptor_sets(ctx->device); |
| 5254 | + |
5246 | 5255 | ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz); |
5247 | 5256 |
|
5248 | 5257 | vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue); |
5249 | 5258 | ggml_vk_ctx_begin(ctx->device, subctx); |
5250 | 5259 | const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne }; |
5251 | | - ggml_vk_dispatch_pipeline(ctx, subctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1}); |
| 5260 | + ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1}); |
5252 | 5261 | ggml_vk_ctx_end(subctx); |
5253 | 5262 |
|
5254 | 5263 | auto begin = std::chrono::high_resolution_clock::now(); |
@@ -5375,6 +5384,8 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m, |
5375 | 5384 | } |
5376 | 5385 | } |
5377 | 5386 |
|
| 5387 | + ggml_pipeline_allocate_descriptor_sets(ctx->device); |
| 5388 | + |
5378 | 5389 | ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz); |
5379 | 5390 | ggml_vk_buffer_write(y_buf, 0, y, y_sz); |
5380 | 5391 |
|
@@ -5442,7 +5453,9 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m, |
5442 | 5453 |
|
5443 | 5454 | avg_err /= m * n; |
5444 | 5455 |
|
5445 | | - std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms avg_err=" << avg_err << std::endl; |
| 5456 | + double tflops = 2.0*m*n*k*batch*num_it / (time_ms / 1000.0) / (1000.0*1000.0*1000.0*1000.0); |
| 5457 | + |
| 5458 | + std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl; |
5446 | 5459 |
|
5447 | 5460 | if (avg_err > 0.01 || std::isnan(avg_err)) { |
5448 | 5461 | std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl; |
@@ -5494,9 +5507,6 @@ static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor) |
5494 | 5507 |
|
5495 | 5508 | static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { |
5496 | 5509 | #if defined(GGML_VULKAN_RUN_TESTS) |
5497 | | - ctx->staging = ggml_vk_create_buffer_check(ctx->device, 100ul * 1024ul * 1024ul, |
5498 | | - vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached, |
5499 | | - vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); |
5500 | 5510 | ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32); |
5501 | 5511 | ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_0); |
5502 | 5512 | ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_1); |
|
0 commit comments