Skip to content

Commit 4456649

Browse files
committed
fixes for intel perf - no shmem padding, placeholder shader core count
1 parent 7d3553f commit 4456649

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3095,6 +3095,10 @@ static void ggml_vk_load_shaders(vk_device& device) {
30953095
uint32_t conv2d_TS_K = 8;
30963096
uint32_t conv2d_SHMEM_PAD = 4;
30973097

3098+
if (device->vendor_id == VK_VENDOR_ID_INTEL) {
3099+
conv2d_SHMEM_PAD = 0;
3100+
}
3101+
30983102
switch (s) {
30993103
default:
31003104
case CONV_SHAPE_128x128:
@@ -7060,9 +7064,14 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
70607064
for (uint32_t i = 0; i < CONV_SHAPE_COUNT; ++i) {
70617065
tiles[i] = CEIL_DIV(elements[0], ctx->device->pipeline_conv2d_f32[i]->wg_denoms[0]) * CEIL_DIV(elements[1], ctx->device->pipeline_conv2d_f32[i]->wg_denoms[1]);
70627066
}
7063-
if (elements[0] > 64 && tiles[CONV_SHAPE_128x128] >= ctx->device->shader_core_count * 2) {
7067+
7068+
// We can't query number of shader cores on Intel, use 32 as a placeholder
7069+
// so small convolutions will still choose a smaller tile.
7070+
const uint32_t shader_core_count = ctx->device->shader_core_count > 0 ? ctx->device->shader_core_count : 32;
7071+
7072+
if (elements[0] > 64 && tiles[CONV_SHAPE_128x128] >= shader_core_count * 2) {
70647073
shape = CONV_SHAPE_128x128;
7065-
} else if (elements[0] <= 32 && tiles[CONV_SHAPE_32x256] >= ctx->device->shader_core_count * 2) {
7074+
} else if (elements[0] <= 32 && tiles[CONV_SHAPE_32x256] >= shader_core_count * 2) {
70667075
shape = CONV_SHAPE_32x256;
70677076
} else {
70687077
shape = CONV_SHAPE_64x32;

0 commit comments

Comments
 (0)