Skip to content

Commit 2d67196

Browse files
committed
vulkan: Increase workgroup size for GLU, for performance
1 parent ab46d11 commit 2d67196

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2761,8 +2761,8 @@ static void ggml_vk_load_shaders(vk_device& device) {
27612761
#undef CREATE_UNARY
27622762

27632763
#define CREATE_GLU(name) \
2764-
ggml_vk_create_pipeline(device, device->pipeline_ ## name [0], #name "_f32", name ## _f32_len, name ## _f32_data, "main", 3, sizeof(vk_op_glu_push_constants), {1, 1, 1}, { device->subgroup_size }, 1); \
2765-
ggml_vk_create_pipeline(device, device->pipeline_ ## name [1], #name "_f16", name ## _f16_len, name ## _f16_data, "main", 3, sizeof(vk_op_glu_push_constants), {1, 1, 1}, { device->subgroup_size }, 1);
2764+
ggml_vk_create_pipeline(device, device->pipeline_ ## name [0], #name "_f32", name ## _f32_len, name ## _f32_data, "main", 3, sizeof(vk_op_glu_push_constants), {1, 1, 1}, {}, 1); \
2765+
ggml_vk_create_pipeline(device, device->pipeline_ ## name [1], #name "_f16", name ## _f16_len, name ## _f16_data, "main", 3, sizeof(vk_op_glu_push_constants), {1, 1, 1}, {}, 1);
27662766

27672767
CREATE_GLU(geglu)
27682768
CREATE_GLU(reglu)

ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#extension GL_EXT_shader_16bit_storage : require
22

3-
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
3+
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
44

55
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
66
layout (binding = 1) readonly buffer B {A_TYPE data_b[];};
77
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
88

9-
layout (constant_id = 0) const uint BLOCK_SIZE = 32;
9+
const uint BLOCK_SIZE = 512;
1010

1111
layout (push_constant) uniform parameter
1212
{

0 commit comments

Comments
 (0)