Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@

layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

layout (constant_id = 0) const uint32_t WorkGroupSize = 128;
layout (constant_id = 1) const uint32_t Br = 1;
layout (constant_id = 2) const uint32_t Bc = 32;
layout (constant_id = 3) const uint32_t D = 32;

layout (constant_id = 5) const uint32_t D_split = 16;
const uint32_t D_per_thread = D / D_split;

const uint32_t cols_per_iter = gl_WorkGroupSize.x / D_split;
const uint32_t cols_per_iter = WorkGroupSize / D_split;
const uint32_t cols_per_thread = Bc / cols_per_iter;

layout (push_constant) uniform parameter {
Expand Down Expand Up @@ -134,8 +135,8 @@ ACC_TYPE perElemOpComputeSlope(const in uint32_t r, const in uint32_t c, const i
return ACC_TYPE(pow(base, ACC_TYPE(exph)));
}

shared FLOAT_TYPE tmpsh[gl_WorkGroupSize.x];
shared vec4 tmpshv4[gl_WorkGroupSize.x];
shared FLOAT_TYPE tmpsh[WorkGroupSize];
shared vec4 tmpshv4[WorkGroupSize];

shared float masksh[Bc][Br];
shared vec4 Qf[Br][D / 4];
Expand Down
Loading