We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9ec19e1 commit c9d79dbCopy full SHA for c9d79db
ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp
@@ -68,6 +68,10 @@ void main() {
68
state[j] = s0[s0_base_idx + j * D_STATE + tid];
69
}
70
71
+ if (tid >= D_STATE) {
72
+ return;
73
+ }
74
+
75
for (int i = 0; i < int(n_tok); i++) {
76
float dt_soft_plus = dt[dt_base_idx + i * stride_dt];
77
dt_soft_plus = softplus(dt_soft_plus);
@@ -119,9 +123,8 @@ void main() {
119
123
barrier();
120
124
121
125
122
- y = warp_sdata[tid & ~(SUBGROUP_SIZE - 1)];
-
126
if (tid % SUBGROUP_SIZE == 0) {
127
+ y = warp_sdata[tid & ~(SUBGROUP_SIZE - 1)];
128
const int k = tid / SUBGROUP_SIZE + j * (D_STATE / SUBGROUP_SIZE);
129
d[y_base_idx + uint(i) * uint(stride_y) + uint(k)] = y;
130
0 commit comments