File tree Expand file tree Collapse file tree 1 file changed +8
-8
lines changed
ggml/src/ggml-webgpu/wgsl-shaders Expand file tree Collapse file tree 1 file changed +8
-8
lines changed Original file line number Diff line number Diff line change @@ -19,20 +19,20 @@ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
1919 let start = params . offset ;
2020 let end = params . offset + params . size ;
2121
22- for (var j : u32 = 0u ; j < bytes_per_thread ; j = j + 1u ) {
22+ for (var j : u32 = 0u ; j < bytes_per_thread ; j += 4 ) {
2323 let byte_index = start + i + j ;
24- if (byte_index + 4u <= end ) {
25- output_buffer [( byte_index >> 2u ) ] = params . value ;
24+ if (byte_index + 4 <= end ) {
25+ output_buffer [byte_index >> 2 ] = params . value ;
2626 } else {
2727 // Handle tail (unaligned)
28- for (var k : u32 = 0u ; k < 4u ; k = k + 1u ) {
28+ for (var k : u32 = 0 ; k < 4 ; k ++ ) {
2929 let idx = byte_index + k ;
3030 if (idx < end ) {
31- let word_idx = idx >> 2u ;
32- let byte_offset = (idx & 3u ) * 8u ;
33- let mask = ~(0xffu << byte_offset );
31+ let word_idx = idx >> 2 ;
32+ let bit_offset = (idx & 3 ) * 8u ;
33+ let mask = ~(0xffu << bit_offset );
3434 let existing = output_buffer [word_idx ];
35- output_buffer [word_idx ] = (existing & mask ) | ((params . value & 0xffu ) << byte_offset );
35+ output_buffer [word_idx ] = (existing & mask ) | ((( params . value >> bit_offset ) & 0xff ) << bit_offset );
3636 }
3737 }
3838 }
You can’t perform that action at this time.
0 commit comments