@@ -16,8 +16,8 @@ layout(std430) buffer;
1616
1717#include "indexing_utils.h"
1818
19- layout (set = 0 , binding = 0 , ${IMAGE_FORMAT[ DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
20- layout (set = 0 , binding = 1 ) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} image_in;
19+ ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
20+ ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
2121
2222layout (push_constant) uniform PRECISION restrict Block {
2323 ivec4 out_limits;
@@ -72,7 +72,7 @@ void main() {
7272 fetch_pos[packed_dim] >>= 2 ;
7373
7474 // fetch input texel
75- VEC4_T inval = VEC4_T(texelFetch(image_in , fetch_pos, 0 ));
75+ VEC4_T inval = VEC4_T(load_texel(t_in , fetch_pos));
7676 outval[j] = inval[in_packed_dim_lane_index];
7777
7878 // go to next position in the input, that is mapped to the packed dim in the output
@@ -81,5 +81,5 @@ void main() {
8181
8282 pos[packed_dim] = int (gl_GlobalInvocationID[packed_dim]);
8383
84- imageStore(image_out , pos, outval);
84+ imageStore(t_out , pos, outval);
8585}
0 commit comments