@@ -92,7 +92,7 @@ void main() {
9292
9393#extension  GL_EXT_shader_explicit_arithmetic_types_int16 :  require
9494
95- VEC4_T q_8w_linear(const  u16vec3  out_pos, const  uint16_t K) {
95+ VEC4_T q_8w_linear(const  u16vec2  out_pos, const  uint16_t K) {
9696  const  uint16_t qmat2_pos_y =  out_pos.x *  uint16_t(4 );
9797
9898  VEC4_T outtex =  VEC4_T(0 );
@@ -101,7 +101,7 @@ VEC4_T q_8w_linear(const u16vec3 out_pos, const uint16_t K) {
101101  const  VEC4_T scales =  load_texel(t_scales, scales_pos);
102102
103103  for  (uint16_t i =  uint16_t(0 ), x =  uint16_t(0 ); i <  K; i +=  uint16_t(4 ), x++ ) {
104-     const  VEC4_T mat1_tex =  load_texel(t_mat1, u16vec3(x, out_pos.yz ));
104+     const  VEC4_T mat1_tex =  load_texel(t_mat1, u16vec3(x, out_pos.y,  0 ));
105105    const  VEC4_T sums =  VEC4_T(
106106        dot (mat1_tex, load_texel(t_qmat2, u16vec3(x, qmat2_pos_y, 0 ))),
107107        dot (mat1_tex, load_texel(t_qmat2, u16vec3(x, qmat2_pos_y +  uint16_t(1 ), 0 ))),
@@ -117,16 +117,15 @@ VEC4_T q_8w_linear(const u16vec3 out_pos, const uint16_t K) {
117117}
118118
119119void  main() {
120-   const  u16vec3  out_pos =  u16vec3 (
120+   const  u16vec2  out_pos =  u16vec2 (
121121    gl_GlobalInvocationID.x /  out_limits.y,
122-     gl_GlobalInvocationID.x %  out_limits.y,
123-     0 );
122+     gl_GlobalInvocationID.x %  out_limits.y);
124123  if  (out_pos.x >=  out_limits.x) {
125124    return ;
126125  }
127126
128127  VEC4_T outtex =  q_8w_linear(out_pos, uint16_t(mat1_sizes.x));
129-   write_texel(t_out, out_pos, outtex);
128+   write_texel(t_out, u16vec3( out_pos,  0 ) , outtex);
130129}
131130
132131#endif 
0 commit comments