@@ -124,42 +124,3 @@ void main() {
124124 }
125125 }
126126}
127-
128- #else
129- void main() {
130- const uint div_by_x = gl_GlobalInvocationID.x / out_limits.x;
131- const ivec3 pos = ivec3 (
132- gl_GlobalInvocationID.x % out_limits.x,
133- div_by_x % out_limits.y,
134- div_by_x / out_limits.y);
135-
136- if (any (greaterThanEqual (pos, out_limits))) {
137- return ;
138- }
139-
140- // Compute the index of the top-left element of the overlay region. Negative
141- // indices indicate that the top-left element is in a region added by padding.
142- const ivec2 ipos = pos.xy * stride - padding;
143-
144- // Compute the start and end of the input indices to load. Padding is assumed
145- // to be constant 0 padding, so any reads from the padding region is skipped.
146- const ivec2 start = ipos;
147- const ivec2 end = ipos + overlay_region.xy;
148-
149- VEC4_T sum = texelFetch(t_bias, ivec2 (pos.z, 0 ), 0 );
150- int kx = 0 ;
151- for (int y = start.y, i = 0 ; i < TILE_SIZE; y += dilation.y, i++ ) {
152- for (int x = start.x, j = 0 ; j < TILE_SIZE; x += dilation.x, j++ ) {
153- // The weight kernel was rearranged such that every NxN filter is
154- // flattened to fit in one row. Each filter was then stacked on top of
155- // each other vertically.
156- const vec4 in_texel = texelFetch(t_in, ivec3 (x, y, pos.z), 0 );
157- sum = fma(in_texel, texelFetch(t_kernel, ivec2 (kx, pos.z), 0 ), sum);
158- kx++ ;
159- }
160- }
161-
162- imageStore(t_out, pos, op(sum, out_min, out_max));
163- }
164-
165- #endif
0 commit comments