@@ -46,10 +46,10 @@ $if VARIANCE_MODE:
4646// work group will write into its assigned element in the shared array.
4747#define MAX_NTHREADS 16
4848
49- shared vec4 shared_vecs[MAX_NTHREADS];
49+ shared VEC4_T shared_vecs[MAX_NTHREADS];
5050// Second accumulator for variance mode - used for sum of values, prev
5151// accumulator is used for sum of squares
52- shared vec4 shared_sum_sq[MAX_NTHREADS];
52+ shared VEC4_T shared_sum_sq[MAX_NTHREADS];
5353shared int shared_count[MAX_NTHREADS];
5454
5555#include "indexing_utils.h"
@@ -58,9 +58,9 @@ int tid_to_smi(const ivec2 tid) {
5858 return tid.x + tid.y * NWORKERS;
5959}
6060
61- vec4 calculate_variance(vec4 sum, vec4 sum_sq, int count) {
62- vec4 mean = sum / float (count);
63- vec4 variance = (sum_sq / float (count)) - (mean * mean);
61+ VEC4_T calculate_variance(VEC4_T sum, VEC4_T sum_sq, int count) {
62+ VEC4_T mean = sum / float (count);
63+ VEC4_T variance = (sum_sq / float (count)) - (mean * mean);
6464
6565 if ((pc.unbiased != 0 ) && (count > 1 )) {
6666 variance = variance * (float (count) / float (count - 1.0 ));
@@ -111,10 +111,10 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
111111 const int smi = tid_to_smi(tid);
112112
113113 scan_pos[reduce_dim] = 0 ;
114- vec4 accum = INIT_ACCUM(load_texel(tin, scan_pos));
114+ VEC4_T accum = INIT_ACCUM(load_texel(tin, scan_pos));
115115
116116#ifdef VARIANCE_MODE
117- vec4 sum_sq = VEC4_T(0 );
117+ VEC4_T sum_sq = VEC4_T(0 );
118118 int count = 0 ;
119119#endif
120120
@@ -123,7 +123,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
123123 // the reduction row
124124 for (int i = tid.x; i < tin_sizes[reduce_dim];
125125 i += NWORKERS, scan_pos[reduce_dim] += NWORKERS) {
126- vec4 val = load_texel(tin, scan_pos);
126+ VEC4_T val = load_texel(tin, scan_pos);
127127 accum = UPDATE_ACCUM(accum, val);
128128#ifdef VARIANCE_MODE
129129 sum_sq += val * val;
@@ -166,7 +166,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
166166 scan_pos[packed_dim] == (tin_limits[packed_dim] - 1 );
167167
168168#ifdef VARIANCE_MODE
169- vec4 variance = calculate_variance(accum, sum_sq, count);
169+ VEC4_T variance = calculate_variance(accum, sum_sq, count);
170170#endif
171171
172172 // Explicitly set padding elements to 0
@@ -208,10 +208,10 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
208208 const int reduce_len = tin_sizes[packed_dim] - nspill;
209209
210210 scan_pos[reduce_dim] = 0 ;
211- vec4 accum = INIT_ACCUM(vec4 (load_texel(tin, scan_pos).x));
211+ VEC4_T accum = INIT_ACCUM(VEC4_T (load_texel(tin, scan_pos).x));
212212
213213#ifdef VARIANCE_MODE
214- vec4 sum_sq = VEC4_T(0 );
214+ VEC4_T sum_sq = VEC4_T(0 );
215215 int count = 0 ;
216216#endif
217217
@@ -220,7 +220,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
220220 scan_pos[reduce_dim] = tid.x;
221221 for (int i = tid.x * 4 ; i < reduce_len;
222222 i += NWORKERS * 4 , scan_pos[reduce_dim] += NWORKERS) {
223- vec4 val = load_texel(tin, scan_pos);
223+ VEC4_T val = load_texel(tin, scan_pos);
224224 accum = UPDATE_ACCUM(accum, val);
225225#ifdef VARIANCE_MODE
226226 sum_sq += val * val;
@@ -231,7 +231,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
231231 // element of the texel needs to be processed individually such that the
232232 // padding elements are ignored
233233 if (scan_pos[reduce_dim] == tin_limits[reduce_dim] - 1 && nspill > 0 ) {
234- const vec4 val = load_texel(tin, scan_pos);
234+ const VEC4_T val = load_texel(tin, scan_pos);
235235 for (int i = 0 ; i < nspill; i++ ) {
236236 accum.x = UPDATE_ACCUM(accum.x, val[i]);
237237#ifdef VARIANCE_MODE
@@ -280,7 +280,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
280280 }
281281
282282 scan_pos[reduce_dim] = tid.x;
283- write_texel(tout, scan_pos, vec4 (variance, 0 , 0 , 0 ));
283+ write_texel(tout, scan_pos, VEC4_T (variance, 0 , 0 , 0 ));
284284#else
285285 // Each element of the texel is itself a partial maximum; iterate over the
286286 // texel to find the actual maximum
@@ -290,7 +290,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
290290 }
291291
292292 scan_pos[reduce_dim] = tid.x;
293- write_texel(tout, scan_pos, POSTPROCESS(vec4 (accum_final, 0 , 0 , 0 )));
293+ write_texel(tout, scan_pos, POSTPROCESS(VEC4_T (accum_final, 0 , 0 , 0 )));
294294#endif
295295 }
296296}
0 commit comments