@@ -85,26 +85,24 @@ const Ncv32u NUM_SCAN_THREADS = 256;
85
85
const Ncv32u LOG2_NUM_SCAN_THREADS = 8 ;
86
86
87
87
88
- template <class T_in , class T_out >
88
+ template <class T_in , class T_out , bool tbDoSqr >
89
89
struct _scanElemOp
90
90
{
91
- template <bool tbDoSqr>
92
- static inline __host__ __device__ T_out scanElemOp (T_in elem)
93
- {
94
- return scanElemOp ( elem, Int2Type<(int )tbDoSqr>() );
95
- }
96
-
97
- private:
98
-
99
- template <int v> struct Int2Type { enum { value = v }; };
91
+ static __host__ __device__ T_out scanElemOp (T_in elem);
92
+ };
100
93
101
- static inline __host__ __device__ T_out scanElemOp (T_in elem, Int2Type<0 >)
102
- {
103
- return (T_out)elem;
94
+ template <class T_in , class T_out >
95
+ struct _scanElemOp <T_in, T_out, false >
96
+ {
97
+ static inline __host__ __device__ T_out scanElemOp (T_in elem) {
98
+ return (T_out)(elem);
104
99
}
100
+ };
105
101
106
- static inline __host__ __device__ T_out scanElemOp (T_in elem, Int2Type<1 >)
107
- {
102
+ template <class T_in , class T_out >
103
+ struct _scanElemOp <T_in, T_out, true >
104
+ {
105
+ static inline __host__ __device__ T_out scanElemOp (T_in elem) {
108
106
return (T_out)(elem*elem);
109
107
}
110
108
};
@@ -177,15 +175,15 @@ __global__ void scanRows(cv::cudev::TexturePtr<Ncv8u> tex8u, T_in *d_src, Ncv32u
177
175
Ncv32u curElemOffs = offsetX + threadIdx .x ;
178
176
T_out curScanElem;
179
177
180
- T_in curElem;
178
+ T_in curElem = 0 ;
181
179
T_out curElemMod;
182
180
183
181
if (curElemOffs < srcWidth)
184
182
{
185
183
// load elements
186
184
curElem = readElem<T_in>(tex8u, d_src, texOffs, srcStride, curElemOffs);
187
185
}
188
- curElemMod = _scanElemOp<T_in, T_out>::scanElemOp<tbDoSqr> (curElem);
186
+ curElemMod = _scanElemOp<T_in, T_out, tbDoSqr >::scanElemOp (curElem);
189
187
190
188
// inclusive scan
191
189
curScanElem = cv::cudev::blockScanInclusive<NUM_SCAN_THREADS>(curElemMod, shmem, threadIdx .x );
0 commit comments