Skip to content

Commit e2601be

Browse files
committed
gpu module fixed
1 parent d2d4b4b commit e2601be

File tree

1 file changed

+102
-62
lines changed

1 file changed

+102
-62
lines changed

src/Core/regularisers_GPU/PatchSelect_GPU_core.cu

Lines changed: 102 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@
4040
*/
4141

4242

43-
#define BLKXSIZE 16
44-
#define BLKYSIZE 16
43+
#define BLKXSIZE 8
44+
#define BLKYSIZE 4
4545
#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
4646
#define M_PI 3.14159265358979323846
4747
#define EPS 1.0e-8
@@ -68,46 +68,56 @@ __device__ void swapUS(unsigned short *xp, unsigned short *yp)
6868
__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
6969
{
7070

71-
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
71+
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind;
7272
float normsum;
7373

7474
float Weight_Vec[CONSTVECSIZE5];
7575
unsigned short ind_i[CONSTVECSIZE5];
7676
unsigned short ind_j[CONSTVECSIZE5];
7777

78+
for(ind=0; ind<CONSTVECSIZE5; ind++) {
79+
Weight_Vec[ind] = 0.0;
80+
ind_i[ind] = 0;
81+
ind_j[ind] = 0; }
82+
7883
int i = blockDim.x * blockIdx.x + threadIdx.x;
7984
int j = blockDim.y * blockIdx.y + threadIdx.y;
8085

81-
long index = i*M+j;
86+
long index = i + N*j;
8287

8388
counter = 0;
8489
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
90+
i1 = i+i_m;
91+
if ((i1 >= 0) && (i1 < N)) {
8592
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
86-
i1 = i+i_m;
8793
j1 = j+j_m;
88-
if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
94+
if ((j1 >= 0) && (j1 < M)) {
8995
normsum = 0.0f; counterG = 0;
9096
for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
97+
i2 = i1 + i_c;
98+
i3 = i + i_c;
99+
if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
91100
for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
92-
i2 = i1 + i_c;
93101
j2 = j1 + j_c;
94-
i3 = i + i_c;
95102
j3 = j + j_c;
96-
if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
97-
if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
98-
normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
103+
if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
104+
normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2);
99105
counterG++;
100-
}}
101-
}}
106+
} /*if j2 j3*/
107+
}
108+
} /*if i2 i3*/
109+
}
102110
/* writing temporarily into vectors */
103111
if (normsum > EPS) {
104-
Weight_Vec[counter] = __expf(-normsum/h2);
112+
Weight_Vec[counter] = expf(-normsum/h2);
105113
ind_i[counter] = i1;
106114
ind_j[counter] = j1;
107115
counter++;
108116
}
109-
}
110-
}}
117+
} /*if j1*/
118+
}
119+
} /*if i1*/
120+
}
111121

112122
/* do sorting to choose the most prominent weights [HIGH to LOW] */
113123
/* and re-arrange indeces accordingly */
@@ -133,46 +143,56 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne
133143
__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
134144
{
135145

136-
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
146+
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind;
137147
float normsum;
138148

139149
float Weight_Vec[CONSTVECSIZE7];
140150
unsigned short ind_i[CONSTVECSIZE7];
141151
unsigned short ind_j[CONSTVECSIZE7];
142152

153+
for(ind=0; ind<CONSTVECSIZE7; ind++) {
154+
Weight_Vec[ind] = 0.0;
155+
ind_i[ind] = 0;
156+
ind_j[ind] = 0; }
157+
143158
int i = blockDim.x * blockIdx.x + threadIdx.x;
144159
int j = blockDim.y * blockIdx.y + threadIdx.y;
145160

146-
long index = i*M+j;
161+
long index = i + N*j;
147162

148163
counter = 0;
149164
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
165+
i1 = i+i_m;
166+
if ((i1 >= 0) && (i1 < N)) {
150167
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
151-
i1 = i+i_m;
152168
j1 = j+j_m;
153-
if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
169+
if ((j1 >= 0) && (j1 < M)) {
154170
normsum = 0.0f; counterG = 0;
155171
for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
172+
i2 = i1 + i_c;
173+
i3 = i + i_c;
174+
if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
156175
for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
157-
i2 = i1 + i_c;
158176
j2 = j1 + j_c;
159-
i3 = i + i_c;
160177
j3 = j + j_c;
161-
if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
162-
if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
163-
normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
178+
if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
179+
normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2);
164180
counterG++;
165-
}}
166-
}}
181+
} /*if j2 j3*/
182+
}
183+
} /*if i2 i3*/
184+
}
167185
/* writing temporarily into vectors */
168186
if (normsum > EPS) {
169-
Weight_Vec[counter] = __expf(-normsum/h2);
187+
Weight_Vec[counter] = expf(-normsum/h2);
170188
ind_i[counter] = i1;
171189
ind_j[counter] = j1;
172190
counter++;
173191
}
174-
}
175-
}}
192+
} /*if j1*/
193+
}
194+
} /*if i1*/
195+
}
176196

177197
/* do sorting to choose the most prominent weights [HIGH to LOW] */
178198
/* and re-arrange indeces accordingly */
@@ -225,16 +245,16 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne
225245
for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
226246
i2 = i1 + i_c;
227247
i3 = i + i_c;
228-
//if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
248+
if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
229249
for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
230250
j2 = j1 + j_c;
231251
j3 = j + j_c;
232-
//if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
252+
if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
233253
normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2);
234254
counterG++;
235-
// } /*if j2 j3*/
255+
} /*if j2 j3*/
236256
}
237-
// } /*if i2 i3*/
257+
} /*if i2 i3*/
238258
}
239259
/* writing temporarily into vectors */
240260
if (normsum > EPS) {
@@ -271,46 +291,56 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne
271291
__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
272292
{
273293

274-
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
294+
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind;
275295
float normsum;
276296

277297
float Weight_Vec[CONSTVECSIZE11];
278298
unsigned short ind_i[CONSTVECSIZE11];
279299
unsigned short ind_j[CONSTVECSIZE11];
280300

301+
for(ind=0; ind<CONSTVECSIZE11; ind++) {
302+
Weight_Vec[ind] = 0.0;
303+
ind_i[ind] = 0;
304+
ind_j[ind] = 0; }
305+
281306
int i = blockDim.x * blockIdx.x + threadIdx.x;
282307
int j = blockDim.y * blockIdx.y + threadIdx.y;
283308

284-
long index = i*M+j;
309+
long index = i + N*j;
285310

286311
counter = 0;
287312
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
313+
i1 = i+i_m;
314+
if ((i1 >= 0) && (i1 < N)) {
288315
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
289-
i1 = i+i_m;
290316
j1 = j+j_m;
291-
if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
317+
if ((j1 >= 0) && (j1 < M)) {
292318
normsum = 0.0f; counterG = 0;
293319
for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
320+
i2 = i1 + i_c;
321+
i3 = i + i_c;
322+
if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
294323
for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
295-
i2 = i1 + i_c;
296324
j2 = j1 + j_c;
297-
i3 = i + i_c;
298325
j3 = j + j_c;
299-
if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
300-
if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
301-
normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
326+
if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
327+
normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2);
302328
counterG++;
303-
}}
304-
}}
329+
} /*if j2 j3*/
330+
}
331+
} /*if i2 i3*/
332+
}
305333
/* writing temporarily into vectors */
306334
if (normsum > EPS) {
307-
Weight_Vec[counter] = __expf(-normsum/h2);
335+
Weight_Vec[counter] = expf(-normsum/h2);
308336
ind_i[counter] = i1;
309337
ind_j[counter] = j1;
310338
counter++;
311339
}
312-
}
313-
}}
340+
} /*if j1*/
341+
}
342+
} /*if i1*/
343+
}
314344

315345
/* do sorting to choose the most prominent weights [HIGH to LOW] */
316346
/* and re-arrange indeces accordingly */
@@ -335,46 +365,56 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign
335365
__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
336366
{
337367

338-
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
368+
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind;
339369
float normsum;
340370

341371
float Weight_Vec[CONSTVECSIZE13];
342372
unsigned short ind_i[CONSTVECSIZE13];
343373
unsigned short ind_j[CONSTVECSIZE13];
344374

375+
for(ind=0; ind<CONSTVECSIZE13; ind++) {
376+
Weight_Vec[ind] = 0.0;
377+
ind_i[ind] = 0;
378+
ind_j[ind] = 0; }
379+
345380
int i = blockDim.x * blockIdx.x + threadIdx.x;
346381
int j = blockDim.y * blockIdx.y + threadIdx.y;
347382

348-
long index = i*M+j;
383+
long index = i + N*j;
349384

350385
counter = 0;
351386
for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
387+
i1 = i+i_m;
388+
if ((i1 >= 0) && (i1 < N)) {
352389
for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
353-
i1 = i+i_m;
354390
j1 = j+j_m;
355-
if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
391+
if ((j1 >= 0) && (j1 < M)) {
356392
normsum = 0.0f; counterG = 0;
357393
for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
394+
i2 = i1 + i_c;
395+
i3 = i + i_c;
396+
if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) {
358397
for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
359-
i2 = i1 + i_c;
360398
j2 = j1 + j_c;
361-
i3 = i + i_c;
362399
j3 = j + j_c;
363-
if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
364-
if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
365-
normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
400+
if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) {
401+
normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2);
366402
counterG++;
367-
}}
368-
}}
403+
} /*if j2 j3*/
404+
}
405+
} /*if i2 i3*/
406+
}
369407
/* writing temporarily into vectors */
370408
if (normsum > EPS) {
371-
Weight_Vec[counter] = __expf(-normsum/h2);
409+
Weight_Vec[counter] = expf(-normsum/h2);
372410
ind_i[counter] = i1;
373411
ind_j[counter] = j1;
374412
counter++;
375413
}
376-
}
377-
}}
414+
} /*if j1*/
415+
}
416+
} /*if i1*/
417+
}
378418

379419
/* do sorting to choose the most prominent weights [HIGH to LOW] */
380420
/* and re-arrange indeces accordingly */

0 commit comments

Comments
 (0)