@@ -152,7 +152,7 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
152
152
const int ksizeW, const int ksizeH,
153
153
const int strideH, const int strideW,
154
154
const int offsetH, const int offsetW,
155
- real* tgtData) {
155
+ real* tgtData, const int tgtStride ) {
156
156
int index = blockIdx .x * blockDim .x + threadIdx .x ;
157
157
if (index < nthreads) {
158
158
int pw = index % pooledW;
@@ -173,7 +173,9 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
173
173
maxval = inputData[h * width + w];
174
174
}
175
175
}
176
- tgtData[index] = maxval;
176
+ int tgtIndex = index % (pooledW * pooledH * channels) +
177
+ frameNum * tgtStride;
178
+ tgtData[tgtIndex] = maxval;
177
179
}
178
180
}
179
181
@@ -184,7 +186,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
184
186
const int sizeX, const int sizeY,
185
187
const int strideH, const int strideW,
186
188
const int paddingH, const int paddingW,
187
- real* tgtData) {
189
+ real* tgtData, const int tgtStride ) {
188
190
189
191
int num_kernels = pooledH * pooledW * channels * frameCnt;
190
192
int blocks = (num_kernels + 1024 - 1 ) / 1024 ;
@@ -194,7 +196,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
194
196
KeMaxPoolForward<<< grid, threads, 0 , STREAM_DEFAULT >>>
195
197
(num_kernels, inputData, channels, height, width,
196
198
pooledH, pooledW, sizeX, sizeY, strideH, strideW,
197
- paddingH, paddingW, tgtData);
199
+ paddingH, paddingW, tgtData, tgtStride );
198
200
CHECK_SYNC (" hl_maxpool_forward failed" );
199
201
}
200
202
@@ -207,7 +209,7 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
207
209
const int strideH, const int strideW,
208
210
const int padH, const int padW,
209
211
real scaleA, real scaleB,
210
- real* targetGrad) {
212
+ real* targetGrad, const int outStride ) {
211
213
int index = blockIdx .x * blockDim .x + threadIdx .x ;
212
214
if (index < nthreads) {
213
215
// find out the local index
@@ -223,8 +225,8 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
223
225
int pwend = offsetW >= 0 ? min (offsetW / strideW + 1 , pooledW) : 0 ;
224
226
real gradient = 0 ;
225
227
real input = inputData[index];
226
- outData += (frameNum * channels + offsetC) * pooledH * pooledW;
227
- outGrad += (frameNum * channels + offsetC) * pooledH * pooledW;
228
+ outData += (frameNum * outStride + offsetC * pooledH * pooledW) ;
229
+ outGrad += (frameNum * outStride + offsetC * pooledH * pooledW) ;
228
230
for (int ph = phstart; ph < phend; ++ph) {
229
231
for (int pw = pwstart; pw < pwend; ++pw) {
230
232
if (input == outData[ph * pooledW + pw]) {
@@ -246,7 +248,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
246
248
const int strideH, const int strideW,
247
249
const int paddingH, const int paddingW,
248
250
real scaleA, real scaleB,
249
- real* targetGrad) {
251
+ real* targetGrad, const int outStride ) {
250
252
251
253
int num_kernels = height * width * channels * frameCnt;
252
254
int blocks = (num_kernels + 1024 - 1 ) / 1024 ;
@@ -257,7 +259,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
257
259
strideH, strideW,
258
260
paddingH, paddingW,
259
261
scaleA, scaleB,
260
- targetGrad);
262
+ targetGrad, outStride );
261
263
CHECK_SYNC (" hl_maxpool_backward" );
262
264
}
263
265
@@ -268,7 +270,7 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
268
270
const int sizeX, const int sizeY,
269
271
const int strideH, const int strideW,
270
272
const int padH, const int padW,
271
- real* tgtData) {
273
+ real* tgtData, const int tgtStride ) {
272
274
int index = blockIdx .x * blockDim .x + threadIdx .x ;
273
275
if (index < nthreads) {
274
276
int pw = index % pooledW;
@@ -293,7 +295,9 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
293
295
aveval += inputData[h * width + w];
294
296
}
295
297
}
296
- tgtData[index] = aveval / pool_size;
298
+ int tgtIndex = index % (pooledW * pooledH * channels) +
299
+ frameNum * tgtStride;
300
+ tgtData[tgtIndex] = aveval / pool_size;
297
301
}
298
302
}
299
303
@@ -303,14 +307,15 @@ void hl_avgpool_forward(const int frameCnt, const real* inputData,
303
307
const int pooledH, const int pooledW,
304
308
const int sizeX, const int sizeY,
305
309
const int strideH, const int strideW,
306
- const int paddingH, const int paddingW, real* tgtData) {
310
+ const int paddingH, const int paddingW,
311
+ real* tgtData, const int tgtStride) {
307
312
int num_kernels = pooledH * pooledW * channels * frameCnt;
308
313
int blocks = (num_kernels + 1024 - 1 ) / 1024 ;
309
314
KeAvgPoolForward<<< blocks, 1024 , 0 , STREAM_DEFAULT >>>
310
315
(num_kernels, inputData, channels,
311
316
height, width, pooledH, pooledW,
312
317
sizeX, sizeY, strideH, strideW,
313
- paddingH, paddingW, tgtData);
318
+ paddingH, paddingW, tgtData, tgtStride );
314
319
CHECK_SYNC (" hl_avgpool_forward failed" );
315
320
}
316
321
@@ -322,7 +327,7 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
322
327
const int strideH, const int strideW,
323
328
const int padH, const int padW,
324
329
real scaleA, real scaleB,
325
- real* tgtGrad) {
330
+ real* tgtGrad, const int outStride ) {
326
331
int index = blockIdx .x * blockDim .x + threadIdx .x ;
327
332
if (index < nthreads) {
328
333
int offsetW = index % width + padW;
@@ -335,7 +340,8 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
335
340
int phend = offsetH >= 0 ? min (offsetH / strideH + 1 , pooledH) : 0 ;
336
341
int pwend = offsetW >= 0 ? min (offsetW / strideW + 1 , pooledW) : 0 ;
337
342
real gradient = 0 ;
338
- outGrad += (frameNum * channels + offsetC) * pooledH * pooledW;
343
+ outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
344
+
339
345
340
346
for (int ph = phstart; ph < phend; ++ph) {
341
347
for (int pw = pwstart; pw < pwend; ++pw) {
@@ -360,7 +366,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
360
366
const int strideH, const int strideW,
361
367
const int paddingH, const int paddingW,
362
368
real scaleA, real scaleB,
363
- real* backGrad) {
369
+ real* backGrad, const int outStride ) {
364
370
int num_kernels = height * width * channels * frameCnt;
365
371
int blocks = (num_kernels + 1024 - 1 ) / 1024 ;
366
372
@@ -370,7 +376,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
370
376
strideH, strideW,
371
377
paddingH, paddingW,
372
378
scaleA, scaleB,
373
- backGrad);
379
+ backGrad, outStride );
374
380
CHECK_SYNC (" hl_avgpool_backward failed" );
375
381
}
376
382
0 commit comments