Skip to content

Commit d220217

Browse files
authored
[BugFix] fix naive_conv.cu (#62)
1 parent 969a8e8 commit d220217

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

docs/12_convolution/01_naive_conv/naive_conv.cu

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,12 +133,12 @@ int main()
133133
const int k = 5; // 卷积核数量
134134
const int r = 3; // 卷积核高
135135
const int s = 3; // 卷积核宽
136-
const int out_h = (h - r + 2 * 0) / 1 + 1; // 输出高
137-
const int out_w = (w - s + 2 * 0) / 1 + 1; // 输出宽
138136
const int u = 1; // 卷积在高方向上的步长
139137
const int v = 1; // 卷积在宽方向上的步长
140138
const int p = 0; // 卷积在高方向上的补边
141139
const int q = 0; // 卷积在宽方向上的补边
140+
const int out_h = (h - r + 2 * p) / u + 1; // 输出高
141+
const int out_w = (w - s + 2 * q) / v + 1; // 输出宽
142142

143143
// 分配内存并随机生成输入数据和卷积核
144144
float *in, *weight, *out;
@@ -168,8 +168,9 @@ int main()
168168
cudaMemcpy(out_device, out, n * k * out_h * out_w * sizeof(float), cudaMemcpyHostToDevice);
169169

170170
// 定义线程块的大小
171-
const int blockDim_x = 16;
172-
const int blockDim_y = 16;
171+
const int blockDim_x =
172+
(out_h * out_w / k) > 1024/k ? 1024/k : (out_h * out_w / k);
173+
const int blockDim_y = k;
173174

174175
// 计算线程块和网格的数量
175176
const int gridDim_x = (out_h * out_w + blockDim_x - 1) / blockDim_x;
@@ -200,7 +201,7 @@ int main()
200201
{
201202
pass = false;
202203
std::cout << "Verification failed at " << i << "!" << std::endl;
203-
std::cout << "GPU: " << out_cpu[i] << " CPU: " << out[i] << std::endl;
204+
std::cout << "CPU: " << out_cpu[i] << " GPU: " << out[i] << std::endl;
204205
break;
205206
}
206207
}
@@ -236,4 +237,4 @@ int main()
236237
free(out);
237238

238239
return 0;
239-
}
240+
}

0 commit comments

Comments
 (0)