Skip to content

Commit 0da10cd

Browse files
authored
Merge pull request #407 from denghuilu/master
fix bug of illegal device memory access
2 parents 3c0026f + 7d25997 commit 0da10cd

File tree

2 files changed

+14
-30
lines changed

2 files changed

+14
-30
lines changed

source/op/descrpt_se_a_multi_device.cc

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -237,32 +237,23 @@ class DescrptSeAOp : public OpKernel {
237237
int *mesh_host = new int[size], *ilist_host = NULL, *jrange_host = NULL, *jlist_host = NULL;
238238
cudaErrcheck(cudaMemcpy(mesh_host, mesh, sizeof(int) * size, cudaMemcpyDeviceToHost));
239239
memcpy (&ilist_host, 4 + mesh_host, sizeof(int *));
240-
memcpy (&jrange_host, 8 + mesh_host, sizeof(int *));
241-
memcpy (&jlist_host, 12 + mesh_host, sizeof(int *));
240+
memcpy (&jrange_host, 8 + mesh_host, sizeof(int *));
241+
memcpy (&jlist_host, 12 + mesh_host, sizeof(int *));
242242
int const ago = mesh_host[0];
243-
if (!init) {
244-
ilist_size = (int)(mesh_host[1] * 1.2);
245-
jrange_size = (int)(mesh_host[2] * 1.2);
246-
jlist_size = (int)(mesh_host[3] * 1.2);
247-
cudaErrcheck(cudaMalloc((void **)&ilist, sizeof(int) * ilist_size));
248-
cudaErrcheck(cudaMalloc((void **)&jrange, sizeof(int) * jrange_size));
249-
cudaErrcheck(cudaMalloc((void **)&jlist, sizeof(int) * jlist_size));
250-
init = true;
251-
}
252-
if (ago == 0) {
243+
if (!init || ago == 0) {
253244
if (ilist_size < mesh_host[1]) {
254245
ilist_size = (int)(mesh_host[1] * 1.2);
255-
cudaErrcheck(cudaFree(ilist));
246+
if (ilist != NULL) {cudaErrcheck(cudaFree(ilist));}
256247
cudaErrcheck(cudaMalloc((void **)&ilist, sizeof(int) * ilist_size));
257248
}
258249
if (jrange_size < mesh_host[2]) {
259250
jrange_size = (int)(mesh_host[2] * 1.2);
260-
cudaErrcheck(cudaFree(jrange));
251+
if (jrange != NULL) {cudaErrcheck(cudaFree(jrange));}
261252
cudaErrcheck(cudaMalloc((void **)&jrange,sizeof(int) * jrange_size));
262253
}
263254
if (jlist_size < mesh_host[3]) {
264255
jlist_size = (int)(mesh_host[3] * 1.2);
265-
cudaErrcheck(cudaFree(jlist));
256+
if (jlist != NULL) {cudaErrcheck(cudaFree(jlist));}
266257
cudaErrcheck(cudaMalloc((void **)&jlist, sizeof(int) * jlist_size));
267258
}
268259
cudaErrcheck(cudaMemcpy(ilist, ilist_host, sizeof(int) * mesh_host[1], cudaMemcpyHostToDevice));
@@ -284,6 +275,7 @@ class DescrptSeAOp : public OpKernel {
284275
max_nbor_size = 4096;
285276
}
286277
}
278+
init = true;
287279
delete [] mesh_host;
288280
}
289281
};

source/op/descrpt_se_r_multi_device.cc

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -226,32 +226,23 @@ class DescrptSeROp : public OpKernel {
226226
int *mesh_host = new int[size], *ilist_host = NULL, *jrange_host = NULL, *jlist_host = NULL;
227227
cudaErrcheck(cudaMemcpy(mesh_host, mesh, sizeof(int) * size, cudaMemcpyDeviceToHost));
228228
memcpy (&ilist_host, 4 + mesh_host, sizeof(int *));
229-
memcpy (&jrange_host, 8 + mesh_host, sizeof(int *));
230-
memcpy (&jlist_host, 12 + mesh_host, sizeof(int *));
229+
memcpy (&jrange_host, 8 + mesh_host, sizeof(int *));
230+
memcpy (&jlist_host, 12 + mesh_host, sizeof(int *));
231231
int const ago = mesh_host[0];
232-
if (!init) {
233-
ilist_size = (int)(mesh_host[1] * 1.2);
234-
jrange_size = (int)(mesh_host[2] * 1.2);
235-
jlist_size = (int)(mesh_host[3] * 1.2);
236-
cudaErrcheck(cudaMalloc((void **)&ilist, sizeof(int) * ilist_size));
237-
cudaErrcheck(cudaMalloc((void **)&jrange, sizeof(int) * jrange_size));
238-
cudaErrcheck(cudaMalloc((void **)&jlist, sizeof(int) * jlist_size));
239-
init = true;
240-
}
241-
if (ago == 0) {
232+
if (!init || ago == 0) {
242233
if (ilist_size < mesh_host[1]) {
243234
ilist_size = (int)(mesh_host[1] * 1.2);
244-
cudaErrcheck(cudaFree(ilist));
235+
if (ilist != NULL) {cudaErrcheck(cudaFree(ilist));}
245236
cudaErrcheck(cudaMalloc((void **)&ilist, sizeof(int) * ilist_size));
246237
}
247238
if (jrange_size < mesh_host[2]) {
248239
jrange_size = (int)(mesh_host[2] * 1.2);
249-
cudaErrcheck(cudaFree(jrange));
240+
if (jrange != NULL) {cudaErrcheck(cudaFree(jrange));}
250241
cudaErrcheck(cudaMalloc((void **)&jrange,sizeof(int) * jrange_size));
251242
}
252243
if (jlist_size < mesh_host[3]) {
253244
jlist_size = (int)(mesh_host[3] * 1.2);
254-
cudaErrcheck(cudaFree(jlist));
245+
if (jlist != NULL) {cudaErrcheck(cudaFree(jlist));}
255246
cudaErrcheck(cudaMalloc((void **)&jlist, sizeof(int) * jlist_size));
256247
}
257248
cudaErrcheck(cudaMemcpy(ilist, ilist_host, sizeof(int) * mesh_host[1], cudaMemcpyHostToDevice));
@@ -273,6 +264,7 @@ class DescrptSeROp : public OpKernel {
273264
max_nbor_size = 4096;
274265
}
275266
}
267+
init = true;
276268
delete [] mesh_host;
277269
}
278270

0 commit comments

Comments
 (0)