Skip to content

Commit 7328be0

Browse files
committed
fix bug of max_nbor_size usage
1 parent 90938c4 commit 7328be0

File tree

2 files changed

+30
-7
lines changed

2 files changed

+30
-7
lines changed

source/op/descrpt_se_a_multi_device.cc

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ REGISTER_OP("DescrptSeA")
2121
.Output("nlist: int32");
2222
// only sel_a and rcut_r uesd.
2323

24+
#define GPU_MAX_NBOR_SIZE 4096
25+
2426
struct DeviceFunctor {
2527
void operator()(const CPUDevice& d, std::string& device) {
2628
device = "CPU";
@@ -158,14 +160,14 @@ class DescrptSeAOp : public OpKernel {
158160
OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, int_shape, &int_temp));
159161
Tensor uint64_temp;
160162
TensorShape uint64_shape;
161-
uint64_shape.AddDim(nloc * max_nbor_size * 2);
163+
uint64_shape.AddDim(nloc * GPU_MAX_NBOR_SIZE * 2);
162164
OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape, &uint64_temp));
163165

164166
array_int = int_temp.flat<int>().data();
165167
array_longlong = uint64_temp.flat<unsigned long long>().data();
166168

167169
nbor_update(mesh_tensor.flat<int>().data(), static_cast<int>(mesh_tensor.NumElements()));
168-
OP_REQUIRES (context, (max_nbor_size <= 4096), errors::InvalidArgument ("Assert failed, max neighbor size of atom(lammps) " + std::to_string(max_nbor_size) + " is larger than 4096, which currently is not supported by deepmd-kit."));
170+
OP_REQUIRES (context, (max_nbor_size <= GPU_MAX_NBOR_SIZE), errors::InvalidArgument ("Assert failed, max neighbor size of atom(lammps) " + std::to_string(max_nbor_size) + " is larger than 4096, which currently is not supported by deepmd-kit."));
169171
}
170172
else if (device == "CPU") {
171173
memcpy (&ilist, 4 + mesh_tensor.flat<int>().data(), sizeof(int *));
@@ -267,14 +269,23 @@ class DescrptSeAOp : public OpKernel {
267269
cudaErrcheck(cudaMemcpy(jrange, jrange_host, sizeof(int) * mesh_host[2], cudaMemcpyHostToDevice));
268270
cudaErrcheck(cudaMemcpy(jlist, jlist_host, sizeof(int) * mesh_host[3], cudaMemcpyHostToDevice));
269271

270-
max_nbor_size = 1024;
272+
max_nbor_size = 0;
271273
for(int ii = 0; ii < mesh_host[2]; ii++) {
272274
max_nbor_size = (jrange_host[ii + 1] - jrange_host[ii]) > max_nbor_size ? (jrange_host[ii + 1] - jrange_host[ii]) : max_nbor_size;
273275
}
276+
assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
277+
if (max_nbor_size <= 1024) {
278+
max_nbor_size = 1024;
279+
}
280+
else if (max_nbor_size <= 2048) {
281+
max_nbor_size = 2048;
282+
}
283+
else {
284+
max_nbor_size = 4096;
285+
}
274286
}
275287
delete [] mesh_host;
276288
}
277-
278289
};
279290

280291
// Register the CPU kernels.

source/op/descrpt_se_r_multi_device.cc

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ REGISTER_OP("DescrptSeR")
1818
.Output("rij: T")
1919
.Output("nlist: int32");
2020

21+
#define GPU_MAX_NBOR_SIZE 4096
22+
2123
struct DeviceFunctor {
2224
void operator()(const CPUDevice& d, std::string& device) {
2325
device = "CPU";
@@ -147,14 +149,14 @@ class DescrptSeROp : public OpKernel {
147149
OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, int_shape, &int_temp));
148150
Tensor uint64_temp;
149151
TensorShape uint64_shape;
150-
uint64_shape.AddDim(nloc * max_nbor_size * 2);
152+
uint64_shape.AddDim(nloc * GPU_MAX_NBOR_SIZE * 2);
151153
OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape, &uint64_temp));
152154

153155
array_int = int_temp.flat<int>().data();
154156
array_longlong = uint64_temp.flat<unsigned long long>().data();
155157

156158
nbor_update(mesh_tensor.flat<int>().data(), static_cast<int>(mesh_tensor.NumElements()));
157-
OP_REQUIRES (context, (max_nbor_size <= 4096), errors::InvalidArgument ("Assert failed, max neighbor size of atom(lammps) " + std::to_string(max_nbor_size) + " is larger than 4096, which currently is not supported by deepmd-kit."));
159+
OP_REQUIRES (context, (max_nbor_size <= GPU_MAX_NBOR_SIZE), errors::InvalidArgument ("Assert failed, max neighbor size of atom(lammps) " + std::to_string(max_nbor_size) + " is larger than 4096, which currently is not supported by deepmd-kit."));
158160
}
159161
else if (device == "CPU") {
160162
memcpy (&ilist, 4 + mesh_tensor.flat<int>().data(), sizeof(int *));
@@ -256,10 +258,20 @@ class DescrptSeROp : public OpKernel {
256258
cudaErrcheck(cudaMemcpy(jrange, jrange_host, sizeof(int) * mesh_host[2], cudaMemcpyHostToDevice));
257259
cudaErrcheck(cudaMemcpy(jlist, jlist_host, sizeof(int) * mesh_host[3], cudaMemcpyHostToDevice));
258260

259-
max_nbor_size = 1024;
261+
max_nbor_size = 0;
260262
for(int ii = 0; ii < mesh_host[2]; ii++) {
261263
max_nbor_size = (jrange_host[ii + 1] - jrange_host[ii]) > max_nbor_size ? (jrange_host[ii + 1] - jrange_host[ii]) : max_nbor_size;
262264
}
265+
assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
266+
if (max_nbor_size <= 1024) {
267+
max_nbor_size = 1024;
268+
}
269+
else if (max_nbor_size <= 2048) {
270+
max_nbor_size = 2048;
271+
}
272+
else {
273+
max_nbor_size = 4096;
274+
}
263275
}
264276
delete [] mesh_host;
265277
}

0 commit comments

Comments
 (0)