Skip to content

Commit fe3a7ec

Browse files
committed
fix bug of nbor sorting
When the number of sel is smaller than the lammps nbors, the program may have a gpu sorting error.
1 parent 27854e4 commit fe3a7ec

File tree

4 files changed

+8
-198
lines changed

4 files changed

+8
-198
lines changed

source/op/cuda/descrpt_se_a.cu

Lines changed: 3 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -228,73 +228,6 @@ __global__ void compute_descriptor_se_a (FPTYPE* descript,
228228
}
229229
}
230230

231-
template<typename FPTYPE>
232-
void format_nbor_list_256 (
233-
const FPTYPE* coord,
234-
const int* type,
235-
const int* jrange,
236-
const int* jlist,
237-
const int& nloc,
238-
const float& rcut_r,
239-
int * i_idx,
240-
int_64 * key
241-
)
242-
{
243-
const int LEN = 256;
244-
const int MAGIC_NUMBER = 256;
245-
const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
246-
dim3 block_grid(nloc, nblock);
247-
dim3 thread_grid(1, LEN);
248-
format_nlist_fill_a_se_a
249-
<<<block_grid, thread_grid>>> (
250-
coord,
251-
type,
252-
jrange,
253-
jlist,
254-
rcut_r,
255-
key,
256-
i_idx,
257-
MAGIC_NUMBER
258-
);
259-
const int ITEMS_PER_THREAD = 4;
260-
const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
261-
// BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
262-
BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
263-
}
264-
265-
template<typename FPTYPE>
266-
void format_nbor_list_512 (
267-
const FPTYPE* coord,
268-
const int* type,
269-
const int* jrange,
270-
const int* jlist,
271-
const int& nloc,
272-
const float& rcut_r,
273-
int * i_idx,
274-
int_64 * key
275-
)
276-
{
277-
const int LEN = 256;
278-
const int MAGIC_NUMBER = 512;
279-
const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
280-
dim3 block_grid(nloc, nblock);
281-
dim3 thread_grid(1, LEN);
282-
format_nlist_fill_a_se_a
283-
<<<block_grid, thread_grid>>> (
284-
coord,
285-
type,
286-
jrange,
287-
jlist,
288-
rcut_r,
289-
key,
290-
i_idx,
291-
MAGIC_NUMBER
292-
);
293-
const int ITEMS_PER_THREAD = 4;
294-
const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
295-
// BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
296-
BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
297-
}
298231

299232
template<typename FPTYPE>
300233
void format_nbor_list_1024 (
@@ -419,29 +352,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
419352
// cudaProfilerStart();
420353
get_i_idx_se_a<<<nblock, LEN>>> (nloc, ilist, i_idx);
421354

422-
if (nnei <= 256) {
423-
format_nbor_list_256 (
424-
coord,
425-
type,
426-
jrange,
427-
jlist,
428-
nloc,
429-
rcut_r,
430-
i_idx,
431-
key
432-
);
433-
} else if (nnei <= 512) {
434-
format_nbor_list_512 (
435-
coord,
436-
type,
437-
jrange,
438-
jlist,
439-
nloc,
440-
rcut_r,
441-
i_idx,
442-
key
443-
);
444-
} else if (nnei <= 1024) {
355+
if (MAGIC_NUMBER <= 1024) {
445356
format_nbor_list_1024 (
446357
coord,
447358
type,
@@ -452,7 +363,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
452363
i_idx,
453364
key
454365
);
455-
} else if (nnei <= 2048) {
366+
} else if (MAGIC_NUMBER <= 2048) {
456367
format_nbor_list_2048 (
457368
coord,
458369
type,
@@ -463,7 +374,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
463374
i_idx,
464375
key
465376
);
466-
} else if (nnei <= 4096) {
377+
} else if (MAGIC_NUMBER <= 4096) {
467378
format_nbor_list_4096 (
468379
coord,
469380
type,

source/op/cuda/descrpt_se_r.cu

Lines changed: 3 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -210,73 +210,6 @@ __global__ void compute_descriptor_se_r (FPTYPE* descript,
210210
}
211211
}
212212

213-
template<typename FPTYPE>
214-
void format_nbor_list_256 (
215-
const FPTYPE* coord,
216-
const int* type,
217-
const int* jrange,
218-
const int* jlist,
219-
const int& nloc,
220-
const float& rcut_r,
221-
int * i_idx,
222-
int_64 * key
223-
)
224-
{
225-
const int LEN = 256;
226-
const int MAGIC_NUMBER = 256;
227-
const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
228-
dim3 block_grid(nloc, nblock);
229-
dim3 thread_grid(1, LEN);
230-
format_nlist_fill_a_se_r
231-
<<<block_grid, thread_grid>>> (
232-
coord,
233-
type,
234-
jrange,
235-
jlist,
236-
rcut_r,
237-
key,
238-
i_idx,
239-
MAGIC_NUMBER
240-
);
241-
const int ITEMS_PER_THREAD = 4;
242-
const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
243-
// BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
244-
BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
245-
}
246-
247-
template<typename FPTYPE>
248-
void format_nbor_list_512 (
249-
const FPTYPE* coord,
250-
const int* type,
251-
const int* jrange,
252-
const int* jlist,
253-
const int& nloc,
254-
const float& rcut_r,
255-
int * i_idx,
256-
int_64 * key
257-
)
258-
{
259-
const int LEN = 256;
260-
const int MAGIC_NUMBER = 512;
261-
const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
262-
dim3 block_grid(nloc, nblock);
263-
dim3 thread_grid(1, LEN);
264-
format_nlist_fill_a_se_r
265-
<<<block_grid, thread_grid>>> (
266-
coord,
267-
type,
268-
jrange,
269-
jlist,
270-
rcut_r,
271-
key,
272-
i_idx,
273-
MAGIC_NUMBER
274-
);
275-
const int ITEMS_PER_THREAD = 4;
276-
const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
277-
// BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
278-
BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
279-
}
280213

281214
template<typename FPTYPE>
282215
void format_nbor_list_1024 (
@@ -401,29 +334,7 @@ void DescrptSeRGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
401334
// cudaProfilerStart();
402335
get_i_idx_se_r<<<nblock, LEN>>> (nloc, ilist, i_idx);
403336

404-
if (nnei <= 256) {
405-
format_nbor_list_256 (
406-
coord,
407-
type,
408-
jrange,
409-
jlist,
410-
nloc,
411-
rcut_r,
412-
i_idx,
413-
key
414-
);
415-
} else if (nnei <= 512) {
416-
format_nbor_list_512 (
417-
coord,
418-
type,
419-
jrange,
420-
jlist,
421-
nloc,
422-
rcut_r,
423-
i_idx,
424-
key
425-
);
426-
} else if (nnei <= 1024) {
337+
if (MAGIC_NUMBER <= 1024) {
427338
format_nbor_list_1024 (
428339
coord,
429340
type,
@@ -434,7 +345,7 @@ void DescrptSeRGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
434345
i_idx,
435346
key
436347
);
437-
} else if (nnei <= 2048) {
348+
} else if (MAGIC_NUMBER <= 2048) {
438349
format_nbor_list_2048 (
439350
coord,
440351
type,
@@ -445,7 +356,7 @@ void DescrptSeRGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
445356
i_idx,
446357
key
447358
);
448-
} else if (nnei <= 4096) {
359+
} else if (MAGIC_NUMBER <= 4096) {
449360
format_nbor_list_4096 (
450361
coord,
451362
type,

source/op/descrpt_se_a_multi_device.cc

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -271,13 +271,7 @@ class DescrptSeAOp : public OpKernel {
271271
}
272272

273273
int get_magic_number(int const nnei) {
274-
if (nnei <= 256) {
275-
return 256;
276-
}
277-
else if (nnei <= 512) {
278-
return 512;
279-
}
280-
else if (nnei <= 1024) {
274+
if (nnei <= 1024) {
281275
return 1024;
282276
}
283277
else if (nnei <= 2048) {

source/op/descrpt_se_r_multi_device.cc

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -261,13 +261,7 @@ class DescrptSeROp : public OpKernel {
261261
}
262262

263263
int get_magic_number(int const nnei) {
264-
if (nnei <= 256) {
265-
return 256;
266-
}
267-
else if (nnei <= 512) {
268-
return 512;
269-
}
270-
else if (nnei <= 1024) {
264+
if (nnei <= 1024) {
271265
return 1024;
272266
}
273267
else if (nnei <= 2048) {

0 commit comments

Comments
 (0)