Skip to content

Commit 3c9dfc4

Browse files
committed
use lammps max_nbor_size as the upper boundary of gpu sorting
1 parent fe3a7ec commit 3c9dfc4

File tree

5 files changed

+88
-101
lines changed

5 files changed

+88
-101
lines changed

source/lib/include/CustomeOperation.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ void compute_descriptor_se_a_cpu (
169169
}
170170

171171
template<typename FPTYPE>
172-
void DescrptSeACPULauncher(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descrpt, FPTYPE * descrpt_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ntypes, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int magic_number) {
172+
void DescrptSeACPULauncher(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descrpt, FPTYPE * descrpt_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ntypes, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int max_nbor_size) {
173173
// set & normalize coord
174174
std::vector<FPTYPE> d_coord3(nall * 3);
175175
for (int ii = 0; ii < nall; ++ii) {
@@ -235,8 +235,8 @@ void DescrptSeACPULauncher(const FPTYPE * coord, const int * type, const int * i
235235

236236
#if GOOGLE_CUDA
237237
template<typename FPTYPE>
238-
void DescrptSeAGPULauncher(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, int * array_int, unsigned long long * array_longlong, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descrpt, FPTYPE * descrpt_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int magic_number) {
239-
DescrptSeAGPUExecuteFunctor<FPTYPE>()(coord, type, ilist, jrange, jlist, array_int, array_longlong, avg, std, descrpt, descrpt_deriv, rij, nlist, nloc, nall, nnei, ndescrpt, rcut_r, rcut_r_smth, sec_a, fill_nei_a, magic_number);
238+
void DescrptSeAGPULauncher(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, int * array_int, unsigned long long * array_longlong, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descrpt, FPTYPE * descrpt_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int max_nbor_size) {
239+
DescrptSeAGPUExecuteFunctor<FPTYPE>()(coord, type, ilist, jrange, jlist, array_int, array_longlong, avg, std, descrpt, descrpt_deriv, rij, nlist, nloc, nall, nnei, ndescrpt, rcut_r, rcut_r_smth, sec_a, fill_nei_a, max_nbor_size);
240240
}
241241
#endif // GOOGLE_CUDA
242242
// ******************************************************************************
@@ -432,7 +432,7 @@ void compute_descriptor_se_r_cpu (
432432
}
433433

434434
template<typename FPTYPE>
435-
void DescrptSeRCPULauncher(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descrpt, FPTYPE * descrpt_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ntypes, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int magic_number) {
435+
void DescrptSeRCPULauncher(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descrpt, FPTYPE * descrpt_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ntypes, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int max_nbor_size) {
436436
// set & normalize coord
437437
std::vector<FPTYPE> d_coord3(nall * 3);
438438
for (int ii = 0; ii < nall; ++ii) {
@@ -498,8 +498,8 @@ void DescrptSeRCPULauncher(const FPTYPE * coord, const int * type, const int * i
498498

499499
#if GOOGLE_CUDA
500500
template<typename FPTYPE>
501-
void DescrptSeRGPULauncher(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, int * array_int, unsigned long long * array_longlong, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descrpt, FPTYPE * descrpt_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int magic_number) {
502-
DescrptSeRGPUExecuteFunctor<FPTYPE>()(coord, type, ilist, jrange, jlist, array_int, array_longlong, avg, std, descrpt, descrpt_deriv, rij, nlist, nloc, nall, nnei, ndescrpt, rcut_r, rcut_r_smth, sec_a, fill_nei_a, magic_number);
501+
void DescrptSeRGPULauncher(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, int * array_int, unsigned long long * array_longlong, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descrpt, FPTYPE * descrpt_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int max_nbor_size) {
502+
DescrptSeRGPUExecuteFunctor<FPTYPE>()(coord, type, ilist, jrange, jlist, array_int, array_longlong, avg, std, descrpt, descrpt_deriv, rij, nlist, nloc, nall, nnei, ndescrpt, rcut_r, rcut_r_smth, sec_a, fill_nei_a, max_nbor_size);
503503
}
504504
#endif // GOOGLE_CUDA
505505
// ******************************************************************************

source/op/cuda/descrpt_se_a.cu

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ __global__ void format_nlist_fill_a_se_a(const FPTYPE * coord,
8484
const float rcut,
8585
int_64 * key,
8686
int * i_idx,
87-
const int MAGIC_NUMBER)
87+
const int MAX_NBOR_SIZE)
8888
{
89-
// <<<nloc, MAGIC_NUMBER>>>
89+
// <<<nloc, MAX_NBOR_SIZE>>>
9090
const unsigned int idx = blockIdx.x;
9191
const unsigned int idy = blockIdx.y * blockDim.y + threadIdx.y;
9292

@@ -98,7 +98,7 @@ __global__ void format_nlist_fill_a_se_a(const FPTYPE * coord,
9898
const int * nei_idx = jlist + jrange[i_idx[idx]];
9999
// dev_copy(nei_idx, &jlist[jrange[i_idx]], nsize);
100100

101-
int_64 * key_in = key + idx * MAGIC_NUMBER;
101+
int_64 * key_in = key + idx * MAX_NBOR_SIZE;
102102

103103
FPTYPE diff[3];
104104
const int & j_idx = nei_idx[idy];
@@ -121,7 +121,7 @@ __global__ void format_nlist_fill_b_se_a(int * nlist,
121121
const int * sec_a,
122122
const int sec_a_size,
123123
int * nei_iter_dev,
124-
const int MAGIC_NUMBER)
124+
const int MAX_NBOR_SIZE)
125125
{
126126

127127
const unsigned int idy = blockIdx.x * blockDim.x + threadIdx.x;
@@ -132,13 +132,13 @@ __global__ void format_nlist_fill_b_se_a(int * nlist,
132132

133133
int * row_nlist = nlist + idy * nlist_size;
134134
int * nei_iter = nei_iter_dev + idy * sec_a_size;
135-
int_64 * key_out = key + nloc * MAGIC_NUMBER + idy * MAGIC_NUMBER;
135+
int_64 * key_out = key + nloc * MAX_NBOR_SIZE + idy * MAX_NBOR_SIZE;
136136

137137
for (int ii = 0; ii < sec_a_size; ii++) {
138138
nei_iter[ii] = sec_a[ii];
139139
}
140140

141-
for (unsigned int kk = 0; key_out[kk] != key_out[MAGIC_NUMBER - 1]; kk++) {
141+
for (unsigned int kk = 0; key_out[kk] != key_out[MAX_NBOR_SIZE - 1]; kk++) {
142142
const int & nei_type = key_out[kk] / 1E15;
143143
if (nei_iter[nei_type] < sec_a[nei_type + 1]) {
144144
row_nlist[nei_iter[nei_type]++] = key_out[kk] % 100000;
@@ -242,8 +242,8 @@ void format_nbor_list_1024 (
242242
)
243243
{
244244
const int LEN = 256;
245-
const int MAGIC_NUMBER = 1024;
246-
const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
245+
const int MAX_NBOR_SIZE = 1024;
246+
const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
247247
dim3 block_grid(nloc, nblock);
248248
dim3 thread_grid(1, LEN);
249249
format_nlist_fill_a_se_a
@@ -255,12 +255,12 @@ void format_nbor_list_1024 (
255255
rcut_r,
256256
key,
257257
i_idx,
258-
MAGIC_NUMBER
258+
MAX_NBOR_SIZE
259259
);
260260
const int ITEMS_PER_THREAD = 8;
261-
const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
261+
const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
262262
// BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
263-
BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
263+
BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAX_NBOR_SIZE);
264264
}
265265

266266
template<typename FPTYPE>
@@ -276,8 +276,8 @@ void format_nbor_list_2048 (
276276
)
277277
{
278278
const int LEN = 256;
279-
const int MAGIC_NUMBER = 2048;
280-
const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
279+
const int MAX_NBOR_SIZE = 2048;
280+
const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
281281
dim3 block_grid(nloc, nblock);
282282
dim3 thread_grid(1, LEN);
283283
format_nlist_fill_a_se_a
@@ -289,12 +289,12 @@ void format_nbor_list_2048 (
289289
rcut_r,
290290
key,
291291
i_idx,
292-
MAGIC_NUMBER
292+
MAX_NBOR_SIZE
293293
);
294294
const int ITEMS_PER_THREAD = 8;
295-
const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
295+
const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
296296
// BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
297-
BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
297+
BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAX_NBOR_SIZE);
298298
}
299299

300300
template<typename FPTYPE>
@@ -310,8 +310,8 @@ void format_nbor_list_4096 (
310310
)
311311
{
312312
const int LEN = 256;
313-
const int MAGIC_NUMBER = 4096;
314-
const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
313+
const int MAX_NBOR_SIZE = 4096;
314+
const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
315315
dim3 block_grid(nloc, nblock);
316316
dim3 thread_grid(1, LEN);
317317
format_nlist_fill_a_se_a
@@ -323,16 +323,16 @@ void format_nbor_list_4096 (
323323
rcut_r,
324324
key,
325325
i_idx,
326-
MAGIC_NUMBER
326+
MAX_NBOR_SIZE
327327
);
328328
const int ITEMS_PER_THREAD = 16;
329-
const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
329+
const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
330330
// BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
331-
BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
331+
BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAX_NBOR_SIZE);
332332
}
333333

334334
template <typename FPTYPE>
335-
void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, int * array_int, unsigned long long * array_longlong, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descript, FPTYPE * descript_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int MAGIC_NUMBER) {
335+
void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, int * array_int, unsigned long long * array_longlong, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descript, FPTYPE * descript_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int max_nbor_size) {
336336
const int LEN = 256;
337337
int nblock = (nloc + LEN -1) / LEN;
338338
int * sec_a_dev = array_int;
@@ -342,7 +342,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
342342

343343
cudaError_t res = cudaSuccess;
344344
res = cudaMemcpy(sec_a_dev, &sec_a[0], sizeof(int) * sec_a.size(), cudaMemcpyHostToDevice); cudaErrcheck(res);
345-
res = cudaMemset(key, 0xffffffff, sizeof(int_64) * nloc * MAGIC_NUMBER); cudaErrcheck(res);
345+
res = cudaMemset(key, 0xffffffff, sizeof(int_64) * nloc * max_nbor_size); cudaErrcheck(res);
346346
res = cudaMemset(nlist, -1, sizeof(int) * nloc * nnei); cudaErrcheck(res);
347347
res = cudaMemset(descript, 0.0, sizeof(FPTYPE) * nloc * ndescrpt); cudaErrcheck(res);
348348
res = cudaMemset(descript_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3); cudaErrcheck(res);
@@ -352,7 +352,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
352352
// cudaProfilerStart();
353353
get_i_idx_se_a<<<nblock, LEN>>> (nloc, ilist, i_idx);
354354

355-
if (MAGIC_NUMBER <= 1024) {
355+
if (max_nbor_size <= 1024) {
356356
format_nbor_list_1024 (
357357
coord,
358358
type,
@@ -363,7 +363,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
363363
i_idx,
364364
key
365365
);
366-
} else if (MAGIC_NUMBER <= 2048) {
366+
} else if (max_nbor_size <= 2048) {
367367
format_nbor_list_2048 (
368368
coord,
369369
type,
@@ -374,7 +374,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
374374
i_idx,
375375
key
376376
);
377-
} else if (MAGIC_NUMBER <= 4096) {
377+
} else if (max_nbor_size <= 4096) {
378378
format_nbor_list_4096 (
379379
coord,
380380
type,
@@ -397,7 +397,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
397397
sec_a_dev,
398398
sec_a.size(),
399399
nei_iter,
400-
MAGIC_NUMBER
400+
max_nbor_size
401401
);
402402
}
403403

0 commit comments

Comments
 (0)