fix bug of nbor sorting

denghuilu · denghuilu · commit fe3a7ec47796 · 2021-01-26T12:46:00.000+08:00
When the number of sel is smaller than the lammps nbors, the program may have a gpu sorting error.
diff --git a/source/op/cuda/descrpt_se_a.cu b/source/op/cuda/descrpt_se_a.cu
@@ -228,73 +228,6 @@ __global__ void compute_descriptor_se_a (FPTYPE* descript,
     }
 }
 
-template<typename FPTYPE>
-void format_nbor_list_256 (
-    const FPTYPE* coord,
-    const int* type,
-    const int* jrange,
-    const int* jlist,
-    const int& nloc,       
-    const float& rcut_r, 
-    int * i_idx, 
-    int_64 * key
-) 
-{   
-    const int LEN = 256;
-    const int MAGIC_NUMBER = 256;
-    const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(1, LEN);
-    format_nlist_fill_a_se_a
-    <<<block_grid, thread_grid>>> (
-        coord,
-        type,
-        jrange,
-        jlist,
-        rcut_r,
-        key,
-        i_idx,
-        MAGIC_NUMBER
-    );
-    const int ITEMS_PER_THREAD = 4;
-    const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
-    // BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
-    BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
-}
-
-template<typename FPTYPE>
-void format_nbor_list_512 (
-    const FPTYPE* coord,
-    const int* type,
-    const int* jrange,
-    const int* jlist,
-    const int& nloc,       
-    const float& rcut_r, 
-    int * i_idx, 
-    int_64 * key
-) 
-{   
-    const int LEN = 256;
-    const int MAGIC_NUMBER = 512;
-    const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(1, LEN);
-    format_nlist_fill_a_se_a
-    <<<block_grid, thread_grid>>> (
-        coord,
-        type,
-        jrange,
-        jlist,
-        rcut_r,
-        key,
-        i_idx,
-        MAGIC_NUMBER
-    );
-    const int ITEMS_PER_THREAD = 4;
-    const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
-    // BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
-    BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
-}
 
 template<typename FPTYPE>
 void format_nbor_list_1024 (
@@ -419,29 +352,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
         // cudaProfilerStart();
         get_i_idx_se_a<<<nblock, LEN>>> (nloc, ilist, i_idx);
 
-        if (nnei <= 256) {
-            format_nbor_list_256 (
-                coord,
-                type,
-                jrange,
-                jlist,
-                nloc,       
-                rcut_r, 
-                i_idx, 
-                key
-            ); 
-        } else if (nnei <= 512) {
-            format_nbor_list_512 (
-                coord,
-                type,
-                jrange,
-                jlist,
-                nloc,       
-                rcut_r, 
-                i_idx, 
-                key
-            ); 
-        } else if (nnei <= 1024) {
+        if (MAGIC_NUMBER <= 1024) {
             format_nbor_list_1024 (
                 coord,
                 type,
@@ -452,7 +363,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
                 i_idx, 
                 key
             ); 
-        } else if (nnei <= 2048) {
+        } else if (MAGIC_NUMBER <= 2048) {
             format_nbor_list_2048 (
                 coord,
                 type,
@@ -463,7 +374,7 @@ void DescrptSeAGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
                 i_idx, 
                 key
             ); 
-        } else if (nnei <= 4096) {
+        } else if (MAGIC_NUMBER <= 4096) {
             format_nbor_list_4096 (
                 coord,
                 type,
diff --git a/source/op/cuda/descrpt_se_r.cu b/source/op/cuda/descrpt_se_r.cu
@@ -210,73 +210,6 @@ __global__ void compute_descriptor_se_r (FPTYPE* descript,
     }
 }
 
-template<typename FPTYPE>
-void format_nbor_list_256 (
-    const FPTYPE* coord,
-    const int* type,
-    const int* jrange,
-    const int* jlist,
-    const int& nloc,       
-    const float& rcut_r, 
-    int * i_idx, 
-    int_64 * key
-) 
-{   
-    const int LEN = 256;
-    const int MAGIC_NUMBER = 256;
-    const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(1, LEN);
-    format_nlist_fill_a_se_r
-    <<<block_grid, thread_grid>>> (
-        coord,
-        type,
-        jrange,
-        jlist,
-        rcut_r,
-        key,
-        i_idx,
-        MAGIC_NUMBER
-    );
-    const int ITEMS_PER_THREAD = 4;
-    const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
-    // BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
-    BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
-}
-
-template<typename FPTYPE>
-void format_nbor_list_512 (
-    const FPTYPE* coord,
-    const int* type,
-    const int* jrange,
-    const int* jlist,
-    const int& nloc,       
-    const float& rcut_r, 
-    int * i_idx, 
-    int_64 * key
-) 
-{   
-    const int LEN = 256;
-    const int MAGIC_NUMBER = 512;
-    const int nblock = (MAGIC_NUMBER + LEN - 1) / LEN;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(1, LEN);
-    format_nlist_fill_a_se_r
-    <<<block_grid, thread_grid>>> (
-        coord,
-        type,
-        jrange,
-        jlist,
-        rcut_r,
-        key,
-        i_idx,
-        MAGIC_NUMBER
-    );
-    const int ITEMS_PER_THREAD = 4;
-    const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
-    // BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
-    BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
-}
 
 template<typename FPTYPE>
 void format_nbor_list_1024 (
@@ -401,29 +334,7 @@ void DescrptSeRGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
         // cudaProfilerStart();
         get_i_idx_se_r<<<nblock, LEN>>> (nloc, ilist, i_idx);
 
-        if (nnei <= 256) {
-            format_nbor_list_256 (
-                coord,
-                type,
-                jrange,
-                jlist,
-                nloc,       
-                rcut_r, 
-                i_idx, 
-                key
-            ); 
-        } else if (nnei <= 512) {
-            format_nbor_list_512 (
-                coord,
-                type,
-                jrange,
-                jlist,
-                nloc,       
-                rcut_r, 
-                i_idx, 
-                key
-            ); 
-        } else if (nnei <= 1024) {
+        if (MAGIC_NUMBER <= 1024) {
             format_nbor_list_1024 (
                 coord,
                 type,
@@ -434,7 +345,7 @@ void DescrptSeRGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
                 i_idx, 
                 key
             ); 
-        } else if (nnei <= 2048) {
+        } else if (MAGIC_NUMBER <= 2048) {
             format_nbor_list_2048 (
                 coord,
                 type,
@@ -445,7 +356,7 @@ void DescrptSeRGPUExecuteFunctor<FPTYPE>::operator()(const FPTYPE * coord, const
                 i_idx, 
                 key
             ); 
-        } else if (nnei <= 4096) {
+        } else if (MAGIC_NUMBER <= 4096) {
             format_nbor_list_4096 (
                 coord,
                 type,
diff --git a/source/op/descrpt_se_a_multi_device.cc b/source/op/descrpt_se_a_multi_device.cc
@@ -271,13 +271,7 @@ class DescrptSeAOp : public OpKernel {
     }
 
     int get_magic_number(int const nnei) {
-        if (nnei <= 256) {
-            return 256;
-        }
-        else if (nnei <= 512) {
-            return 512;
-        }
-        else if (nnei <= 1024) {
+        if (nnei <= 1024) {
             return 1024;
         }
         else if (nnei <= 2048) {
diff --git a/source/op/descrpt_se_r_multi_device.cc b/source/op/descrpt_se_r_multi_device.cc
@@ -261,13 +261,7 @@ class DescrptSeROp : public OpKernel {
     }
 
     int get_magic_number(int const nnei) {
-        if (nnei <= 256) {
-            return 256;
-        }
-        else if (nnei <= 512) {
-            return 512;
-        }
-        else if (nnei <= 1024) {
+        if (nnei <= 1024) {
             return 1024;
         }
         else if (nnei <= 2048) {

Original file line number	Diff line number	Diff line change
`@@ -271,13 +271,7 @@ class DescrptSeAOp : public OpKernel {`
`271`	`271`	`}`
`272`	`272`
`273`	`273`	`int get_magic_number(int const nnei) {`
`274`		`- if (nnei <= 256) {`
`275`		`- return 256;`
`276`		`- }`
`277`		`- else if (nnei <= 512) {`
`278`		`- return 512;`
`279`		`- }`
`280`		`- else if (nnei <= 1024) {`
	`274`	`+ if (nnei <= 1024) {`
`281`	`275`	`return 1024;`
`282`	`276`	`}`
`283`	`277`	`else if (nnei <= 2048) {`
Original file line number	Diff line number	Diff line change
`@@ -261,13 +261,7 @@ class DescrptSeROp : public OpKernel {`
`261`	`261`	`}`
`262`	`262`
`263`	`263`	`int get_magic_number(int const nnei) {`
`264`		`- if (nnei <= 256) {`
`265`		`- return 256;`
`266`		`- }`
`267`		`- else if (nnei <= 512) {`
`268`		`- return 512;`
`269`		`- }`
`270`		`- else if (nnei <= 1024) {`
	`264`	`+ if (nnei <= 1024) {`
`271`	`265`	`return 1024;`
`272`	`266`	`}`
`273`	`267`	`else if (nnei <= 2048) {`