merge CUDA and ROCm in header files (#2845)

njzjz · web-flow · commit 338018ceffe9 · 2023-09-20T15:49:03.000+08:00
Signed-off-by: Jinzhe Zeng &lt;jinzhe.zeng@rutgers.edu&gt;
diff --git a/source/lib/include/coord.h b/source/lib/include/coord.h
@@ -44,7 +44,7 @@ void compute_cell_info(int* cell_info,
                        const float& rcut,
                        const deepmd::Region<FPTYPE>& region);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 // normalize coords
 // output:
 // coord
@@ -83,47 +83,6 @@ int copy_coord_gpu(FPTYPE* out_c,
                    const int& total_cellnum,
                    const int* cell_info,
                    const deepmd::Region<FPTYPE>& region);
-#endif  // GOOGLE_CUDA
-
-#if TENSORFLOW_USE_ROCM
-// normalize coords
-// output:
-// coord
-// input:
-// natom, box_info: boxt, rec_boxt
-template <typename FPTYPE>
-void normalize_coord_gpu(FPTYPE* coord,
-                         const int natom,
-                         const deepmd::Region<FPTYPE>& region);
-
-// copy coordinates
-// outputs:
-//	out_c, out_t, mapping, nall,
-//  int_data(temp cuda
-//  memory):idx_map,idx_map_noshift,temp_idx_order,loc_cellnum_map,total_cellnum_map,mask_cellnum_map,
-//                             cell_map,cell_shift_map,sec_loc_cellnum_map,sec_total_cellnum_map,loc_clist
-// inputs:
-//	in_c, in_t, nloc, mem_nall, loc_cellnum, total_cellnum, cell_info,
-// box_info 	mem_nall is the size of allocated memory for out_c, out_t,
-// mapping
-// returns
-//	0: succssful
-//	1: the memory is not large enough to hold all copied coords and types.
-//	   i.e. nall > mem_nall
-template <typename FPTYPE>
-int copy_coord_gpu(FPTYPE* out_c,
-                   int* out_t,
-                   int* mapping,
-                   int* nall,
-                   int* int_data,
-                   const FPTYPE* in_c,
-                   const int* in_t,
-                   const int& nloc,
-                   const int& mem_nall,
-                   const int& loc_cellnum,
-                   const int& total_cellnum,
-                   const int* cell_info,
-                   const deepmd::Region<FPTYPE>& region);
-#endif  // TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace deepmd
diff --git a/source/lib/include/fmt_nlist.h b/source/lib/include/fmt_nlist.h
@@ -18,7 +18,7 @@ void format_nlist_cpu(int* nlist,
                       const float rcut,
                       const std::vector<int> sec);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 template <typename FPTYPE>
 void format_nbor_list_gpu(int* nlist,
                           const FPTYPE* coord,
@@ -40,31 +40,7 @@ void test_encoding_decoding_nbor_info_gpu(uint_64* key,
                                           const FPTYPE* in_dist,
                                           const int* in_index,
                                           const int size_of_array);
-#endif  // GOOGLE_CUDA
-
-#if TENSORFLOW_USE_ROCM
-template <typename FPTYPE>
-void format_nbor_list_gpu(int* nlist,
-                          const FPTYPE* coord,
-                          const int* type,
-                          const deepmd::InputNlist& gpu_inlist,
-                          int* array_int,
-                          uint_64* array_longlong,
-                          const int max_nbor_size,
-                          const int nloc,
-                          const int nall,
-                          const float rcut,
-                          const std::vector<int> sec);
-
-template <typename FPTYPE>
-void test_encoding_decoding_nbor_info_gpu(uint_64* key,
-                                          int* out_type,
-                                          int* out_index,
-                                          const int* in_type,
-                                          const FPTYPE* in_dist,
-                                          const int* in_index,
-                                          const int size_of_array);
-#endif  // TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace deepmd
 
diff --git a/source/lib/include/gelu.h b/source/lib/include/gelu.h
@@ -20,7 +20,7 @@ void gelu_grad_grad_cpu(FPTYPE* out,
                         const FPTYPE* dy_2,
                         const int_64 size);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 template <typename FPTYPE>
 void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);
 
@@ -36,24 +36,5 @@ void gelu_grad_grad_gpu(FPTYPE* out,
                         const FPTYPE* dy,
                         const FPTYPE* dy_2,
                         const int_64 size);
-#endif  // GOOGLE_CUDA
-
-#if TENSORFLOW_USE_ROCM
-template <typename FPTYPE>
-void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);
-
-template <typename FPTYPE>
-void gelu_grad_gpu(FPTYPE* out,
-                   const FPTYPE* xx,
-                   const FPTYPE* dy,
-                   const int_64 size);
-
-template <typename FPTYPE>
-void gelu_grad_grad_gpu(FPTYPE* out,
-                        const FPTYPE* xx,
-                        const FPTYPE* dy,
-                        const FPTYPE* dy_2,
-                        const int_64 size);
-
-#endif  // TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 }  // namespace deepmd
diff --git a/source/lib/include/neighbor_list.h b/source/lib/include/neighbor_list.h
@@ -121,7 +121,7 @@ void use_nlist_map(int* nlist,
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 // build neighbor list.
 // outputs
 //	nlist, max_list_size
@@ -162,49 +162,7 @@ void use_nei_info_gpu(int* nlist,
                       const int ntypes,
                       const bool b_nlist_map);
 
-#endif  // GOOGLE_CUDA
-
-#if TENSORFLOW_USE_ROCM
-// build neighbor list.
-// outputs
-//	nlist, max_list_size
-//	max_list_size is the maximal size of jlist.
-// inputs
-//	c_cpy, nloc, nall, mem_size, rcut, region
-//	mem_size is the size of allocated memory for jlist.
-// returns
-//	0: succssful
-//	1: the memory is not large enough to hold all neighbors.
-//	   i.e. max_list_size > mem_nall
-template <typename FPTYPE>
-int build_nlist_gpu(InputNlist& nlist,
-                    int* max_list_size,
-                    int* nlist_data,
-                    const FPTYPE* c_cpy,
-                    const int& nloc,
-                    const int& nall,
-                    const int& mem_size,
-                    const float& rcut);
-/**
- * @brief Filter the fake atom type.
- * @details If >=0, set to 0; if <0, set to -1.
- * @param ftype_out The output filtered atom type.
- * @param ftype_in The input atom type.
- * @param nloc The number of atoms.
- */
-void filter_ftype_gpu(int* ftype_out, const int* ftype_in, const int nloc);
-
-void use_nei_info_gpu(int* nlist,
-                      int* ntype,
-                      bool* nmask,
-                      const int* type,
-                      const int* nlist_map,
-                      const int nloc,
-                      const int nnei,
-                      const int ntypes,
-                      const bool b_nlist_map);
-
-#endif  // TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace deepmd
 
diff --git a/source/lib/include/prod_env_mat.h b/source/lib/include/prod_env_mat.h
@@ -42,7 +42,7 @@ void prod_env_mat_r_cpu(FPTYPE *em,
                         const float rcut_smth,
                         const std::vector<int> sec);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 template <typename FPTYPE>
 void prod_env_mat_a_gpu(FPTYPE *em,
                         FPTYPE *em_deriv,
@@ -88,54 +88,6 @@ void env_mat_nbor_update(InputNlist &inlist,
                          int *&nbor_list_dev,
                          const int *mesh,
                          const int size);
-#endif  // GOOGLE_CUDA
-
-#if TENSORFLOW_USE_ROCM
-template <typename FPTYPE>
-void prod_env_mat_a_gpu(FPTYPE *em,
-                        FPTYPE *em_deriv,
-                        FPTYPE *rij,
-                        int *nlist,
-                        const FPTYPE *coord,
-                        const int *type,
-                        const InputNlist &gpu_inlist,
-                        int *array_int,
-                        unsigned long long *array_longlong,
-                        const int max_nbor_size,
-                        const FPTYPE *avg,
-                        const FPTYPE *std,
-                        const int nloc,
-                        const int nall,
-                        const float rcut,
-                        const float rcut_smth,
-                        const std::vector<int> sec,
-                        const int *f_type = NULL);
-
-template <typename FPTYPE>
-void prod_env_mat_r_gpu(FPTYPE *em,
-                        FPTYPE *em_deriv,
-                        FPTYPE *rij,
-                        int *nlist,
-                        const FPTYPE *coord,
-                        const int *type,
-                        const InputNlist &gpu_inlist,
-                        int *array_int,
-                        unsigned long long *array_longlong,
-                        const int max_nbor_size,
-                        const FPTYPE *avg,
-                        const FPTYPE *std,
-                        const int nloc,
-                        const int nall,
-                        const float rcut,
-                        const float rcut_smth,
-                        const std::vector<int> sec);
-
-void env_mat_nbor_update(InputNlist &inlist,
-                         InputNlist &gpu_inlist,
-                         int &max_nbor_size,
-                         int *&nbor_list_dev,
-                         const int *mesh,
-                         const int size);
-#endif  // TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace deepmd
diff --git a/source/lib/include/prod_env_mat_nvnmd.h b/source/lib/include/prod_env_mat_nvnmd.h
@@ -45,12 +45,8 @@ void prod_env_mat_a_nvnmd_quantize_cpu(FPTYPE* em,
                                        const std::vector<int> sec,
                                        const int* f_type = NULL);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 // UNDEFINE
-#endif  // GOOGLE_CUDA
-
-#if TENSORFLOW_USE_ROCM
-// UNDEFINE
-#endif  // TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace deepmd
diff --git a/source/lib/include/prod_force.h b/source/lib/include/prod_force.h
@@ -67,7 +67,7 @@ void prod_force_r_cpu(FPTYPE* force,
                       const int nnei,
                       const int nframes);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 template <typename FPTYPE>
 void prod_force_a_gpu(FPTYPE* force,
                       const FPTYPE* net_deriv,
@@ -87,28 +87,6 @@ void prod_force_r_gpu(FPTYPE* force,
                       const int nall,
                       const int nnei,
                       const int nframes);
-#endif  // GOOGLE_CUDA
-
-#if TENSORFLOW_USE_ROCM
-template <typename FPTYPE>
-void prod_force_a_gpu(FPTYPE* force,
-                      const FPTYPE* net_deriv,
-                      const FPTYPE* in_deriv,
-                      const int* nlist,
-                      const int nloc,
-                      const int nall,
-                      const int nnei,
-                      const int nframes);
-
-template <typename FPTYPE>
-void prod_force_r_gpu(FPTYPE* force,
-                      const FPTYPE* net_deriv,
-                      const FPTYPE* in_deriv,
-                      const int* nlist,
-                      const int nloc,
-                      const int nall,
-                      const int nnei,
-                      const int nframes);
-#endif  // TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace deepmd
diff --git a/source/lib/include/prod_force_grad.h b/source/lib/include/prod_force_grad.h
@@ -21,7 +21,7 @@ void prod_force_grad_r_cpu(FPTYPE* grad_net,
                            const int nnei,
                            const int nframes);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 template <typename FPTYPE>
 void prod_force_grad_a_gpu(FPTYPE* grad_net,
                            const FPTYPE* grad,
@@ -39,25 +39,6 @@ void prod_force_grad_r_gpu(FPTYPE* grad_net,
                            const int nloc,
                            const int nnei,
                            const int nframes);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#if TENSORFLOW_USE_ROCM
-template <typename FPTYPE>
-void prod_force_grad_a_gpu(FPTYPE* grad_net,
-                           const FPTYPE* grad,
-                           const FPTYPE* env_deriv,
-                           const int* nlist,
-                           const int nloc,
-                           const int nnei,
-                           const int nframes);
-
-template <typename FPTYPE>
-void prod_force_grad_r_gpu(FPTYPE* grad_net,
-                           const FPTYPE* grad,
-                           const FPTYPE* env_deriv,
-                           const int* nlist,
-                           const int nloc,
-                           const int nnei,
-                           const int nframes);
-#endif  // TENSORFLOW_USE_ROCM
 }  // namespace deepmd
diff --git a/source/lib/include/prod_virial.h b/source/lib/include/prod_virial.h
@@ -25,7 +25,7 @@ void prod_virial_r_cpu(FPTYPE* virial,
                        const int nall,
                        const int nnei);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 template <typename FPTYPE>
 void prod_virial_a_gpu(FPTYPE* virial,
                        FPTYPE* atom_virial,
@@ -47,30 +47,6 @@ void prod_virial_r_gpu(FPTYPE* virial,
                        const int nloc,
                        const int nall,
                        const int nnei);
-#endif  // GOOGLE_CUDA
-
-#if TENSORFLOW_USE_ROCM
-template <typename FPTYPE>
-void prod_virial_a_gpu(FPTYPE* virial,
-                       FPTYPE* atom_virial,
-                       const FPTYPE* net_deriv,
-                       const FPTYPE* env_deriv,
-                       const FPTYPE* rij,
-                       const int* nlist,
-                       const int nloc,
-                       const int nall,
-                       const int nnei);
-
-template <typename FPTYPE>
-void prod_virial_r_gpu(FPTYPE* virial,
-                       FPTYPE* atom_virial,
-                       const FPTYPE* net_deriv,
-                       const FPTYPE* env_deriv,
-                       const FPTYPE* rij,
-                       const int* nlist,
-                       const int nloc,
-                       const int nall,
-                       const int nnei);
-#endif  // TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace deepmd
diff --git a/source/lib/include/prod_virial_grad.h b/source/lib/include/prod_virial_grad.h
diff --git a/source/lib/include/region.h b/source/lib/include/region.h
diff --git a/source/lib/include/tabulate.h b/source/lib/include/tabulate.h