Skip to content

Commit 338018c

Browse files
authored
merge CUDA and ROCm in header files (#2845)
Signed-off-by: Jinzhe Zeng <[email protected]>
1 parent b34e5a3 commit 338018c

File tree

12 files changed

+24
-410
lines changed

12 files changed

+24
-410
lines changed

source/lib/include/coord.h

Lines changed: 2 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void compute_cell_info(int* cell_info,
4444
const float& rcut,
4545
const deepmd::Region<FPTYPE>& region);
4646

47-
#if GOOGLE_CUDA
47+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
4848
// normalize coords
4949
// output:
5050
// coord
@@ -83,47 +83,6 @@ int copy_coord_gpu(FPTYPE* out_c,
8383
const int& total_cellnum,
8484
const int* cell_info,
8585
const deepmd::Region<FPTYPE>& region);
86-
#endif // GOOGLE_CUDA
87-
88-
#if TENSORFLOW_USE_ROCM
89-
// normalize coords
90-
// output:
91-
// coord
92-
// input:
93-
// natom, box_info: boxt, rec_boxt
94-
template <typename FPTYPE>
95-
void normalize_coord_gpu(FPTYPE* coord,
96-
const int natom,
97-
const deepmd::Region<FPTYPE>& region);
98-
99-
// copy coordinates
100-
// outputs:
101-
// out_c, out_t, mapping, nall,
102-
// int_data(temp cuda
103-
// memory):idx_map,idx_map_noshift,temp_idx_order,loc_cellnum_map,total_cellnum_map,mask_cellnum_map,
104-
// cell_map,cell_shift_map,sec_loc_cellnum_map,sec_total_cellnum_map,loc_clist
105-
// inputs:
106-
// in_c, in_t, nloc, mem_nall, loc_cellnum, total_cellnum, cell_info,
107-
// box_info mem_nall is the size of allocated memory for out_c, out_t,
108-
// mapping
109-
// returns
110-
// 0: succssful
111-
// 1: the memory is not large enough to hold all copied coords and types.
112-
// i.e. nall > mem_nall
113-
template <typename FPTYPE>
114-
int copy_coord_gpu(FPTYPE* out_c,
115-
int* out_t,
116-
int* mapping,
117-
int* nall,
118-
int* int_data,
119-
const FPTYPE* in_c,
120-
const int* in_t,
121-
const int& nloc,
122-
const int& mem_nall,
123-
const int& loc_cellnum,
124-
const int& total_cellnum,
125-
const int* cell_info,
126-
const deepmd::Region<FPTYPE>& region);
127-
#endif // TENSORFLOW_USE_ROCM
86+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
12887

12988
} // namespace deepmd

source/lib/include/fmt_nlist.h

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ void format_nlist_cpu(int* nlist,
1818
const float rcut,
1919
const std::vector<int> sec);
2020

21-
#if GOOGLE_CUDA
21+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
2222
template <typename FPTYPE>
2323
void format_nbor_list_gpu(int* nlist,
2424
const FPTYPE* coord,
@@ -40,31 +40,7 @@ void test_encoding_decoding_nbor_info_gpu(uint_64* key,
4040
const FPTYPE* in_dist,
4141
const int* in_index,
4242
const int size_of_array);
43-
#endif // GOOGLE_CUDA
44-
45-
#if TENSORFLOW_USE_ROCM
46-
template <typename FPTYPE>
47-
void format_nbor_list_gpu(int* nlist,
48-
const FPTYPE* coord,
49-
const int* type,
50-
const deepmd::InputNlist& gpu_inlist,
51-
int* array_int,
52-
uint_64* array_longlong,
53-
const int max_nbor_size,
54-
const int nloc,
55-
const int nall,
56-
const float rcut,
57-
const std::vector<int> sec);
58-
59-
template <typename FPTYPE>
60-
void test_encoding_decoding_nbor_info_gpu(uint_64* key,
61-
int* out_type,
62-
int* out_index,
63-
const int* in_type,
64-
const FPTYPE* in_dist,
65-
const int* in_index,
66-
const int size_of_array);
67-
#endif // TENSORFLOW_USE_ROCM
43+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
6844

6945
} // namespace deepmd
7046

source/lib/include/gelu.h

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ void gelu_grad_grad_cpu(FPTYPE* out,
2020
const FPTYPE* dy_2,
2121
const int_64 size);
2222

23-
#if GOOGLE_CUDA
23+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
2424
template <typename FPTYPE>
2525
void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);
2626

@@ -36,24 +36,5 @@ void gelu_grad_grad_gpu(FPTYPE* out,
3636
const FPTYPE* dy,
3737
const FPTYPE* dy_2,
3838
const int_64 size);
39-
#endif // GOOGLE_CUDA
40-
41-
#if TENSORFLOW_USE_ROCM
42-
template <typename FPTYPE>
43-
void gelu_gpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);
44-
45-
template <typename FPTYPE>
46-
void gelu_grad_gpu(FPTYPE* out,
47-
const FPTYPE* xx,
48-
const FPTYPE* dy,
49-
const int_64 size);
50-
51-
template <typename FPTYPE>
52-
void gelu_grad_grad_gpu(FPTYPE* out,
53-
const FPTYPE* xx,
54-
const FPTYPE* dy,
55-
const FPTYPE* dy_2,
56-
const int_64 size);
57-
58-
#endif // TENSORFLOW_USE_ROCM
39+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
5940
} // namespace deepmd

source/lib/include/neighbor_list.h

Lines changed: 2 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ void use_nlist_map(int* nlist,
121121

122122
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
123123

124-
#if GOOGLE_CUDA
124+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
125125
// build neighbor list.
126126
// outputs
127127
// nlist, max_list_size
@@ -162,49 +162,7 @@ void use_nei_info_gpu(int* nlist,
162162
const int ntypes,
163163
const bool b_nlist_map);
164164

165-
#endif // GOOGLE_CUDA
166-
167-
#if TENSORFLOW_USE_ROCM
168-
// build neighbor list.
169-
// outputs
170-
// nlist, max_list_size
171-
// max_list_size is the maximal size of jlist.
172-
// inputs
173-
// c_cpy, nloc, nall, mem_size, rcut, region
174-
// mem_size is the size of allocated memory for jlist.
175-
// returns
176-
// 0: succssful
177-
// 1: the memory is not large enough to hold all neighbors.
178-
// i.e. max_list_size > mem_nall
179-
template <typename FPTYPE>
180-
int build_nlist_gpu(InputNlist& nlist,
181-
int* max_list_size,
182-
int* nlist_data,
183-
const FPTYPE* c_cpy,
184-
const int& nloc,
185-
const int& nall,
186-
const int& mem_size,
187-
const float& rcut);
188-
/**
189-
* @brief Filter the fake atom type.
190-
* @details If >=0, set to 0; if <0, set to -1.
191-
* @param ftype_out The output filtered atom type.
192-
* @param ftype_in The input atom type.
193-
* @param nloc The number of atoms.
194-
*/
195-
void filter_ftype_gpu(int* ftype_out, const int* ftype_in, const int nloc);
196-
197-
void use_nei_info_gpu(int* nlist,
198-
int* ntype,
199-
bool* nmask,
200-
const int* type,
201-
const int* nlist_map,
202-
const int nloc,
203-
const int nnei,
204-
const int ntypes,
205-
const bool b_nlist_map);
206-
207-
#endif // TENSORFLOW_USE_ROCM
165+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
208166

209167
} // namespace deepmd
210168

source/lib/include/prod_env_mat.h

Lines changed: 2 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ void prod_env_mat_r_cpu(FPTYPE *em,
4242
const float rcut_smth,
4343
const std::vector<int> sec);
4444

45-
#if GOOGLE_CUDA
45+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
4646
template <typename FPTYPE>
4747
void prod_env_mat_a_gpu(FPTYPE *em,
4848
FPTYPE *em_deriv,
@@ -88,54 +88,6 @@ void env_mat_nbor_update(InputNlist &inlist,
8888
int *&nbor_list_dev,
8989
const int *mesh,
9090
const int size);
91-
#endif // GOOGLE_CUDA
92-
93-
#if TENSORFLOW_USE_ROCM
94-
template <typename FPTYPE>
95-
void prod_env_mat_a_gpu(FPTYPE *em,
96-
FPTYPE *em_deriv,
97-
FPTYPE *rij,
98-
int *nlist,
99-
const FPTYPE *coord,
100-
const int *type,
101-
const InputNlist &gpu_inlist,
102-
int *array_int,
103-
unsigned long long *array_longlong,
104-
const int max_nbor_size,
105-
const FPTYPE *avg,
106-
const FPTYPE *std,
107-
const int nloc,
108-
const int nall,
109-
const float rcut,
110-
const float rcut_smth,
111-
const std::vector<int> sec,
112-
const int *f_type = NULL);
113-
114-
template <typename FPTYPE>
115-
void prod_env_mat_r_gpu(FPTYPE *em,
116-
FPTYPE *em_deriv,
117-
FPTYPE *rij,
118-
int *nlist,
119-
const FPTYPE *coord,
120-
const int *type,
121-
const InputNlist &gpu_inlist,
122-
int *array_int,
123-
unsigned long long *array_longlong,
124-
const int max_nbor_size,
125-
const FPTYPE *avg,
126-
const FPTYPE *std,
127-
const int nloc,
128-
const int nall,
129-
const float rcut,
130-
const float rcut_smth,
131-
const std::vector<int> sec);
132-
133-
void env_mat_nbor_update(InputNlist &inlist,
134-
InputNlist &gpu_inlist,
135-
int &max_nbor_size,
136-
int *&nbor_list_dev,
137-
const int *mesh,
138-
const int size);
139-
#endif // TENSORFLOW_USE_ROCM
91+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
14092

14193
} // namespace deepmd

source/lib/include/prod_env_mat_nvnmd.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,8 @@ void prod_env_mat_a_nvnmd_quantize_cpu(FPTYPE* em,
4545
const std::vector<int> sec,
4646
const int* f_type = NULL);
4747

48-
#if GOOGLE_CUDA
48+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
4949
// UNDEFINE
50-
#endif // GOOGLE_CUDA
51-
52-
#if TENSORFLOW_USE_ROCM
53-
// UNDEFINE
54-
#endif // TENSORFLOW_USE_ROCM
50+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
5551

5652
} // namespace deepmd

source/lib/include/prod_force.h

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ void prod_force_r_cpu(FPTYPE* force,
6767
const int nnei,
6868
const int nframes);
6969

70-
#if GOOGLE_CUDA
70+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
7171
template <typename FPTYPE>
7272
void prod_force_a_gpu(FPTYPE* force,
7373
const FPTYPE* net_deriv,
@@ -87,28 +87,6 @@ void prod_force_r_gpu(FPTYPE* force,
8787
const int nall,
8888
const int nnei,
8989
const int nframes);
90-
#endif // GOOGLE_CUDA
91-
92-
#if TENSORFLOW_USE_ROCM
93-
template <typename FPTYPE>
94-
void prod_force_a_gpu(FPTYPE* force,
95-
const FPTYPE* net_deriv,
96-
const FPTYPE* in_deriv,
97-
const int* nlist,
98-
const int nloc,
99-
const int nall,
100-
const int nnei,
101-
const int nframes);
102-
103-
template <typename FPTYPE>
104-
void prod_force_r_gpu(FPTYPE* force,
105-
const FPTYPE* net_deriv,
106-
const FPTYPE* in_deriv,
107-
const int* nlist,
108-
const int nloc,
109-
const int nall,
110-
const int nnei,
111-
const int nframes);
112-
#endif // TENSORFLOW_USE_ROCM
90+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
11391

11492
} // namespace deepmd

source/lib/include/prod_force_grad.h

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ void prod_force_grad_r_cpu(FPTYPE* grad_net,
2121
const int nnei,
2222
const int nframes);
2323

24-
#if GOOGLE_CUDA
24+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
2525
template <typename FPTYPE>
2626
void prod_force_grad_a_gpu(FPTYPE* grad_net,
2727
const FPTYPE* grad,
@@ -39,25 +39,6 @@ void prod_force_grad_r_gpu(FPTYPE* grad_net,
3939
const int nloc,
4040
const int nnei,
4141
const int nframes);
42-
#endif // GOOGLE_CUDA
42+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
4343

44-
#if TENSORFLOW_USE_ROCM
45-
template <typename FPTYPE>
46-
void prod_force_grad_a_gpu(FPTYPE* grad_net,
47-
const FPTYPE* grad,
48-
const FPTYPE* env_deriv,
49-
const int* nlist,
50-
const int nloc,
51-
const int nnei,
52-
const int nframes);
53-
54-
template <typename FPTYPE>
55-
void prod_force_grad_r_gpu(FPTYPE* grad_net,
56-
const FPTYPE* grad,
57-
const FPTYPE* env_deriv,
58-
const int* nlist,
59-
const int nloc,
60-
const int nnei,
61-
const int nframes);
62-
#endif // TENSORFLOW_USE_ROCM
6344
} // namespace deepmd

source/lib/include/prod_virial.h

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ void prod_virial_r_cpu(FPTYPE* virial,
2525
const int nall,
2626
const int nnei);
2727

28-
#if GOOGLE_CUDA
28+
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
2929
template <typename FPTYPE>
3030
void prod_virial_a_gpu(FPTYPE* virial,
3131
FPTYPE* atom_virial,
@@ -47,30 +47,6 @@ void prod_virial_r_gpu(FPTYPE* virial,
4747
const int nloc,
4848
const int nall,
4949
const int nnei);
50-
#endif // GOOGLE_CUDA
51-
52-
#if TENSORFLOW_USE_ROCM
53-
template <typename FPTYPE>
54-
void prod_virial_a_gpu(FPTYPE* virial,
55-
FPTYPE* atom_virial,
56-
const FPTYPE* net_deriv,
57-
const FPTYPE* env_deriv,
58-
const FPTYPE* rij,
59-
const int* nlist,
60-
const int nloc,
61-
const int nall,
62-
const int nnei);
63-
64-
template <typename FPTYPE>
65-
void prod_virial_r_gpu(FPTYPE* virial,
66-
FPTYPE* atom_virial,
67-
const FPTYPE* net_deriv,
68-
const FPTYPE* env_deriv,
69-
const FPTYPE* rij,
70-
const int* nlist,
71-
const int nloc,
72-
const int nall,
73-
const int nnei);
74-
#endif // TENSORFLOW_USE_ROCM
50+
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
7551

7652
} // namespace deepmd

0 commit comments

Comments
 (0)