Skip to content

Commit fccbacd

Browse files
author
Han Wang
committed
upport for distributed training, train with atomic energy, support single precision floating point numbers
1 parent 45d88c3 commit fccbacd

File tree

18 files changed

+1004
-295
lines changed

18 files changed

+1004
-295
lines changed

source/CMakeLists.txt

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
cmake_minimum_required(VERSION 3.0)
22
project(DeePMD)
33

4+
# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
5+
# set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")
6+
47
list (APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/)
58
list (APPEND CMAKE_CXX_FLAGS "-std=c++11 -Wno-ignored-attributes")
69

@@ -22,14 +25,17 @@ if ((NOT DEFINED CMAKE_BUILD_TYPE) OR CMAKE_BUILD_TYPE STREQUAL "")
2225
endif ()
2326

2427
# set op prec
25-
if (DEFINED OP_PREC)
26-
string ( TOLOWER ${OP_PREC} lower_op_prec )
27-
if (lower_op_prec STREQUAL "high")
28-
add_definitions (-DHIGH_PREC)
28+
if (DEFINED FLOAT_PREC)
29+
string ( TOLOWER ${FLOAT_PREC} lower_float_prec )
30+
if (lower_float_prec STREQUAL "high")
31+
set(PREC_DEF "-DHIGH_PREC")
32+
else ()
33+
set(PREC_DEF "")
2934
endif ()
3035
else ()
31-
add_definitions (-DHIGH_PREC)
36+
set(PREC_DEF "-DHIGH_PREC")
3237
endif()
38+
add_definitions (${PREC_DEF})
3339

3440
# find openmp
3541
find_package(OpenMP)
@@ -58,7 +64,7 @@ set (LIB_DEEPMD_IPI "deepmd_ipi")
5864
add_subdirectory (lib/)
5965
add_subdirectory (op/)
6066
add_subdirectory (train/)
61-
add_subdirectory (md/)
67+
# add_subdirectory (md/)
6268
add_subdirectory (ipi/)
6369
add_subdirectory (scripts/)
6470
add_subdirectory (lmp/)

source/ipi/driver.cc

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ char *trimwhitespace(char *str)
3838
}
3939

4040
void
41-
normalize_coord (vector<VALUETYPE > & coord,
42-
const SimulationRegion<VALUETYPE > & region)
41+
normalize_coord (vector<double > & coord,
42+
const SimulationRegion<double > & region)
4343
{
4444
int natoms = coord.size() / 3;
4545

4646
for (int ii = 0; ii < natoms; ++ii){
47-
VALUETYPE inter[3];
47+
double inter[3];
4848
region.phys2Inter (inter, &coord[3*ii]);
4949
for (int dd = 0; dd < 3; ++dd){
5050
inter[dd] -= int(floor(inter[dd]));
@@ -182,7 +182,22 @@ int main(int argc, char * argv[])
182182
normalize_coord (dcoord, region);
183183

184184
// nnp over writes ener, force and virial
185+
#ifdef HIGH_PREC
185186
nnp_inter.compute (dener, dforce_tmp, dvirial, dcoord, dtype, dbox);
187+
#else
188+
// model in float prec
189+
vector<float> dcoord_(dcoord.size());
190+
vector<float> dbox_(dbox.size());
191+
for (unsigned dd = 0; dd < dcoord.size(); ++dd) dcoord_[dd] = dcoord[dd];
192+
for (unsigned dd = 0; dd < dbox.size(); ++dd) dbox_[dd] = dbox[dd];
193+
vector<float> dforce_(dforce.size(), 0);
194+
vector<float> dvirial_(dvirial.size(), 0);
195+
float dener_ = 0;
196+
nnp_inter.compute (dener_, dforce_, dvirial_, dcoord_, dtype, dbox_);
197+
for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce_tmp[dd] = dforce_[dd];
198+
for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];
199+
dener = dener_;
200+
#endif
186201
cvt.backward (dforce, dforce_tmp, 3);
187202
hasdata = true;
188203
}

source/lib/include/SimulationRegion.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
public:
3131
void toFaceDistance (double * dd) const;
3232
public:
33-
void phys2Inter (double * i_v, const double * p_v) const;
34-
void inter2Phys (double * p_v, const double * i_v) const;
33+
void phys2Inter (double * i_v, const VALUETYPE * p_v) const;
34+
void inter2Phys (VALUETYPE * p_v, const double * i_v) const;
3535
public:
3636
bool isPeriodic (const int dim) const {return is_periodic[dim];}
3737
static int compactIndex (const int * idx) ;

source/lib/include/SimulationRegion_Impl.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ inline void
8282
SimulationRegion<VALUETYPE>::
8383
affineTransform (const double * affine_map)
8484
{
85-
8685
tensorDotVector (boxt+SPACENDIM*0, affine_map, boxt+SPACENDIM*0);
8786
tensorDotVector (boxt+SPACENDIM*1, affine_map, boxt+SPACENDIM*1);
8887
tensorDotVector (boxt+SPACENDIM*2, affine_map, boxt+SPACENDIM*2);
@@ -129,7 +128,8 @@ computeShiftVec ()
129128
inter_posi[0] = ii;
130129
inter_posi[1] = jj;
131130
inter_posi[2] = kk;
132-
inter2Phys (posi, inter_posi);
131+
// inter2Phys (posi, inter_posi);
132+
tensorTransDotVector (posi, boxt, inter_posi);
133133
}
134134
}
135135
}
@@ -302,7 +302,7 @@ diffNearestNeighbor (const VALUETYPE x0,
302302
// diffNearestNeighbor (0, x0, x1, dx);
303303
// diffNearestNeighbor (1, y0, y1, dy);
304304
// diffNearestNeighbor (2, z0, z1, dz);
305-
double phys [3];
305+
VALUETYPE phys [3];
306306
double inter[3];
307307
phys[0] = x0 - x1;
308308
phys[1] = y0 - y1;
@@ -336,7 +336,7 @@ diffNearestNeighbor (const VALUETYPE x0,
336336
// diffNearestNeighbor (0, x0, x1, dx, shift_x);
337337
// diffNearestNeighbor (1, y0, y1, dy, shift_y);
338338
// diffNearestNeighbor (2, z0, z1, dz, shift_z);
339-
double phys [3];
339+
VALUETYPE phys [3];
340340
double inter[3];
341341
phys[0] = x0 - x1;
342342
phys[1] = y0 - y1;
@@ -370,7 +370,7 @@ diffNearestNeighbor (const VALUETYPE x0,
370370
// diffNearestNeighbor (0, x0, x1, dx, shift_x);
371371
// diffNearestNeighbor (1, y0, y1, dy, shift_y);
372372
// diffNearestNeighbor (2, z0, z1, dz, shift_z);
373-
double phys [3];
373+
VALUETYPE phys [3];
374374
double inter[3];
375375
phys[0] = x0 - x1;
376376
phys[1] = y0 - y1;
@@ -393,17 +393,21 @@ diffNearestNeighbor (const VALUETYPE x0,
393393
template <typename VALUETYPE>
394394
inline void
395395
SimulationRegion<VALUETYPE>::
396-
phys2Inter (double * i_v, const double * p_v) const
396+
phys2Inter (double * i_v, const VALUETYPE * p_v_) const
397397
{
398+
double p_v[3];
399+
for (int dd = 0; dd < 3; ++dd) p_v[dd] = p_v_[dd];
398400
tensorDotVector (i_v, rec_boxt, p_v);
399401
}
400402

401403
template <typename VALUETYPE>
402404
inline void
403405
SimulationRegion<VALUETYPE>::
404-
inter2Phys (double * p_v, const double * i_v) const
406+
inter2Phys (VALUETYPE * p_v_, const double * i_v) const
405407
{
408+
double p_v[3];
406409
tensorTransDotVector (p_v, boxt, i_v);
410+
for (int dd = 0; dd < 3; ++dd) p_v_[dd] = p_v[dd];
407411
}
408412

409413
template <typename VALUETYPE>

source/lib/src/NNPInter.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@ make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
7979
datype.insert (datype.end(), datype_.begin() + nloc, datype_.end());
8080

8181
SimulationRegion<VALUETYPE> region;
82-
region.reinitBox (&dbox[0]);
82+
vector<double > dbox_(9);
83+
for (int dd = 0; dd < 9; ++dd) dbox_[dd] = dbox[dd];
84+
region.reinitBox (&dbox_[0]);
8385
double box_l[3];
8486
region.toFaceDistance (box_l);
8587

source/lib/src/SimulationRegion.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@
44
#include "SimulationRegion_Impl.h"
55

66
template class SimulationRegion<double>;
7+
template class SimulationRegion<float>;
78

89
#endif

source/lmp/env.sh.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ TF_INCLUDE_DIRS=`echo $TENSORFLOW_INCLUDE_DIRS | sed "s/;/ -I/g"`
66
TF_LIBRARY_PATH=`echo $TENSORFLOW_LIBRARY_PATH | sed "s/;/ -L/g"`
77
TF_RPATH=`echo $TENSORFLOW_LIBRARY_PATH | sed "s/;/ -Wl,-rpath=/g"`
88

9-
NNP_INC=" -std=c++11 -DHIGH_PREC -I$TF_INCLUDE_DIRS -I$DEEPMD_ROOT/include/deepmd "
9+
NNP_INC=" -std=c++11 @PREC_DEF@ -I$TF_INCLUDE_DIRS -I$DEEPMD_ROOT/include/deepmd "
1010
NNP_PATH=" -L$TF_LIBRARY_PATH -L$DEEPMD_ROOT/lib"
1111
NNP_LIB=" -Wl,--no-as-needed -l@LIB_DEEPMD_OP@ -l@LIB_DEEPMD@ -ltensorflow_cc -ltensorflow_framework -Wl,-rpath=$TF_RPATH -Wl,-rpath=$DEEPMD_ROOT/lib"

source/lmp/pair_nnp.cpp

Lines changed: 106 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,45 @@ void PairNNP::compute(int eflag, int vflag)
106106
LammpsNeighborList lmp_list (list->inum, list->ilist, list->numneigh, list->firstneigh);
107107
if (numb_models == 1) {
108108
if ( ! (eflag_atom || vflag_atom) ) {
109+
#ifdef HIGH_PREC
109110
nnp_inter.compute (dener, dforce, dvirial, dcoord, dtype, dbox, nghost, lmp_list);
111+
#else
112+
vector<float> dcoord_(dcoord.size());
113+
vector<float> dbox_(dbox.size());
114+
for (unsigned dd = 0; dd < dcoord.size(); ++dd) dcoord_[dd] = dcoord[dd];
115+
for (unsigned dd = 0; dd < dbox.size(); ++dd) dbox_[dd] = dbox[dd];
116+
vector<float> dforce_(dforce.size(), 0);
117+
vector<float> dvirial_(dvirial.size(), 0);
118+
float dener_ = 0;
119+
nnp_inter.compute (dener_, dforce_, dvirial_, dcoord_, dtype, dbox_, nghost, lmp_list);
120+
for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];
121+
for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];
122+
dener = dener_;
123+
#endif
110124
}
111125
// do atomic energy and virial
112126
else {
113127
vector<double > deatom (nall * 1, 0);
114128
vector<double > dvatom (nall * 9, 0);
129+
#ifdef HIGH_PREC
115130
nnp_inter.compute (dener, dforce, dvirial, deatom, dvatom, dcoord, dtype, dbox, nghost, lmp_list);
131+
#else
132+
vector<float> dcoord_(dcoord.size());
133+
vector<float> dbox_(dbox.size());
134+
for (unsigned dd = 0; dd < dcoord.size(); ++dd) dcoord_[dd] = dcoord[dd];
135+
for (unsigned dd = 0; dd < dbox.size(); ++dd) dbox_[dd] = dbox[dd];
136+
vector<float> dforce_(dforce.size(), 0);
137+
vector<float> dvirial_(dvirial.size(), 0);
138+
vector<float> deatom_(dforce.size(), 0);
139+
vector<float> dvatom_(dforce.size(), 0);
140+
float dener_ = 0;
141+
nnp_inter.compute (dener_, dforce_, dvirial_, deatom_, dvatom_, dcoord_, dtype, dbox_, nghost, lmp_list);
142+
for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];
143+
for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];
144+
for (unsigned dd = 0; dd < deatom.size(); ++dd) deatom[dd] = deatom_[dd];
145+
for (unsigned dd = 0; dd < dvatom.size(); ++dd) dvatom[dd] = dvatom_[dd];
146+
dener = dener_;
147+
#endif
116148
if (eflag_atom) {
117149
for (int ii = 0; ii < nlocal; ++ii) eatom[ii] += deatom[ii];
118150
}
@@ -129,6 +161,9 @@ void PairNNP::compute(int eflag, int vflag)
129161
}
130162
}
131163
else {
164+
vector<double > deatom (nall * 1, 0);
165+
vector<double > dvatom (nall * 9, 0);
166+
#ifdef HIGH_PREC
132167
vector<double> all_energy;
133168
vector<vector<double>> all_virial;
134169
vector<vector<double>> all_atom_energy;
@@ -137,10 +172,42 @@ void PairNNP::compute(int eflag, int vflag)
137172
nnp_inter_model_devi.compute_avg (dener, all_energy);
138173
nnp_inter_model_devi.compute_avg (dforce, all_force);
139174
nnp_inter_model_devi.compute_avg (dvirial, all_virial);
140-
vector<double > deatom (nall * 1, 0);
141-
vector<double > dvatom (nall * 9, 0);
142175
nnp_inter_model_devi.compute_avg (deatom, all_atom_energy);
143176
nnp_inter_model_devi.compute_avg (dvatom, all_atom_virial);
177+
#else
178+
vector<float> dcoord_(dcoord.size());
179+
vector<float> dbox_(dbox.size());
180+
for (unsigned dd = 0; dd < dcoord.size(); ++dd) dcoord_[dd] = dcoord[dd];
181+
for (unsigned dd = 0; dd < dbox.size(); ++dd) dbox_[dd] = dbox[dd];
182+
vector<float> dforce_(dforce.size(), 0);
183+
vector<float> dvirial_(dvirial.size(), 0);
184+
vector<float> deatom_(dforce.size(), 0);
185+
vector<float> dvatom_(dforce.size(), 0);
186+
float dener_ = 0;
187+
vector<float> all_energy_;
188+
vector<vector<float>> all_force_;
189+
vector<vector<float>> all_virial_;
190+
vector<vector<float>> all_atom_energy_;
191+
vector<vector<float>> all_atom_virial_;
192+
nnp_inter_model_devi.compute(all_energy_, all_force_, all_virial_, all_atom_energy_, all_atom_virial_, dcoord_, dtype, dbox_, nghost, lmp_list);
193+
nnp_inter_model_devi.compute_avg (dener_, all_energy_);
194+
nnp_inter_model_devi.compute_avg (dforce_, all_force_);
195+
nnp_inter_model_devi.compute_avg (dvirial_, all_virial_);
196+
nnp_inter_model_devi.compute_avg (deatom_, all_atom_energy_);
197+
nnp_inter_model_devi.compute_avg (dvatom_, all_atom_virial_);
198+
dener = dener_;
199+
for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];
200+
for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];
201+
for (unsigned dd = 0; dd < deatom.size(); ++dd) deatom[dd] = deatom_[dd];
202+
for (unsigned dd = 0; dd < dvatom.size(); ++dd) dvatom[dd] = dvatom_[dd];
203+
all_force.resize(all_force_.size());
204+
for (unsigned ii = 0; ii < all_force_.size(); ++ii){
205+
all_force[ii].resize(all_force_[ii].size());
206+
for (unsigned jj = 0; jj < all_force_[ii].size(); ++jj){
207+
all_force[ii][jj] = all_force_[ii][jj];
208+
}
209+
}
210+
#endif
144211
if (eflag_atom) {
145212
for (int ii = 0; ii < nlocal; ++ii) eatom[ii] += deatom[ii];
146213
}
@@ -160,10 +227,23 @@ void PairNNP::compute(int eflag, int vflag)
160227
if (newton_pair) {
161228
comm->reverse_comm_pair(this);
162229
}
163-
vector<double> tmp_avg_f;
164230
vector<double> std_f;
231+
#ifdef HIGH_PREC
232+
vector<double> tmp_avg_f;
165233
nnp_inter_model_devi.compute_avg (tmp_avg_f, all_force);
166234
nnp_inter_model_devi.compute_std_f (std_f, tmp_avg_f, all_force);
235+
#else
236+
vector<float> tmp_avg_f_, std_f_;
237+
for (unsigned ii = 0; ii < all_force_.size(); ++ii){
238+
for (unsigned jj = 0; jj < all_force_[ii].size(); ++jj){
239+
all_force_[ii][jj] = all_force[ii][jj];
240+
}
241+
}
242+
nnp_inter_model_devi.compute_avg (tmp_avg_f_, all_force_);
243+
nnp_inter_model_devi.compute_std_f (std_f_, tmp_avg_f_, all_force_);
244+
std_f.resize(std_f_.size());
245+
for (int dd = 0; dd < std_f_.size(); ++dd) std_f[dd] = std_f_[dd];
246+
#endif
167247
double min = 0, max = 0, avg = 0;
168248
ana_st(max, min, avg, std_f, nlocal);
169249
int all_nlocal = 0;
@@ -174,10 +254,18 @@ void PairNNP::compute(int eflag, int vflag)
174254
MPI_Reduce (&avg, &all_f_avg, 1, MPI_DOUBLE, MPI_SUM, 0, world);
175255
all_f_avg /= double(all_nlocal);
176256
// std energy
177-
vector<double > tmp_avg_e;
178257
vector<double > std_e;
258+
#ifdef HIGH_PREC
259+
vector<double > tmp_avg_e;
179260
nnp_inter_model_devi.compute_avg (tmp_avg_e, all_atom_energy);
180261
nnp_inter_model_devi.compute_std_e (std_e, tmp_avg_e, all_atom_energy);
262+
#else
263+
vector<float> tmp_avg_e_, std_e_;
264+
nnp_inter_model_devi.compute_avg (tmp_avg_e_, all_atom_energy_);
265+
nnp_inter_model_devi.compute_std_e (std_e_, tmp_avg_e_, all_atom_energy_);
266+
std_e.resize(std_e_.size());
267+
for (int dd = 0; dd < std_e_.size(); ++dd) std_e[dd] = std_e_[dd];
268+
#endif
181269
min = max = avg = 0;
182270
ana_st(max, min, avg, std_e, nlocal);
183271
double all_e_min = 0, all_e_max = 0, all_e_avg = 0;
@@ -209,7 +297,21 @@ void PairNNP::compute(int eflag, int vflag)
209297
}
210298
else {
211299
if (numb_models == 1) {
300+
#ifdef HIGH_PREC
212301
nnp_inter.compute (dener, dforce, dvirial, dcoord, dtype, dbox, nghost);
302+
#else
303+
vector<float> dcoord_(dcoord.size());
304+
vector<float> dbox_(dbox.size());
305+
for (unsigned dd = 0; dd < dcoord.size(); ++dd) dcoord_[dd] = dcoord[dd];
306+
for (unsigned dd = 0; dd < dbox.size(); ++dd) dbox_[dd] = dbox[dd];
307+
vector<float> dforce_(dforce.size(), 0);
308+
vector<float> dvirial_(dvirial.size(), 0);
309+
float dener_ = 0;
310+
nnp_inter.compute (dener_, dforce_, dvirial_, dcoord_, dtype, dbox_, nghost);
311+
for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];
312+
for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];
313+
dener = dener_;
314+
#endif
213315
}
214316
else {
215317
error->all(FLERR,"Serial version does not support model devi");

source/train/CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
# train
22

3-
file(GLOB LIB_PY Data.py DataSystem.py Model.py Test.py TestNorot.py RunOptions.py)
3+
configure_file("RunOptions.py.in" "${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py" @ONLY)
4+
5+
file(GLOB LIB_PY Data.py DataSystem.py Model.py Test.py TestNorot.py ${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py)
6+
7+
file(GLOB CLS_PY Local.py Slurm.py)
48

59
install(
610
FILES ${LIB_PY}
711
DESTINATION lib/deepmd
812
)
13+
install(
14+
FILES ${CLS_PY}
15+
DESTINATION lib/deepmd/cluster
16+
)
917
install(
1018
PROGRAMS train.py
1119
DESTINATION bin/

0 commit comments

Comments
 (0)