When running dpgen, the dp command always goes into sleep mode #1203
Unanswered
SEU-NiuWenLong
asked this question in
Q&A
Replies: 2 comments
-
such as 9969 gcniu 20 0 5275656 252368 106908 R 33.6 0.1 6:47.56 lmp ,the utilization rate of CPU is 33% at this time. |
Beta Was this translation helpful? Give feedback.
0 replies
-
The |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
When I run the methane example given on the official website, if I set the number of training steps in the parameter file to tens of thousands of steps, the dp command will go to sleep and not work. Why? And the CPU of the server I use is 24 cores. What I set up in the machine settings file is all 24 cores, but the CPU utilization is very low.
参数文件如下:
{
"type_map": ["H","C"],
"mass_map": [1,12],
"init_data_prefix": "../",
"init_data_sys": ["init/CH4.POSCAR.01x01x01/02.md/sys-0004-0001/deepmd"],
"sys_configs_prefix": "../",
"sys_configs": [
["init/CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale-1.000/00000*/POSCAR"],
["init/CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale-1.000/00001*/POSCAR"],
["init/CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale-1.000/00002*/POSCAR"],
["init/CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale-1.000/00001*/POSCAR"],
["init/CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale-1.000/00002*/POSCAR"]
],
"_comment": " that's all ",
"numb_models": 4,
"default_training_param": {
"model": {
"type_map": ["H","C"],
"descriptor": {
"type": "se_a",
"sel": [16,4],
"rcut_smth": 0.5,
"rcut": 5.0,
"neuron": [120,120,120],
"resnet_dt": true,
"axis_neuron": 12,
"seed": 1
},
"fitting_net": {
"neuron": [25,50,100],
"resnet_dt": false,
"seed": 1
}
},
"learning_rate": {
"type": "exp",
"start_lr": 0.001,
"decay_steps": 5000
},
"loss": {
"start_pref_e": 0.02,
"limit_pref_e": 2,
"start_pref_f": 1000,
"limit_pref_f": 1,
"start_pref_v": 0.0,
"limit_pref_v": 0.0
},
"training": {
"stop_batch": 20000,
"disp_file": "lcurve.out",
"disp_freq": 1000,
"numb_test": 4,
"save_freq": 1000,
"save_ckpt": "model.ckpt",
"disp_training": true,
"time_training": true,
"profiling": false,
"profiling_file": "timeline.json",
"_comment": "that's all"
}
},
"model_devi_dt": 0.002,
"model_devi_skip": 0,
"model_devi_f_trust_lo": 0.05,
"model_devi_f_trust_hi": 0.15,
"model_devi_e_trust_lo": 10000000000.0,
"model_devi_e_trust_hi": 10000000000.0,
"model_devi_clean_traj": true,
"model_devi_jobs": [
{"sys_idx": [0],"temps": [100],"press": [1.0],"trj_freq": 10,"nsteps": 300,"ensemble": "nvt","_idx": "00"},
{"sys_idx": [1],"temps": [100],"press": [1.0],"trj_freq": 10,"nsteps": 3000,"ensemble": "nvt","_idx": "01"},
{"sys_idx": [2],"temps": [100],"press": [1.0],"trj_freq": 10,"nsteps": 3000,"ensemble": "nvt","_idx": "02"},
{"sys_idx": [3],"temps": [100],"press": [1.0],"trj_freq": 10,"nsteps": 3000,"ensemble": "nvt","_idx": "03"},
{"sys_idx": [4],"temps": [100],"press": [1.0],"trj_freq": 10,"nsteps": 3000,"ensemble": "nvt","_idx": "04"}
],
"fp_style": "vasp",
"shuffle_poscar": false,
"fp_task_max": 30,
"fp_task_min": 5,
"fp_pp_path": "./",
"fp_pp_files": ["POTCAR_H","POTCAR_C"],
"fp_incar": "./INCAR_methane"
}
{
"api_version": "1.0",
"deepmd_version": "2.2.1",
"train": [
{
"command": "dp",
"machine": {
"batch_type": "Shell",
"context_type": "local",
"local_root": "./",
"remote_root": "/home/gcniu/work/deepmd/dpgen_example/run/temp"
},
"resources": {
"number_node": 1,
"cpu_per_node": 24,
"gpu_per_node": 0,
"group_size": 4
}
}
],
"model_devi": [
{
"command": "mpirun -np 24 lmp -i input.lammps",
"machine": {
"batch_type": "Shell",
"context_type": "local",
"local_root": "./",
"remote_root": "/home/gcniu/work/deepmd/dpgen_example/run/temp"
},
"resources": {
"number_node": 1,
"cpu_per_node": 24,
"gpu_per_node": 0,
"group_size": 4
}
}
],
"fp": [
{
"command": "mpirun -np 24 vasp_std >& log",
"machine": {
"batch_type": "Shell",
"context_type": "local",
"local_root": "./",
"remote_root": "/home/gcniu/work/deepmd/dpgen_example/run/temp"
},
"resources": {
"number_node": 1,
"cpu_per_node": 24,
"gpu_per_node": 0,
"group_size": 1
}
}
]
}
Beta Was this translation helpful? Give feedback.
All reactions