Skip to content

Commit fa6c21f

Browse files
authored
Merge pull request brucefan1983#798 from zhyan0603/zhyan0603-GPUMD
clean up tools
2 parents c5ed706 + e16cf76 commit fa6c21f

File tree

7 files changed

+249
-29
lines changed

7 files changed

+249
-29
lines changed

tools/cp2k2xyz/xyz2cp2k.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ export PATH=$PATH:/home/chen/software/cp2k-2024.1/exe/local
99
xyz_file="trj.xyz"
1010
template_inp="test.inp"
1111

12+
dos2unix ${xyz_file} ${template_inp}
13+
1214
# Set the number of cores
1315
num_cores=48 # You can change this value as needed
1416

tools/exyz2pdb/exyz2pdb.tcl

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
proc read_exyz {filename} {
2+
set file [open $filename "r"]
3+
set structures {}
4+
5+
while {[gets $file line] >= 0} {
6+
set natoms [string trim $line]
7+
if {![gets $file line]} break
8+
9+
set lattice {1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0}
10+
11+
if {[regexp {Lattice="([^"]+)"} $line -> lattice_values]} {
12+
set lattice [split $lattice_values]
13+
}
14+
15+
# Extract lattice vectors
16+
set a1 [lindex $lattice 0]; set a2 [lindex $lattice 1]; set a3 [lindex $lattice 2]
17+
set b1 [lindex $lattice 3]; set b2 [lindex $lattice 4]; set b3 [lindex $lattice 5]
18+
set c1 [lindex $lattice 6]; set c2 [lindex $lattice 7]; set c3 [lindex $lattice 8]
19+
20+
# Calculate cell lengths
21+
set a [expr {sqrt($a1*$a1 + $a2*$a2 + $a3*$a3)}]
22+
set b [expr {sqrt($b1*$b1 + $b2*$b2 + $b3*$b3)}]
23+
set c [expr {sqrt($c1*$c1 + $c2*$c2 + $c3*$c3)}]
24+
25+
# Calculate angles
26+
set alpha [expr {acos(($b1*$c1 + $b2*$c2 + $b3*$c3) / ($b*$c)) * 180 / 3.1415926535}]
27+
set beta [expr {acos(($a1*$c1 + $a2*$c2 + $a3*$c3) / ($a*$c)) * 180 / 3.1415926535}]
28+
set gamma [expr {acos(($a1*$b1 + $a2*$b2 + $a3*$b3) / ($a*$b)) * 180 / 3.1415926535}]
29+
30+
set atoms {}
31+
for {set i 0} {$i < $natoms} {incr i} {
32+
gets $file line
33+
lappend atoms $line
34+
}
35+
36+
lappend structures [list $a $b $c $alpha $beta $gamma $atoms]
37+
}
38+
close $file
39+
return $structures
40+
}
41+
42+
proc write_pdb {structures output_filename} {
43+
set pdb_file [open $output_filename "w"]
44+
45+
foreach structure $structures {
46+
lassign $structure a b c alpha beta gamma atoms
47+
48+
puts $pdb_file [format "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1" $a $b $c $alpha $beta $gamma]
49+
50+
set atom_index 1
51+
foreach atom $atoms {
52+
lassign [split $atom] element x y z
53+
puts $pdb_file [format "ATOM %5d %-4s MOL 1 %8.3f%8.3f%8.3f 1.00 0.00 %-2s" $atom_index $element $x $y $z $element]
54+
incr atom_index
55+
}
56+
57+
puts $pdb_file "END"
58+
}
59+
close $pdb_file
60+
}
61+
62+
# 使用示例
63+
set structures [read_exyz "2.xyz"]
64+
set pdb_filename "output_file.pdb"
65+
write_pdb $structures $pdb_filename
66+
67+
# 加载生成的 PDB 文件并在 VMD 中显示
68+
mol new $pdb_filename

tools/gmx2exyz/gmx2exyz.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#Convert the trr trajectory of gmx to the exyz trajectory
2+
#write in the potential energy (eV) and atomic forces (eV/A)
3+
#Before use, run the gmx energy command to obtain the energy. xvg file that records potential energy
4+
5+
#Author:Zherui Chen ([email protected])
6+
7+
import MDAnalysis as mda
8+
9+
u = mda.Universe('nvt.tpr', 'test.trr')
10+
11+
sel = u.select_atoms('all', updating=False)
12+
13+
# Load the energy file
14+
with open('energy.xvg', 'r') as f:
15+
for i in range(24):
16+
f.readline() # Skip the first 24 lines of comments
17+
energies = [float(line.split()[1]) / 96.48533212331 for line in f] # Convert from kJ/mol to eV
18+
19+
with open('your_exyz_file.xyz', 'w') as f:
20+
for i, ts in enumerate(u.trajectory):
21+
22+
f.write('{}\n'.format(sel.n_atoms))
23+
24+
box = ts.dimensions[:3]
25+
f.write('energy={:.8f} config_type=gmx2xyz pbc="T T T" Lattice="{} 0.0 0.0 0.0 {} 0.0 0.0 0.0 {}" Properties=species:S:1:pos:R:3:force:R:3\n'.format(energies[i], box[0], box[1], box[2]))
26+
27+
for atom in sel:
28+
# Get the force on the atom
29+
force = atom.force / 96.48533212331 # Convert from kJ/(mol*A) to eV/A
30+
31+
# Write the atom element, position, and force to the output file
32+
f.write('{} {} {} {} {} {} {}\n'.format(atom.element, atom.position[0], atom.position[1], atom.position[2], force[0], force[1], force[2]))
33+
34+
35+
36+

tools/readme.md

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,34 @@
55
* If you have questions on a tool, you can try to contact the creator.
66

77

8-
| folder | creator | brief description |
9-
| -------------------- | ------------ | ------------------------------------------------------------ |
10-
| abacus2xyz | Benrui Tang | Get `train.xyz` from `ABACUS` outputs. |
11-
| add_groups | Yuwen Zhang | Generate grouping method(s) for `model.xyz`. |
12-
| castep2exyz | Yanzhou Wang | Get `train.xyz` from `CASTEP` outputs. |
13-
| cp2k2xyz | Zherui Chen | Get `train.xyz` from `CP2K` outputs or vice versa. |
14-
| deep2nep | Ke Xu | Oudated? |
15-
| doc_3.3.1 | Zheyong Fan | Documentation for some parts of GPUMD-v3.3.1. |
16-
| dp2xyz | Ke Xu | Convert `DP` training data to xyz format. |
17-
| for_coding | Zheyong Fan | Something useful for Zheyong Fan only. |
18-
| get_max_rmse_xyz | Who? | Identify sturctures with the largest force errors. |
19-
| gpumdkit | Zihan Yan | A shell toolkit for GPUMD |
20-
| md_tersoff | Zheyong Fan | Already in MD book; can be removed later. |
21-
| mtp2nep | Junjie Wang? | Outdated? |
22-
| mtp2xyz | Junjie Wang? | Convert `MTP` training data to xyz format. |
23-
| nep2xyz | Ke Xu | Outdated? |
24-
| pca_sampling | Penghua Ying | farthest-point sampling based on `calorine` |
25-
| perturbed2poscar | Who? | What? |
26-
| rdf_adf | Ke Xu | Calculate RDF and ADF using `OVITO`. |
27-
| runner2xyz | Ke Xu | Convert `RUNNER` training data to xyz format. |
28-
| shift_energy_to_zero | Nan Xu | Shift the average energy of each species to zero for a dataset. |
29-
| split_xyz | Yong Wang | Some functionalities for trainnig/test data. |
30-
| vasp2xyz | Yanzhou Wang | Get `train.xyz` from `VASP` outputs. |
31-
| vim | Ke Xu | Highlight GPUMD grammar in `vim`. |
32-
| xyz2gro | Zherui Chen | Convert `xyz` file to `gro` file. |
33-
| [NepTrainKit](https://github.com/aboys-cb/NepTrainKit) | Chengbing Chen| NEP data visualization interface program |
34-
8+
9+
10+
| Folder | Creator | Email | Brief Description |
11+
| -------------------- | ------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
12+
| abacus2xyz | Benrui Tang | [email protected] | Get `train.xyz` from `ABACUS` outputs. |
13+
| add_groups | Yuwen Zhang | [email protected] | Generate grouping method(s) for `model.xyz`. |
14+
| castep2exyz | Yanzhou Wang | [email protected] | Get `train.xyz` from `CASTEP` outputs. |
15+
| cp2k2xyz | Zherui Chen | [email protected] | Get `train.xyz` from `CP2K` outputs or vice versa. |
16+
| deep2nep | Ke Xu | [email protected] | Oudated? |
17+
| doc_3.3.1 | Zheyong Fan | [email protected] | Documentation for some parts of GPUMD-v3.3.1. |
18+
| dp2xyz | Ke Xu | [email protected] | Convert `DP` training data to `xyz` format. |
19+
| exyz2pdb | Zherui Chem | [email protected] | Convert `exyz` to `pdb`. |
20+
| for_coding | Zheyong Fan | [email protected] | Something useful for Zheyong Fan only. |
21+
| get_max_rmse_xyz | Ke Xu | [email protected] | Identify structures with the largest errors. |
22+
| gmx2exyz | Zherui Chen | [email protected] | Convert the `trr` trajectory of `gmx` to the `exyz` trajectory. |
23+
| gpumdkit | Zihan Yan | [email protected] | A shell toolkit for GPUMD. |
24+
| md_tersoff | Zheyong Fan | [email protected] | Already in MD book; can be removed later. |
25+
| mtp2nep | Who? | | Outdated? |
26+
| mtp2xyz | Ke Xu | [email protected] | Convert `MTP` training data to xyz format. |
27+
| nep2xyz | Ke Xu | [email protected] | Outdated? |
28+
| pca_sampling | Penghua Ying | [email protected] | Farthest-point sampling based on `calorine`. |
29+
| perturbed2poscar | Who? | | What? |
30+
| rdf_adf | Ke Xu | [email protected] | Calculate RDF and ADF using `OVITO`. |
31+
| runner2xyz | Ke Xu | [email protected] | Convert `RUNNER` training data to `xyz` format. |
32+
| select_xyz_frames | Zherui Chen | [email protected] | Select frames from the `exyz` file. |
33+
| shift_energy_to_zero | Nan Xu | [email protected] | Shift the average energy of each species to zero for a dataset. |
34+
| split_xyz | Yong Wang | [email protected] | Some functionalities for training/test data. |
35+
| vasp2xyz | Yanzhou Wang | [email protected] | Get `train.xyz` from `VASP` outputs. |
36+
| vim | Ke Xu | [email protected] | Highlight GPUMD grammar in `vim`. |
37+
| xyz2gro | Who? | | Convert `xyz` file to `gro` file. |
38+
| [NepTrainKit](https://github.com/aboys-cb/NepTrainKit) | Chengbing Chen| [email protected] | NEP data visualization interface program. |
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#Select frames from the exyz trajectory file.
2+
#When an atom in a frame experiences a force greater than the force_threshold in any direction, the frame is removed. eV/A
3+
#When the total energy difference between adjacent frames is less than energy_threshold, only one frame is retained. eV
4+
#When the RMSD difference between adjacent frames is less than rmsd_threshold, only one frame is retained. A^2
5+
#If set to 'not', do not select based on this condition.
6+
7+
#Author:Zherui Chen ([email protected])
8+
9+
import numpy as np
10+
11+
def parse_xyz_file(filename):
12+
with open(filename, 'r') as file:
13+
lines = file.readlines()
14+
15+
frames = []
16+
i = 0
17+
while i < len(lines):
18+
num_atoms = int(lines[i].strip())
19+
frame_info = lines[i + 1].strip()
20+
21+
22+
energy_str = frame_info.split('energy=')[1].split()[0]
23+
energy = float(energy_str)
24+
25+
26+
lattice_str = frame_info.split('Lattice="')[1].split('"')[0]
27+
lattice = np.array(list(map(float, lattice_str.split()))).reshape(3, 3)
28+
29+
atoms = lines[i + 2:i + 2 + num_atoms]
30+
frames.append((num_atoms, frame_info, energy, lattice, atoms))
31+
i += 2 + num_atoms
32+
return frames
33+
34+
def force_exceeds_threshold(atom_line, threshold):
35+
if threshold == "not":
36+
return False
37+
forces = list(map(float, atom_line.split()[4:7]))
38+
return any(abs(force) > threshold for force in forces)
39+
40+
def calculate_rmsd(frame1_atoms, frame2_atoms, lattice):
41+
num_atoms = len(frame1_atoms)
42+
rmsd_sum = 0.0
43+
44+
for atom1, atom2 in zip(frame1_atoms, frame2_atoms):
45+
pos1 = np.array(list(map(float, atom1.split()[1:4])))
46+
pos2 = np.array(list(map(float, atom2.split()[1:4])))
47+
48+
# Calculate the minimum image distance
49+
diff = pos2 - pos1
50+
diff -= np.round(diff @ np.linalg.inv(lattice)) @ lattice
51+
52+
rmsd_sum += np.dot(diff, diff)
53+
54+
return np.sqrt(rmsd_sum / num_atoms)
55+
56+
def filter_frames(frames, force_threshold, energy_threshold, rmsd_threshold):
57+
filtered_frames = []
58+
59+
if frames:
60+
filtered_frames.append(frames[0])
61+
62+
for j in range(1, len(frames)):
63+
prev_frame = filtered_frames[-1]
64+
current_frame = frames[j]
65+
66+
num_atoms, frame_info, energy, lattice, atoms = current_frame
67+
68+
69+
if force_threshold != "not" and any(force_exceeds_threshold(atom, force_threshold) for atom in atoms):
70+
continue
71+
72+
73+
if energy_threshold != "not":
74+
prev_energy = prev_frame[2]
75+
if abs(energy - prev_energy) < energy_threshold:
76+
continue
77+
78+
79+
if rmsd_threshold != "not":
80+
prev_atoms = prev_frame[4]
81+
rmsd = calculate_rmsd(prev_atoms, atoms, lattice)
82+
if rmsd < rmsd_threshold:
83+
continue
84+
85+
filtered_frames.append(current_frame)
86+
87+
return filtered_frames
88+
89+
def write_xyz_file(frames, output_filename):
90+
with open(output_filename, 'w') as file:
91+
for num_atoms, frame_info, energy, lattice, atoms in frames:
92+
file.write(f"{num_atoms}\n")
93+
file.write(f"{frame_info}\n")
94+
for atom in atoms:
95+
file.write(f"{atom.strip()}\n")
96+
97+
98+
force_threshold = 20.0
99+
energy_threshold = "not"
100+
rmsd_threshold = "not"
101+
input_filename = 'coal-nep.xyz'
102+
output_filename = 'output.xyz'
103+
104+
frames = parse_xyz_file(input_filename)
105+
filtered_frames = filter_frames(frames, force_threshold, energy_threshold, rmsd_threshold)
106+
write_xyz_file(filtered_frames, output_filename)
107+
108+
print(f"Filtered frames written to {output_filename}")

tools/vasp2xyz/outcar2xyz/multipleFrames-outcars2nep-exyz.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ total_outcar=$(find -L "$read_dire" -name "OUTCAR" | wc -l)
3333
converged_files=()
3434
non_converged_files=()
3535

36+
echo "Checking the convergence of OUTCARs ..."
37+
3638
for file in $(find "$read_dire" -name "OUTCAR"); do
3739
NSW=$(grep "number of steps for IOM" "$file" | awk '{print $3}')
3840

tools/vasp2xyz/outcar2xyz/singleFrame-outcars2nep-exyz.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ do
2929
if [[ $viri_logi -eq 1 ]]
3030
then
3131
viri=$(grep -A 20 "FORCE on cell =-STRESS" $i | grep "Total " | tail -n 1 | awk '{print $2,$5,$7,$5,$3,$6,$7,$6,$4}')
32-
echo "Config_type=$configuration Weight=1.0 Lattice=\"$latt\" Energy=$ener Virial=\"$viri\" Properties=species:S:1:pos:R:3:forces:R:3" >> $writ_dire/$writ_file
32+
echo "Config_type=$configuration Weight=1.0 Lattice=\"$latt\" Energy=$ener Virial=\"$viri\" pbc=\"T T T\" Properties=species:S:1:pos:R:3:forces:R:3" >> $writ_dire/$writ_file
3333
else
34-
echo "Config_type=$configuration Weight=1.0 Lattice=\"$latt\" Energy=$ener Properties=species:S:1:pos:R:3:forces:R:3" >> $writ_dire/$writ_file
34+
echo "Config_type=$configuration Weight=1.0 Lattice=\"$latt\" Energy=$ener pbc=\"T T T\" Properties=species:S:1:pos:R:3:forces:R:3" >> $writ_dire/$writ_file
3535
fi
3636
ion_numb_arra=($(grep "ions per type" $i | tail -n 1 | awk -F"=" '{print $2}'))
3737
ion_symb_arra=($(grep "POTCAR:" $i | awk '{print $3}' | awk -F"_" '{print $1}' | awk '!seen[$0]++'))

0 commit comments

Comments
 (0)