Merge pull request brucefan1983#798 from zhyan0603/zhyan0603-GPUMD

brucefan1983 · web-flow · commit fa6c21ffc67b · 2024-11-23T16:13:12.000+02:00
clean up tools
diff --git a/tools/cp2k2xyz/xyz2cp2k.sh b/tools/cp2k2xyz/xyz2cp2k.sh
@@ -9,6 +9,8 @@ export PATH=$PATH:/home/chen/software/cp2k-2024.1/exe/local
 xyz_file="trj.xyz"
 template_inp="test.inp"
 
+dos2unix ${xyz_file} ${template_inp}
+
 # Set the number of cores
 num_cores=48  # You can change this value as needed
 
diff --git a/tools/exyz2pdb/exyz2pdb.tcl b/tools/exyz2pdb/exyz2pdb.tcl
@@ -0,0 +1,68 @@
+proc read_exyz {filename} {
+    set file [open $filename "r"]
+    set structures {}
+
+    while {[gets $file line] >= 0} {
+        set natoms [string trim $line]
+        if {![gets $file line]} break
+
+        set lattice {1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0}
+        
+        if {[regexp {Lattice="([^"]+)"} $line -> lattice_values]} {
+            set lattice [split $lattice_values]
+        }
+
+        # Extract lattice vectors
+        set a1 [lindex $lattice 0]; set a2 [lindex $lattice 1]; set a3 [lindex $lattice 2]
+        set b1 [lindex $lattice 3]; set b2 [lindex $lattice 4]; set b3 [lindex $lattice 5]
+        set c1 [lindex $lattice 6]; set c2 [lindex $lattice 7]; set c3 [lindex $lattice 8]
+
+        # Calculate cell lengths
+        set a [expr {sqrt($a1*$a1 + $a2*$a2 + $a3*$a3)}]
+        set b [expr {sqrt($b1*$b1 + $b2*$b2 + $b3*$b3)}]
+        set c [expr {sqrt($c1*$c1 + $c2*$c2 + $c3*$c3)}]
+
+        # Calculate angles
+        set alpha [expr {acos(($b1*$c1 + $b2*$c2 + $b3*$c3) / ($b*$c)) * 180 / 3.1415926535}]
+        set beta [expr {acos(($a1*$c1 + $a2*$c2 + $a3*$c3) / ($a*$c)) * 180 / 3.1415926535}]
+        set gamma [expr {acos(($a1*$b1 + $a2*$b2 + $a3*$b3) / ($a*$b)) * 180 / 3.1415926535}]
+
+        set atoms {}
+        for {set i 0} {$i < $natoms} {incr i} {
+            gets $file line
+            lappend atoms $line
+        }
+
+        lappend structures [list $a $b $c $alpha $beta $gamma $atoms]
+    }
+    close $file
+    return $structures
+}
+
+proc write_pdb {structures output_filename} {
+    set pdb_file [open $output_filename "w"]
+
+    foreach structure $structures {
+        lassign $structure a b c alpha beta gamma atoms
+
+        puts $pdb_file [format "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1" $a $b $c $alpha $beta $gamma]
+
+        set atom_index 1
+        foreach atom $atoms {
+            lassign [split $atom] element x y z
+            puts $pdb_file [format "ATOM  %5d %-4s MOL     1    %8.3f%8.3f%8.3f  1.00  0.00          %-2s" $atom_index $element $x $y $z $element]
+            incr atom_index
+        }
+        
+        puts $pdb_file "END"
+    }
+    close $pdb_file
+}
+
+# ʹ��ʾ��
+set structures [read_exyz "2.xyz"]
+set pdb_filename "output_file.pdb"
+write_pdb $structures $pdb_filename
+
+# �������ɵ� PDB �ļ����� VMD ����ʾ
+mol new $pdb_filename
diff --git a/tools/gmx2exyz/gmx2exyz.py b/tools/gmx2exyz/gmx2exyz.py
@@ -0,0 +1,36 @@
+#Convert the trr trajectory of gmx to the exyz trajectory
+#write in the potential energy (eV) and atomic forces (eV/A)
+#Before use, run the gmx energy command to obtain the energy. xvg file that records potential energy
+
+#Author:Zherui Chen (chenzherui0124@foxmail.com)
+
+import MDAnalysis as mda
+
+u = mda.Universe('nvt.tpr', 'test.trr')
+
+sel = u.select_atoms('all', updating=False)
+
+# Load the energy file
+with open('energy.xvg', 'r') as f:
+    for i in range(24):
+        f.readline()  # Skip the first 24 lines of comments
+    energies = [float(line.split()[1]) / 96.48533212331 for line in f]  # Convert from kJ/mol to eV
+
+with open('your_exyz_file.xyz', 'w') as f:
+    for i, ts in enumerate(u.trajectory):
+
+        f.write('{}\n'.format(sel.n_atoms))
+
+        box = ts.dimensions[:3]
+        f.write('energy={:.8f} config_type=gmx2xyz pbc="T T T" Lattice="{} 0.0 0.0 0.0 {} 0.0 0.0 0.0 {}" Properties=species:S:1:pos:R:3:force:R:3\n'.format(energies[i], box[0], box[1], box[2]))
+
+        for atom in sel:
+            # Get the force on the atom
+            force = atom.force / 96.48533212331 # Convert from kJ/(mol*A) to eV/A
+
+            # Write the atom element, position, and force to the output file
+            f.write('{} {} {} {} {} {} {}\n'.format(atom.element, atom.position[0], atom.position[1], atom.position[2], force[0], force[1], force[2]))
+
+
+
+
diff --git a/tools/readme.md b/tools/readme.md
@@ -5,30 +5,34 @@
 * If you have questions on a tool, you can try to contact the creator.
 
 
-| folder               | creator      | brief description                                            |
-| -------------------- | ------------ | ------------------------------------------------------------ |
-| abacus2xyz           | Benrui Tang  | Get `train.xyz` from `ABACUS` outputs.                       |
-| add_groups           | Yuwen Zhang  | Generate grouping method(s) for `model.xyz`.                 |
-| castep2exyz          | Yanzhou Wang | Get `train.xyz` from `CASTEP` outputs.                       |
-| cp2k2xyz             | Zherui Chen  | Get `train.xyz` from `CP2K` outputs or vice versa.           |
-| deep2nep             | Ke Xu        | Oudated?                                                     |
-| doc_3.3.1            | Zheyong Fan  | Documentation for some parts of GPUMD-v3.3.1.                |
-| dp2xyz               | Ke Xu        | Convert `DP` training data to xyz format.                    |
-| for_coding           | Zheyong Fan  | Something useful for Zheyong Fan only.                       |
-| get_max_rmse_xyz     | Who?         | Identify sturctures with the largest force errors.           |
-| gpumdkit             | Zihan Yan    | A shell toolkit for GPUMD                                    |
-| md_tersoff           | Zheyong Fan  | Already in MD book; can be removed later.                    |
-| mtp2nep              | Junjie Wang? | Outdated?                                                    |
-| mtp2xyz              | Junjie Wang? | Convert `MTP` training data to xyz format.                   |
-| nep2xyz              | Ke Xu        | Outdated?                                                    |
-| pca_sampling         | Penghua Ying | farthest-point sampling based on `calorine`                  |
-| perturbed2poscar     | Who?         | What?                                                        |
-| rdf_adf              | Ke Xu        | Calculate RDF and ADF using `OVITO`.                         |
-| runner2xyz           | Ke Xu        | Convert `RUNNER` training data to xyz format.                |
-| shift_energy_to_zero | Nan Xu       | Shift the average energy of each species to zero for a dataset. |
-| split_xyz            | Yong Wang    | Some functionalities for trainnig/test data.                 |
-| vasp2xyz             | Yanzhou Wang | Get `train.xyz` from `VASP` outputs.                         |
-| vim                  | Ke Xu        | Highlight GPUMD grammar in `vim`.                            |
-| xyz2gro              | Zherui Chen  | Convert `xyz` file to `gro` file.                            |
-| [NepTrainKit](https://github.com/aboys-cb/NepTrainKit)           | Chengbing Chen| NEP data visualization interface program |
- 
+
+
+| Folder               | Creator      | Email                                       | Brief Description                                            |
+| -------------------- | ------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| abacus2xyz           | Benrui Tang  | tang070205@proton.me   | Get `train.xyz` from `ABACUS` outputs.                       |
+| add_groups           | Yuwen Zhang  | 984307703@qq.com | Generate grouping method(s) for `model.xyz`.                 |
+| castep2exyz          | Yanzhou Wang | yanzhowang@gmail.com   | Get `train.xyz` from `CASTEP` outputs.                       |
+| cp2k2xyz             | Zherui Chen  | chenzherui0124@foxmail.com | Get `train.xyz` from `CP2K` outputs or vice versa.           |
+| deep2nep             | Ke Xu        | twtdq@qq.com                                     | Oudated?                                                     |
+| doc_3.3.1            | Zheyong Fan  | brucenju@gmail.com | Documentation for some parts of GPUMD-v3.3.1.                |
+| dp2xyz               | Ke Xu        | twtdq@qq.com        | Convert `DP` training data to `xyz` format.                  |
+| exyz2pdb             | Zherui Chem  | chenzherui0124@foxmail.com  | Convert `exyz` to `pdb`. |
+| for_coding           | Zheyong Fan  | brucenju@gmail.com     | Something useful for Zheyong Fan only.                       |
+| get_max_rmse_xyz     | Ke Xu  | twtdq@qq.com | Identify structures with the largest errors.          |
+| gmx2exyz             | Zherui Chen  | chenzherui0124@foxmail.com  | Convert the `trr` trajectory of `gmx` to the `exyz` trajectory. |
+| gpumdkit             | Zihan Yan    | yanzihan@westlake.edu.cn            | A shell toolkit for GPUMD.                                   |
+| md_tersoff           | Zheyong Fan  | brucenju@gmail.com  | Already in MD book; can be removed later.                    |
+| mtp2nep              | Who? |                                                     | Outdated?                                                    |
+| mtp2xyz              | Ke Xu | twtdq@qq.com       | Convert `MTP` training data to xyz format.                   |
+| nep2xyz              | Ke Xu        | twtdq@qq.com                                        | Outdated?                                                    |
+| pca_sampling         | Penghua Ying | hityingph@163.com | Farthest-point sampling based on `calorine`.                 |
+| perturbed2poscar     | Who?         |                                                         | What?                                                        |
+| rdf_adf              | Ke Xu        | twtdq@qq.com             | Calculate RDF and ADF using `OVITO`.                         |
+| runner2xyz           | Ke Xu        | twtdq@qq.com    | Convert `RUNNER` training data to `xyz` format.             |
+| select_xyz_frames    | Zherui Chen  | chenzherui0124@foxmail.com | Select frames from the `exyz`  file. |
+| shift_energy_to_zero | Nan Xu       | tamas@zju.edu.cn | Shift the average energy of each species to zero for a dataset. |
+| split_xyz            | Yong Wang    | yongw@princeton.edu | Some functionalities for training/test data.         |
+| vasp2xyz             | Yanzhou Wang | yanzhowang@gmail.com     | Get `train.xyz` from `VASP` outputs.                         |
+| vim                  | Ke Xu        | twtdq@qq.com                | Highlight GPUMD grammar in `vim`.                            |
+| xyz2gro              | Who? |                             | Convert `xyz` file to `gro` file.                            |
+| [NepTrainKit](https://github.com/aboys-cb/NepTrainKit)           | Chengbing Chen| 1747193328@qq.com | NEP data visualization interface program. |
diff --git a/tools/select_xyz_frames/select_xyz_frames.py b/tools/select_xyz_frames/select_xyz_frames.py
@@ -0,0 +1,108 @@
+#Select frames from the exyz trajectory file.
+#When an atom in a frame experiences a force greater than the force_threshold in any direction, the frame is removed.   eV/A
+#When the total energy difference between adjacent frames is less than energy_threshold, only one frame is retained.    eV
+#When the RMSD difference between adjacent frames is less than rmsd_threshold, only one frame is retained.              A^2
+#If set to 'not', do not select based on this condition.
+
+#Author:Zherui Chen (chenzherui0124@foxmail.com)
+
+import numpy as np
+
+def parse_xyz_file(filename):
+    with open(filename, 'r') as file:
+        lines = file.readlines()
+
+    frames = []
+    i = 0
+    while i < len(lines):
+        num_atoms = int(lines[i].strip())
+        frame_info = lines[i + 1].strip()
+
+
+        energy_str = frame_info.split('energy=')[1].split()[0]
+        energy = float(energy_str)
+
+
+        lattice_str = frame_info.split('Lattice="')[1].split('"')[0]
+        lattice = np.array(list(map(float, lattice_str.split()))).reshape(3, 3)
+
+        atoms = lines[i + 2:i + 2 + num_atoms]
+        frames.append((num_atoms, frame_info, energy, lattice, atoms))
+        i += 2 + num_atoms
+    return frames
+
+def force_exceeds_threshold(atom_line, threshold):
+    if threshold == "not":
+        return False
+    forces = list(map(float, atom_line.split()[4:7]))  
+    return any(abs(force) > threshold for force in forces)
+
+def calculate_rmsd(frame1_atoms, frame2_atoms, lattice):
+    num_atoms = len(frame1_atoms)
+    rmsd_sum = 0.0
+
+    for atom1, atom2 in zip(frame1_atoms, frame2_atoms):
+        pos1 = np.array(list(map(float, atom1.split()[1:4])))  
+        pos2 = np.array(list(map(float, atom2.split()[1:4])))
+
+        # Calculate the minimum image distance
+        diff = pos2 - pos1
+        diff -= np.round(diff @ np.linalg.inv(lattice)) @ lattice
+
+        rmsd_sum += np.dot(diff, diff)
+
+    return np.sqrt(rmsd_sum / num_atoms)
+
+def filter_frames(frames, force_threshold, energy_threshold, rmsd_threshold):
+    filtered_frames = []
+
+    if frames:
+        filtered_frames.append(frames[0])  
+
+    for j in range(1, len(frames)):
+        prev_frame = filtered_frames[-1]
+        current_frame = frames[j]
+
+        num_atoms, frame_info, energy, lattice, atoms = current_frame
+
+
+        if force_threshold != "not" and any(force_exceeds_threshold(atom, force_threshold) for atom in atoms):
+            continue
+
+
+        if energy_threshold != "not":
+            prev_energy = prev_frame[2]
+            if abs(energy - prev_energy) < energy_threshold:
+                continue
+
+
+        if rmsd_threshold != "not":
+            prev_atoms = prev_frame[4]
+            rmsd = calculate_rmsd(prev_atoms, atoms, lattice)
+            if rmsd < rmsd_threshold:
+                continue
+
+        filtered_frames.append(current_frame)
+
+    return filtered_frames
+
+def write_xyz_file(frames, output_filename):
+    with open(output_filename, 'w') as file:
+        for num_atoms, frame_info, energy, lattice, atoms in frames:
+            file.write(f"{num_atoms}\n")
+            file.write(f"{frame_info}\n")
+            for atom in atoms:
+                file.write(f"{atom.strip()}\n")
+
+
+force_threshold = 20.0  
+energy_threshold = "not"  
+rmsd_threshold = "not"  
+input_filename = 'coal-nep.xyz'  
+output_filename = 'output.xyz'  
+
+frames = parse_xyz_file(input_filename)
+filtered_frames = filter_frames(frames, force_threshold, energy_threshold, rmsd_threshold)
+write_xyz_file(filtered_frames, output_filename)
+
+print(f"Filtered frames written to {output_filename}")
diff --git a/tools/vasp2xyz/outcar2xyz/multipleFrames-outcars2nep-exyz.sh b/tools/vasp2xyz/outcar2xyz/multipleFrames-outcars2nep-exyz.sh
@@ -33,6 +33,8 @@ total_outcar=$(find -L "$read_dire" -name "OUTCAR" | wc -l)
 converged_files=()
 non_converged_files=()
 
+echo "Checking the convergence of OUTCARs ..."
+
 for file in $(find "$read_dire" -name "OUTCAR"); do
     NSW=$(grep "number of steps for IOM" "$file" | awk '{print $3}')
     
diff --git a/tools/vasp2xyz/outcar2xyz/singleFrame-outcars2nep-exyz.sh b/tools/vasp2xyz/outcar2xyz/singleFrame-outcars2nep-exyz.sh
@@ -29,9 +29,9 @@ do
              if [[ $viri_logi -eq 1 ]]
              then
                    viri=$(grep -A 20 "FORCE on cell =-STRESS" $i | grep "Total " | tail -n 1 | awk '{print $2,$5,$7,$5,$3,$6,$7,$6,$4}')
-                   echo "Config_type=$configuration Weight=1.0 Lattice=\"$latt\" Energy=$ener Virial=\"$viri\" Properties=species:S:1:pos:R:3:forces:R:3" >> $writ_dire/$writ_file
+                   echo "Config_type=$configuration Weight=1.0 Lattice=\"$latt\" Energy=$ener Virial=\"$viri\" pbc=\"T T T\" Properties=species:S:1:pos:R:3:forces:R:3" >> $writ_dire/$writ_file
              else
-                   echo "Config_type=$configuration Weight=1.0 Lattice=\"$latt\" Energy=$ener Properties=species:S:1:pos:R:3:forces:R:3" >> $writ_dire/$writ_file
+                   echo "Config_type=$configuration Weight=1.0 Lattice=\"$latt\" Energy=$ener pbc=\"T T T\" Properties=species:S:1:pos:R:3:forces:R:3" >> $writ_dire/$writ_file
              fi
              ion_numb_arra=($(grep "ions per type"  $i | tail -n 1 | awk -F"=" '{print $2}'))
              ion_symb_arra=($(grep "POTCAR:" $i  | awk '{print $3}' | awk -F"_" '{print $1}' | awk '!seen[$0]++'))