Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 171 additions & 52 deletions dpdata/xyz/quip_gap_xyz.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@

import numpy as np

from ..unit import EnergyConversion, ForceConversion

e_conv_kcalpermol2eV = EnergyConversion("kcal_mol", "eV").value()
e_conv_au2eV = EnergyConversion("hartree", "eV").value()
f_conv_kcalpermolperang2eVperang = ForceConversion(
"kcal_mol/angstrom", "eV/angstrom"
).value()
f_conv_auperang2eVperang = ForceConversion("hartree/angstrom", "eV/angstrom").value()
f_conv_kcalpermolperbohr2eVperang = ForceConversion(
"kcal_mol/bohr", "eV/angstrom"
).value()
f_conv_au2eVperang = ForceConversion("hartree/bohr", "eV/angstrom").value()


class QuipGapxyzSystems:
"""deal with QuipGapxyzFile."""
Expand Down Expand Up @@ -82,56 +95,72 @@ def handle_single_xyz_frame(lines):
force_array = None
virials = None
for kv_dict in prop_list:
if kv_dict["key"] == "species":
if kv_dict["datatype"] != "S":
raise RuntimeError(
"datatype for species must be 'S' instead of {}".format(
kv_dict["datatype"]
try:
if kv_dict["key"] == "species":
if kv_dict["datatype"] != "S":
raise RuntimeError(
"datatype for species must be 'S' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
type_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "pos":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
type_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "pos":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
coords_array = data_array[:, used_colomn : used_colomn + field_length]
used_colomn += field_length
continue
elif kv_dict["key"] == "Z":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
coords_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
elif kv_dict["key"] == "Z":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
Z_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "force":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
Z_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "tags":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for tags must be 'I' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
force_array = data_array[:, used_colomn : used_colomn + field_length]
used_colomn += field_length
field_length = int(kv_dict["value"])
used_colomn += field_length
continue
elif kv_dict["key"] == "force" or kv_dict["key"] == "forces":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
force_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
except Exception as e:
Comment on lines +98 to +161
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improved field handling and error management.

The changes improve handling of fields like species, pos, Z, tags, and forces. The logic is sound, but avoid using bare except statements for better debugging.

-  except Exception as e:
+  except KeyError as e:
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
if kv_dict["key"] == "species":
if kv_dict["datatype"] != "S":
raise RuntimeError(
"datatype for species must be 'S' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
type_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "pos":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
type_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "pos":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
coords_array = data_array[:, used_colomn : used_colomn + field_length]
used_colomn += field_length
continue
elif kv_dict["key"] == "Z":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
coords_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
elif kv_dict["key"] == "Z":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
Z_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "force":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
Z_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "tags":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for tags must be 'I' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
force_array = data_array[:, used_colomn : used_colomn + field_length]
used_colomn += field_length
field_length = int(kv_dict["value"])
used_colomn += field_length
continue
elif kv_dict["key"] == "force" or kv_dict["key"] == "forces":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
force_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
except Exception as e:
try:
if kv_dict["key"] == "species":
if kv_dict["datatype"] != "S":
raise RuntimeError(
"datatype for species must be 'S' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
type_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "pos":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
coords_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
elif kv_dict["key"] == "Z":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
Z_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "tags":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for tags must be 'I' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
used_colomn += field_length
continue
elif kv_dict["key"] == "force" or kv_dict["key"] == "forces":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
force_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
except KeyError as e:
Tools
Ruff

133-133: Local variable Z_array is assigned to but never used

Remove assignment to unused variable Z_array

(F841)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the error it is expected to catch?

print("unknown field {}".format(kv_dict["key"]), e)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason not to raise an error?

continue
else:
raise RuntimeError("unknown field {}".format(kv_dict["key"]))

type_num_dict = OrderedDict()
atom_type_list = []
Expand Down Expand Up @@ -165,21 +194,111 @@ def handle_single_xyz_frame(lines):
else:
virials = None

try:
e_units = np.array([field_dict["energy-unit"].lower()])
f_units = np.array([field_dict["force-unit"].lower()])
except Exception:
pass
# print('No units information contained.')

Comment on lines +197 to +203
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Proper handling of missing unit information.

The try-except block for handling missing unit information should raise an exception or log a warning if units are not found, rather than passing silently.

-  except Exception:
-    pass
-    # print('No units information contained.')
+  except KeyError as e:
+    raise ValueError("No units information contained.") from e
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
e_units = np.array([field_dict["energy-unit"].lower()])
f_units = np.array([field_dict["force-unit"].lower()])
except Exception:
pass
# print('No units information contained.')
try:
e_units = np.array([field_dict["energy-unit"].lower()])
f_units = np.array([field_dict["force-unit"].lower()])
except KeyError as e:
raise ValueError("No units information contained.") from e

info_dict = {}
info_dict["nopbc"] = False
info_dict["atom_names"] = list(type_num_array[:, 0])
info_dict["atom_numbs"] = list(type_num_array[:, 1].astype(int))
info_dict["atom_types"] = np.array(atom_type_list).astype(int)
info_dict["cells"] = np.array(
[
np.array(list(filter(bool, field_dict["Lattice"].split(" ")))).reshape(
3, 3
)
]
).astype("float32")

info_dict["coords"] = np.array([coords_array]).astype("float32")
"""
info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32")
info_dict["forces"] = np.array([force_array]).astype("float32")
"""
Comment on lines +211 to +214
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it is not used anymore, it should be removed.


try:
if e_units == "kcal/mol":
info_dict["energies"] = (
np.array([field_dict["energy"]]).astype("float32")
* e_conv_kcalpermol2eV
)
elif e_units in ["hartree", "au", "a.u."]:
info_dict["energies"] = (
np.array([field_dict["energy"]]).astype("float32") * e_conv_au2eV
)
elif e_units == "ev":
info_dict["energies"] = np.array([field_dict["energy"]]).astype(
"float32"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it converted to float32?

)
else:
info_dict["energies"] = np.array([field_dict["energy"]]).astype(
"float32"
)
except Exception:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the expected error to catch here? It's hard to read.

try:
possible_fields = [
"energy",
"energies",
"Energies",
"potential-energy.energy",
"Energy",
]
for key in possible_fields:
if key in field_dict:
info_dict["energies"] = np.array(
[field_dict[key]], dtype="float32"
)
break
else:
raise ValueError("No valid energy field found in field_dict.")
except KeyError:
raise ValueError("Error while accessing energy fields in field_dict.")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raise exceptions with raise ... from None.

Within an except clause, raise exceptions with raise ... from None to distinguish them from errors in exception handling.

-  raise ValueError("Error while accessing energy fields in field_dict.")
+  raise ValueError("Error while accessing energy fields in field_dict.") from None
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
raise ValueError("Error while accessing energy fields in field_dict.")
raise ValueError("Error while accessing energy fields in field_dict.") from None
Tools
Ruff

252-252: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)


Comment on lines +235 to +253
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improved handling of possible energy fields and raising exceptions.

The changes improve handling of possible energy fields and raising exceptions. Use raise ... from None to distinguish exceptions from errors in exception handling.

-  raise ValueError("Error while accessing energy fields in field_dict.")
+  raise ValueError("Error while accessing energy fields in field_dict.") from None
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
possible_fields = [
"energy",
"energies",
"Energies",
"potential-energy.energy",
"Energy",
]
for key in possible_fields:
if key in field_dict:
info_dict["energies"] = np.array(
[field_dict[key]], dtype="float32"
)
break
else:
raise ValueError("No valid energy field found in field_dict.")
except KeyError:
raise ValueError("Error while accessing energy fields in field_dict.")
try:
possible_fields = [
"energy",
"energies",
"Energies",
"potential-energy.energy",
"Energy",
]
for key in possible_fields:
if key in field_dict:
info_dict["energies"] = np.array(
[field_dict[key]], dtype="float32"
)
break
else:
raise ValueError("No valid energy field found in field_dict.")
except KeyError:
raise ValueError("Error while accessing energy fields in field_dict.") from None
Tools
Ruff

252-252: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)

try:
if f_units == "kcal/mol/angstrom":
info_dict["forces"] = (
np.array([force_array]).astype("float32")
* f_conv_kcalpermolperang2eVperang
)
elif (
f_units == "hartree/angstrom"
or f_units == "hartree/ang"
or f_units == "hartree/ang."
):
info_dict["forces"] = (
np.array([force_array]).astype("float32") * f_conv_auperang2eVperang
)
elif f_units == "kcal/mol/bohr":
info_dict["forces"] = (
np.array([force_array]).astype("float32")
* f_conv_kcalpermolperbohr2eVperang
)
elif f_units == "kcal/mol/bohr":
info_dict["forces"] = (
np.array([force_array]).astype("float32") * f_conv_au2eVperang
Comment on lines +273 to +275
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove redundant conditional block for kcal/mol/bohr.

The conditional block for kcal/mol/bohr is redundant and should be removed.

-  elif f_units == "kcal/mol/bohr":
-    info_dict["forces"] = (
-      np.array([force_array]).astype("float32") * f_conv_au2eVperang
-    )

Committable suggestion was skipped due to low confidence.

)
elif (
f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang."
):
info_dict["forces"] = np.array([force_array]).astype("float32")
else:
info_dict["forces"] = np.array([force_array]).astype("float32")
except Exception:
Comment on lines +254 to +283
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unit conversion for forces and redundant conditional block.

The new logic for force unit conversion is correctly implemented but contains a redundant conditional block for kcal/mol/bohr and uses a bare except statement.

-  elif f_units == "kcal/mol/bohr":
-    info_dict["forces"] = (
-      np.array([force_array]).astype("float32") * f_conv_au2eVperang
-    )
-  except:
-    info_dict["forces"] = np.array([force_array]).astype("float32")
+  except KeyError as e:
+    raise ValueError("Error while accessing force fields in field_dict.") from e
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
if f_units == "kcal/mol/angstrom":
info_dict["forces"] = (
np.array([force_array]).astype("float32")
* f_conv_kcalpermolperang2eVperang
)
elif (
f_units == "hartree/angstrom"
or f_units == "hartree/ang"
or f_units == "hartree/ang."
):
info_dict["forces"] = (
np.array([force_array]).astype("float32") * f_conv_auperang2eVperang
)
elif f_units == "kcal/mol/bohr":
info_dict["forces"] = (
np.array([force_array]).astype("float32")
* f_conv_kcalpermolperbohr2eVperang
)
elif f_units == "kcal/mol/bohr":
info_dict["forces"] = (
np.array([force_array]).astype("float32") * f_conv_au2eVperang
)
elif (
f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang."
):
info_dict["forces"] = np.array([force_array]).astype("float32")
else:
info_dict["forces"] = np.array([force_array]).astype("float32")
except Exception:
try:
if f_units == "kcal/mol/angstrom":
info_dict["forces"] = (
np.array([force_array]).astype("float32")
* f_conv_kcalpermolperang2eVperang
)
elif (
f_units == "hartree/angstrom"
or f_units == "hartree/ang"
or f_units == "hartree/ang."
):
info_dict["forces"] = (
np.array([force_array]).astype("float32") * f_conv_auperang2eVperang
)
elif f_units == "kcal/mol/bohr":
info_dict["forces"] = (
np.array([force_array]).astype("float32")
* f_conv_kcalpermolperbohr2eVperang
)
elif (
f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang."
):
info_dict["forces"] = np.array([force_array]).astype("float32")
else:
info_dict["forces"] = np.array([force_array]).astype("float32")
except KeyError as e:
raise ValueError("Error while accessing force fields in field_dict.") from e

info_dict["forces"] = np.array([force_array]).astype("float32")

if virials is not None:
info_dict["virials"] = virials
info_dict["orig"] = np.zeros(3)
if "Lattice" in field_dict and field_dict["Lattice"].strip():
lattice_values = list(filter(bool, field_dict["Lattice"].split(" ")))
info_dict["cells"] = np.array(
[np.array(lattice_values).reshape(3, 3)]
).astype("float32")
else:
lattice_values = np.array(
[[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]]
)

info_dict["nopbc"] = True
info_dict["cells"] = np.array(
[np.array(lattice_values).reshape(3, 3)]
).astype("float32")

return info_dict