Remove LOBSTER output file trailing line sensitivity (#4448)

DanielYang59 · naik-aakash · web-flow · commit fc5286e6cb2d · 2025-07-29T09:20:58.000-07:00
* clean up tests

* remove trailing line sensitivity

* simplify temp file

* lint fix

* add test

* fix filename

Co-authored-by: Aakash Ashok Naik &lt;91958822+naik-aakash@users.noreply.github.com&gt;
Signed-off-by: Haoyu (Daniel) YANG 杨浩宇 &lt;yanghaoyu97@outlook.com&gt;

* also test default filename

* test `_get_lines`

---------

Signed-off-by: Haoyu (Daniel) YANG 杨浩宇 &lt;yanghaoyu97@outlook.com&gt;
Co-authored-by: Aakash Ashok Naik &lt;91958822+naik-aakash@users.noreply.github.com&gt;
diff --git a/src/pymatgen/io/lobster/outputs.py b/src/pymatgen/io/lobster/outputs.py
@@ -57,7 +57,7 @@
 
 def _get_lines(filename) -> list[str]:
     with zopen(filename, mode="rt", encoding="utf-8") as file:
-        return file.read().split("\n")  # type:ignore[return-value,arg-type]
+        return cast("list[str]", file.read().splitlines())
 
 
 class Cohpcar:
@@ -109,7 +109,7 @@ def __init__(
             or (are_coops and are_multi_center_cobis)
             or (are_cobis and are_multi_center_cobis)
         ):
-            raise ValueError("You cannot have info about COOPs, COBIs and/or multi-center COBIS in the same file.")
+            raise ValueError("You cannot have info about COOPs, COBIs and/or multi-center COBIs in the same file.")
 
         self.are_coops = are_coops
         self.are_cobis = are_cobis
@@ -125,7 +125,7 @@ def __init__(
             else:
                 self._filename = "COHPCAR.lobster"
 
-        lines = _get_lines(filename)
+        lines: list[str] = _get_lines(self._filename)
 
         # The parameters line is the second line in a COHPCAR file.
         # It contains all parameters that are needed to map the file.
@@ -136,24 +136,23 @@ def __init__(
         self.is_spin_polarized = int(parameters[1]) == 2
         spins = [Spin.up, Spin.down] if int(parameters[1]) == 2 else [Spin.up]
         cohp_data: dict[str, dict[str, Any]] = {}
+
+        # The COHP/COBI data start from line num_bonds + 3
+        data = np.array([np.array(line.split(), dtype=float) for line in lines[num_bonds + 3 :]]).transpose()
+
         if not self.are_multi_center_cobis:
-            # The COHP data start in line num_bonds + 3
-            data = np.array([np.array(line.split(), dtype=float) for line in lines[num_bonds + 3 :]]).transpose()
             cohp_data = {
                 "average": {
                     "COHP": {spin: data[1 + 2 * s * (num_bonds + 1)] for s, spin in enumerate(spins)},
                     "ICOHP": {spin: data[2 + 2 * s * (num_bonds + 1)] for s, spin in enumerate(spins)},
                 }
             }
-        else:
-            # The COBI data start in line num_bonds + 3 if multi-center cobis exist
-            data = np.array([np.array(line.split(), dtype=float) for line in lines[num_bonds + 3 :]]).transpose()
 
         self.energies = data[0]
 
         orb_cohp: dict[str, Any] = {}
         # Present for LOBSTER versions older than 2.2.0
-        very_old = False
+        older_than_2_2_0: bool = False
 
         # The label has to be changed: there are more than one COHP for each atom combination
         # this is done to make the labeling consistent with ICOHPLIST.lobster
@@ -192,8 +191,8 @@ def __init__(
                 else:
                     # Present for LOBSTER versions older than 2.2.0
                     if bond_num == 0:
-                        very_old = True
-                    if very_old:
+                        older_than_2_2_0 = True
+                    if older_than_2_2_0:
                         bond_num += 1
                         label = str(bond_num)
 
@@ -245,8 +244,8 @@ def __init__(
                 else:
                     # Present for LOBSTER versions older than 2.2.0
                     if bond_num == 0:
-                        very_old = True
-                    if very_old:
+                        older_than_2_2_0 = True
+                    if older_than_2_2_0:
                         bond_num += 1
                         label = str(bond_num)
 
@@ -261,7 +260,7 @@ def __init__(
                     }
 
         # Present for LOBSTER older than 2.2.0
-        if very_old:
+        if older_than_2_2_0:
             for bond_str in orb_cohp:
                 cohp_data[bond_str] = {
                     "COHP": None,
@@ -405,14 +404,10 @@ def __init__(
             else:
                 self._filename = "ICOHPLIST.lobster"
 
-        # LOBSTER list files have an extra trailing blank line
-        # and we don't need the header.
         if self._icohpcollection is None:
             with zopen(self._filename, mode="rt", encoding="utf-8") as file:
-                all_lines: list[str] = file.read().splitlines()  # type:ignore[assignment]
+                all_lines: list[str] = cast("list[str]", file.read().splitlines())
 
-                # strip *trailing* blank lines only
-                all_lines = [line for line in all_lines if line.strip()]
                 # --- detect header length robustly ---
                 header_len = 0
                 try:
@@ -442,7 +437,7 @@ def __init__(
             # If the calculation is spin polarized, the line in the middle
             # of the file will be another header line.
             # TODO: adapt this for orbital-wise stuff
-            if version in ("3.1.1", "2.2.1"):
+            if version in {"3.1.1", "2.2.1"}:
                 self.is_spin_polarized = "distance" in lines[len(lines) // 2]
             else:  # if version == "5.1.0":
                 self.is_spin_polarized = len(lines[0].split()) == 9
@@ -637,10 +632,8 @@ def __init__(self, filename: PathLike = "NcICOBILIST.lobster") -> None:
         Args:
             filename: Name of the NcICOBILIST file.
         """
-
-        # LOBSTER list files have an extra trailing blank line
-        # and we don't need the header
-        lines = _get_lines(filename)[1:-1]
+        # We don't need the header
+        lines = _get_lines(filename)[1:]
         if len(lines) == 0:
             raise RuntimeError("NcICOBILIST file contains no data.")
 
@@ -930,7 +923,7 @@ def __init__(
         self.loewdin = [] if loewdin is None else loewdin
 
         if self.num_atoms is None:
-            lines = _get_lines(filename)[3:-3]  # type:ignore[arg-type,assignment]
+            lines = _get_lines(filename)[3:-2]
             if len(lines) == 0:
                 raise RuntimeError("CHARGES file contains no data.")
 
@@ -1105,10 +1098,12 @@ def __init__(self, filename: PathLike | None, **kwargs) -> None:
             self.has_doscar_lso = (
                 "writing DOSCAR.LSO.lobster..." in lines and "SKIPPING writing DOSCAR.LSO.lobster..." not in lines
             )
+
             try:
                 version_number = float(".".join(self.lobster_version.strip("v").split(".")[:2]))
             except ValueError:
                 version_number = 0.0
+
             if version_number < 5.1:
                 self.has_cohpcar = (
                     "writing COOPCAR.lobster and ICOOPLIST.lobster..." in lines
@@ -1452,9 +1447,7 @@ def __init__(
             for name in os.listdir(filenames):
                 if fnmatch.fnmatch(name, "FATBAND_*.lobster"):
                     filenames_new.append(os.path.join(filenames, name))
-            filenames = filenames_new  # type: ignore[assignment]
-
-        filenames = cast("list[PathLike]", filenames)
+            filenames = cast("list[PathLike]", filenames_new)
 
         if len(filenames) == 0:
             raise ValueError("No FATBAND files in folder or given")
@@ -1546,7 +1539,7 @@ def __init__(
 
             idx_kpt = -1
             linenumber = iband = 0
-            for line in lines[1:-1]:
+            for line in lines[1:]:
                 if line.split()[0] == "#":
                     KPOINT = np.array(
                         [
@@ -1600,7 +1593,7 @@ def get_bandstructure(self) -> LobsterBandStructureSymmLine:
             lattice=self.lattice,
             efermi=self.efermi,  # type: ignore[arg-type]
             labels_dict=self.label_dict,
-            structure=self.structure,  # type:ignore[arg-type]
+            structure=self.structure,  # type: ignore[arg-type]
             projections=self.p_eigenvals,
         )
 
@@ -2159,7 +2152,7 @@ def __init__(
             self._filename = filename
             self.ewald_splitting = float(lines[0].split()[9])
 
-            lines = lines[5:-1]
+            lines = lines[5:]
             self.num_atoms = len(lines) - 2
             for atom in range(self.num_atoms):
                 line_parts = lines[atom].split()
@@ -2305,7 +2298,7 @@ def __init__(
 
         self._filename = str(filename)
         with zopen(self._filename, mode="rt", encoding="utf-8") as file:
-            lines: list[str] = file.readlines()  # type:ignore[assignment]
+            lines: list[str] = cast("list[str]", file.readlines())
         if len(lines) == 0:
             raise RuntimeError("Please check provided input file, it seems to be empty")
 
diff --git a/tests/io/lobster/test_outputs.py b/tests/io/lobster/test_outputs.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
 import copy
+import gzip
 import os
-import tempfile
 
 import numpy as np
 import orjson
@@ -30,6 +30,7 @@
     SitePotential,
     Wavefunction,
 )
+from pymatgen.io.lobster.outputs import _get_lines
 from pymatgen.io.vasp import Vasprun
 from pymatgen.util.testing import TEST_FILES_DIR, VASP_IN_DIR, VASP_OUT_DIR, MatSciTest
 
@@ -64,7 +65,17 @@ def setup_method(self):
             filename=f"{TEST_DIR}/COOPCAR.lobster.BiSe.gz",
             are_coops=True,
         )
-        self.cohp_fe = Cohpcar(filename=f"{TEST_DIR}/COOPCAR.lobster.gz")
+
+        # Make sure Cohpcar also works with terminating line ending char
+        gz_path = f"{TEST_DIR}/COOPCAR.lobster.gz"
+        with gzip.open(gz_path, "rt", encoding="utf-8") as f:
+            content = f.read() + "\n"
+
+        # Test default filename (None should be redirected to "COHPCAR.lobster")
+        with open("COHPCAR.lobster", "w", encoding="utf-8") as f:
+            f.write(content)
+
+        self.cohp_fe = Cohpcar(filename=None)
         self.coop_fe = Cohpcar(
             filename=f"{TEST_DIR}/COOPCAR.lobster.gz",
             are_coops=True,
@@ -645,16 +656,11 @@ def setup_method(self):
         self.charge_lcfo = Charge(filename=f"{TEST_DIR}/CHARGE.LCFO.lobster.ALN.gz", is_lcfo=True)
 
     def test_attributes(self):
-        charge_Loewdin = [-1.25, 1.25]
-        charge_Mulliken = [-1.30, 1.30]
-        atomlist = ["O1", "Mn2"]
-        types = ["O", "Mn"]
-        num_atoms = 2
-        assert charge_Mulliken == self.charge2.mulliken
-        assert charge_Loewdin == self.charge2.loewdin
-        assert atomlist == self.charge2.atomlist
-        assert types == self.charge2.types
-        assert num_atoms == self.charge2.num_atoms
+        assert self.charge2.mulliken == approx([-1.30, 1.30])
+        assert self.charge2.loewdin == approx([-1.25, 1.25])
+        assert self.charge2.atomlist == ["O1", "Mn2"]
+        assert self.charge2.types == ["O", "Mn"]
+        assert self.charge2.num_atoms == 2
 
         # test with CHARG.LCFO.lobster file
         assert self.charge_lcfo.is_lcfo
@@ -1866,7 +1872,7 @@ def test_msonable(self):
             assert getattr(grosspop_from_dict, attr_name) == attr_value
 
 
-class TestIcohplist:
+class TestIcohplist(MatSciTest):
     def setup_method(self):
         self.icohp_bise = Icohplist(filename=f"{TEST_DIR}/ICOHPLIST.lobster.BiSe")
         self.icoop_bise = Icohplist(
@@ -2173,21 +2179,16 @@ def test_msonable(self):
                 assert getattr(icohplist_from_dict, attr_name) == attr_value
 
     def test_missing_trailing_newline(self):
-        content = (
-            "1   Co1   O1   1.00000   0   0   0   -0.50000   -1.00000\n"
-            "2   Co2   O2   1.10000   0   0   0   -0.60000   -1.10000"
-        )
+        fname = f"{self.tmp_path}/icohplist"
+        with open(fname, mode="w", encoding="utf-8") as f:
+            f.write(
+                "1   Co1   O1   1.00000   0   0   0   -0.50000   -1.00000\n"
+                "2   Co2   O2   1.10000   0   0   0   -0.60000   -1.10000"
+            )
 
-        with tempfile.NamedTemporaryFile("w+", delete=False) as tmp:
-            tmp.write(content)
-            tmp.flush()
-            fname = tmp.name
-        try:
-            ip = Icohplist(filename=fname)
-            assert len(ip.icohplist) == 2
-            assert ip.icohplist["1"]["icohp"][Spin.up] == approx(-0.5)
-        finally:
-            os.remove(fname)
+        ip = Icohplist(filename=fname)
+        assert len(ip.icohplist) == 2
+        assert ip.icohplist["1"]["icohp"][Spin.up] == approx(-0.5)
 
 
 class TestNciCobiList:
@@ -2531,3 +2532,17 @@ def test_attributes(self):
             "abs": 56.14,
             "unit": "uC/cm2",
         }
+
+
+def test_get_lines():
+    """Ensure `_get_lines` is not trailing end char sensitive."""
+    with open("without-end-char", mode="wb") as f:
+        f.write(b"first line\nsecond line")
+
+    with open("with-end-char", mode="wb") as f:
+        f.write(b"first line\nsecond line\n")
+
+    without_end_char = _get_lines("without-end-char")
+    with_end_char = _get_lines("with-end-char")
+
+    assert len(with_end_char) == len(without_end_char) == 2