pre significant refactor to refocus on main points of interest to avoid long computation time

MoritzNeuberger · MoritzNeuberger · commit 6a82dd0a99e7 · 2026-03-26T17:10:40.000+01:00
diff --git a/python/remage/utils.py b/python/remage/utils.py
@@ -18,6 +18,99 @@
 from collections.abc import Iterable
 
 
+def write_process_table(file_path: str, proc_name_to_id: dict[str, int]) -> None:
+    """Write an LH5 ``processes`` struct from a process-name to id mapping."""
+    if not proc_name_to_id:
+        return
+
+    import numpy as np
+    from lgdo import Scalar, Struct, lh5
+
+    process_struct = Struct(
+        {name: Scalar(np.int64(pid)) for name, pid in proc_name_to_id.items()}
+    )
+    lh5.write(process_struct, "processes", file_path, wo_mode="overwrite")
+
+
+def canonicalize_process_ids_for_concat(
+    lh5_files: list[str],
+    *,
+    suffix: str = "-canon",
+) -> tuple[list[str], dict[str, int]]:
+    """Create remapped LH5 copies with canonical process ids for safe concatenation.
+    This is useful when concatenating simulation outputs generated by different remage instances with different process-name to id mappings.
+
+    Parameters
+    ----------
+    lh5_files
+        Input files to canonicalize.
+    suffix
+        Suffix appended to each copied filename stem.
+
+    Returns
+    -------
+    remapped_files, canonical_proc_ids
+        Paths to remapped copies and canonical process-name to id mapping.
+    """
+    if not lh5_files:
+        return [], {}
+
+    import shutil
+    from pathlib import Path
+
+    from lgdo import Array, lh5
+
+    def _read_process_map(file_path: str) -> dict[str, int]:
+        if lh5.ls(file_path, "processes") == []:
+            return {}
+
+        procs = lh5.read("processes", file_path)
+        return {name: int(proc.value) for name, proc in procs.items()}
+
+    ordered_names = []
+    shard_proc_maps = {}
+    for lh5_file in lh5_files:
+        proc_map = _read_process_map(lh5_file)
+        shard_proc_maps[lh5_file] = proc_map
+        for name in proc_map:
+            if name not in ordered_names:
+                ordered_names.append(name)
+
+    canonical_proc_ids = {name: idx for idx, name in enumerate(ordered_names)}
+    shard_id_maps = {
+        lh5_file: {
+            old_id: canonical_proc_ids[name] for name, old_id in proc_map.items()
+        }
+        for lh5_file, proc_map in shard_proc_maps.items()
+    }
+
+    remapped_files = []
+    for lh5_file in lh5_files:
+        src_path = Path(lh5_file)
+        remapped_file = str(
+            src_path.with_name(f"{src_path.stem}{suffix}{src_path.suffix}")
+        )
+        shutil.copy2(lh5_file, remapped_file)
+
+        if lh5.ls(remapped_file, "tracks") != []:
+            tracks = lh5.read("tracks", remapped_file)
+            old_procid = tracks["procid"].view_as("np")
+            new_procid = old_procid.copy()
+            for old_id, new_id in shard_id_maps.get(lh5_file, {}).items():
+                new_procid[old_procid == old_id] = new_id
+
+            tracks["procid"] = Array(
+                new_procid.astype(old_procid.dtype, copy=False),
+                attrs=tracks["procid"].attrs,
+            )
+            lh5.write(tracks, "tracks", remapped_file, wo_mode="overwrite")
+
+        write_process_table(remapped_file, canonical_proc_ids)
+        remapped_files.append(remapped_file)
+
+    return remapped_files, canonical_proc_ids
+
+
 def _to_list(thing):
     if not isinstance(thing, tuple | list):
         return [thing]
diff --git a/tests/cosmogenics/muon/CMakeLists.txt b/tests/cosmogenics/muon/CMakeLists.txt
@@ -1,7 +1,7 @@
 file(
   GLOB _file_list
   RELATIVE ${PROJECT_SOURCE_DIR}
-  macros/*.mac gdml/*.gdml *.py)
+  macros/*.mac gdml/*.gdml misc/*.yaml *.py)
 
 # copy them to the build area
 foreach(_file ${_file_list})
@@ -11,4 +11,4 @@ endforeach()
 # run python tests using pytest
 add_test(NAME cosmogenics/muon/pytest-all COMMAND ${PYTHONPATH} -m pytest -s -vvv .)
 set_tests_properties(cosmogenics/muon/pytest-all PROPERTIES LABELS "extra;val;val-only" TIMEOUT
-                                                            7200)
+                                                            36000)
diff --git a/tests/cosmogenics/muon/test_energy_loss.py b/tests/cosmogenics/muon/test_energy_loss.py
@@ -16,7 +16,7 @@
 
 u = pint.get_application_registry()
 
-g_world_size = 200 * u.cm
+g_world_size = 50 * u.cm
 g_world_size_cm = g_world_size.to(u.cm).magnitude
 
 macro = """
@@ -102,9 +102,14 @@ def simulate(
     em_physics: str = "Livermore",
     max_threads: int = 1,
 ) -> str:
-    output = (
-        f"output-energy_loss-{energy:.0f}-{material}-{had_physics}-{em_physics}.lh5"
-    )
+    if energy > 1:
+        output = (
+            f"output-energy_loss-{energy:.0f}-{material}-{had_physics}-{em_physics}.lh5"
+        )
+    else:
+        output = (
+            f"output-energy_loss-{energy:.2f}-{material}-{had_physics}-{em_physics}.lh5"
+        )
 
     events = 100000 * int(os.environ.get("RMG_STATS_FACTOR", "1"))
 
@@ -136,7 +141,10 @@ def calculate_dEdx(remage_output: str):
     mask = tracks["parent_trackid"] == 0
 
     # read in event data
-    stp = lh5.read("stp/", remage_output)
+    try:
+        stp = lh5.read("stp/", remage_output)
+    except lh5.exceptions.LH5DecodeError:
+        return np.array([0])
 
     c = sp.constants.physical_constants["speed of light in vacuum"][0]
     m = sp.constants.physical_constants["muon mass energy equivalent in MeV"][0]
@@ -363,7 +371,10 @@ def plot(energies, materials, had_physics, em_physics, outfiles):
             for had_physic in had_physics:
                 for em_physic in em_physics:
                     remage_output = outfiles[(energy, material, had_physic, em_physic)]
-                    dEdx_sims[(had_physic, em_physic)] = calculate_dEdx(remage_output)
+                    out = calculate_dEdx(remage_output)
+                    if isinstance(out, float):
+                        out = np.array([out])
+                    dEdx_sims[(had_physic, em_physic)] = out
 
             fig, ax = plt.subplots()
 
@@ -438,6 +449,48 @@ def plot(energies, materials, had_physics, em_physics, outfiles):
                 )
 
 
+def plot_energy_range(energies, materials, had_physics, em_physics, outfiles):
+
+    for material in materials:
+        dEdx_sims = {}
+        for energy in energies:
+            for had_physic in had_physics:
+                for em_physic in em_physics:
+                    remage_output = outfiles[(energy, material, had_physic, em_physic)]
+                    dEdx_sims[energy] = calculate_dEdx(remage_output)
+
+        x = np.array(energies)
+        y = np.array([np.mean(dEdx_sim) for dEdx_sim in dEdx_sims.values()])
+        mask = y > 0
+        x = x[mask]
+        y = y[mask]
+        y_unc = np.array(
+            [
+                np.std(dEdx_sim) / np.sqrt(len(dEdx_sim))
+                for dEdx_sim in dEdx_sims.values()
+            ]
+        )
+
+        x_exp = np.array(lookup_tables["energy_loss"]["total"][material])[:, 0] / 1e3
+        y_exp = (
+            np.array(lookup_tables["energy_loss"]["total"][material])[:, 1]
+            * lookup_tables["densities"][material]
+        )
+
+        fig, ax = plt.subplots()
+        ax.errorbar(x, y, yerr=y_unc, fmt="o", label="simulation")
+        ax.plot(x_exp, y_exp, label="expected")
+        ax.set_xscale("log")
+        ax.set_xlabel("muon energy [GeV]")
+        ax.set_ylabel("mean energy loss dE/dx [MeV/cm]")
+        ax.legend()
+        ax.set_title(
+            f"Energy loss of muons in {material}",
+            size=8,
+        )
+        fig.savefig(f"energy_loss_{material}_energy_range.output.png")
+
+
 def _simulate_case(
     case: tuple[float, str, str, str], max_threads: int = 1
 ) -> tuple[tuple[float, str, str, str], str]:
@@ -521,3 +574,37 @@ def test_energy_loss():
                 outfiles[key] = output
 
     plot(energies, materials, had_physics_list, em_physics_list, outfiles)
+
+    energies = np.array(lookup_tables["energy_loss"]["total"]["lar"])[:, 0][10::2] / 1e3
+    materials = ["lar", "water"]
+    had_physics_list = ["Shielding"]
+    em_physics_list = ["Livermore"]
+
+    cases = [
+        (energy, material, had_physics, em_physics)
+        for material in materials
+        for had_physics in had_physics_list
+        for em_physics in em_physics_list
+        for energy in energies
+    ]
+
+    max_workers = min(len(cases), os.cpu_count() // 2)
+    max_threads = os.cpu_count() // 2 // max_workers
+    max_threads = max(1, max_threads)
+
+    outfiles = {}
+    if max_workers == 1:
+        for case in cases:
+            key, output = _simulate_case(case, max_threads=max_threads)
+            outfiles[key] = output
+    else:
+        with ProcessPoolExecutor(max_workers=max_workers) as ex:
+            futures = [
+                ex.submit(_simulate_case, case, max_threads=max_threads)
+                for case in cases
+            ]
+            for fut in as_completed(futures):
+                key, output = fut.result()
+                outfiles[key] = output
+
+    plot_energy_range(energies, materials, had_physics_list, em_physics_list, outfiles)
diff --git a/tests/cosmogenics/muon/test_shower_simulation.py b/tests/cosmogenics/muon/test_shower_simulation.py