lanl
diff --git a/‎examples/Notebooks/pleiades_venus_demo.ipynb‎
Lines changed: 944 additions & 0 deletions b/‎examples/Notebooks/pleiades_venus_demo.ipynb‎
Lines changed: 944 additions & 0 deletions
diff --git a/‎src/pleiades/processing/normalization.py‎
Lines changed: 101 additions & 2 deletions b/‎src/pleiades/processing/normalization.py‎
Lines changed: 101 additions & 2 deletions
diff --git a/‎src/pleiades/processing/normalization_handler.py‎
Lines changed: 51 additions & 5 deletions b/‎src/pleiades/processing/normalization_handler.py‎
Lines changed: 51 additions & 5 deletions
diff --git a/‎src/pleiades/sammy/backends/local.py‎
Lines changed: 73 additions & 10 deletions b/‎src/pleiades/sammy/backends/local.py‎
Lines changed: 73 additions & 10 deletions
diff --git a/‎src/pleiades/sammy/data/options.py‎
Lines changed: 4 additions & 4 deletions b/‎src/pleiades/sammy/data/options.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/pleiades/sammy/factory.py‎
Lines changed: 10 additions & 1 deletion b/‎src/pleiades/sammy/factory.py‎
Lines changed: 10 additions & 1 deletion
@@ -33,6 +33,8 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 
+import numpy as np
+
 from pleiades.processing import DataType, Facility, MasterDictKeys, NormalizationStatus, Roi
 from pleiades.processing.normalization_handler import (
     combine_data,
@@ -375,8 +377,12 @@ def normalization(
 
     # Format and export results
     if output_folder:
-        for folder in normalization_dict[MasterDictKeys.sample_data].keys():
-            logger.info(f"Exporting data for folder: {folder}")
+        sample_folders = list(normalization_dict[MasterDictKeys.sample_data].keys())
+
+        if len(sample_folders) == 1:
+            # Single run - use original logic
+            folder = sample_folders[0]
+            logger.info(f"Exporting data for single folder: {folder}")
 
             # Get time spectra for energy conversion
             spectra_array = sample_master_dict[MasterDictKeys.list_folders][folder][MasterDictKeys.list_spectra]
@@ -408,6 +414,99 @@ def normalization(
             output_file_name = Path(output_folder) / f"{Path(folder).name}_transmission.txt"
             export_ascii(data_dict, str(output_file_name))
 
+        else:
+            # Multiple runs - export individual runs AND combined result
+            logger.info(f"Processing {len(sample_folders)} sample runs individually and combining")
+
+            # Get energy array from first folder (all should be the same)
+            first_folder = sample_folders[0]
+            spectra_array = sample_master_dict[MasterDictKeys.list_folders][first_folder][MasterDictKeys.list_spectra]
+
+            # Convert time-of-flight to energy
+            energy_array = convert_array_from_time_to_energy(
+                spectra_array,
+                time_unit=TimeUnitOptions.s,
+                distance_source_detector=distance_source_detector_m,
+                distance_source_detector_unit=DistanceUnitOptions.m,
+                detector_offset=detector_offset_micros,
+                detector_offset_unit=TimeUnitOptions.us,
+                energy_unit=EnergyUnitOptions.eV,
+            )
+
+            # Step 1: Export individual runs
+            run_transmissions = []
+            run_uncertainties = []
+
+            for folder in sample_folders:
+                logger.info(f"Processing and exporting individual run: {Path(folder).name}")
+
+                # Extract transmission and uncertainties for this run
+                counts_array, uncertainties = get_counts_from_normalized_data(
+                    normalization_dict[MasterDictKeys.sample_data][folder]
+                )
+
+                # Export individual run
+                individual_data_dict = {
+                    "energy_eV": energy_array[::-1],
+                    "transmission": counts_array[::-1],
+                    "uncertainties": uncertainties[::-1],
+                }
+
+                individual_output_file = Path(output_folder) / f"{Path(folder).name}_transmission.txt"
+                export_ascii(individual_data_dict, str(individual_output_file))
+                logger.info(f"  Individual run exported to: {individual_output_file}")
+
+                # Collect for combination
+                run_transmissions.append(counts_array)
+                run_uncertainties.append(uncertainties)
+
+            # Step 2: Create combined result
+            logger.info("Creating weighted combination of all runs...")
+
+            # Convert to numpy arrays for easier calculation
+            run_transmissions = np.array(run_transmissions)  # Shape: (n_runs, n_energy_bins)
+            run_uncertainties = np.array(run_uncertainties)
+
+            # Calculate weights: w_i = 1/σ_i²
+            with np.errstate(divide="ignore", invalid="ignore"):
+                weights = 1.0 / (run_uncertainties**2)
+                weights[~np.isfinite(weights)] = 0  # Set invalid weights to 0
+
+            # Weighted average: T_combined = Σ(w_i * T_i) / Σ(w_i)
+            total_weights = np.sum(weights, axis=0)
+
+            with np.errstate(divide="ignore", invalid="ignore"):
+                combined_transmission = np.sum(weights * run_transmissions, axis=0) / total_weights
+                combined_uncertainties = 1.0 / np.sqrt(total_weights)
+
+                # Handle edge cases
+                combined_transmission[total_weights == 0] = 0.001
+                combined_uncertainties[total_weights == 0] = 0.1
+
+            # Log statistics
+            avg_improvement = np.sqrt(len(sample_folders))
+            actual_improvement = np.mean(run_uncertainties.mean(axis=0) / combined_uncertainties)
+            logger.info(
+                f"Combined transmission range: {combined_transmission.min():.3f} to {combined_transmission.max():.3f}"
+            )
+            logger.info(f"Expected uncertainty improvement: {avg_improvement:.2f}x")
+            logger.info(f"Actual uncertainty improvement: {actual_improvement:.2f}x")
+
+            # Export combined result
+            combined_data_dict = {
+                "energy_eV": energy_array[::-1],
+                "transmission": combined_transmission[::-1],
+                "uncertainties": combined_uncertainties[::-1],
+            }
+
+            # Generate combined output filename
+            run_numbers = [Path(folder).name.split("_")[1] for folder in sample_folders]
+            combined_output_file = Path(output_folder) / f"Combined_Runs_{'_'.join(run_numbers)}_transmission.txt"
+            export_ascii(combined_data_dict, str(combined_output_file))
+
+            logger.info(f"Combined transmission data exported to: {combined_output_file}")
+            logger.info(f"Summary: {len(sample_folders)} individual files + 1 combined file exported")
+
 
 if __name__ == "__main__":
     # Example usage
 
@@ -312,7 +312,11 @@ def combine_data(
 
         logger.debug(f"2. {np.shape(_uncertainty) = }")
 
-        _uncertainty += 1 / ob_data
+        # VENUS fix: avoid division by zero in open beam data
+        with np.errstate(divide="ignore", invalid="ignore"):
+            ob_reciprocal = 1 / ob_data
+            ob_reciprocal[~np.isfinite(ob_reciprocal)] = 0  # Set inf/nan to 0
+            _uncertainty += ob_reciprocal
         full_ob_data_corrected.append(ob_data)
         # uncertainties_ob_data_corrected.append(ob_data * np.sqrt(_uncertainty))
 
@@ -469,7 +473,11 @@ def performing_normalization(
                 median_roi_of_sample = np.median(_sample[y0:y1, x0:x1])
                 coeff = median_roi_of_ob / median_roi_of_sample
 
-            normalized_sample[_index] = (_sample / _ob) * coeff
+            # VENUS fix: avoid division by zero in transmission calculation
+            with np.errstate(divide="ignore", invalid="ignore"):
+                transmission = (_sample / _ob) * coeff
+                transmission[~np.isfinite(transmission)] = 0  # Set inf/nan to 0
+                normalized_sample[_index] = transmission
 
         normalization_dict[MasterDictKeys.sample_data][sample_folder] = normalized_sample
 
@@ -517,9 +525,47 @@ def get_counts_from_normalized_data(normalized_data: np.ndarray) -> Tuple[np.nda
     if normalized_data.ndim != 3:
         raise ValueError(f"Expected 3D array, got {normalized_data.ndim}D array")
 
-    # Assuming normalized_data is a 3D array with shape (num_images, height, width)
-    counts_array = np.sum(normalized_data, axis=(1, 2))
+    # VENUS FIX: Extract transmission values by averaging over spatial pixels
+    # The old approach summed ~262k pixels per time bin, giving transmission values of ~355k
+    # The correct approach averages transmission over detector area
 
-    uncertainties = np.sqrt(counts_array)  # Assuming Poisson statistics for counts
+    # Configuration constants
+    MIN_TRANSMISSION_THRESHOLD = 0.01  # Below this likely indicates bad pixels/no sample
+    MIN_VALID_PIXELS = 1000  # Minimum pixels required for robust statistics
+
+    # Calculate spatial average for each time bin using all pixels
+    counts_array = np.nanmean(normalized_data, axis=(1, 2))
+
+    # For uncertainty calculation, identify potentially problematic pixels
+    # but DO NOT modify the data - only use for uncertainty estimation
+    valid_mask = normalized_data >= MIN_TRANSMISSION_THRESHOLD
+    valid_pixel_count = np.sum(valid_mask, axis=(1, 2))
+
+    # Calculate uncertainties based on pixel-to-pixel variation (standard error)
+    # Use all pixels for standard deviation calculation
+    pixel_std = np.nanstd(normalized_data, axis=(1, 2))
+    total_pixel_count = np.sum(np.isfinite(normalized_data), axis=(1, 2))
+    uncertainties = pixel_std / np.sqrt(np.maximum(total_pixel_count, 1))
+
+    # Warn about potential data quality issues without modifying data
+    outlier_count = np.sum(normalized_data > 2.0)
+    low_transmission_count = np.sum((normalized_data > 0) & (normalized_data < MIN_TRANSMISSION_THRESHOLD))
+
+    if outlier_count > 0:
+        logger.warning(f"Found {outlier_count} pixels with transmission > 2.0 (potential outliers)")
+
+    if low_transmission_count > normalized_data.size * 0.1:  # More than 10% low transmission
+        logger.warning(
+            f"Found {low_transmission_count} pixels with very low transmission < {MIN_TRANSMISSION_THRESHOLD}"
+        )
+
+    # Warn about time bins with insufficient valid pixels for uncertainty calculation
+    problematic_bins = np.sum(valid_pixel_count < MIN_VALID_PIXELS)
+    if problematic_bins > 0:
+        logger.warning(f"{problematic_bins} time bins have fewer than {MIN_VALID_PIXELS} valid pixels")
+
+    # Replace NaN/inf values in final results only (preserve outliers)
+    counts_array = np.nan_to_num(counts_array, nan=0.0, posinf=np.inf, neginf=0.0)
+    uncertainties = np.nan_to_num(uncertainties, nan=0.1, posinf=0.1, neginf=0.1)
 
     return (counts_array, uncertainties)
@@ -6,7 +6,7 @@
 import textwrap
 from datetime import datetime
 from pathlib import Path
-from typing import List
+from typing import List, Union
 from uuid import uuid4
 
 from pleiades.sammy.config import LocalSammyConfig
@@ -15,6 +15,7 @@
     SammyExecutionError,
     SammyExecutionResult,
     SammyFiles,
+    SammyFilesMultiMode,
     SammyRunner,
 )
 from pleiades.utils.logger import loguru_logger
@@ -38,13 +39,18 @@ def __init__(self, config: LocalSammyConfig):
         self.config: LocalSammyConfig = config
         self._moved_files: List[Path] = []
 
-    def prepare_environment(self, files: SammyFiles) -> None:
+    def prepare_environment(self, files: Union[SammyFiles, SammyFilesMultiMode]) -> None:
         """Prepare environment for local SAMMY execution."""
         try:
             logger.debug("Validating input files")
             files.validate()
 
-            # Move files to working directory - copy input and parameter files, symlink data file
+            # Additional validation for JSON mode
+            if isinstance(files, SammyFilesMultiMode):
+                logger.debug("Performing JSON-ENDF mapping validation")
+                self._validate_json_endf_mapping(files)
+
+            # Move files to working directory
             logger.debug("Moving files to working directory")
             files.move_to_working_dir(self.config.working_dir)
 
@@ -54,20 +60,77 @@ def prepare_environment(self, files: SammyFiles) -> None:
         except Exception as e:
             raise EnvironmentPreparationError(f"Environment preparation failed: {str(e)}")
 
-    def execute_sammy(self, files: SammyFiles) -> SammyExecutionResult:
+    def _validate_json_endf_mapping(self, files: SammyFilesMultiMode) -> None:
+        """
+        Validate that JSON configuration references existing ENDF files.
+
+        Args:
+            files: SammyFilesMultiMode containing JSON config and ENDF directory
+
+        Raises:
+            ValueError: If JSON references missing ENDF files
+        """
+        import json
+
+        try:
+            # Parse JSON to find referenced ENDF files
+            with open(files.json_config_file, "r") as f:
+                json_data = json.load(f)
+
+            # Find isotope entries (lists in JSON) - keys are ENDF filenames
+            endf_files_referenced = []
+            for key, value in json_data.items():
+                if isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict):
+                    # Key is the ENDF filename (e.g., "079-Au-197.B-VIII.0.par")
+                    endf_files_referenced.append(key)
+
+            # Check each referenced ENDF file exists in ENDF directory
+            missing_files = []
+            for endf_filename in endf_files_referenced:
+                endf_path = files.endf_directory / endf_filename
+                if not endf_path.exists():
+                    missing_files.append(endf_filename)
+
+            if missing_files:
+                raise ValueError(
+                    f"JSON references missing ENDF files: {missing_files}. "
+                    f"Expected in directory: {files.endf_directory}"
+                )
+
+            logger.debug(f"JSON-ENDF validation passed: {len(endf_files_referenced)} ENDF files verified")
+
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON configuration file: {e}")
+
+    def execute_sammy(self, files: Union[SammyFiles, SammyFilesMultiMode]) -> SammyExecutionResult:
         """Execute SAMMY using local installation."""
         execution_id = str(uuid4())
         start_time = datetime.now()
 
         logger.info(f"Starting SAMMY execution {execution_id}")
         logger.debug(f"Working directory: {self.config.working_dir}")
 
-        sammy_command = textwrap.dedent(f"""\
-            {self.config.sammy_executable} <<EOF
-            {shlex.quote(files.input_file.name)}
-            {shlex.quote(files.parameter_file.name)}
-            {shlex.quote(files.data_file.name)}
-            EOF""")
+        # Generate command based on file type
+        if isinstance(files, SammyFilesMultiMode):
+            # JSON mode command format
+            sammy_command = textwrap.dedent(f"""\
+                {self.config.sammy_executable} <<EOF
+                {shlex.quote(files.input_file.name)}
+                #file {shlex.quote(files.json_config_file.name)}
+                {shlex.quote(files.data_file.name)}
+
+                EOF""")
+            logger.debug("Using JSON mode command format")
+        else:
+            # Traditional mode command format
+            sammy_command = textwrap.dedent(f"""\
+                {self.config.sammy_executable} <<EOF
+                {shlex.quote(files.input_file.name)}
+                {shlex.quote(files.parameter_file.name)}
+                {shlex.quote(files.data_file.name)}
+
+                EOF""")
+            logger.debug("Using traditional mode command format")
 
         try:
             process = subprocess.run(
 
@@ -264,16 +264,16 @@ def energy(self):
 
     @property
     def experimental_cross_section(self):
-        return self.data.get("Experimental cross section")
+        return self.data.get("Experimental cross section (barns)")
 
     @property
     def theoretical_cross_section(self):
-        return self.data.get("Final theoretical cross section")
+        return self.data.get("Final theoretical cross section as evaluated by SAMMY (barns)")
 
     @property
     def experimental_transmission(self):
-        return self.data.get("Experimental transmission")
+        return self.data.get("Experimental transmission (dimensionless)")
 
     @property
     def theoretical_transmission(self):
-        return self.data.get("Final theoretical transmission")
+        return self.data.get("Final theoretical transmission as evaluated by SAMMY (dimensionless)")
@@ -145,7 +145,16 @@ def create_runner(
 
             # Check backend availability
             available_backends = cls.list_available_backends()
-            if not available_backends[backend]:
+
+            # For local backend, also check if explicit sammy_executable was provided
+            if backend == BackendType.LOCAL and not available_backends[backend]:
+                explicit_sammy = kwargs.get("sammy_executable")
+                if explicit_sammy and Path(explicit_sammy).exists():
+                    # Explicit executable provided and exists - allow local backend
+                    pass
+                else:
+                    raise BackendNotAvailableError(f"Backend {backend.value} is not available")
+            elif not available_backends[backend]:
                 raise BackendNotAvailableError(f"Backend {backend.value} is not available")
 
             # Set default output directory if not specified