Skip to content

Commit 6d9cf95

Browse files
Complete end to end integration for VENUS (#135)
* Fix division by zero errors and improve transmission calculation in normalization workflow * Enhance normalization export functionality to handle multiple sample runs and calculate combined transmission results * fix incorrect data clipping * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add twenty formater * add new json manager for multi-fit route * update json manager and expand test coverage * fix endf and json pairing logic * update runner to support multi mode * add multi-isotope configuration support in FitOptions and InpManager * add multi-isotope INP generation and number density calculation functions * add plot file options and enhance physical constants generation in InpManager * add isotope section generation and update tests for multi-isotope handling * add integration notebook * add end to end notebook * fix lpt parser bugs sneaked in previous PR * enhance isotope section generation to use Card Set 2 format and update tests accordingly * fix missig new line issue * adjust default alphanumerics * refactor FitOptions to use Optional types for several attributes to improve flexibility --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent b354b38 commit 6d9cf95

File tree

22 files changed

+3116
-157
lines changed

22 files changed

+3116
-157
lines changed

examples/Notebooks/pleiades_venus_demo.ipynb

Lines changed: 944 additions & 0 deletions
Large diffs are not rendered by default.

src/pleiades/processing/normalization.py

Lines changed: 101 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
from pathlib import Path
3434
from typing import Any, Dict, List, Optional, Union
3535

36+
import numpy as np
37+
3638
from pleiades.processing import DataType, Facility, MasterDictKeys, NormalizationStatus, Roi
3739
from pleiades.processing.normalization_handler import (
3840
combine_data,
@@ -375,8 +377,12 @@ def normalization(
375377

376378
# Format and export results
377379
if output_folder:
378-
for folder in normalization_dict[MasterDictKeys.sample_data].keys():
379-
logger.info(f"Exporting data for folder: {folder}")
380+
sample_folders = list(normalization_dict[MasterDictKeys.sample_data].keys())
381+
382+
if len(sample_folders) == 1:
383+
# Single run - use original logic
384+
folder = sample_folders[0]
385+
logger.info(f"Exporting data for single folder: {folder}")
380386

381387
# Get time spectra for energy conversion
382388
spectra_array = sample_master_dict[MasterDictKeys.list_folders][folder][MasterDictKeys.list_spectra]
@@ -408,6 +414,99 @@ def normalization(
408414
output_file_name = Path(output_folder) / f"{Path(folder).name}_transmission.txt"
409415
export_ascii(data_dict, str(output_file_name))
410416

417+
else:
418+
# Multiple runs - export individual runs AND combined result
419+
logger.info(f"Processing {len(sample_folders)} sample runs individually and combining")
420+
421+
# Get energy array from first folder (all should be the same)
422+
first_folder = sample_folders[0]
423+
spectra_array = sample_master_dict[MasterDictKeys.list_folders][first_folder][MasterDictKeys.list_spectra]
424+
425+
# Convert time-of-flight to energy
426+
energy_array = convert_array_from_time_to_energy(
427+
spectra_array,
428+
time_unit=TimeUnitOptions.s,
429+
distance_source_detector=distance_source_detector_m,
430+
distance_source_detector_unit=DistanceUnitOptions.m,
431+
detector_offset=detector_offset_micros,
432+
detector_offset_unit=TimeUnitOptions.us,
433+
energy_unit=EnergyUnitOptions.eV,
434+
)
435+
436+
# Step 1: Export individual runs
437+
run_transmissions = []
438+
run_uncertainties = []
439+
440+
for folder in sample_folders:
441+
logger.info(f"Processing and exporting individual run: {Path(folder).name}")
442+
443+
# Extract transmission and uncertainties for this run
444+
counts_array, uncertainties = get_counts_from_normalized_data(
445+
normalization_dict[MasterDictKeys.sample_data][folder]
446+
)
447+
448+
# Export individual run
449+
individual_data_dict = {
450+
"energy_eV": energy_array[::-1],
451+
"transmission": counts_array[::-1],
452+
"uncertainties": uncertainties[::-1],
453+
}
454+
455+
individual_output_file = Path(output_folder) / f"{Path(folder).name}_transmission.txt"
456+
export_ascii(individual_data_dict, str(individual_output_file))
457+
logger.info(f" Individual run exported to: {individual_output_file}")
458+
459+
# Collect for combination
460+
run_transmissions.append(counts_array)
461+
run_uncertainties.append(uncertainties)
462+
463+
# Step 2: Create combined result
464+
logger.info("Creating weighted combination of all runs...")
465+
466+
# Convert to numpy arrays for easier calculation
467+
run_transmissions = np.array(run_transmissions) # Shape: (n_runs, n_energy_bins)
468+
run_uncertainties = np.array(run_uncertainties)
469+
470+
# Calculate weights: w_i = 1/σ_i²
471+
with np.errstate(divide="ignore", invalid="ignore"):
472+
weights = 1.0 / (run_uncertainties**2)
473+
weights[~np.isfinite(weights)] = 0 # Set invalid weights to 0
474+
475+
# Weighted average: T_combined = Σ(w_i * T_i) / Σ(w_i)
476+
total_weights = np.sum(weights, axis=0)
477+
478+
with np.errstate(divide="ignore", invalid="ignore"):
479+
combined_transmission = np.sum(weights * run_transmissions, axis=0) / total_weights
480+
combined_uncertainties = 1.0 / np.sqrt(total_weights)
481+
482+
# Handle edge cases
483+
combined_transmission[total_weights == 0] = 0.001
484+
combined_uncertainties[total_weights == 0] = 0.1
485+
486+
# Log statistics
487+
avg_improvement = np.sqrt(len(sample_folders))
488+
actual_improvement = np.mean(run_uncertainties.mean(axis=0) / combined_uncertainties)
489+
logger.info(
490+
f"Combined transmission range: {combined_transmission.min():.3f} to {combined_transmission.max():.3f}"
491+
)
492+
logger.info(f"Expected uncertainty improvement: {avg_improvement:.2f}x")
493+
logger.info(f"Actual uncertainty improvement: {actual_improvement:.2f}x")
494+
495+
# Export combined result
496+
combined_data_dict = {
497+
"energy_eV": energy_array[::-1],
498+
"transmission": combined_transmission[::-1],
499+
"uncertainties": combined_uncertainties[::-1],
500+
}
501+
502+
# Generate combined output filename
503+
run_numbers = [Path(folder).name.split("_")[1] for folder in sample_folders]
504+
combined_output_file = Path(output_folder) / f"Combined_Runs_{'_'.join(run_numbers)}_transmission.txt"
505+
export_ascii(combined_data_dict, str(combined_output_file))
506+
507+
logger.info(f"Combined transmission data exported to: {combined_output_file}")
508+
logger.info(f"Summary: {len(sample_folders)} individual files + 1 combined file exported")
509+
411510

412511
if __name__ == "__main__":
413512
# Example usage

src/pleiades/processing/normalization_handler.py

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,11 @@ def combine_data(
312312

313313
logger.debug(f"2. {np.shape(_uncertainty) = }")
314314

315-
_uncertainty += 1 / ob_data
315+
# VENUS fix: avoid division by zero in open beam data
316+
with np.errstate(divide="ignore", invalid="ignore"):
317+
ob_reciprocal = 1 / ob_data
318+
ob_reciprocal[~np.isfinite(ob_reciprocal)] = 0 # Set inf/nan to 0
319+
_uncertainty += ob_reciprocal
316320
full_ob_data_corrected.append(ob_data)
317321
# uncertainties_ob_data_corrected.append(ob_data * np.sqrt(_uncertainty))
318322

@@ -469,7 +473,11 @@ def performing_normalization(
469473
median_roi_of_sample = np.median(_sample[y0:y1, x0:x1])
470474
coeff = median_roi_of_ob / median_roi_of_sample
471475

472-
normalized_sample[_index] = (_sample / _ob) * coeff
476+
# VENUS fix: avoid division by zero in transmission calculation
477+
with np.errstate(divide="ignore", invalid="ignore"):
478+
transmission = (_sample / _ob) * coeff
479+
transmission[~np.isfinite(transmission)] = 0 # Set inf/nan to 0
480+
normalized_sample[_index] = transmission
473481

474482
normalization_dict[MasterDictKeys.sample_data][sample_folder] = normalized_sample
475483

@@ -517,9 +525,47 @@ def get_counts_from_normalized_data(normalized_data: np.ndarray) -> Tuple[np.nda
517525
if normalized_data.ndim != 3:
518526
raise ValueError(f"Expected 3D array, got {normalized_data.ndim}D array")
519527

520-
# Assuming normalized_data is a 3D array with shape (num_images, height, width)
521-
counts_array = np.sum(normalized_data, axis=(1, 2))
528+
# VENUS FIX: Extract transmission values by averaging over spatial pixels
529+
# The old approach summed ~262k pixels per time bin, giving transmission values of ~355k
530+
# The correct approach averages transmission over detector area
522531

523-
uncertainties = np.sqrt(counts_array) # Assuming Poisson statistics for counts
532+
# Configuration constants
533+
MIN_TRANSMISSION_THRESHOLD = 0.01 # Below this likely indicates bad pixels/no sample
534+
MIN_VALID_PIXELS = 1000 # Minimum pixels required for robust statistics
535+
536+
# Calculate spatial average for each time bin using all pixels
537+
counts_array = np.nanmean(normalized_data, axis=(1, 2))
538+
539+
# For uncertainty calculation, identify potentially problematic pixels
540+
# but DO NOT modify the data - only use for uncertainty estimation
541+
valid_mask = normalized_data >= MIN_TRANSMISSION_THRESHOLD
542+
valid_pixel_count = np.sum(valid_mask, axis=(1, 2))
543+
544+
# Calculate uncertainties based on pixel-to-pixel variation (standard error)
545+
# Use all pixels for standard deviation calculation
546+
pixel_std = np.nanstd(normalized_data, axis=(1, 2))
547+
total_pixel_count = np.sum(np.isfinite(normalized_data), axis=(1, 2))
548+
uncertainties = pixel_std / np.sqrt(np.maximum(total_pixel_count, 1))
549+
550+
# Warn about potential data quality issues without modifying data
551+
outlier_count = np.sum(normalized_data > 2.0)
552+
low_transmission_count = np.sum((normalized_data > 0) & (normalized_data < MIN_TRANSMISSION_THRESHOLD))
553+
554+
if outlier_count > 0:
555+
logger.warning(f"Found {outlier_count} pixels with transmission > 2.0 (potential outliers)")
556+
557+
if low_transmission_count > normalized_data.size * 0.1: # More than 10% low transmission
558+
logger.warning(
559+
f"Found {low_transmission_count} pixels with very low transmission < {MIN_TRANSMISSION_THRESHOLD}"
560+
)
561+
562+
# Warn about time bins with insufficient valid pixels for uncertainty calculation
563+
problematic_bins = np.sum(valid_pixel_count < MIN_VALID_PIXELS)
564+
if problematic_bins > 0:
565+
logger.warning(f"{problematic_bins} time bins have fewer than {MIN_VALID_PIXELS} valid pixels")
566+
567+
# Replace NaN/inf values in final results only (preserve outliers)
568+
counts_array = np.nan_to_num(counts_array, nan=0.0, posinf=np.inf, neginf=0.0)
569+
uncertainties = np.nan_to_num(uncertainties, nan=0.1, posinf=0.1, neginf=0.1)
524570

525571
return (counts_array, uncertainties)

src/pleiades/sammy/backends/local.py

Lines changed: 73 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import textwrap
77
from datetime import datetime
88
from pathlib import Path
9-
from typing import List
9+
from typing import List, Union
1010
from uuid import uuid4
1111

1212
from pleiades.sammy.config import LocalSammyConfig
@@ -15,6 +15,7 @@
1515
SammyExecutionError,
1616
SammyExecutionResult,
1717
SammyFiles,
18+
SammyFilesMultiMode,
1819
SammyRunner,
1920
)
2021
from pleiades.utils.logger import loguru_logger
@@ -38,13 +39,18 @@ def __init__(self, config: LocalSammyConfig):
3839
self.config: LocalSammyConfig = config
3940
self._moved_files: List[Path] = []
4041

41-
def prepare_environment(self, files: SammyFiles) -> None:
42+
def prepare_environment(self, files: Union[SammyFiles, SammyFilesMultiMode]) -> None:
4243
"""Prepare environment for local SAMMY execution."""
4344
try:
4445
logger.debug("Validating input files")
4546
files.validate()
4647

47-
# Move files to working directory - copy input and parameter files, symlink data file
48+
# Additional validation for JSON mode
49+
if isinstance(files, SammyFilesMultiMode):
50+
logger.debug("Performing JSON-ENDF mapping validation")
51+
self._validate_json_endf_mapping(files)
52+
53+
# Move files to working directory
4854
logger.debug("Moving files to working directory")
4955
files.move_to_working_dir(self.config.working_dir)
5056

@@ -54,20 +60,77 @@ def prepare_environment(self, files: SammyFiles) -> None:
5460
except Exception as e:
5561
raise EnvironmentPreparationError(f"Environment preparation failed: {str(e)}")
5662

57-
def execute_sammy(self, files: SammyFiles) -> SammyExecutionResult:
63+
def _validate_json_endf_mapping(self, files: SammyFilesMultiMode) -> None:
64+
"""
65+
Validate that JSON configuration references existing ENDF files.
66+
67+
Args:
68+
files: SammyFilesMultiMode containing JSON config and ENDF directory
69+
70+
Raises:
71+
ValueError: If JSON references missing ENDF files
72+
"""
73+
import json
74+
75+
try:
76+
# Parse JSON to find referenced ENDF files
77+
with open(files.json_config_file, "r") as f:
78+
json_data = json.load(f)
79+
80+
# Find isotope entries (lists in JSON) - keys are ENDF filenames
81+
endf_files_referenced = []
82+
for key, value in json_data.items():
83+
if isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict):
84+
# Key is the ENDF filename (e.g., "079-Au-197.B-VIII.0.par")
85+
endf_files_referenced.append(key)
86+
87+
# Check each referenced ENDF file exists in ENDF directory
88+
missing_files = []
89+
for endf_filename in endf_files_referenced:
90+
endf_path = files.endf_directory / endf_filename
91+
if not endf_path.exists():
92+
missing_files.append(endf_filename)
93+
94+
if missing_files:
95+
raise ValueError(
96+
f"JSON references missing ENDF files: {missing_files}. "
97+
f"Expected in directory: {files.endf_directory}"
98+
)
99+
100+
logger.debug(f"JSON-ENDF validation passed: {len(endf_files_referenced)} ENDF files verified")
101+
102+
except json.JSONDecodeError as e:
103+
raise ValueError(f"Invalid JSON configuration file: {e}")
104+
105+
def execute_sammy(self, files: Union[SammyFiles, SammyFilesMultiMode]) -> SammyExecutionResult:
58106
"""Execute SAMMY using local installation."""
59107
execution_id = str(uuid4())
60108
start_time = datetime.now()
61109

62110
logger.info(f"Starting SAMMY execution {execution_id}")
63111
logger.debug(f"Working directory: {self.config.working_dir}")
64112

65-
sammy_command = textwrap.dedent(f"""\
66-
{self.config.sammy_executable} <<EOF
67-
{shlex.quote(files.input_file.name)}
68-
{shlex.quote(files.parameter_file.name)}
69-
{shlex.quote(files.data_file.name)}
70-
EOF""")
113+
# Generate command based on file type
114+
if isinstance(files, SammyFilesMultiMode):
115+
# JSON mode command format
116+
sammy_command = textwrap.dedent(f"""\
117+
{self.config.sammy_executable} <<EOF
118+
{shlex.quote(files.input_file.name)}
119+
#file {shlex.quote(files.json_config_file.name)}
120+
{shlex.quote(files.data_file.name)}
121+
122+
EOF""")
123+
logger.debug("Using JSON mode command format")
124+
else:
125+
# Traditional mode command format
126+
sammy_command = textwrap.dedent(f"""\
127+
{self.config.sammy_executable} <<EOF
128+
{shlex.quote(files.input_file.name)}
129+
{shlex.quote(files.parameter_file.name)}
130+
{shlex.quote(files.data_file.name)}
131+
132+
EOF""")
133+
logger.debug("Using traditional mode command format")
71134

72135
try:
73136
process = subprocess.run(

src/pleiades/sammy/data/options.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,16 +264,16 @@ def energy(self):
264264

265265
@property
266266
def experimental_cross_section(self):
267-
return self.data.get("Experimental cross section")
267+
return self.data.get("Experimental cross section (barns)")
268268

269269
@property
270270
def theoretical_cross_section(self):
271-
return self.data.get("Final theoretical cross section")
271+
return self.data.get("Final theoretical cross section as evaluated by SAMMY (barns)")
272272

273273
@property
274274
def experimental_transmission(self):
275-
return self.data.get("Experimental transmission")
275+
return self.data.get("Experimental transmission (dimensionless)")
276276

277277
@property
278278
def theoretical_transmission(self):
279-
return self.data.get("Final theoretical transmission")
279+
return self.data.get("Final theoretical transmission as evaluated by SAMMY (dimensionless)")

src/pleiades/sammy/factory.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,16 @@ def create_runner(
145145

146146
# Check backend availability
147147
available_backends = cls.list_available_backends()
148-
if not available_backends[backend]:
148+
149+
# For local backend, also check if explicit sammy_executable was provided
150+
if backend == BackendType.LOCAL and not available_backends[backend]:
151+
explicit_sammy = kwargs.get("sammy_executable")
152+
if explicit_sammy and Path(explicit_sammy).exists():
153+
# Explicit executable provided and exists - allow local backend
154+
pass
155+
else:
156+
raise BackendNotAvailableError(f"Backend {backend.value} is not available")
157+
elif not available_backends[backend]:
149158
raise BackendNotAvailableError(f"Backend {backend.value} is not available")
150159

151160
# Set default output directory if not specified

0 commit comments

Comments
 (0)