Skip to content

Commit 77a3672

Browse files
author
Kyle Beyer
committed
just parse all errors and handle semantics after parsing
1 parent a096e9a commit 77a3672

File tree

1 file changed

+130
-77
lines changed

1 file changed

+130
-77
lines changed

src/exfor_tools/exfor_tools.py

Lines changed: 130 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,8 @@ def categorize_measurements_by_energy(all_entries, min_num_pts=5, Einc_tol=0.1):
286286

287287

288288
def parse_differential_data(
289-
data_set, data_error_columns=["DATA-ERR"], err_treatment="independent"
289+
data_set,
290+
data_error_columns=["DATA-ERR"],
290291
):
291292
r"""
292293
Extract differential cross section (potentially as ratio to Rutherford)
@@ -307,7 +308,7 @@ def parse_differential_data(
307308
xs = np.array(data_column[2:], dtype=np.float64)
308309

309310
# parse errors
310-
err_col_match = []
311+
xs_err = []
311312
for label in data_error_columns:
312313

313314
# parse error column
@@ -337,29 +338,13 @@ def parse_differential_data(
337338
f"{err_units} for data column {data_column[0]} with units {xs_units}"
338339
)
339340

340-
err_col_match.append(err_data)
341-
342-
if not err_col_match:
343-
xs_err = np.zeros_like(xs)
344-
elif err_treatment == "independent":
345-
# sum errors in quadrature
346-
xs_err = np.sqrt(np.sum(np.array(err_col_match) ** 2, axis=0))
347-
elif err_treatment == "cumulative":
348-
# add errors
349-
xs_err = np.sum(np.array(err_col_match), axis=0)
350-
elif err_treatment == "difference":
351-
# subtract second error column from first
352-
if len(err_col_match) > 2:
353-
raise ValueError(
354-
f"Cannot only take difference of 2 error columns, but {len(err_col_match)} were found!"
355-
)
356-
xs_err = err_col_match[0] - err_col_match[1]
341+
xs_err.append(err_data)
357342

358343
return xs, xs_err, xs_units
359344

360345

361-
# TODO handle Q-value and level number
362346
def parse_ex_energy(data_set):
347+
# TODO handle Q-value and level number
363348
Ex = reduce(condenseColumn, [c.getValue(data_set) for c in energyExParserList])
364349

365350
missing_Ex = np.all([a is None for a in Ex[2:]])
@@ -447,7 +432,6 @@ def parse_angular_distribution(
447432
subentry,
448433
data_set,
449434
data_error_columns=None,
450-
err_treatment="independent",
451435
vocal=False,
452436
):
453437
r"""
@@ -475,13 +459,15 @@ def parse_angular_distribution(
475459

476460
# parse diff xs
477461
xs, xs_err, xs_units = parse_differential_data(
478-
data_set, data_error_columns=data_error_columns, err_treatment=err_treatment
462+
data_set, data_error_columns=data_error_columns
479463
)
480464
except Exception as e:
481465
new_exception = type(e)(f"Error while parsing {subentry}: {e}")
482466
raise new_exception from e
483467

484468
N = data_set.numrows()
469+
n_err_cols = len(xs_err)
470+
data_err = np.zeros((n_err_cols, N))
485471
data = np.zeros((8, N))
486472

487473
data[:, :] = [
@@ -492,10 +478,16 @@ def parse_angular_distribution(
492478
angle,
493479
np.nan_to_num(angle_err),
494480
xs,
495-
np.nan_to_num(xs_err),
496481
]
497482

498-
return data, (angle_units, Einc_units, Ex_units, xs_units)
483+
data_err[:, :] = (np.nan_to_num(xs_err),)
484+
485+
return (
486+
data,
487+
data_err,
488+
data_error_columns,
489+
(angle_units, Einc_units, Ex_units, xs_units),
490+
)
499491

500492

501493
def attempt_parse_subentry(
@@ -505,8 +497,6 @@ def attempt_parse_subentry(
505497
Ex_range=(0, np.inf),
506498
elastic_only=False,
507499
vocal=True,
508-
err_labels=None,
509-
err_treatment=None,
510500
):
511501
failed_parses = {}
512502
measurements = []
@@ -518,8 +508,6 @@ def attempt_parse_subentry(
518508
Ex_range=Ex_range,
519509
elastic_only=elastic_only,
520510
vocal=vocal,
521-
err_labels=err_labels,
522-
err_treatment=err_treatment,
523511
)
524512
except Exception as e:
525513
print(f"Failed to parse subentry {subentry}:\n\t{e}")
@@ -535,8 +523,6 @@ def get_measurements_from_subentry(
535523
Ex_range=(0, np.inf),
536524
elastic_only=False,
537525
vocal=False,
538-
err_labels=None,
539-
err_treatment=None,
540526
):
541527
r"""unrolls subentry into individual arrays for each energy"""
542528

@@ -551,65 +537,52 @@ def get_measurements_from_subentry(
551537
):
552538
return []
553539

554-
if err_labels is None:
555-
lbl_frags_to_skip = ["ANG", "EN", "E-LVL", "E-EXC"]
556-
err_labels = [
557-
label
558-
for label in data_set.labels
559-
if "ERR" in label
560-
and np.all([frag not in label for frag in lbl_frags_to_skip])
561-
]
562-
563-
err_labels_set = set(err_labels)
564-
systematic_and_statistical_labels = set(["ERR-S", "ERR-SYS"])
565-
data_and_systematic_labels = set(["DATA-ERR", "ERR-SYS"])
566-
567-
if err_labels == []:
568-
err_treatment = "independent"
569-
elif err_labels == ["DATA-ERR"]:
570-
err_treatment = "independent"
571-
elif err_labels == ["ERR-DIG"]:
572-
err_treatment = "independent"
573-
elif err_labels == ["ERR-T"]:
574-
err_treatment = "independent"
575-
elif err_labels == ["ERR-S"]:
576-
err_treatment = "independent"
577-
elif err_labels_set == systematic_and_statistical_labels:
578-
err_treatment = "independent"
579-
elif err_labels_set == data_and_systematic_labels:
580-
err_treatment = "independent"
581-
else:
582-
raise NotImplementedError(
583-
f"Subentry {subentry} has an ambiguous set of error labels:\n\t\t"
584-
+ "".join([f"{l}, " for l in err_labels])
585-
)
586-
else:
587-
assert err_treatment is not None
540+
lbl_frags_to_skip = ["ANG", "EN", "E-LVL", "E-EXC"]
541+
err_labels = [
542+
label
543+
for label in data_set.labels
544+
if "ERR" in label and np.all([frag not in label for frag in lbl_frags_to_skip])
545+
]
588546

589-
data, units = parse_angular_distribution(
547+
data, data_err, error_columns, units = parse_angular_distribution(
590548
subentry,
591549
data_set,
592550
data_error_columns=err_labels,
593-
err_treatment=err_treatment,
594551
vocal=vocal,
595552
)
596553

597554
measurements = sort_subentry_data_by_energy(
598-
subentry, data, Einc_range, Ex_range, elastic_only, units
555+
subentry,
556+
data,
557+
data_err,
558+
error_columns,
559+
Einc_range,
560+
Ex_range,
561+
elastic_only,
562+
units,
599563
)
600564
return measurements
601565

602566

603567
def sort_subentry_data_by_energy(
604-
subentry, data, Einc_range, Ex_range, elastic_only, units
568+
subentry,
569+
data,
570+
data_err,
571+
error_columns,
572+
Einc_range,
573+
Ex_range,
574+
elastic_only,
575+
units,
605576
):
606577
angle_units, Einc_units, Ex_units, xs_units = units
607578
Einc_mask = np.logical_and(data[0, :] >= Einc_range[0], data[0, :] <= Einc_range[1])
608579
data = data[:, Einc_mask]
580+
data_err = data[:, Einc_mask]
609581

610582
if not elastic_only:
611583
Ex_mask = np.logical_and(data[2, :] >= Ex_range[0], data[2, :] <= Ex_range[1])
612584
data = data[:, Ex_mask]
585+
data_err = data_err[:, Ex_mask]
613586

614587
# AngularDistribution objects sorted by incident energy, then excitation energy
615588
# or just incident enrgy if elastic_only is True
@@ -627,7 +600,11 @@ def sort_subentry_data_by_energy(
627600
measurements.append(
628601
AngularDistribution(
629602
subentry,
630-
data[4:, mask],
603+
data[4, mask],
604+
data[5, mask],
605+
data[6, mask],
606+
[data_err[i, mask] for i in range(data_err.shape[0])],
607+
error_columns,
631608
Einc,
632609
Einc_err,
633610
Einc_units,
@@ -640,6 +617,7 @@ def sort_subentry_data_by_energy(
640617
)
641618
else:
642619
subset = data[2:, mask]
620+
subset_err = data_err[:, mask]
643621

644622
# find set of unique residual excitation energies
645623
unique_Ex = np.unique(subset[0, :])
@@ -651,7 +629,11 @@ def sort_subentry_data_by_energy(
651629
measurements.append(
652630
AngularDistribution(
653631
subentry,
654-
subset[2:, mask],
632+
subset[2, mask],
633+
subset[3, mask],
634+
subset[4, mask],
635+
[subset_err[i, mask] for i in range(data_err.shape[0])],
636+
error_columns,
655637
Einc,
656638
Einc_err,
657639
Einc_units,
@@ -666,12 +648,17 @@ def sort_subentry_data_by_energy(
666648

667649

668650
class AngularDistribution:
669-
r"""for a given incident and residual excitation energy stores angular distribution with x and y errors. x is angle in degrees. data is [x, x_err, y, y_err]. All energies in MeV."""
651+
r"""for a given incident and residual excitation energy stores angular
652+
distribution with x and y errors. x is angle in degrees."""
670653

671654
def __init__(
672655
self,
673656
subentry: str,
674-
data: np.array,
657+
x: np.ndarray,
658+
x_err: np.ndarray,
659+
y: np.ndarray,
660+
y_errs: list,
661+
y_err_labels: list,
675662
Einc: float,
676663
Einc_err: float,
677664
Einc_units: str,
@@ -682,7 +669,6 @@ def __init__(
682669
y_units: str,
683670
):
684671
self.subentry = subentry
685-
self.data = data[:, data[0, :].argsort()]
686672
self.Einc = Einc
687673
self.Einc_err = Einc_err
688674
self.Einc_units = Einc_units
@@ -692,10 +678,13 @@ def __init__(
692678
self.x_units = x_units
693679
self.y_units = y_units
694680

695-
self.x = self.data[0, :]
696-
self.y = self.data[2, :]
697-
self.x_err = self.data[1, :]
698-
self.y_err = self.data[3, :]
681+
sort_by_angle = x.argsort()
682+
self.x = x[sort_by_angle]
683+
self.x_err = x_err[sort_by_angle]
684+
self.y = y[sort_by_angle]
685+
self.y_errs = [y_err[sort_by_angle] for y_err in y_errs]
686+
self.y_err_labels = y_err_labels
687+
self.rows = self.x.shape[0]
699688

700689
assert (
701690
np.all(self.x[1:] - self.x[:-1] >= 0)
@@ -704,6 +693,70 @@ def __init__(
704693
)
705694

706695

696+
class AngularDistributionStdErr(AngularDistribution):
697+
r"""Specific case of AngularDistribution with one statistical and one systematic uncertainty"""
698+
699+
def __init__(
700+
self,
701+
*args,
702+
statistical_err_labels=["DATA-ERR"],
703+
statistical_err_treatment="independent",
704+
systematic_err_labels=["ERR-SYS"],
705+
systematic_err_treatment="independent",
706+
):
707+
self.super().__init__(*args)
708+
709+
self.systematic_err = np.zeros(
710+
(len(statistical_err_labels),),
711+
dtype=np.float64,
712+
)
713+
self.statistical_err = np.zeros(
714+
(
715+
len(statistical_err_labels),
716+
self.rows,
717+
),
718+
dtype=np.float64,
719+
)
720+
for i, l in enumerate(statistical_err_labels):
721+
if l not in self.y_err_labels:
722+
raise ValueError(
723+
f"Did not find error column label {l} in subentry {self.subentry}"
724+
)
725+
else:
726+
j = self.y_err_labels.index(l)
727+
self.statistical_err[i, :] = self.y_errs[j]
728+
729+
if statistical_err_treatment == "independent":
730+
self.statistical_err = np.sqrt(np.sum(self.statistical_err**2, axis=0))
731+
elif statistical_err_treatment == "difference":
732+
self.statistical_err = np.diff(self.statistical_err, axis=0)
733+
else:
734+
raise ValueError(
735+
f"Unknown statistical_err_treatment option: {statistical_err_treatment}"
736+
)
737+
738+
for i, l in enumerate(systematic_err_labels):
739+
if l not in self.y_err_labels:
740+
raise ValueError(
741+
f"Did not find error column label {l} in subentry {self.subentry}"
742+
)
743+
else:
744+
j = self.y_err_labels.index(l)
745+
err = self.y_errs[j]
746+
if not np.all(err == err[0]):
747+
raise ValueError(
748+
f"Systematic error must be the same for all data points, but was not for column {l}"
749+
)
750+
self.systematic_err[i] = err
751+
752+
if systematic_err_treatment == "independent":
753+
self.statistical_err = np.sqrt(np.sum(self.statistical_err**2, axis=0))
754+
else:
755+
raise ValueError(
756+
f"Unknown systematic_err_treatment option: {systematic_err_treatment}"
757+
)
758+
759+
707760
def get_symbol(A, Z, Ex=None):
708761
if (A, Z) == (1, 0):
709762
return "n"

0 commit comments

Comments
 (0)