Skip to content

Commit 27dbfc4

Browse files
authored
Add img-angres as feature (#51)
* Add img-angres as feature * add multiplicity cut
1 parent a2f1bac commit 27dbfc4

File tree

9 files changed

+98
-18
lines changed

9 files changed

+98
-18
lines changed

docs/changes/51.feature.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Improve stereo reconstruction by adding the geometrical feature img2_ang.
2+
Change clipping min for size to '1' (applicable for small images in SSTs).
3+
Add preview_rows as command line parameter to allow flexible printout for debugging.

src/eventdisplay_ml/config.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,21 @@ def configure_training(analysis_type):
9191
help="Maximum number of telescopes to keep per mirror area type (for feature reduction).",
9292
default=None,
9393
)
94+
parser.add_argument(
95+
"--preview_rows",
96+
type=int,
97+
help=(
98+
"Number of events to include in the sorted telescope preview log (set to 0 to disable)."
99+
),
100+
default=20,
101+
)
102+
if analysis_type == "stereo_analysis":
103+
parser.add_argument(
104+
"--min_images",
105+
type=int,
106+
help="Minimum number of images (DispNImages) for quality cut (default: 2).",
107+
default=2,
108+
)
94109

95110
model_configs = vars(parser.parse_args())
96111

@@ -102,8 +117,11 @@ def configure_training(analysis_type):
102117
_logger.info(f"Random state: {model_configs['random_state']}")
103118
_logger.info(f"Max events: {model_configs['max_events']}")
104119
_logger.info(f"Max CPU cores: {model_configs['max_cores']}")
120+
_logger.info(f"Preview rows: {model_configs['preview_rows']}")
105121
if model_configs.get("max_tel_per_type") is not None:
106122
_logger.info(f"Max telescopes per mirror area type: {model_configs['max_tel_per_type']}")
123+
if analysis_type == "stereo_analysis":
124+
_logger.info(f"Minimum images (DispNImages): {model_configs.get('min_images')}")
107125

108126
model_configs["models"] = hyper_parameters(
109127
analysis_type, model_configs.get("hyperparameter_config")
@@ -112,7 +130,9 @@ def configure_training(analysis_type):
112130
model_configs["targets"] = target_features(analysis_type)
113131

114132
if analysis_type == "stereo_analysis":
115-
model_configs["pre_cuts"] = pre_cuts_regression(model_configs.get("n_tel"))
133+
model_configs["pre_cuts"] = pre_cuts_regression(
134+
model_configs.get("n_tel"), min_images=model_configs.get("min_images", 2)
135+
)
116136
elif analysis_type == "classification":
117137
_logger.info(f"Energy bin {model_configs['energy_bin_number']}")
118138
model_parameters = utils.load_model_parameters(
@@ -193,6 +213,14 @@ def configure_apply(analysis_type):
193213
help="Observatory/site name for geomagnetic field (default: VERITAS).",
194214
default="VERITAS",
195215
)
216+
parser.add_argument(
217+
"--preview_rows",
218+
type=int,
219+
help=(
220+
"Number of events to include in the sorted telescope preview log (set to 0 to disable)."
221+
),
222+
default=20,
223+
)
196224

197225
model_configs = vars(parser.parse_args())
198226

@@ -204,6 +232,7 @@ def configure_apply(analysis_type):
204232
_logger.info(f"Image selection: {model_configs.get('image_selection')}")
205233
_logger.info(f"Max events: {model_configs.get('max_events')}")
206234
_logger.info(f"Max cores: {model_configs.get('max_cores')}")
235+
_logger.info(f"Preview rows: {model_configs['preview_rows']}")
207236

208237
model_configs["models"], par = load_models(
209238
analysis_type, model_configs["model_prefix"], model_configs["model_name"]

src/eventdisplay_ml/data_processing.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ def flatten_telescope_data_vectorized(
426426
tel_config=None,
427427
observatory="veritas",
428428
max_tel_per_type=None,
429+
preview_rows=20,
429430
):
430431
"""
431432
Vectorized flattening of telescope array columns.
@@ -451,6 +452,8 @@ def flatten_telescope_data_vectorized(
451452
Observatory name for indexing mode detection. Default is "veritas".
452453
max_tel_per_type : int, optional
453454
Maximum number of telescopes to keep per mirror area type. If None, keep all.
455+
preview_rows : int, optional
456+
Number of events to include in the sorting preview log. Set to 0 to disable.
454457
455458
Returns
456459
-------
@@ -563,7 +566,14 @@ def flatten_telescope_data_vectorized(
563566
flat_features = filtered_features
564567

565568
index = _get_index(df, n_evt)
566-
df_flat = flatten_telescope_variables(n_tel, flat_features, index, tel_config, analysis_type)
569+
df_flat = flatten_telescope_variables(
570+
n_tel,
571+
flat_features,
572+
index,
573+
tel_config=tel_config,
574+
analysis_type=analysis_type,
575+
preview_rows=preview_rows,
576+
)
567577
return pd.concat(
568578
[df_flat, extra_columns(df, analysis_type, training, index, tel_config, observatory)],
569579
axis=1,
@@ -706,6 +716,7 @@ def flatten_feature_data(
706716
tel_config=None,
707717
observatory="veritas",
708718
max_tel_per_type=None,
719+
preview_rows=20,
709720
):
710721
"""
711722
Get flattened features for events.
@@ -728,6 +739,8 @@ def flatten_feature_data(
728739
Observatory name for indexing mode detection.
729740
max_tel_per_type : int, optional
730741
Maximum number of telescopes to keep per mirror area type. If None, keep all.
742+
preview_rows : int, optional
743+
Number of events to include in the sorting preview log. Set to 0 to disable.
731744
"""
732745
df_flat = flatten_telescope_data_vectorized(
733746
group_df,
@@ -738,6 +751,7 @@ def flatten_feature_data(
738751
tel_config=tel_config,
739752
observatory=observatory,
740753
max_tel_per_type=max_tel_per_type,
754+
preview_rows=preview_rows,
741755
)
742756
max_tel_id = tel_config["max_tel_id"] if tel_config else ntel - 1
743757
excluded_columns = set(features_module.target_features(analysis_type)) | set(
@@ -855,6 +869,7 @@ def load_training_data(model_configs, file_list, analysis_type):
855869
tel_config=tel_config,
856870
observatory=model_configs.get("observatory", "veritas"),
857871
max_tel_per_type=model_configs.get("max_tel_per_type", None),
872+
preview_rows=model_configs.get("preview_rows", 20),
858873
)
859874
if analysis_type == "stereo_analysis":
860875
new_cols = {
@@ -941,7 +956,14 @@ def apply_clip_intervals(df, n_tel=None, apply_log10=None):
941956
df.loc[mask_to_log, var_base] = np.log10(df.loc[mask_to_log, var_base])
942957

943958

944-
def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None, analysis_type=None):
959+
def flatten_telescope_variables(
960+
n_tel,
961+
flat_features,
962+
index,
963+
tel_config=None,
964+
analysis_type=None,
965+
preview_rows=20,
966+
):
945967
"""Generate dataframe for telescope variables flattened for all telescopes.
946968
947969
Creates features for all telescope IDs, using NaN as default value for missing data.
@@ -958,6 +980,8 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None, an
958980
Telescope configuration with 'max_tel_id' key.
959981
analysis_type : str, optional
960982
Type of analysis, e.g. "classification" or "stereo_analysis".
983+
preview_rows : int, optional
984+
Number of events to include in the sorting preview log. Set to 0 to disable.
961985
"""
962986
df_flat = pd.DataFrame(flat_features, index=index)
963987
df_flat = df_flat.astype(np.float32)
@@ -988,11 +1012,13 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None, an
9881012
size_cols = [c for c in df_flat.columns if c.startswith("size_")][: max_tel_id + 1]
9891013
area_cols = [c for c in df_flat.columns if c.startswith("mirror_area_")][: max_tel_id + 1]
9901014
disp_cols = [c for c in df_flat.columns if c.startswith("Disp_T_")][: max_tel_id + 1]
991-
preview = df_flat[size_cols + area_cols + disp_cols].head(20)
992-
_logger.info(
993-
"Sorted telescope sizes (pre-clip/log10), first 20 events: \n"
994-
f"{preview.to_string(index=False)}"
995-
)
1015+
if preview_rows and preview_rows > 0:
1016+
preview = df_flat[size_cols + area_cols + disp_cols].head(preview_rows)
1017+
_logger.info(
1018+
"Sorted telescope sizes (pre-clip/log10), first %d events: \n%s",
1019+
preview_rows,
1020+
preview.to_string(index=False),
1021+
)
9961022

9971023
apply_clip_intervals(
9981024
df_flat,
@@ -1098,6 +1124,7 @@ def extra_columns(df, analysis_type, training, index, tel_config=None, observato
10981124
- _to_numpy_1d(df["Yoff_intersect"], np.float32)
10991125
).astype(np.float32),
11001126
"DispNImages": _to_numpy_1d(df["DispNImages"], np.int32),
1127+
"img2_ang": _to_numpy_1d(df["img2_ang"], np.float32),
11011128
# These may be absent in some datasets; if missing, fill with NaN
11021129
"Erec": (
11031130
_to_numpy_1d(df["Erec"], np.float32)

src/eventdisplay_ml/features.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def _regression_features(training):
116116
"DispNImages",
117117
"DispTelList_T",
118118
"ImgSel_list",
119+
"img2_ang",
119120
"Xoff",
120121
"Yoff",
121122
"Xoff_intersect",
@@ -182,8 +183,9 @@ def clip_intervals():
182183
"ErecS": (energy_min, None),
183184
"EChi2S": (energy_min, None),
184185
"EmissionHeightChi2": (1e-6, None),
186+
"img2_ang": (0.0, 360.0),
185187
# Per-telescope energy and size variables - log10 transformation with lower bound
186-
"size": (10, None),
188+
"size": (1, None),
187189
"E": (energy_min, None),
188190
"ES": (energy_min, None),
189191
"ntubes": (1, None),

src/eventdisplay_ml/hyper_parameters.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,26 @@ def _load_hyper_parameters_from_file(config_file):
8888
return hyperparameters
8989

9090

91-
def pre_cuts_regression(n_tel):
92-
"""Get pre-cuts for regression analysis."""
93-
event_cut = "DispNImages >=2"
91+
def pre_cuts_regression(n_tel, min_images=2):
92+
"""
93+
Get pre-cuts for regression analysis.
94+
95+
Parameters
96+
----------
97+
n_tel : int or None
98+
Number of telescopes (not currently used).
99+
min_images : int
100+
Minimum number of images (DispNImages) for quality cut (default: 2).
101+
102+
Returns
103+
-------
104+
str or None
105+
Pre-cut string for filtering events.
106+
"""
107+
cuts = [f"DispNImages >={min_images}"]
94108
if PRE_CUTS_REGRESSION:
95-
event_cut = " & ".join(f"({c})" for c in PRE_CUTS_REGRESSION)
109+
cuts.extend(PRE_CUTS_REGRESSION)
110+
event_cut = " & ".join(f"({c})" for c in cuts)
96111
_logger.info(f"Pre-cuts (regression): {event_cut if event_cut else 'None'}")
97112
return event_cut if event_cut else None
98113

src/eventdisplay_ml/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ def apply_regression_models(df, model_configs):
252252
training=False,
253253
tel_config=tel_config,
254254
observatory=model_configs.get("observatory", "veritas"),
255+
preview_rows=model_configs.get("preview_rows", 20),
255256
)
256257

257258
models = model_configs["models"]
@@ -313,6 +314,7 @@ def apply_classification_models(df, model_configs, threshold_keys):
313314
training=False,
314315
tel_config=tel_config,
315316
observatory=model_configs.get("observatory", "veritas"),
317+
preview_rows=model_configs.get("preview_rows", 20),
316318
)
317319
model = models[e_bin]["model"]
318320
flatten_data = flatten_data.reindex(columns=models[e_bin]["features"])

tests/conftest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def create_base_df(n_rows=2, n_tel=2):
4343
"Erec": np.arange(n_rows, dtype=float) * 10.0 + 10.0,
4444
"ErecS": np.arange(n_rows, dtype=float) * 5.0 + 5.0,
4545
"EmissionHeight": np.arange(n_rows, dtype=float) * 100.0 + 100.0,
46+
"img2_ang": np.arange(n_rows, dtype=float) * 15.0 + 30.0,
4647
}
4748
)
4849

@@ -94,6 +95,7 @@ def df_three_tel_missing():
9495
"Erec": [10.0],
9596
"ErecS": [5.0],
9697
"EmissionHeight": [100.0],
98+
"img2_ang": [45.0],
9799
}
98100
)
99101

@@ -146,6 +148,7 @@ def sample_df():
146148
"Erec": [100.0, 200.0, 300.0, 400.0],
147149
"ErecS": [90.0, 180.0, 270.0, 360.0],
148150
"EmissionHeight": [10.0, 11.0, 12.0, 13.0],
151+
"img2_ang": [45.0, 50.0, 55.0, 60.0],
149152
}
150153
)
151154

tests/test_imgsel_debug.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def test_imgsel_sorting_and_alignment():
4747
"Erec": [1.0],
4848
"ErecS": [1.0],
4949
"EmissionHeight": [10.0],
50+
"img2_ang": [45.0],
5051
"cosphi": [np.array([0.0, 0.0], dtype=float)],
5152
"sinphi": [np.array([1.0, 1.0], dtype=float)],
5253
"loss": [np.array([0.0, 0.0], dtype=float)],

tests/test_size_area_sort.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def test_mirror_area_then_size_sorting():
3636
"Erec": [1.0],
3737
"ErecS": [1.0],
3838
"EmissionHeight": [10.0],
39+
"img2_ang": [45.0],
3940
# Supply base telescope arrays referenced in feature list; others will default to NaN
4041
"Disp_T": [np.array([0.0, 0.0, 0.0], dtype=float)],
4142
"cosphi": [np.array([1.0, 1.0, 1.0], dtype=float)],
@@ -59,12 +60,9 @@ def test_mirror_area_then_size_sorting():
5960
)
6061

6162
# Expected telescope order: TelID 1 (area 100), TelID 2 (area 50, size 20), TelID 0 (area 50, size 10)
62-
# Note: sizes are clipped to minimum 10 before sorting, so size 8→10
63-
# After clipping: [10, 10, 20] at TelIDs [0, 1, 2]
64-
# After sort by area (desc) then size (desc): [1, 2, 0]
65-
# Result: [clipped[1]=10, clipped[2]=20, clipped[0]=10]
63+
# Note: sizes are not clipped before sorting.
6664
expected_areas = [100.0, 50.0, 50.0]
67-
expected_sizes = [np.log10(10.0), np.log10(20.0), np.log10(10.0)] # After clipping
65+
expected_sizes = [np.log10(8.0), np.log10(20.0), np.log10(10.0)]
6866
expected_rel_x = [1000.0, 2000.0, 0.0]
6967

7068
np.testing.assert_allclose(

0 commit comments

Comments
 (0)