Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/eventdisplay_ml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@ def configure_training(analysis_type):
help="Maximum number of telescopes to keep per mirror area type (for feature reduction).",
default=None,
)
parser.add_argument(
"--preview_rows",
type=int,
help=(
"Number of events to include in the sorted telescope preview log (set to 0 to disable)."
),
default=20,
)

model_configs = vars(parser.parse_args())

Expand All @@ -102,6 +110,7 @@ def configure_training(analysis_type):
_logger.info(f"Random state: {model_configs['random_state']}")
_logger.info(f"Max events: {model_configs['max_events']}")
_logger.info(f"Max CPU cores: {model_configs['max_cores']}")
_logger.info(f"Preview rows: {model_configs['preview_rows']}")
if model_configs.get("max_tel_per_type") is not None:
_logger.info(f"Max telescopes per mirror area type: {model_configs['max_tel_per_type']}")

Expand Down Expand Up @@ -193,6 +202,14 @@ def configure_apply(analysis_type):
help="Observatory/site name for geomagnetic field (default: VERITAS).",
default="VERITAS",
)
parser.add_argument(
"--preview_rows",
type=int,
help=(
"Number of events to include in the sorted telescope preview log (set to 0 to disable)."
),
default=20,
)

model_configs = vars(parser.parse_args())

Expand All @@ -204,6 +221,7 @@ def configure_apply(analysis_type):
_logger.info(f"Image selection: {model_configs.get('image_selection')}")
_logger.info(f"Max events: {model_configs.get('max_events')}")
_logger.info(f"Max cores: {model_configs.get('max_cores')}")
_logger.info(f"Preview rows: {model_configs['preview_rows']}")

model_configs["models"], par = load_models(
analysis_type, model_configs["model_prefix"], model_configs["model_name"]
Expand Down
41 changes: 34 additions & 7 deletions src/eventdisplay_ml/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ def flatten_telescope_data_vectorized(
tel_config=None,
observatory="veritas",
max_tel_per_type=None,
preview_rows=20,
):
"""
Vectorized flattening of telescope array columns.
Expand All @@ -451,6 +452,8 @@ def flatten_telescope_data_vectorized(
Observatory name for indexing mode detection. Default is "veritas".
max_tel_per_type : int, optional
Maximum number of telescopes to keep per mirror area type. If None, keep all.
preview_rows : int, optional
Number of events to include in the sorting preview log. Set to 0 to disable.

Returns
-------
Expand Down Expand Up @@ -563,7 +566,14 @@ def flatten_telescope_data_vectorized(
flat_features = filtered_features

index = _get_index(df, n_evt)
df_flat = flatten_telescope_variables(n_tel, flat_features, index, tel_config, analysis_type)
df_flat = flatten_telescope_variables(
n_tel,
flat_features,
index,
tel_config=tel_config,
analysis_type=analysis_type,
preview_rows=preview_rows,
)
return pd.concat(
[df_flat, extra_columns(df, analysis_type, training, index, tel_config, observatory)],
axis=1,
Expand Down Expand Up @@ -706,6 +716,7 @@ def flatten_feature_data(
tel_config=None,
observatory="veritas",
max_tel_per_type=None,
preview_rows=20,
):
"""
Get flattened features for events.
Expand All @@ -728,6 +739,8 @@ def flatten_feature_data(
Observatory name for indexing mode detection.
max_tel_per_type : int, optional
Maximum number of telescopes to keep per mirror area type. If None, keep all.
preview_rows : int, optional
Number of events to include in the sorting preview log. Set to 0 to disable.
"""
df_flat = flatten_telescope_data_vectorized(
group_df,
Expand All @@ -738,6 +751,7 @@ def flatten_feature_data(
tel_config=tel_config,
observatory=observatory,
max_tel_per_type=max_tel_per_type,
preview_rows=preview_rows,
)
max_tel_id = tel_config["max_tel_id"] if tel_config else ntel - 1
excluded_columns = set(features_module.target_features(analysis_type)) | set(
Expand Down Expand Up @@ -855,6 +869,7 @@ def load_training_data(model_configs, file_list, analysis_type):
tel_config=tel_config,
observatory=model_configs.get("observatory", "veritas"),
max_tel_per_type=model_configs.get("max_tel_per_type", None),
preview_rows=model_configs.get("preview_rows", 20),
)
if analysis_type == "stereo_analysis":
new_cols = {
Expand Down Expand Up @@ -941,7 +956,14 @@ def apply_clip_intervals(df, n_tel=None, apply_log10=None):
df.loc[mask_to_log, var_base] = np.log10(df.loc[mask_to_log, var_base])


def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None, analysis_type=None):
def flatten_telescope_variables(
n_tel,
flat_features,
index,
tel_config=None,
analysis_type=None,
preview_rows=20,
):
"""Generate dataframe for telescope variables flattened for all telescopes.

Creates features for all telescope IDs, using NaN as default value for missing data.
Expand All @@ -958,6 +980,8 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None, an
Telescope configuration with 'max_tel_id' key.
analysis_type : str, optional
Type of analysis, e.g. "classification" or "stereo_analysis".
preview_rows : int, optional
Number of events to include in the sorting preview log. Set to 0 to disable.
"""
df_flat = pd.DataFrame(flat_features, index=index)
df_flat = df_flat.astype(np.float32)
Expand Down Expand Up @@ -988,11 +1012,13 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None, an
size_cols = [c for c in df_flat.columns if c.startswith("size_")][: max_tel_id + 1]
area_cols = [c for c in df_flat.columns if c.startswith("mirror_area_")][: max_tel_id + 1]
disp_cols = [c for c in df_flat.columns if c.startswith("Disp_T_")][: max_tel_id + 1]
preview = df_flat[size_cols + area_cols + disp_cols].head(20)
_logger.info(
"Sorted telescope sizes (pre-clip/log10), first 20 events: \n"
f"{preview.to_string(index=False)}"
)
if preview_rows and preview_rows > 0:
preview = df_flat[size_cols + area_cols + disp_cols].head(preview_rows)
_logger.info(
"Sorted telescope sizes (pre-clip/log10), first %d events: \n%s",
preview_rows,
preview.to_string(index=False),
)

apply_clip_intervals(
df_flat,
Expand Down Expand Up @@ -1098,6 +1124,7 @@ def extra_columns(df, analysis_type, training, index, tel_config=None, observato
- _to_numpy_1d(df["Yoff_intersect"], np.float32)
).astype(np.float32),
"DispNImages": _to_numpy_1d(df["DispNImages"], np.int32),
"img2_ang": _to_numpy_1d(df["img2_ang"], np.float32),
Comment thread
GernotMaier marked this conversation as resolved.
# These may be absent in some datasets; if missing, fill with NaN
"Erec": (
_to_numpy_1d(df["Erec"], np.float32)
Expand Down
4 changes: 3 additions & 1 deletion src/eventdisplay_ml/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def _regression_features(training):
"DispNImages",
"DispTelList_T",
"ImgSel_list",
"img2_ang",
"Xoff",
"Yoff",
"Xoff_intersect",
Expand Down Expand Up @@ -182,8 +183,9 @@ def clip_intervals():
"ErecS": (energy_min, None),
"EChi2S": (energy_min, None),
"EmissionHeightChi2": (1e-6, None),
"img2_ang": (0.0, 360.0),
# Per-telescope energy and size variables - log10 transformation with lower bound
"size": (10, None),
"size": (1, None),
"E": (energy_min, None),
"ES": (energy_min, None),
"ntubes": (1, None),
Expand Down
2 changes: 2 additions & 0 deletions src/eventdisplay_ml/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ def apply_regression_models(df, model_configs):
training=False,
tel_config=tel_config,
observatory=model_configs.get("observatory", "veritas"),
preview_rows=model_configs.get("preview_rows", 20),
)

models = model_configs["models"]
Expand Down Expand Up @@ -313,6 +314,7 @@ def apply_classification_models(df, model_configs, threshold_keys):
training=False,
tel_config=tel_config,
observatory=model_configs.get("observatory", "veritas"),
preview_rows=model_configs.get("preview_rows", 20),
)
model = models[e_bin]["model"]
flatten_data = flatten_data.reindex(columns=models[e_bin]["features"])
Expand Down
Loading