Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion src/eventdisplay_ml/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,15 +391,27 @@ def flatten_telescope_data_vectorized(
_logger.info("Detected CTAO ImgSel_list variable-length indexing mode.")
index_list_for_remapping = _to_dense_array(df["ImgSel_list"])

# Determine the maximum width needed for padded arrays
# This ensures all arrays are padded to the same width
max_array_width = tel_list_matrix.shape[1]
Comment thread
GernotMaier marked this conversation as resolved.
Outdated
if index_list_for_remapping is not None:
max_array_width = max(max_array_width, index_list_for_remapping.shape[1])

Comment thread
GernotMaier marked this conversation as resolved.
Outdated
active_mask = np.zeros((n_evt, max_tel_id + 1), dtype=bool)
row_indices, col_indices = np.where(~np.isnan(tel_list_matrix))
tel_ids = tel_list_matrix[row_indices, col_indices].astype(int)
valid_tel_mask = tel_ids <= max_tel_id
active_mask[row_indices[valid_tel_mask], tel_ids[valid_tel_mask]] = True

# Pre-load and normalize size to telescope-ID space for sorting
size_raw = _to_dense_array(df["size"])
# Ensure size array is padded to the same width as the index lists
if size_raw.shape[1] < max_array_width:
padded_size = np.full((n_evt, max_array_width), np.nan, dtype=np.float32)
padded_size[:, : size_raw.shape[1]] = size_raw
size_raw = padded_size
size_data = _normalize_telescope_variable_to_tel_id_space(
_to_dense_array(df["size"]), index_list_for_remapping, max_tel_id, n_evt
size_raw, index_list_for_remapping, max_tel_id, n_evt
)
size_data = _clip_size_array(size_data)

Expand Down Expand Up @@ -441,6 +453,12 @@ def flatten_telescope_data_vectorized(

data = _to_dense_array(df[var]) if _has_field(df, var) else np.full((n_evt, n_tel), np.nan)

# Ensure data array is padded to the same width as the index lists
if data.shape[1] < max_array_width:
padded_data = np.full((n_evt, max_array_width), np.nan, dtype=np.float32)
padded_data[:, : data.shape[1]] = data
data = padded_data

Comment thread
GernotMaier marked this conversation as resolved.
Outdated
# Disp_* variables always use DispTelList_T, regardless of mode
if var.startswith("Disp") and "_T" in var:
data_normalized = _normalize_telescope_variable_to_tel_id_space(
Expand Down Expand Up @@ -691,6 +709,16 @@ def load_training_data(model_configs, file_list, analysis_type):
if tel_config is None:
tel_config = read_telescope_config(root_file)
model_configs["tel_config"] = tel_config
else:
# Check if current file has a larger max_tel_id and update if needed
current_tel_config = read_telescope_config(root_file)
if current_tel_config["max_tel_id"] > tel_config["max_tel_id"]:
_logger.info(
f"Updating max_tel_id from {tel_config['max_tel_id']} "
f"to {current_tel_config['max_tel_id']} (file: {f})"
)
tel_config["max_tel_id"] = current_tel_config["max_tel_id"]
Comment thread
GernotMaier marked this conversation as resolved.
Outdated
model_configs["tel_config"] = tel_config
Comment thread
GernotMaier marked this conversation as resolved.

_logger.info(f"Processing file: {f} (file {file_idx}/{total_files})")
tree = root_file["data"]
Expand Down