Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions ncpi/EphysDatasetParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ class ParseConfig:
# Post-processing
# ---------------------------
zscore: bool = False
zscore_after_epoch: bool = False # If True, apply z-score AFTER epoching (per-epoch)
exclude_last_epoch: bool = False # If True, exclude the last epoch from each time series

# Aggregation: aggregate over one or more categorical columns.
# Examples:
Expand Down Expand Up @@ -235,8 +237,8 @@ def parse(self, source: Union[str, Path, Any]) -> "Any":
rows = self._parse_object(obj, source_file=source_file)
df = self._rows_to_df(rows)

# 2) Optional z-scoring (sensor-wise, pre-aggregation)
if self.config.zscore:
# 2) Optional z-scoring BEFORE epoching (default behavior)
if self.config.zscore and not self.config.zscore_after_epoch:
df = self._apply_zscore(df)

# 3) Aggregate FIRST (e.g. collapse sensors)
Expand All @@ -249,6 +251,14 @@ def parse(self, source: Union[str, Path, Any]) -> "Any":
rows = self._apply_epoching_rows(rows)
df = self._rows_to_df(rows)

# 4b) Exclude last epoch if requested
if self.config.exclude_last_epoch:
df = self._exclude_last_epoch(df)

# 5) Optional z-scoring AFTER epoching (per-epoch normalization)
if self.config.zscore and self.config.zscore_after_epoch:
df = self._apply_zscore(df)

return df

# -------------------------
Expand Down Expand Up @@ -946,6 +956,28 @@ def z(x):
df["data"] = df["data"].apply(z)
return df

def _exclude_last_epoch(self, df: "Any") -> "Any":
"""Exclude the last epoch for each unique combination of grouping columns.

This is useful for discarding the last (potentially incomplete) epoch
from each time series when the signal length is not perfectly divisible
by the epoch length.
"""
if "epoch" not in df.columns:
return df

# Group by columns that identify unique time series (e.g., subject_id, sensor)
group_cols = [c for c in ["subject_id", "sensor"] if c in df.columns]

if not group_cols:
# No grouping columns, just exclude the global max epoch
max_epoch = df["epoch"].max()
return df[df["epoch"] < max_epoch].reset_index(drop=True)

# For each group, find the max epoch and exclude it
max_epochs = df.groupby(group_cols)["epoch"].transform("max")
return df[df["epoch"] < max_epochs].reset_index(drop=True)

def _apply_aggregation(self, df):
over = list(self.config.aggregate_over or [])
method = self.config.aggregate_method
Expand Down
5 changes: 3 additions & 2 deletions ncpi/Features.py
Original file line number Diff line number Diff line change
Expand Up @@ -1343,8 +1343,9 @@ def specparam(
if select_peak == "all":
all_peaks = peaks
elif select_peak == "max_pw":
idx = int(np.nanargmax(pws))
selected_peaks = peaks[idx:idx + 1]
if not np.all(np.isnan(pws)):
idx = int(np.nanargmax(pws))
selected_peaks = peaks[idx:idx + 1]
elif select_peak == "max_cf_in_range":
mask = (cfs >= fmin) & (cfs <= fmax)
if np.any(mask):
Expand Down