Skip to content

Commit debe1bb

Browse files
authored
Merge pull request #49 from KumarLabJax/KLAUS-65-extend-summaries-table
Extending new metrics to summaries table too
2 parents 6b0d36f + 61892aa commit debe1bb

File tree

5 files changed

+372
-12
lines changed

5 files changed

+372
-12
lines changed

README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ Lots of the functions used in generating these behavior tables were designed for
121121

122122
There are two behavior tables generated. Both contain a header line to store parameters used while calling the script.
123123

124-
Some features are optional, because calculating them can be expensive. These options are noted with an asterisk (\*). While default behavior is to include them, they are not guaranteed.
124+
Some features are optional, because calculating them can be expensive or are controlled via optional arguments. These options are noted with an asterisk (\*). While default behavior is to include them, they are not guaranteed.
125125

126126
## Header Data
127127

@@ -150,6 +150,11 @@ The bout table contains a compressed RLE encoded format for each bout (post-filt
150150
* `0` : Not behavior prediction
151151
* `1` : Behavior prediction
152152
* `distance`\* : Distance traveled during bout
153+
* `total_bout_count`\* : Number of behavior bouts per animal
154+
* `avg_bout_duration`\* : Average bout all duration per animal
155+
* `bout_duration_std`\* : Standard deviation of all bout durations
156+
* `bout_duration_var`\* : Variance of all bout durations
157+
* `latency_to_first_bout`\* : Frame number of first behavior bout
153158

154159
## Binned Table
155160

@@ -168,6 +173,13 @@ Summaries included:
168173
* If a bout spans multiple time bins, it will be divided into both via the proportion of time
169174
* Sum of bouts across bins produces the correct total count
170175
* Note that bouts cannot span between video files
176+
* `_stats_sample_count` : Sample count used in stats calculation (count of whole and partial bouts in time bin)
177+
* `avg_bout_duration` : Average bout duration per animal (in time bin)
178+
* `bout_duration_std` : Standard deviation of bout durations (in time bin)
179+
* `bout_duration_var` : Variance of bout durations (in time bin)
180+
* `latency_to_first_prediction` : Frame number of first behavior prediction in the time bin
181+
* Frame is relative to the experiment start, not the time bin
182+
* `latency_to_last_prediction` : Frame number of last behavior prediction in the time bin
171183
* `not_behavior_dist`\* : Total distance traveled during not behavior bouts
172184
* `behavior_dist`\* : Total distance traveled during behavior bouts
173185

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "jabs-postprocess"
3-
version = "0.4.2"
3+
version = "0.5.0"
44
description = "A python library for JABS postprocessing utilities."
55
readme = "README.md"
66
license = "LicenseRef-PLATFORM-LICENSE-AGREEMENT-FOR-NON-COMMERCIAL-USE"

src/jabs_postprocess/utils/project_utils.py

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -802,7 +802,6 @@ def add_bout_statistics(self):
802802
- bout_duration_var: Variance of bout durations for this animal
803803
- latency_to_first_bout: Frame number of first behavior bout (if any)
804804
"""
805-
806805
# Group by animal and calculate statistics for behavior bouts only
807806
behavior_bouts = self._data[self._data["is_behavior"] == 1]
808807

@@ -902,7 +901,9 @@ def bouts_to_bins(
902901
Binned event data describing the event data.
903902
904903
Notes:
905-
Binned data describes event data as summaries. For each state, total time and distance travelled are provided. Additionally, the number of behavior events are counted.
904+
Binned data describes event data as summaries.
905+
For each state, total time and distance travelled are provided.
906+
Additionally, the number of behavior events are counted.
906907
Events that span multiple bins are split between them based on the percent in each, allowing fractional bout counts.
907908
"""
908909
# Get the range that the experiment spans
@@ -1013,12 +1014,57 @@ def bouts_to_bins(
10131014
results["time_not_behavior"] = bins_to_summarize.loc[
10141015
bins_to_summarize["is_behavior"] == 0, "duration"
10151016
].sum()
1016-
results["time_behavior"] = bins_to_summarize.loc[
1017-
bins_to_summarize["is_behavior"] == 1, "duration"
1018-
].sum()
1019-
results["bout_behavior"] = len(
1020-
bins_to_summarize.loc[bins_to_summarize["is_behavior"] == 1]
1021-
)
1017+
1018+
# Lots of "behavior" stats are run, so separate them for convenience
1019+
behavior_bins = bins_to_summarize.loc[bins_to_summarize["is_behavior"] == 1]
1020+
1021+
results["time_behavior"] = behavior_bins["duration"].sum()
1022+
results["bout_behavior"] = behavior_bins["percent_bout"].sum()
1023+
results["_stats_sample_count"] = len(behavior_bins)
1024+
# We use a weighted statistic definitions here
1025+
# Weights are the proportion of bout contained in the bin (percent_bout)
1026+
if results["bout_behavior"] > 0:
1027+
results["avg_bout_duration"] = (
1028+
np.sum(
1029+
behavior_bins["duration"].values
1030+
* behavior_bins["percent_bout"].values
1031+
)
1032+
/ results["bout_behavior"]
1033+
)
1034+
results["latency_to_first_prediction"] = behavior_bins["start"].min()
1035+
results["latency_to_last_prediction"] = (
1036+
behavior_bins["start"] + behavior_bins["duration"]
1037+
).max()
1038+
1039+
# Variance requires more than one effective bout
1040+
if len(behavior_bins) > 1:
1041+
denom = (
1042+
(len(behavior_bins) - 1)
1043+
* results["bout_behavior"]
1044+
/ len(behavior_bins)
1045+
)
1046+
results["bout_duration_var"] = (
1047+
np.sum(
1048+
behavior_bins["percent_bout"].values
1049+
* np.square(
1050+
behavior_bins["duration"].values
1051+
/ behavior_bins["percent_bout"].values
1052+
- results["avg_bout_duration"]
1053+
)
1054+
)
1055+
/ denom
1056+
)
1057+
results["bout_duration_std"] = np.sqrt(results["bout_duration_var"])
1058+
else:
1059+
results["bout_duration_var"] = np.nan
1060+
results["bout_duration_std"] = np.nan
1061+
else:
1062+
# No behavior data - set all defaults
1063+
results["avg_bout_duration"] = np.nan
1064+
results["bout_duration_var"] = np.nan
1065+
results["bout_duration_std"] = np.nan
1066+
results["latency_to_first_prediction"] = np.nan
1067+
results["latency_to_last_prediction"] = np.nan
10221068
if "distance" in bins_to_summarize.keys():
10231069
results["not_behavior_dist"] = bins_to_summarize.loc[
10241070
bins_to_summarize["is_behavior"] == 0, "calc_dist"
@@ -1107,6 +1153,12 @@ def __init__(self, settings: ClassifierSettings, data: pd.DataFrame):
11071153
"time",
11081154
"not_behavior_dist",
11091155
"behavior_dist",
1156+
"avg_bout_duration",
1157+
"_stats_sample_count",
1158+
"bout_duration_std",
1159+
"bout_duration_var",
1160+
"latency_to_first_prediction",
1161+
"latency_to_last_prediction",
11101162
]
11111163
self._check_fields()
11121164

0 commit comments

Comments
 (0)