Skip to content

Commit 5dea0dd

Browse files
authored
Merge pull request #35 from KumarLabJax/KLAUS-118-add-bout-table-outputs-to-jabs-features
Add behavior table merging functionality for JABS-postprocess
2 parents 8225c78 + d2bc81e commit 5dea0dd

File tree

6 files changed

+537
-34
lines changed

6 files changed

+537
-34
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "jabs-postprocess"
3-
version = "0.3.0"
3+
version = "0.4.0"
44
description = "A python library for JABS postprocessing utilities."
55
readme = "README.md"
66
license = "LicenseRef-PLATFORM-LICENSE-AGREEMENT-FOR-NON-COMMERCIAL-USE"

src/jabs_postprocess/cli/main.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from pathlib import Path
33
from typing import Annotated, List, Optional
44

5+
import pandas as pd
56
import numpy as np
67
import typer
78

@@ -329,5 +330,144 @@ def heuristic_classify(
329330
)
330331

331332

333+
@app.command()
334+
def merge_tables(
335+
input_tables: Annotated[
336+
List[Path],
337+
typer.Option(
338+
help="Paths to behavior table files to merge (must be same behavior and table type)"
339+
),
340+
],
341+
output_prefix: Annotated[
342+
str,
343+
typer.Option(help="File prefix for merged output table"),
344+
] = "merged_behavior",
345+
overwrite: Annotated[bool, typer.Option(help="Overwrites output files")] = False,
346+
):
347+
"""Merge multiple behavior tables of the same type and behavior.
348+
349+
This command merges behavior tables that contain the same behavior data,
350+
combining them into a single consolidated table while preserving header information.
351+
"""
352+
if not input_tables:
353+
typer.echo("Error: No input tables provided.")
354+
raise typer.Exit(1)
355+
356+
# Validate all input files exist
357+
for table_path in input_tables:
358+
if not table_path.exists():
359+
typer.echo(f"Error: Input table not found: {table_path}")
360+
raise typer.Exit(1)
361+
362+
try:
363+
output_file, _ = generate_behavior_tables.merge_behavior_tables(
364+
input_tables=input_tables,
365+
output_prefix=output_prefix,
366+
overwrite=overwrite,
367+
)
368+
369+
typer.echo(f"Successfully merged {len(input_tables)} tables:")
370+
for table in input_tables:
371+
typer.echo(f" - {table}")
372+
typer.echo(f"Output saved to: {output_file}")
373+
374+
except FileExistsError as e:
375+
typer.echo(f"Error: {str(e)}")
376+
typer.echo("Use --overwrite to force overwrite.")
377+
raise typer.Exit(1)
378+
except ValueError as e:
379+
typer.echo(f"Error: {str(e)}")
380+
raise typer.Exit(1)
381+
except Exception as e:
382+
typer.echo(f"Unexpected error: {str(e)}")
383+
raise typer.Exit(1)
384+
385+
386+
@app.command()
387+
def merge_multiple_tables(
388+
table_folder: Annotated[
389+
Path,
390+
typer.Option(
391+
help="Folder containing behavior table files to merge, grouped by behavior"
392+
),
393+
],
394+
behaviors: Annotated[
395+
Optional[List[str]],
396+
typer.Option(help="Specific behaviors to merge (default: auto-detect all)"),
397+
] = None,
398+
table_pattern: Annotated[
399+
str,
400+
typer.Option(help="File pattern to match behavior tables"),
401+
] = "*.csv",
402+
output_prefix: Annotated[
403+
str,
404+
typer.Option(help="File prefix for merged output tables"),
405+
] = "merged_behavior",
406+
overwrite: Annotated[bool, typer.Option(help="Overwrites output files")] = False,
407+
):
408+
"""Merge multiple sets of behavior tables, automatically grouping by behavior.
409+
410+
This command scans a folder for behavior table files, groups them by behavior name,
411+
and merges each group separately. Useful for combining results from multiple experiments.
412+
"""
413+
if not table_folder.exists():
414+
typer.echo(f"Error: Table folder not found: {table_folder}")
415+
raise typer.Exit(1)
416+
417+
# Find all table files matching the pattern
418+
table_files = list(table_folder.glob(table_pattern))
419+
if not table_files:
420+
typer.echo(
421+
f"Error: No table files found matching pattern '{table_pattern}' in {table_folder}"
422+
)
423+
raise typer.Exit(1)
424+
425+
# Group tables by behavior (extract from filename or header)
426+
table_groups = {}
427+
for table_file in table_files:
428+
try:
429+
header_data = pd.read_csv(table_file, nrows=1)
430+
behavior_name = header_data["Behavior"][0]
431+
432+
# Filter by requested behaviors if specified
433+
if behaviors and behavior_name not in behaviors:
434+
continue
435+
436+
if behavior_name not in table_groups:
437+
table_groups[behavior_name] = []
438+
table_groups[behavior_name].append(table_file)
439+
440+
except (KeyError, pd.errors.EmptyDataError, Exception):
441+
typer.echo(f"Warning: Could not read behavior from {table_file}, skipping.")
442+
continue
443+
444+
if not table_groups:
445+
typer.echo("Error: No valid behavior tables found to merge.")
446+
raise typer.Exit(1)
447+
448+
try:
449+
results = generate_behavior_tables.merge_multiple_behavior_tables(
450+
table_groups=table_groups,
451+
output_prefix=output_prefix,
452+
overwrite=overwrite,
453+
)
454+
455+
typer.echo(f"Successfully merged tables for {len(results)} behaviors:")
456+
for behavior_name, (bout_file, bin_file) in results.items():
457+
typer.echo(f" {behavior_name}:")
458+
if bout_file:
459+
typer.echo(f" Bout table: {bout_file}")
460+
if bin_file:
461+
typer.echo(f" Bin table: {bin_file}")
462+
463+
except FileExistsError as e:
464+
typer.echo(f"Error: {str(e)}")
465+
typer.echo("Use --overwrite to force overwrite.")
466+
raise typer.Exit(1)
467+
except Exception as e:
468+
typer.echo(f"Unexpected error: {str(e)}")
469+
raise typer.Exit(1)
470+
471+
332472
if __name__ == "__main__":
333473
app()

src/jabs_postprocess/generate_behavior_tables.py

Lines changed: 141 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@
33
from typing import Dict, List, Optional, Tuple
44

55
from pathlib import Path
6-
from jabs_postprocess.utils.project_utils import ClassifierSettings, JabsProject
6+
import pandas as pd
7+
from jabs_postprocess.utils.project_utils import (
8+
ClassifierSettings,
9+
JabsProject,
10+
BoutTable,
11+
BinTable,
12+
)
713

814

915
def process_behavior_tables(
@@ -111,3 +117,137 @@ def process_multiple_behaviors(
111117
results.append((bout_path, bin_path))
112118

113119
return results
120+
121+
122+
def merge_behavior_tables(
123+
input_tables: List[Path],
124+
output_prefix: str = "merged_behavior",
125+
overwrite: bool = False,
126+
) -> Tuple[str, str]:
127+
"""Merge multiple behavior tables for the same behavior.
128+
129+
Args:
130+
input_tables: List of paths to behavior table files to merge
131+
output_prefix: Prefix for output filenames
132+
overwrite: Whether to overwrite existing files
133+
134+
Returns:
135+
Tuple[str, str]: (merged_bout_table_path, merged_bin_table_path) - Paths to the created files
136+
137+
Raises:
138+
FileNotFoundError: If any input table file doesn't exist
139+
ValueError: If tables have different behaviors or incompatible headers
140+
FileExistsError: If output files exist and overwrite is False
141+
"""
142+
if not input_tables:
143+
raise ValueError("No input tables provided")
144+
145+
# Validate all input files exist
146+
for table_path in input_tables:
147+
if not Path(table_path).exists():
148+
raise FileNotFoundError(f"Input table not found: {table_path}")
149+
150+
# Read the first table to determine if it's a bout or bin table and get behavior info
151+
first_table = BoutTable.from_file(input_tables[0])
152+
behavior_name = first_table.settings.behavior
153+
table_type = "bout"
154+
155+
# Try to determine table type by checking columns
156+
if "bout_behavior" in first_table.data.columns:
157+
# This is likely a bin table
158+
first_table = BinTable.from_file(input_tables[0])
159+
table_type = "bin"
160+
161+
# Load all tables and validate they're compatible
162+
tables = []
163+
for table_path in input_tables:
164+
if table_type == "bout":
165+
table = BoutTable.from_file(table_path)
166+
else:
167+
table = BinTable.from_file(table_path)
168+
169+
# Validate same behavior
170+
if table.settings.behavior != behavior_name:
171+
raise ValueError(
172+
f"Incompatible behaviors: {behavior_name} vs {table.settings.behavior} in {table_path}"
173+
)
174+
175+
tables.append(table)
176+
177+
# Merge the tables using the existing combine_data method
178+
if table_type == "bout":
179+
merged_table = BoutTable.combine_data(tables)
180+
output_file = f"{output_prefix}_{behavior_name}_bouts_merged.csv"
181+
else:
182+
merged_table = BinTable.combine_data(tables)
183+
output_file = f"{output_prefix}_{behavior_name}_summaries_merged.csv"
184+
185+
# Write the merged table
186+
merged_table.to_file(output_file, overwrite)
187+
188+
return (
189+
output_file,
190+
output_file,
191+
) # Return same file for both since we only merged one type
192+
193+
194+
def merge_multiple_behavior_tables(
195+
table_groups: Dict[str, List[Path]],
196+
output_prefix: str = "merged_behavior",
197+
overwrite: bool = False,
198+
) -> Dict[str, Tuple[str, str]]:
199+
"""Merge multiple sets of behavior tables grouped by behavior.
200+
201+
Args:
202+
table_groups: Dictionary mapping behavior names to lists of table file paths
203+
output_prefix: Prefix for output filenames
204+
overwrite: Whether to overwrite existing files
205+
206+
Returns:
207+
Dictionary mapping behavior names to (bout_table_path, bin_table_path) tuples
208+
209+
Raises:
210+
ValueError: If any behavior group is empty
211+
FileNotFoundError: If any input table file doesn't exist
212+
FileExistsError: If output files exist and overwrite is False
213+
"""
214+
results = {}
215+
216+
for behavior_name, table_paths in table_groups.items():
217+
if not table_paths:
218+
raise ValueError(f"No tables provided for behavior: {behavior_name}")
219+
220+
# Group tables by type (bout vs bin) for this behavior
221+
bout_tables = []
222+
bin_tables = []
223+
224+
for table_path in table_paths:
225+
data_sample = pd.read_csv(table_path, skiprows=2, nrows=1)
226+
227+
if data_sample.empty:
228+
continue # Skip empty tables
229+
230+
# Check if it's a bin table (has bout_behavior column) or bout table
231+
full_data = pd.read_csv(table_path, skiprows=2)
232+
if "bout_behavior" in full_data.columns:
233+
bin_tables.append(table_path)
234+
else:
235+
bout_tables.append(table_path)
236+
237+
# Merge bout tables if any exist
238+
bout_output = None
239+
if bout_tables:
240+
bout_output, _ = merge_behavior_tables(
241+
bout_tables, f"{output_prefix}_{behavior_name}_bouts", overwrite
242+
)
243+
244+
# Merge bin tables if any exist
245+
bin_output = None
246+
if bin_tables:
247+
bin_output, _ = merge_behavior_tables(
248+
bin_tables, f"{output_prefix}_{behavior_name}_summaries", overwrite
249+
)
250+
251+
results[behavior_name] = (bout_output, bin_output)
252+
253+
return results

src/jabs_postprocess/utils/project_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ def data(self):
526526
return self._data
527527

528528
@classmethod
529-
def combine_data(cls, data_list: List(Table)):
529+
def combine_data(cls, data_list: List[Table]):
530530
"""Combines multiple data tables together.
531531
532532
Args:
@@ -1138,7 +1138,7 @@ def from_no_prediction(
11381138
return cls(settings, bout_df, video_metadata)
11391139

11401140
@classmethod
1141-
def combine_data(cls, data_list: List(Table)):
1141+
def combine_data(cls, data_list: List[Table]):
11421142
"""Combines multiple prediction tables together.
11431143
11441144
Args:

0 commit comments

Comments
 (0)