|
3 | 3 | from typing import Dict, List, Optional, Tuple |
4 | 4 |
|
5 | 5 | from pathlib import Path |
6 | | -from jabs_postprocess.utils.project_utils import ClassifierSettings, JabsProject |
| 6 | +import pandas as pd |
| 7 | +from jabs_postprocess.utils.project_utils import ( |
| 8 | + ClassifierSettings, |
| 9 | + JabsProject, |
| 10 | + BoutTable, |
| 11 | + BinTable, |
| 12 | +) |
7 | 13 |
|
8 | 14 |
|
9 | 15 | def process_behavior_tables( |
@@ -111,3 +117,137 @@ def process_multiple_behaviors( |
111 | 117 | results.append((bout_path, bin_path)) |
112 | 118 |
|
113 | 119 | return results |
| 120 | + |
| 121 | + |
| 122 | +def merge_behavior_tables( |
| 123 | + input_tables: List[Path], |
| 124 | + output_prefix: str = "merged_behavior", |
| 125 | + overwrite: bool = False, |
| 126 | +) -> Tuple[str, str]: |
| 127 | + """Merge multiple behavior tables for the same behavior. |
| 128 | +
|
| 129 | + Args: |
| 130 | + input_tables: List of paths to behavior table files to merge |
| 131 | + output_prefix: Prefix for output filenames |
| 132 | + overwrite: Whether to overwrite existing files |
| 133 | +
|
| 134 | + Returns: |
| 135 | + Tuple[str, str]: (merged_bout_table_path, merged_bin_table_path) - Paths to the created files |
| 136 | +
|
| 137 | + Raises: |
| 138 | + FileNotFoundError: If any input table file doesn't exist |
| 139 | + ValueError: If tables have different behaviors or incompatible headers |
| 140 | + FileExistsError: If output files exist and overwrite is False |
| 141 | + """ |
| 142 | + if not input_tables: |
| 143 | + raise ValueError("No input tables provided") |
| 144 | + |
| 145 | + # Validate all input files exist |
| 146 | + for table_path in input_tables: |
| 147 | + if not Path(table_path).exists(): |
| 148 | + raise FileNotFoundError(f"Input table not found: {table_path}") |
| 149 | + |
| 150 | + # Read the first table to determine if it's a bout or bin table and get behavior info |
| 151 | + first_table = BoutTable.from_file(input_tables[0]) |
| 152 | + behavior_name = first_table.settings.behavior |
| 153 | + table_type = "bout" |
| 154 | + |
| 155 | + # Try to determine table type by checking columns |
| 156 | + if "bout_behavior" in first_table.data.columns: |
| 157 | + # This is likely a bin table |
| 158 | + first_table = BinTable.from_file(input_tables[0]) |
| 159 | + table_type = "bin" |
| 160 | + |
| 161 | + # Load all tables and validate they're compatible |
| 162 | + tables = [] |
| 163 | + for table_path in input_tables: |
| 164 | + if table_type == "bout": |
| 165 | + table = BoutTable.from_file(table_path) |
| 166 | + else: |
| 167 | + table = BinTable.from_file(table_path) |
| 168 | + |
| 169 | + # Validate same behavior |
| 170 | + if table.settings.behavior != behavior_name: |
| 171 | + raise ValueError( |
| 172 | + f"Incompatible behaviors: {behavior_name} vs {table.settings.behavior} in {table_path}" |
| 173 | + ) |
| 174 | + |
| 175 | + tables.append(table) |
| 176 | + |
| 177 | + # Merge the tables using the existing combine_data method |
| 178 | + if table_type == "bout": |
| 179 | + merged_table = BoutTable.combine_data(tables) |
| 180 | + output_file = f"{output_prefix}_{behavior_name}_bouts_merged.csv" |
| 181 | + else: |
| 182 | + merged_table = BinTable.combine_data(tables) |
| 183 | + output_file = f"{output_prefix}_{behavior_name}_summaries_merged.csv" |
| 184 | + |
| 185 | + # Write the merged table |
| 186 | + merged_table.to_file(output_file, overwrite) |
| 187 | + |
| 188 | + return ( |
| 189 | + output_file, |
| 190 | + output_file, |
| 191 | + ) # Return same file for both since we only merged one type |
| 192 | + |
| 193 | + |
| 194 | +def merge_multiple_behavior_tables( |
| 195 | + table_groups: Dict[str, List[Path]], |
| 196 | + output_prefix: str = "merged_behavior", |
| 197 | + overwrite: bool = False, |
| 198 | +) -> Dict[str, Tuple[str, str]]: |
| 199 | + """Merge multiple sets of behavior tables grouped by behavior. |
| 200 | +
|
| 201 | + Args: |
| 202 | + table_groups: Dictionary mapping behavior names to lists of table file paths |
| 203 | + output_prefix: Prefix for output filenames |
| 204 | + overwrite: Whether to overwrite existing files |
| 205 | +
|
| 206 | + Returns: |
| 207 | + Dictionary mapping behavior names to (bout_table_path, bin_table_path) tuples |
| 208 | +
|
| 209 | + Raises: |
| 210 | + ValueError: If any behavior group is empty |
| 211 | + FileNotFoundError: If any input table file doesn't exist |
| 212 | + FileExistsError: If output files exist and overwrite is False |
| 213 | + """ |
| 214 | + results = {} |
| 215 | + |
| 216 | + for behavior_name, table_paths in table_groups.items(): |
| 217 | + if not table_paths: |
| 218 | + raise ValueError(f"No tables provided for behavior: {behavior_name}") |
| 219 | + |
| 220 | + # Group tables by type (bout vs bin) for this behavior |
| 221 | + bout_tables = [] |
| 222 | + bin_tables = [] |
| 223 | + |
| 224 | + for table_path in table_paths: |
| 225 | + data_sample = pd.read_csv(table_path, skiprows=2, nrows=1) |
| 226 | + |
| 227 | + if data_sample.empty: |
| 228 | + continue # Skip empty tables |
| 229 | + |
| 230 | + # Check if it's a bin table (has bout_behavior column) or bout table |
| 231 | + full_data = pd.read_csv(table_path, skiprows=2) |
| 232 | + if "bout_behavior" in full_data.columns: |
| 233 | + bin_tables.append(table_path) |
| 234 | + else: |
| 235 | + bout_tables.append(table_path) |
| 236 | + |
| 237 | + # Merge bout tables if any exist |
| 238 | + bout_output = None |
| 239 | + if bout_tables: |
| 240 | + bout_output, _ = merge_behavior_tables( |
| 241 | + bout_tables, f"{output_prefix}_{behavior_name}_bouts", overwrite |
| 242 | + ) |
| 243 | + |
| 244 | + # Merge bin tables if any exist |
| 245 | + bin_output = None |
| 246 | + if bin_tables: |
| 247 | + bin_output, _ = merge_behavior_tables( |
| 248 | + bin_tables, f"{output_prefix}_{behavior_name}_summaries", overwrite |
| 249 | + ) |
| 250 | + |
| 251 | + results[behavior_name] = (bout_output, bin_output) |
| 252 | + |
| 253 | + return results |
0 commit comments