|
4 | 4 | import json
|
5 | 5 | import os
|
6 | 6 | import sys
|
| 7 | +import warnings |
7 | 8 | from dataclasses import dataclass, field
|
8 | 9 | from pathlib import Path
|
9 |
| -from typing import IO, List, Union |
| 10 | +from typing import IO, List, Literal, Optional, Union |
10 | 11 |
|
11 | 12 |
|
12 | 13 | @dataclass
|
@@ -262,3 +263,72 @@ def collect_precomputed_derivatives(layout, subject_id, derivatives_filters=None
|
262 | 263 | )
|
263 | 264 | derivatives[deriv] = res[0]
|
264 | 265 | return derivatives
|
| 266 | + |
| 267 | + |
| 268 | +def parse_bids_for_age_months( |
| 269 | + bids_root: Union[str, Path], |
| 270 | + subject_id: str, |
| 271 | + session_id: Optional[str] = None, |
| 272 | +) -> Optional[int]: |
| 273 | + """ |
| 274 | + Given a BIDS root, query the BIDS metadata files for participant age, in months. |
| 275 | +
|
| 276 | + The heuristic followed is: |
| 277 | + 1) Check `sub-<subject_id>/sub-<subject_id>_sessions.tsv` |
| 278 | + 2) Check `<root>/participants.tsv` |
| 279 | + """ |
| 280 | + age = None |
| 281 | + if subject_id.startswith('sub-'): |
| 282 | + subject_id = subject_id[4:] |
| 283 | + if session_id and session_id.startswith('ses-'): |
| 284 | + session_id = session_id[4:] |
| 285 | + |
| 286 | + sessions_tsv = Path(bids_root) / f'sub-{subject_id}' / f'sub-{subject_id}_sessions.tsv' |
| 287 | + if sessions_tsv.exists() and session_id is not None: |
| 288 | + age = _get_age_from_tsv(sessions_tsv, level='session', key=f'ses-{session_id}') |
| 289 | + |
| 290 | + participants_tsv = Path(bids_root) / 'participants.tsv' |
| 291 | + if participants_tsv.exists(): |
| 292 | + age = _get_age_from_tsv(participants_tsv, level='participant', key=f'sub-{subject_id}') |
| 293 | + |
| 294 | + return age |
| 295 | + |
| 296 | + |
| 297 | +def _get_age_from_tsv(bids_tsv: Path, level: Literal['session', 'participant'], key: str): |
| 298 | + import pandas as pd |
| 299 | + |
| 300 | + df = pd.read_csv(str(bids_tsv), sep='\t') |
| 301 | + age_col = None |
| 302 | + # prefer explicit "age_months" over "age" |
| 303 | + for c in ('age_months', 'age'): |
| 304 | + if c in df.columns: |
| 305 | + age_col = c |
| 306 | + break |
| 307 | + |
| 308 | + if age_col == 'age': |
| 309 | + # verify age is in months |
| 310 | + bids_json = bids_tsv.with_suffix('.json') |
| 311 | + if not _verify_age_json(bids_json): |
| 312 | + warnings.warn(f'Could not verify age column is in months for file: {bids_tsv}') |
| 313 | + |
| 314 | + # find the relevant row |
| 315 | + if level == 'session': |
| 316 | + mask = df.session_id == key |
| 317 | + elif level == 'participant': |
| 318 | + mask = df.participant_id == key |
| 319 | + |
| 320 | + try: |
| 321 | + # extract age value from row |
| 322 | + age = int(df.loc[mask, age_col].values[0]) |
| 323 | + except Exception: |
| 324 | + age = None |
| 325 | + return age |
| 326 | + |
| 327 | + |
| 328 | +def _verify_age_json(bids_json: Path) -> bool: |
| 329 | + try: |
| 330 | + data = json.loads(bids_json.read_text()) |
| 331 | + assert data['age']['Units'] == 'months' |
| 332 | + except Exception: |
| 333 | + return False |
| 334 | + return True |
0 commit comments