Skip to content

Commit f7a7ee8

Browse files
committed
ENH: Add utility function to grab age from potential BIDS sources
1 parent 6e2778a commit f7a7ee8

File tree

1 file changed

+71
-1
lines changed

1 file changed

+71
-1
lines changed

nibabies/utils/bids.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
import json
55
import os
66
import sys
7+
import warnings
78
from dataclasses import dataclass, field
89
from pathlib import Path
9-
from typing import IO, List, Union
10+
from typing import IO, List, Literal, Optional, Union
1011

1112

1213
@dataclass
@@ -262,3 +263,72 @@ def collect_precomputed_derivatives(layout, subject_id, derivatives_filters=None
262263
)
263264
derivatives[deriv] = res[0]
264265
return derivatives
266+
267+
268+
def parse_bids_for_age_months(
269+
bids_root: Union[str, Path],
270+
subject_id: str,
271+
session_id: Optional[str] = None,
272+
) -> Optional[int]:
273+
"""
274+
Given a BIDS root, query the BIDS metadata files for participant age, in months.
275+
276+
The heuristic followed is:
277+
1) Check `sub-<subject_id>/sub-<subject_id>_sessions.tsv`
278+
2) Check `<root>/participants.tsv`
279+
"""
280+
age = None
281+
if subject_id.startswith('sub-'):
282+
subject_id = subject_id[4:]
283+
if session_id and session_id.startswith('ses-'):
284+
session_id = session_id[4:]
285+
286+
sessions_tsv = Path(bids_root) / f'sub-{subject_id}' / f'sub-{subject_id}_sessions.tsv'
287+
if sessions_tsv.exists() and session_id is not None:
288+
age = _get_age_from_tsv(sessions_tsv, level='session', key=f'ses-{session_id}')
289+
290+
participants_tsv = Path(bids_root) / 'participants.tsv'
291+
if participants_tsv.exists():
292+
age = _get_age_from_tsv(participants_tsv, level='participant', key=f'sub-{subject_id}')
293+
294+
return age
295+
296+
297+
def _get_age_from_tsv(bids_tsv: Path, level: Literal['session', 'participant'], key: str):
298+
import pandas as pd
299+
300+
df = pd.read_csv(str(bids_tsv), sep='\t')
301+
age_col = None
302+
# prefer explicit "age_months" over "age"
303+
for c in ('age_months', 'age'):
304+
if c in df.columns:
305+
age_col = c
306+
break
307+
308+
if age_col == 'age':
309+
# verify age is in months
310+
bids_json = bids_tsv.with_suffix('.json')
311+
if not _verify_age_json(bids_json):
312+
warnings.warn(f'Could not verify age column is in months for file: {bids_tsv}')
313+
314+
# find the relevant row
315+
if level == 'session':
316+
mask = df.session_id == key
317+
elif level == 'participant':
318+
mask = df.participant_id == key
319+
320+
try:
321+
# extract age value from row
322+
age = int(df.loc[mask, age_col].values[0])
323+
except Exception:
324+
age = None
325+
return age
326+
327+
328+
def _verify_age_json(bids_json: Path) -> bool:
329+
try:
330+
data = json.loads(bids_json.read_text())
331+
assert data['age']['Units'] == 'months'
332+
except Exception:
333+
return False
334+
return True

0 commit comments

Comments
 (0)