Skip to content

Commit 5e368f6

Browse files
Merge pull request #154 from sccn/doc-comprehensive-documentation
Add Comprehensive Codebase Documentation
2 parents bb94115 + 7298932 commit 5e368f6

File tree

19 files changed

+1877
-632
lines changed

19 files changed

+1877
-632
lines changed

eegdash/api.py

Lines changed: 180 additions & 86 deletions
Large diffs are not rendered by default.

eegdash/bids_eeg_metadata.py

Lines changed: 139 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,30 @@
3333

3434

3535
def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
36-
"""Build and validate a MongoDB query from user-friendly keyword arguments.
36+
"""Build and validate a MongoDB query from keyword arguments.
37+
38+
This function converts user-friendly keyword arguments into a valid
39+
MongoDB query dictionary. It handles scalar values as exact matches and
40+
list-like values as ``$in`` queries. It also performs validation to
41+
reject unsupported fields and empty values.
42+
43+
Parameters
44+
----------
45+
**kwargs
46+
Keyword arguments representing query filters. Allowed keys are defined
47+
in ``eegdash.const.ALLOWED_QUERY_FIELDS``.
48+
49+
Returns
50+
-------
51+
dict
52+
A MongoDB query dictionary.
53+
54+
Raises
55+
------
56+
ValueError
57+
If an unsupported query field is provided, or if a value is None or
58+
an empty string/list.
3759
38-
Improvements:
39-
- Reject None values and empty/whitespace-only strings
40-
- For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
41-
- Preserve scalars as exact matches
4260
"""
4361
# 1. Validate that all provided keys are allowed for querying
4462
unknown_fields = set(kwargs.keys()) - ALLOWED_QUERY_FIELDS
@@ -89,24 +107,29 @@ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
89107

90108

91109
def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any]:
92-
"""Build the metadata record for a given BIDS file (single recording) in a BIDS dataset.
110+
"""Build a metadata record for a BIDS file.
93111
94-
Attributes are at least the ones defined in data_config attributes (set to None if missing),
95-
but are typically a superset, and include, among others, the paths to relevant
96-
meta-data files needed to load and interpret the file in question.
112+
Extracts metadata attributes from a single BIDS EEG file within a given
113+
BIDS dataset. The extracted attributes include BIDS entities, file paths,
114+
and technical metadata required for database indexing.
97115
98116
Parameters
99117
----------
100118
bids_dataset : EEGBIDSDataset
101119
The BIDS dataset object containing the file.
102120
bids_file : str
103-
The path to the BIDS file within the dataset.
121+
The path to the BIDS file to process.
104122
105123
Returns
106124
-------
107-
dict:
108-
A dictionary representing the metadata record for the given file. This is the
109-
same format as the records stored in the database.
125+
dict
126+
A dictionary of metadata attributes for the file, suitable for
127+
insertion into the database.
128+
129+
Raises
130+
------
131+
ValueError
132+
If ``bids_file`` is not found in the ``bids_dataset``.
110133
111134
"""
112135
if bids_file not in bids_dataset.files:
@@ -198,11 +221,23 @@ def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any
198221

199222

200223
def normalize_key(key: str) -> str:
201-
"""Normalize a metadata key for robust matching.
224+
"""Normalize a string key for robust matching.
225+
226+
Converts the key to lowercase, replaces non-alphanumeric characters with
227+
underscores, and removes leading/trailing underscores. This allows for
228+
tolerant matching of keys that may have different capitalization or
229+
separators (e.g., "p-factor" becomes "p_factor").
230+
231+
Parameters
232+
----------
233+
key : str
234+
The key to normalize.
235+
236+
Returns
237+
-------
238+
str
239+
The normalized key.
202240
203-
Lowercase and replace non-alphanumeric characters with underscores, then strip
204-
leading/trailing underscores. This allows tolerant matching such as
205-
"p-factor" ≈ "p_factor" ≈ "P Factor".
206241
"""
207242
return re.sub(r"[^a-z0-9]+", "_", str(key).lower()).strip("_")
208243

@@ -212,27 +247,27 @@ def merge_participants_fields(
212247
participants_row: dict[str, Any] | None,
213248
description_fields: list[str] | None = None,
214249
) -> dict[str, Any]:
215-
"""Merge participants.tsv fields into a dataset description dictionary.
250+
"""Merge fields from a participants.tsv row into a description dict.
216251
217-
- Preserves existing entries in ``description`` (no overwrites).
218-
- Fills requested ``description_fields`` first, preserving their original names.
219-
- Adds all remaining participants columns generically using normalized keys
220-
unless a matching requested field already captured them.
252+
Enriches a description dictionary with data from a subject's row in
253+
``participants.tsv``. It avoids overwriting existing keys in the
254+
description.
221255
222256
Parameters
223257
----------
224258
description : dict
225-
Current description to be enriched in-place and returned.
226-
participants_row : dict | None
227-
A mapping of participants.tsv columns for the current subject.
228-
description_fields : list[str] | None
229-
Optional list of requested description fields. When provided, matching is
230-
performed by normalized names; the original requested field names are kept.
259+
The description dictionary to enrich.
260+
participants_row : dict or None
261+
A dictionary representing a row from ``participants.tsv``. If None,
262+
the original description is returned unchanged.
263+
description_fields : list of str, optional
264+
A list of specific fields to include in the description. Matching is
265+
done using normalized keys.
231266
232267
Returns
233268
-------
234269
dict
235-
The enriched description (same object as input for convenience).
270+
The enriched description dictionary.
236271
237272
"""
238273
if not isinstance(description, dict) or not isinstance(participants_row, dict):
@@ -272,10 +307,26 @@ def participants_row_for_subject(
272307
subject: str,
273308
id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
274309
) -> pd.Series | None:
275-
"""Load participants.tsv and return the row for a subject.
310+
"""Load participants.tsv and return the row for a specific subject.
311+
312+
Searches for a subject's data in the ``participants.tsv`` file within a
313+
BIDS dataset. It can identify the subject with or without the "sub-"
314+
prefix.
315+
316+
Parameters
317+
----------
318+
bids_root : str or Path
319+
The root directory of the BIDS dataset.
320+
subject : str
321+
The subject identifier (e.g., "01" or "sub-01").
322+
id_columns : tuple of str, default ("participant_id", "participant", "subject")
323+
A tuple of column names to search for the subject identifier.
324+
325+
Returns
326+
-------
327+
pandas.Series or None
328+
A pandas Series containing the subject's data if found, otherwise None.
276329
277-
- Accepts either "01" or "sub-01" as the subject identifier.
278-
- Returns a pandas Series for the first matching row, or None if not found.
279330
"""
280331
try:
281332
participants_tsv = Path(bids_root) / "participants.tsv"
@@ -311,9 +362,28 @@ def participants_extras_from_tsv(
311362
id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
312363
na_like: tuple[str, ...] = ("", "n/a", "na", "nan", "unknown", "none"),
313364
) -> dict[str, Any]:
314-
"""Return non-identifier, non-empty participants.tsv fields for a subject.
365+
"""Extract additional participant information from participants.tsv.
366+
367+
Retrieves all non-identifier and non-empty fields for a subject from
368+
the ``participants.tsv`` file.
369+
370+
Parameters
371+
----------
372+
bids_root : str or Path
373+
The root directory of the BIDS dataset.
374+
subject : str
375+
The subject identifier.
376+
id_columns : tuple of str, default ("participant_id", "participant", "subject")
377+
Column names to be treated as identifiers and excluded from the
378+
output.
379+
na_like : tuple of str, default ("", "n/a", "na", "nan", "unknown", "none")
380+
Values to be considered as "Not Available" and excluded.
381+
382+
Returns
383+
-------
384+
dict
385+
A dictionary of extra participant information.
315386
316-
Uses vectorized pandas operations to drop id columns and NA-like values.
317387
"""
318388
row = participants_row_for_subject(bids_root, subject, id_columns=id_columns)
319389
if row is None:
@@ -331,10 +401,21 @@ def attach_participants_extras(
331401
description: Any,
332402
extras: dict[str, Any],
333403
) -> None:
334-
"""Attach extras to Raw.info and dataset description without overwriting.
404+
"""Attach extra participant data to a raw object and its description.
405+
406+
Updates the ``raw.info['subject_info']`` and the description object
407+
(dict or pandas Series) with extra data from ``participants.tsv``.
408+
It does not overwrite existing keys.
409+
410+
Parameters
411+
----------
412+
raw : mne.io.Raw
413+
The MNE Raw object to be updated.
414+
description : dict or pandas.Series
415+
The description object to be updated.
416+
extras : dict
417+
A dictionary of extra participant information to attach.
335418
336-
- Adds to ``raw.info['subject_info']['participants_extras']``.
337-
- Adds to ``description`` if dict or pandas Series (only missing keys).
338419
"""
339420
if not extras:
340421
return
@@ -375,9 +456,28 @@ def enrich_from_participants(
375456
raw: Any,
376457
description: Any,
377458
) -> dict[str, Any]:
378-
"""Convenience wrapper: read participants.tsv and attach extras for this subject.
459+
"""Read participants.tsv and attach extra info for the subject.
460+
461+
This is a convenience function that finds the subject from the
462+
``bidspath``, retrieves extra information from ``participants.tsv``,
463+
and attaches it to the raw object and its description.
464+
465+
Parameters
466+
----------
467+
bids_root : str or Path
468+
The root directory of the BIDS dataset.
469+
bidspath : mne_bids.BIDSPath
470+
The BIDSPath object for the current data file.
471+
raw : mne.io.Raw
472+
The MNE Raw object to be updated.
473+
description : dict or pandas.Series
474+
The description object to be updated.
475+
476+
Returns
477+
-------
478+
dict
479+
The dictionary of extras that were attached.
379480
380-
Returns the extras dictionary for further use if needed.
381481
"""
382482
subject = getattr(bidspath, "subject", None)
383483
if not subject:

eegdash/const.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
"nchans",
2929
"ntimes",
3030
}
31+
"""set: A set of field names that are permitted in database queries constructed
32+
via :func:`~eegdash.api.EEGDash.find` with keyword arguments."""
3133

3234
RELEASE_TO_OPENNEURO_DATASET_MAP = {
3335
"R11": "ds005516",
@@ -42,6 +44,8 @@
4244
"R2": "ds005506",
4345
"R1": "ds005505",
4446
}
47+
"""dict: A mapping from Healthy Brain Network (HBN) release identifiers (e.g., "R11")
48+
to their corresponding OpenNeuro dataset identifiers (e.g., "ds005516")."""
4549

4650
SUBJECT_MINI_RELEASE_MAP = {
4751
"R11": [
@@ -287,6 +291,9 @@
287291
"NDARFW972KFQ",
288292
],
289293
}
294+
"""dict: A mapping from HBN release identifiers to a list of subject IDs.
295+
This is used to select a small, representative subset of subjects for creating
296+
"mini" datasets for testing and demonstration purposes."""
290297

291298
config = {
292299
"required_fields": ["data_name"],
@@ -322,3 +329,21 @@
322329
],
323330
"accepted_query_fields": ["data_name", "dataset"],
324331
}
332+
"""dict: A global configuration dictionary for the EEGDash package.
333+
334+
Keys
335+
----
336+
required_fields : list
337+
Fields that must be present in every database record.
338+
attributes : dict
339+
A schema defining the expected primary attributes and their types for a
340+
database record.
341+
description_fields : list
342+
A list of fields considered to be descriptive metadata for a recording,
343+
which can be used for filtering and display.
344+
bids_dependencies_files : list
345+
A list of BIDS metadata filenames that are relevant for interpreting an
346+
EEG recording.
347+
accepted_query_fields : list
348+
Fields that are accepted for lightweight existence checks in the database.
349+
"""

0 commit comments

Comments
 (0)