3333
3434
3535def build_query_from_kwargs (** kwargs ) -> dict [str , Any ]:
36- """Build and validate a MongoDB query from user-friendly keyword arguments.
36+ """Build and validate a MongoDB query from keyword arguments.
37+
38+ This function converts user-friendly keyword arguments into a valid
39+ MongoDB query dictionary. It handles scalar values as exact matches and
40+ list-like values as ``$in`` queries. It also performs validation to
41+ reject unsupported fields and empty values.
42+
43+ Parameters
44+ ----------
45+ **kwargs
46+ Keyword arguments representing query filters. Allowed keys are defined
47+ in ``eegdash.const.ALLOWED_QUERY_FIELDS``.
48+
49+ Returns
50+ -------
51+ dict
52+ A MongoDB query dictionary.
53+
54+ Raises
55+ ------
56+ ValueError
57+ If an unsupported query field is provided, or if a value is None or
58+ an empty string/list.
3759
38- Improvements:
39- - Reject None values and empty/whitespace-only strings
40- - For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
41- - Preserve scalars as exact matches
4260 """
4361 # 1. Validate that all provided keys are allowed for querying
4462 unknown_fields = set (kwargs .keys ()) - ALLOWED_QUERY_FIELDS
@@ -89,24 +107,29 @@ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
89107
90108
91109def load_eeg_attrs_from_bids_file (bids_dataset , bids_file : str ) -> dict [str , Any ]:
92- """Build the metadata record for a given BIDS file (single recording) in a BIDS dataset .
110+ """Build a metadata record for a BIDS file.
93111
94- Attributes are at least the ones defined in data_config attributes (set to None if missing),
95- but are typically a superset, and include, among others, the paths to relevant
96- meta-data files needed to load and interpret the file in question .
112+ Extracts metadata attributes from a single BIDS EEG file within a given
113+ BIDS dataset. The extracted attributes include BIDS entities, file paths,
114+ and technical metadata required for database indexing .
97115
98116 Parameters
99117 ----------
100118 bids_dataset : EEGBIDSDataset
101119 The BIDS dataset object containing the file.
102120 bids_file : str
103- The path to the BIDS file within the dataset .
121+ The path to the BIDS file to process .
104122
105123 Returns
106124 -------
107- dict:
108- A dictionary representing the metadata record for the given file. This is the
109- same format as the records stored in the database.
125+ dict
126+ A dictionary of metadata attributes for the file, suitable for
127+ insertion into the database.
128+
129+ Raises
130+ ------
131+ ValueError
132+ If ``bids_file`` is not found in the ``bids_dataset``.
110133
111134 """
112135 if bids_file not in bids_dataset .files :
@@ -198,11 +221,23 @@ def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any
198221
199222
200223def normalize_key (key : str ) -> str :
201- """Normalize a metadata key for robust matching.
224+ """Normalize a string key for robust matching.
225+
226+ Converts the key to lowercase, replaces non-alphanumeric characters with
227+ underscores, and removes leading/trailing underscores. This allows for
228+ tolerant matching of keys that may have different capitalization or
229+ separators (e.g., "p-factor" becomes "p_factor").
230+
231+ Parameters
232+ ----------
233+ key : str
234+ The key to normalize.
235+
236+ Returns
237+ -------
238+ str
239+ The normalized key.
202240
203- Lowercase and replace non-alphanumeric characters with underscores, then strip
204- leading/trailing underscores. This allows tolerant matching such as
205- "p-factor" ≈ "p_factor" ≈ "P Factor".
206241 """
207242 return re .sub (r"[^a-z0-9]+" , "_" , str (key ).lower ()).strip ("_" )
208243
@@ -212,27 +247,27 @@ def merge_participants_fields(
212247 participants_row : dict [str , Any ] | None ,
213248 description_fields : list [str ] | None = None ,
214249) -> dict [str , Any ]:
215- """Merge participants.tsv fields into a dataset description dictionary .
250+ """Merge fields from a participants.tsv row into a description dict .
216251
217- - Preserves existing entries in ``description`` (no overwrites).
218- - Fills requested ``description_fields`` first, preserving their original names.
219- - Adds all remaining participants columns generically using normalized keys
220- unless a matching requested field already captured them.
252+ Enriches a description dictionary with data from a subject's row in
253+ ``participants.tsv``. It avoids overwriting existing keys in the
254+ description.
221255
222256 Parameters
223257 ----------
224258 description : dict
225- Current description to be enriched in-place and returned.
226- participants_row : dict | None
227- A mapping of participants.tsv columns for the current subject.
228- description_fields : list[str] | None
229- Optional list of requested description fields. When provided, matching is
230- performed by normalized names; the original requested field names are kept.
259+ The description dictionary to enrich.
260+ participants_row : dict or None
261+ A dictionary representing a row from ``participants.tsv``. If None,
262+ the original description is returned unchanged.
263+ description_fields : list of str, optional
264+ A list of specific fields to include in the description. Matching is
265+ done using normalized keys.
231266
232267 Returns
233268 -------
234269 dict
235- The enriched description (same object as input for convenience) .
270+ The enriched description dictionary .
236271
237272 """
238273 if not isinstance (description , dict ) or not isinstance (participants_row , dict ):
@@ -272,10 +307,26 @@ def participants_row_for_subject(
272307 subject : str ,
273308 id_columns : tuple [str , ...] = ("participant_id" , "participant" , "subject" ),
274309) -> pd .Series | None :
275- """Load participants.tsv and return the row for a subject.
310+ """Load participants.tsv and return the row for a specific subject.
311+
312+ Searches for a subject's data in the ``participants.tsv`` file within a
313+ BIDS dataset. It can identify the subject with or without the "sub-"
314+ prefix.
315+
316+ Parameters
317+ ----------
318+ bids_root : str or Path
319+ The root directory of the BIDS dataset.
320+ subject : str
321+ The subject identifier (e.g., "01" or "sub-01").
322+ id_columns : tuple of str, default ("participant_id", "participant", "subject")
323+ A tuple of column names to search for the subject identifier.
324+
325+ Returns
326+ -------
327+ pandas.Series or None
328+ A pandas Series containing the subject's data if found, otherwise None.
276329
277- - Accepts either "01" or "sub-01" as the subject identifier.
278- - Returns a pandas Series for the first matching row, or None if not found.
279330 """
280331 try :
281332 participants_tsv = Path (bids_root ) / "participants.tsv"
@@ -311,9 +362,28 @@ def participants_extras_from_tsv(
311362 id_columns : tuple [str , ...] = ("participant_id" , "participant" , "subject" ),
312363 na_like : tuple [str , ...] = ("" , "n/a" , "na" , "nan" , "unknown" , "none" ),
313364) -> dict [str , Any ]:
314- """Return non-identifier, non-empty participants.tsv fields for a subject.
365+ """Extract additional participant information from participants.tsv.
366+
367+ Retrieves all non-identifier and non-empty fields for a subject from
368+ the ``participants.tsv`` file.
369+
370+ Parameters
371+ ----------
372+ bids_root : str or Path
373+ The root directory of the BIDS dataset.
374+ subject : str
375+ The subject identifier.
376+ id_columns : tuple of str, default ("participant_id", "participant", "subject")
377+ Column names to be treated as identifiers and excluded from the
378+ output.
379+ na_like : tuple of str, default ("", "n/a", "na", "nan", "unknown", "none")
380+ Values to be considered as "Not Available" and excluded.
381+
382+ Returns
383+ -------
384+ dict
385+ A dictionary of extra participant information.
315386
316- Uses vectorized pandas operations to drop id columns and NA-like values.
317387 """
318388 row = participants_row_for_subject (bids_root , subject , id_columns = id_columns )
319389 if row is None :
@@ -331,10 +401,21 @@ def attach_participants_extras(
331401 description : Any ,
332402 extras : dict [str , Any ],
333403) -> None :
334- """Attach extras to Raw.info and dataset description without overwriting.
404+ """Attach extra participant data to a raw object and its description.
405+
406+ Updates the ``raw.info['subject_info']`` and the description object
407+ (dict or pandas Series) with extra data from ``participants.tsv``.
408+ It does not overwrite existing keys.
409+
410+ Parameters
411+ ----------
412+ raw : mne.io.Raw
413+ The MNE Raw object to be updated.
414+ description : dict or pandas.Series
415+ The description object to be updated.
416+ extras : dict
417+ A dictionary of extra participant information to attach.
335418
336- - Adds to ``raw.info['subject_info']['participants_extras']``.
337- - Adds to ``description`` if dict or pandas Series (only missing keys).
338419 """
339420 if not extras :
340421 return
@@ -375,9 +456,28 @@ def enrich_from_participants(
375456 raw : Any ,
376457 description : Any ,
377458) -> dict [str , Any ]:
378- """Convenience wrapper: read participants.tsv and attach extras for this subject.
459+ """Read participants.tsv and attach extra info for the subject.
460+
461+ This is a convenience function that finds the subject from the
462+ ``bidspath``, retrieves extra information from ``participants.tsv``,
463+ and attaches it to the raw object and its description.
464+
465+ Parameters
466+ ----------
467+ bids_root : str or Path
468+ The root directory of the BIDS dataset.
469+ bidspath : mne_bids.BIDSPath
470+ The BIDSPath object for the current data file.
471+ raw : mne.io.Raw
472+ The MNE Raw object to be updated.
473+ description : dict or pandas.Series
474+ The description object to be updated.
475+
476+ Returns
477+ -------
478+ dict
479+ The dictionary of extras that were attached.
379480
380- Returns the extras dictionary for further use if needed.
381481 """
382482 subject = getattr (bidspath , "subject" , None )
383483 if not subject :
0 commit comments