Skip to content

Commit 54d4f41

Browse files
Merge pull request #144 from sccn/develop
Improving the docstring
2 parents 62f0f1a + 5389765 commit 54d4f41

29 files changed

+705
-68
lines changed

.gitignore

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,16 @@ docs/source/generated/
2626
docs/auto_examples/
2727
docs/auto_tutorials/
2828
docs/modules/generated/
29+
docs/source/api/datasets/*.rst
30+
docs/source/api/dataset/*.rst
31+
docs/source/api/generated/
2932
docs/sphinxext/cachedir
33+
docs/source/api/api_dataset.rst
3034
pip-log.txt
3135
.coverage*
3236
tags
3337
cover
3438

35-
docs/source/api/
36-
3739
examples/data
3840
#Mac files
3941
.DS_Store

docs/Makefile

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,59 @@ help:
1111

1212
.PHONY: apidoc
1313
apidoc:
14-
# Generate API docs using the top-level package so modules are importable
15-
# as eegdash.* instead of an unimportable bare 'dataset.*'
16-
@python -m sphinx.ext.apidoc -o "$(APIDIR)/dataset" "../$(PKG)" -f -e -M
14+
# Generate full API docs, then prune duplicates covered by autosummary
15+
@python -m sphinx.ext.apidoc -f -e -M -o "$(APIDIR)/dataset" "../$(PKG)"
16+
# Remove top-level package page and modules covered elsewhere
17+
@rm -f "$(APIDIR)/dataset/eegdash.rst"
18+
@rm -f "$(APIDIR)/dataset/eegdash.api.rst"
19+
@rm -f "$(APIDIR)/dataset/eegdash.bids_eeg_metadata.rst"
20+
@rm -f "$(APIDIR)/dataset/eegdash.const.rst"
21+
@rm -f "$(APIDIR)/dataset/eegdash.data_utils.rst"
22+
@rm -f "$(APIDIR)/dataset/eegdash.logging.rst"
23+
@rm -f "$(APIDIR)/dataset/eegdash.mongodb.rst"
24+
@rm -f "$(APIDIR)/dataset/eegdash.paths.rst"
25+
@rm -f "$(APIDIR)/dataset/eegdash.utils.rst"
26+
@rm -f "$(APIDIR)/dataset/eegdash.features.rst"
27+
@rm -f $(APIDIR)/dataset/eegdash.features.*.rst
28+
@rm -f "$(APIDIR)/dataset/eegdash.hbn.rst"
29+
@rm -f $(APIDIR)/dataset/eegdash.hbn.*.rst
30+
@rm -f "$(APIDIR)/dataset/modules.rst"
31+
32+
.PHONY: dataset-pages
33+
dataset-pages:
34+
# Generate individual dataset documentation pages
35+
@python generate_dataset_pages.py
1736

1837
# Standard build runs examples
19-
html: apidoc
38+
html: apidoc dataset-pages
2039

2140
# Fast build: do NOT execute examples (sphinx-gallery)
2241
.PHONY: html-noplot
23-
html-noplot: apidoc
42+
html-noplot: apidoc dataset-pages
2443
@python prepare_summary_tables.py ../eegdash/ $(BUILDDIR)
2544
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" \
2645
$(SPHINXOPTS) -D sphinx_gallery_conf.plot_gallery=0 $(O)
2746

47+
# Custom clean target to remove generated API docs and build files
48+
.PHONY: clean
49+
clean:
50+
@echo "Removing generated API documentation..."
51+
@rm -rf "$(APIDIR)/dataset"
52+
@rm -rf "$(APIDIR)/generated"
53+
@echo "Removing generated dataset pages..."
54+
@rm -rf "$(APIDIR)/datasets"
55+
@rm -f "$(APIDIR)/api_dataset.rst"
56+
@echo "Removing other generated directories..."
57+
@rm -rf "$(SOURCEDIR)/generated"
58+
@rm -rf "$(SOURCEDIR)/gen_modules"
59+
@echo "Removing build directory..."
60+
@rm -rf "$(BUILDDIR)"
61+
@echo "Clean completed."
62+
2863
.PHONY: help apidoc
2964
Makefile: ;
3065

3166
%: Makefile
3267
@python prepare_summary_tables.py ../eegdash/ $(BUILDDIR)
68+
@python generate_dataset_pages.py
3369
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

docs/generate_dataset_pages.py

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
#!/usr/bin/env python3
2+
"""Generate individual documentation pages for each EEGDash dataset.
3+
4+
This script creates individual RST files for each dataset with comprehensive
5+
information including metadata, usage examples, and dataset statistics.
6+
"""
7+
8+
import sys
9+
from pathlib import Path
10+
11+
import pandas as pd
12+
13+
# Add the parent directory to the path to import eegdash modules
14+
sys.path.insert(0, str(Path(__file__).parent.parent))
15+
16+
from eegdash.dataset.registry import _markdown_table
17+
18+
19+
def create_dataset_page_template(dataset_id: str, row_series: pd.Series) -> str:
20+
"""Create an RST page template for a specific dataset."""
21+
# Extract key metadata
22+
n_subjects = row_series.get("n_subjects", "Unknown")
23+
n_records = row_series.get("n_records", "Unknown")
24+
n_tasks = row_series.get("n_tasks", "Unknown")
25+
modality = row_series.get("modality of exp", "")
26+
exp_type = row_series.get("type of exp", "")
27+
subject_type = row_series.get("Type Subject", "")
28+
duration = row_series.get("duration_hours_total", "Unknown")
29+
size = row_series.get("size", "Unknown")
30+
31+
# Create description
32+
description_parts = []
33+
if modality and str(modality).strip():
34+
description_parts.append(f"**{modality}**")
35+
if exp_type and str(exp_type).strip():
36+
description_parts.append(f"{exp_type}")
37+
if subject_type and str(subject_type).strip():
38+
description_parts.append(f"{subject_type} subjects")
39+
40+
description = (
41+
" | ".join(description_parts)
42+
if description_parts
43+
else "EEG dataset from OpenNeuro"
44+
)
45+
46+
# Generate the metadata table
47+
table_content = _markdown_table(row_series)
48+
49+
# Create the RST content
50+
rst_content = f'''.. _{dataset_id.lower()}:
51+
52+
{dataset_id.upper()}
53+
{"=" * len(dataset_id)}
54+
55+
OpenNeuro Dataset {dataset_id}
56+
------------------------------
57+
58+
{description}
59+
60+
This dataset contains **{n_subjects} subjects** with **{n_records} recordings** across **{n_tasks} tasks**.
61+
Total duration: **{duration} hours**. Dataset size: **{size}**.
62+
63+
Dataset Overview
64+
----------------
65+
66+
{table_content}
67+
68+
Usage Examples
69+
--------------
70+
71+
Basic usage:
72+
73+
.. code-block:: python
74+
75+
from eegdash.dataset import {dataset_id.upper()}
76+
77+
# Initialize the dataset
78+
dataset = {dataset_id.upper()}(cache_dir="./data")
79+
80+
# Check dataset size
81+
print(f"Number of recordings: {{len(dataset)}}")
82+
83+
# Access first recording
84+
if len(dataset) > 0:
85+
recording = dataset[0]
86+
print(f"Recording description: {{recording.description}}")
87+
88+
Loading EEG Data:
89+
90+
.. code-block:: python
91+
92+
# Load raw EEG data
93+
if len(dataset) > 0:
94+
recording = dataset[0]
95+
raw = recording.load()
96+
97+
# Inspect the data
98+
print(f"Sampling rate: {{raw.info['sfreq']}} Hz")
99+
print(f"Number of channels: {{len(raw.ch_names)}}")
100+
print(f"Duration: {{raw.times[-1]:.1f}} seconds")
101+
print(f"Channel names: {{raw.ch_names[:5]}}...") # First 5 channels
102+
103+
Advanced Filtering:
104+
105+
.. code-block:: python
106+
107+
# Filter by specific criteria (if applicable)
108+
filtered_dataset = {dataset_id.upper()}(
109+
cache_dir="./data",
110+
query={{"task": "RestingState"}} # Example filter
111+
)
112+
113+
# Combine with other datasets
114+
from eegdash import EEGDashDataset
115+
116+
# Load multiple datasets
117+
combined = EEGDashDataset(
118+
cache_dir="./data",
119+
dataset=["{dataset_id}", "ds002718"], # Multiple datasets
120+
subject=["001", "002"] # Specific subjects
121+
)
122+
123+
Dataset Information
124+
-------------------
125+
126+
**Dataset ID**: {dataset_id}
127+
128+
**OpenNeuro URL**: https://openneuro.org/datasets/{dataset_id}
129+
130+
**NeMAR URL**: https://nemar.org/dataexplorer/detail?dataset_id={dataset_id}
131+
132+
**Key Statistics**:
133+
134+
- **Subjects**: {n_subjects}
135+
- **Recordings**: {n_records}
136+
- **Tasks**: {n_tasks}
137+
- **Duration**: {duration} hours
138+
- **Size**: {size}
139+
- **Modality**: {modality or "EEG"}
140+
- **Experiment Type**: {exp_type or "Not specified"}
141+
- **Subject Type**: {subject_type or "Not specified"}
142+
143+
Related Documentation
144+
---------------------
145+
146+
- :class:`eegdash.api.EEGDashDataset` - Main dataset class
147+
- :doc:`../api_core` - Core API reference
148+
- :ref:`overview` - EEGDash overview
149+
150+
See Also
151+
--------
152+
153+
- `OpenNeuro dataset page <https://openneuro.org/datasets/{dataset_id}>`_
154+
- `NeMAR data explorer <https://nemar.org/dataexplorer/detail?dataset_id={dataset_id}>`_
155+
- :ref:`dataset_index` - Browse all available datasets
156+
'''
157+
158+
return rst_content
159+
160+
161+
def generate_dataset_index_page(df: pd.DataFrame) -> str:
162+
"""Generate an index page listing all datasets."""
163+
# Group datasets by modality for better organization
164+
modalities = df.groupby("modality of exp").size().sort_values(ascending=False)
165+
total_datasets = len(df)
166+
rst_content = """.. _dataset_index:
167+
168+
Dataset Index
169+
=============
170+
171+
EEGDash provides access to **{total_datasets} EEG datasets** from OpenNeuro. Each dataset has its own dedicated documentation page with detailed metadata, usage examples, and statistics.
172+
173+
Quick Statistics
174+
----------------
175+
176+
- **Total Datasets**: {total_datasets}
177+
- **Total Subjects**: {total_subjects:,}
178+
- **Total Recordings**: {total_records:,}
179+
- **Total Duration**: {total_duration:.1f} hours
180+
- **Total Size**: {total_size:.1f} GB
181+
182+
Browse by Modality
183+
------------------
184+
185+
""".format(
186+
total_datasets=total_datasets,
187+
total_subjects=df["n_subjects"].sum(),
188+
total_records=df["n_records"].sum(),
189+
total_duration=df["duration_hours_total"].sum(),
190+
total_size=df["size_bytes"].sum() / (1024**3), # Convert to GB
191+
)
192+
193+
# Add modality sections
194+
for modality, count in modalities.head(10).items():
195+
if pd.isna(modality) or modality == "":
196+
modality = "Other"
197+
198+
rst_content += f"""
199+
{modality} ({count} datasets)
200+
{"^" * (len(modality) + len(f" ({count} datasets)"))}
201+
202+
"""
203+
204+
# List datasets for this modality
205+
modality_datasets = (
206+
df[df["modality of exp"] == modality]
207+
if modality != "Other"
208+
else df[df["modality of exp"].isna() | (df["modality of exp"] == "")]
209+
)
210+
211+
# Show ALL datasets for this modality (no truncation)
212+
for _, row in modality_datasets.iterrows():
213+
dataset_id = row["dataset"]
214+
n_subjects = row["n_subjects"]
215+
n_records = row["n_records"]
216+
exp_type = row.get("type of exp", "")
217+
218+
rst_content += f"- :doc:`{dataset_id} <datasets/{dataset_id}>` - {n_subjects} subjects, {n_records} recordings"
219+
if exp_type and pd.notna(exp_type):
220+
rst_content += f" ({exp_type})"
221+
rst_content += "\n"
222+
223+
rst_content += "\n"
224+
225+
# Add alphabetical index
226+
rst_content += """
227+
Complete Alphabetical Index
228+
---------------------------
229+
230+
.. toctree::
231+
:maxdepth: 1
232+
:glob:
233+
234+
datasets/*
235+
236+
All Datasets (Alphabetical)
237+
---------------------------
238+
239+
"""
240+
241+
# Add alphabetical list
242+
for _, row in df.sort_values("dataset").iterrows():
243+
dataset_id = row["dataset"]
244+
n_subjects = row["n_subjects"]
245+
n_records = row["n_records"]
246+
size = row["size"]
247+
248+
rst_content += f"- :doc:`{dataset_id} <datasets/{dataset_id}>` - {n_subjects} subjects, {n_records} recordings, {size}\n"
249+
250+
# Include key API module pages to satisfy toctree inclusion and avoid warnings
251+
rst_content += """
252+
253+
.. toctree::
254+
:hidden:
255+
:maxdepth: 1
256+
257+
dataset/eegdash.dataset
258+
dataset/eegdash.downloader
259+
"""
260+
261+
return rst_content
262+
263+
264+
def main():
265+
"""Generate all dataset documentation pages."""
266+
# Load dataset metadata
267+
csv_path = (
268+
Path(__file__).parent.parent / "eegdash" / "dataset" / "dataset_summary.csv"
269+
)
270+
df = pd.read_csv(csv_path, comment="#", skip_blank_lines=True)
271+
272+
print(f"Generating documentation for {len(df)} datasets...")
273+
274+
# Create output directories
275+
output_dir = Path(__file__).parent / "source" / "api" / "datasets"
276+
output_dir.mkdir(exist_ok=True, parents=True)
277+
278+
# Generate individual dataset pages
279+
for _, row in df.iterrows():
280+
dataset_id = row["dataset"]
281+
print(f" Generating {dataset_id}...")
282+
283+
# Create RST content
284+
rst_content = create_dataset_page_template(dataset_id, row)
285+
286+
# Write to file
287+
output_file = output_dir / f"{dataset_id}.rst"
288+
with open(output_file, "w") as f:
289+
f.write(rst_content)
290+
291+
# Generate index page
292+
print("Generating dataset index page...")
293+
index_content = generate_dataset_index_page(df)
294+
index_file = Path(__file__).parent / "source" / "api" / "api_dataset.rst"
295+
with open(index_file, "w") as f:
296+
f.write(index_content)
297+
298+
print(f"✅ Generated {len(df)} dataset pages + index page")
299+
print(f"📁 Output directory: {output_dir}")
300+
print(f"📄 Index page: {index_file}")
301+
302+
303+
if __name__ == "__main__":
304+
main()

0 commit comments

Comments
 (0)