Skip to content

Commit 62f0f1a

Browse files
Merge pull request #132 from sccn/develop
Version release 0.4
2 parents 75af377 + 0604e32 commit 62f0f1a

File tree

15 files changed

+320
-341
lines changed

15 files changed

+320
-341
lines changed

.github/workflows/doc.yaml

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@ permissions:
1616
jobs:
1717
docs:
1818
runs-on: ${{ matrix.os }}
19-
env:
20-
EEGDASH_CACHE_DIR: ${{ github.workspace }}/.eegdash_cache
21-
MNE_DATA: ${{ github.workspace }}/.eegdash_cache
2219
strategy:
2320
fail-fast: false
2421
matrix:
@@ -31,6 +28,33 @@ jobs:
3128
with:
3229
python-version: ${{ matrix.python-version }}
3330

31+
- name: Configure dataset cache paths
32+
id: cache-paths
33+
shell: python
34+
run: |
35+
import os
36+
from pathlib import Path
37+
38+
home = Path.home()
39+
workspace = Path(os.environ["GITHUB_WORKSPACE"]).resolve()
40+
candidates = {
41+
"primary": home / "eegdash_cache",
42+
"home_dot": home / ".eegdash_cache",
43+
"workspace": workspace / ".eegdash_cache",
44+
"mne_data": home / "mne_data",
45+
}
46+
47+
for path in candidates.values():
48+
path.mkdir(parents=True, exist_ok=True)
49+
50+
with open(os.environ["GITHUB_ENV"], "a", encoding="utf-8") as env_file:
51+
env_file.write(f"EEGDASH_CACHE_DIR={candidates['primary']}\n")
52+
env_file.write(f"MNE_DATA={candidates['primary']}\n")
53+
54+
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as output:
55+
for key, path in candidates.items():
56+
output.write(f"{key}={path}\n")
57+
3458
- name: Install dependencies
3559
run: |
3660
python -m pip install uv
@@ -42,16 +66,47 @@ jobs:
4266
. .venv/bin/activate
4367
echo PATH=$PATH >> $GITHUB_ENV
4468
45-
- name: Create/Restore Data Caches (workspace)
69+
- name: Restore Data Caches (pull_request)
70+
if: github.event_name == 'pull_request'
71+
id: cache-data-restore
72+
uses: actions/cache@v4
73+
with:
74+
path: |
75+
${{ steps.cache-paths.outputs.primary }}
76+
${{ steps.cache-paths.outputs.home_dot }}
77+
${{ steps.cache-paths.outputs.workspace }}
78+
${{ steps.cache-paths.outputs.mne_data }}
79+
# Cache includes dataset manifest hash so new datasets invalidate once automatically.
80+
key: ${{ runner.os }}-data-${{ github.head_ref || github.ref_name }}-${{ hashFiles('consolidated/datasets_consolidated.json') }}-v2
81+
restore-keys: |
82+
${{ runner.os }}-data-${{ github.base_ref || github.ref_name }}-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
83+
${{ runner.os }}-data-develop-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
84+
${{ runner.os }}-data-main-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
85+
${{ runner.os }}-data-${{ github.base_ref || github.ref_name }}-
86+
${{ runner.os }}-data-develop-
87+
${{ runner.os }}-data-main-
88+
${{ runner.os }}-data-
89+
lookup-only: true
90+
91+
- name: Create/Restore Data Caches (push)
92+
if: github.event_name != 'pull_request'
4693
id: cache-data
4794
uses: actions/cache@v4
4895
with:
4996
path: |
50-
${{ env.EEGDASH_CACHE_DIR }}
51-
# Use a stable key so caches can be reused across runs.
52-
# Bump the suffix (v1 -> v2) to invalidate when needed.
53-
key: ${{ runner.os }}-data-v1
97+
${{ steps.cache-paths.outputs.primary }}
98+
${{ steps.cache-paths.outputs.home_dot }}
99+
${{ steps.cache-paths.outputs.workspace }}
100+
${{ steps.cache-paths.outputs.mne_data }}
101+
# Cache includes dataset manifest hash so new datasets invalidate once automatically.
102+
key: ${{ runner.os }}-data-${{ github.head_ref || github.ref_name }}-${{ hashFiles('consolidated/datasets_consolidated.json') }}-v2
54103
restore-keys: |
104+
${{ runner.os }}-data-${{ github.base_ref || github.ref_name }}-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
105+
${{ runner.os }}-data-develop-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
106+
${{ runner.os }}-data-main-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
107+
${{ runner.os }}-data-${{ github.base_ref || github.ref_name }}-
108+
${{ runner.os }}-data-develop-
109+
${{ runner.os }}-data-main-
55110
${{ runner.os }}-data-
56111
57112
- name: Create Docs
@@ -66,4 +121,4 @@ jobs:
66121
with:
67122
github_token: ${{ secrets.GITHUB_TOKEN }}
68123
publish_dir: ./docs/build/html
69-
cname: eegdash.org
124+
cname: eegdash.org

.github/workflows/tests.yml

Lines changed: 64 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@ on:
1414
jobs:
1515
test:
1616
runs-on: ${{ matrix.os }}
17-
env:
18-
EEGDASH_CACHE_DIR: ${{ github.workspace }}/.eegdash_cache
19-
MNE_DATA: ${{ github.workspace }}/.eegdash_cache
2017
strategy:
2118
fail-fast: false
2219
matrix:
@@ -26,18 +23,75 @@ jobs:
2623
## Install Braindecode
2724
- name: Checking Out Repository
2825
uses: actions/checkout@v4
26+
- name: Configure dataset cache paths
27+
id: cache-paths
28+
shell: python
29+
run: |
30+
import os
31+
from pathlib import Path
32+
33+
home = Path.home()
34+
workspace = Path(os.environ["GITHUB_WORKSPACE"]).resolve()
35+
candidates = {
36+
"primary": home / "eegdash_cache",
37+
"home_dot": home / ".eegdash_cache",
38+
"workspace": workspace / ".eegdash_cache",
39+
"mne_data": home / "mne_data",
40+
}
41+
42+
for path in candidates.values():
43+
path.mkdir(parents=True, exist_ok=True)
44+
45+
with open(os.environ["GITHUB_ENV"], "a", encoding="utf-8") as env_file:
46+
env_file.write(f"EEGDASH_CACHE_DIR={candidates['primary']}\n")
47+
env_file.write(f"MNE_DATA={candidates['primary']}\n")
48+
49+
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as output:
50+
for key, path in candidates.items():
51+
output.write(f"{key}={path}\n")
2952
# Cache MNE Data
30-
# The cache key here is fixed except for os
31-
# so if you download a new mne dataset in the code, best to manually increment the key below
32-
- name: Create/Restore EEGDash Cache (workspace)
53+
# Cache key incorporates the consolidated dataset manifest so new datasets refresh automatically.
54+
- name: Restore EEGDash Cache (pull_request)
55+
if: github.event_name == 'pull_request'
56+
id: cache-mne_data-restore
57+
uses: actions/cache@v4
58+
with:
59+
path: |
60+
${{ steps.cache-paths.outputs.primary }}
61+
${{ steps.cache-paths.outputs.home_dot }}
62+
${{ steps.cache-paths.outputs.workspace }}
63+
${{ steps.cache-paths.outputs.mne_data }}
64+
# Cache includes dataset manifest hash so new datasets invalidate once automatically.
65+
key: ${{ runner.os }}-data-${{ github.head_ref || github.ref_name }}-${{ hashFiles('consolidated/datasets_consolidated.json') }}-v2
66+
restore-keys: |
67+
${{ runner.os }}-data-${{ github.base_ref || github.ref_name }}-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
68+
${{ runner.os }}-data-develop-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
69+
${{ runner.os }}-data-main-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
70+
${{ runner.os }}-data-${{ github.base_ref || github.ref_name }}-
71+
${{ runner.os }}-data-develop-
72+
${{ runner.os }}-data-main-
73+
${{ runner.os }}-data-
74+
lookup-only: true
75+
76+
- name: Create/Restore EEGDash Cache (push)
77+
if: github.event_name != 'pull_request'
3378
id: cache-mne_data
3479
uses: actions/cache@v4
3580
with:
36-
path: ${{ env.EEGDASH_CACHE_DIR }}
37-
# Use a stable key so caches can be reused across runs.
38-
# Keep in sync with docs workflow; bump suffix to invalidate.
39-
key: ${{ runner.os }}-data-v1
81+
path: |
82+
${{ steps.cache-paths.outputs.primary }}
83+
${{ steps.cache-paths.outputs.home_dot }}
84+
${{ steps.cache-paths.outputs.workspace }}
85+
${{ steps.cache-paths.outputs.mne_data }}
86+
# Cache includes dataset manifest hash so new datasets invalidate once automatically.
87+
key: ${{ runner.os }}-data-${{ github.head_ref || github.ref_name }}-${{ hashFiles('consolidated/datasets_consolidated.json') }}-v2
4088
restore-keys: |
89+
${{ runner.os }}-data-${{ github.base_ref || github.ref_name }}-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
90+
${{ runner.os }}-data-develop-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
91+
${{ runner.os }}-data-main-${{ hashFiles('consolidated/datasets_consolidated.json') }}-
92+
${{ runner.os }}-data-${{ github.base_ref || github.ref_name }}-
93+
${{ runner.os }}-data-develop-
94+
${{ runner.os }}-data-main-
4195
${{ runner.os }}-data-
4296
4397
- name: Install uv and set the python version

DevNotes.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
pip install -r requirements.txt
33

44
pip uninstall eegdash -y
5-
python -m pip install --editable /Users/arno/Python/EEG-Dash-Data
5+
python -m pip install --editable .
6+
67
# Warning use the exact command above, pip install by itself might not work
78

89
### check if working from different folders

README.md

Lines changed: 2 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,6 @@ To leverage recent and ongoing advancements in large-scale computational methods
1414

1515
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
1616

17-
## Featured data
18-
19-
The following HBN datasets are currently featured on EEGDash. Documentation about these datasets is available [here](https://neuromechanist.github.io/data/hbn/).
20-
21-
| DatasetID | Participants | Files | Sessions | Population | Channels | Is 10-20? | Modality | Size |
22-
|---|---|---|---|---|---|---|---|---|
23-
| [ds005505](https://nemar.org/dataexplorer/detail?dataset_id=ds005505) | 136 | 5393 | 1 | Healthy | 129 | other | Visual | 103 GB |
24-
| [ds005506](https://nemar.org/dataexplorer/detail?dataset_id=ds005506) | 150 | 5645 | 1 | Healthy | 129 | other | Visual | 112 GB |
25-
| [ds005507](https://nemar.org/dataexplorer/detail?dataset_id=ds005507) | 184 | 7273 | 1 | Healthy | 129 | other | Visual | 140 GB |
26-
| [ds005508](https://nemar.org/dataexplorer/detail?dataset_id=ds005508) | 324 | 13393 | 1 | Healthy | 129 | other | Visual | 230 GB |
27-
| [ds005510](https://nemar.org/dataexplorer/detail?dataset_id=ds005510) | 135 | 4933 | 1 | Healthy | 129 | other | Visual | 91 GB |
28-
| [ds005512](https://nemar.org/dataexplorer/detail?dataset_id=ds005512) | 257 | 9305 | 1 | Healthy | 129 | other | Visual | 157 GB |
29-
| [ds005514](https://nemar.org/dataexplorer/detail?dataset_id=ds005514) | 295 | 11565 | 1 | Healthy | 129 | other | Visual | 185 GB |
30-
31-
A total of [246 other datasets](datasets.md) are also available through EEGDash.
32-
3317
## Data format
3418

3519
EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
@@ -41,47 +25,11 @@ EEGDash datasets are processed using the popular [braindecode](https://braindeco
4125
## EEG-Dash usage
4226

4327
### Install
44-
Use your preferred Python environment manager with Python > 3.9 to install the package.
28+
Use your preferred Python environment manager with Python > 3.10 to install the package.
4529
* To install the eegdash package, use the following command: `pip install eegdash`
4630
* To verify the installation, start a Python session and type: `from eegdash import EEGDash`
4731

48-
### Data access
49-
50-
To use the data from a single subject, enter:
51-
52-
```python
53-
from eegdash import EEGDashDataset
54-
55-
ds_NDARDB033FW5 = EEGDashDataset(
56-
{"dataset": "ds005514", "task":
57-
"RestingState", "subject": "NDARDB033FW5"},
58-
cache_dir="."
59-
)
60-
```
61-
62-
This will search and download the metadata for the task **RestingState** for subject **NDARDB033FW5** in BIDS dataset **ds005514**. The actual data will not be downloaded at this stage. Following standard practice, data is only downloaded once it is processed. The **ds_NDARDB033FW5** object is a fully functional braindecode dataset, which is itself a PyTorch dataset. This [tutorial](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_eoec.ipynb) shows how to preprocess the EEG data, extracting portions of the data containing eyes-open and eyes-closed segments, then perform eyes-open vs. eyes-closed classification using a (shallow) deep-learning model.
63-
64-
To use the data from multiple subjects, enter:
65-
66-
```python
67-
from eegdash import EEGDashDataset
68-
69-
ds_ds005505rest = EEGDashDataset(
70-
{"dataset": "ds005505", "task": "RestingState"}, target_name="sex", cache_dir=".
71-
)
72-
```
73-
74-
This will search and download the metadata for the task 'RestingState' for all subjects in BIDS dataset 'ds005505' (a total of 136). As above, the actual data will not be downloaded at this stage so this command is quick to execute. Also, the target class for each subject is assigned using the target_name parameter. This means that this object is ready to be directly fed to a deep learning model, although the [tutorial script](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_sex_classification.ipynb) performs minimal processing on it, prior to training a deep-learning model. Because 14 gigabytes of data are downloaded, this tutorial takes about 10 minutes to execute.
75-
76-
### Automatic caching
77-
78-
By default, EEGDash caches downloaded data under a single, consistent folder:
79-
80-
- If ``EEGDASH_CACHE_DIR`` is set in your environment, that path is used.
81-
- Else, if MNE’s ``MNE_DATA`` config is set, that path is used to align with other EEG tooling.
82-
- Otherwise, ``.eegdash_cache`` in the current working directory is used.
83-
84-
This means that if you run the tutorial [scripts](https://github.com/sccn/EEGDash/tree/develop/notebooks), the data will only be downloaded the first time the script is executed and reused thereafter.
32+
Please check our tutorial webpages to explore what you can do with [eegdash](https://eegdash.org/)!
8533

8634
## Education -- Coming soon...
8735

0 commit comments

Comments
 (0)