Skip to content

Commit 9098b5f

Browse files
updating the __all__
1 parent 84f6207 commit 9098b5f

File tree

9 files changed

+251
-10
lines changed

9 files changed

+251
-10
lines changed

eegdash/features/datasets.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919
from ..logging import logger
2020

2121

22+
__all__ = [
23+
"FeaturesDataset",
24+
"FeaturesConcatDataset",
25+
]
26+
27+
2228
class FeaturesDataset(EEGWindowsDataset):
2329
"""A dataset of features extracted from EEG windows.
2430

eegdash/features/decorators.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@
1111
)
1212

1313

14+
__all__ = [
15+
"bivariate_feature",
16+
"FeatureKind",
17+
"FeaturePredecessor",
18+
"multivariate_feature",
19+
"univariate_feature",
20+
]
21+
22+
1423
class FeaturePredecessor:
1524
"""A decorator to specify parent extractors for a feature function.
1625

eegdash/features/extractors.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,16 @@
99
from numba.core.dispatcher import Dispatcher
1010

1111

12+
__all__ = [
13+
"BivariateFeature",
14+
"DirectedBivariateFeature",
15+
"FeatureExtractor",
16+
"MultivariateFeature",
17+
"TrainableFeature",
18+
"UnivariateFeature",
19+
]
20+
21+
1222
def _get_underlying_func(func: Callable) -> Callable:
1323
"""Get the underlying function from a potential wrapper.
1424

eegdash/features/feature_bank/signal.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,21 @@
88

99
__all__ = [
1010
"HilbertFeatureExtractor",
11-
"signal_mean",
12-
"signal_variance",
13-
"signal_skewness",
11+
"SIGNAL_PREDECESSORS",
12+
"signal_decorrelation_time",
13+
"signal_hjorth_activity",
14+
"signal_hjorth_complexity",
15+
"signal_hjorth_mobility",
1416
"signal_kurtosis",
15-
"signal_std",
16-
"signal_root_mean_square",
17+
"signal_line_length",
18+
"signal_mean",
1719
"signal_peak_to_peak",
1820
"signal_quantile",
21+
"signal_root_mean_square",
22+
"signal_skewness",
23+
"signal_std",
24+
"signal_variance",
1925
"signal_zero_crossings",
20-
"signal_line_length",
21-
"signal_hjorth_activity",
22-
"signal_hjorth_mobility",
23-
"signal_hjorth_complexity",
24-
"signal_decorrelation_time",
2526
]
2627

2728

eegdash/features/feature_bank/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
import numpy as np
22

3+
4+
__all__ = [
5+
"DEFAULT_FREQ_BANDS",
6+
"get_valid_freq_band",
7+
"reduce_freq_bands",
8+
"slice_freq_band",
9+
]
10+
11+
312
DEFAULT_FREQ_BANDS = {
413
"delta": (1, 4.5),
514
"theta": (4.5, 8),

eegdash/features/inspect.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,15 @@
77
from .extractors import FeatureExtractor, MultivariateFeature, _get_underlying_func
88

99

10+
__all__ = [
11+
"get_all_feature_extractors",
12+
"get_all_feature_kinds",
13+
"get_all_features",
14+
"get_feature_kind",
15+
"get_feature_predecessors",
16+
]
17+
18+
1019
def get_feature_predecessors(feature_or_extractor: Callable) -> list:
1120
"""Get the dependency hierarchy for a feature or feature extractor.
1221

eegdash/features/serialization.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
from .datasets import FeaturesConcatDataset, FeaturesDataset
2020

2121

22+
__all__ = [
23+
"load_features_concat_dataset",
24+
]
25+
26+
2227
def load_features_concat_dataset(
2328
path: str | Path, ids_to_load: list[int] | None = None, n_jobs: int = 1
2429
) -> FeaturesConcatDataset:

eegdash/features/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@
1818
from .extractors import FeatureExtractor
1919

2020

21+
__all__ = [
22+
"extract_features",
23+
"fit_feature_extractors",
24+
]
25+
26+
2127
def _extract_features_from_windowsdataset(
2228
win_ds: EEGWindowsDataset | WindowsDataset,
2329
feature_extractor: FeatureExtractor,

scripts/1_nemar_dataset_list.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
#!/usr/bin/env python3
2+
"""Script to retrieve and process NEMAR datasets."""
3+
4+
import logging
5+
import os
6+
import sys
7+
from typing import Dict, List, Optional
8+
9+
import requests
10+
import urllib3
11+
12+
# Add the project root to the Python path
13+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
14+
15+
import eegdash.dataset
16+
17+
# Disable SSL warnings since we're using verify=False
18+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
19+
20+
# Configure logging
21+
logging.basicConfig(level=logging.INFO, format="%(message)s")
22+
logger = logging.getLogger(__name__)
23+
24+
25+
class NemarAPI:
26+
"""Client for interacting with the NEMAR API."""
27+
28+
def __init__(self, token: Optional[str] = None):
29+
"""Initialize NEMAR API client.
30+
31+
Args:
32+
token: NEMAR access token. If not provided, will look for NEMAR_TOKEN env variable.
33+
34+
Raises:
35+
ValueError: If no token is provided or found in environment.
36+
37+
"""
38+
self.base_url = "https://nemar.org/api/dataexplorer/datapipeline"
39+
self.token = token or os.environ.get("NEMAR_TOKEN")
40+
if not self.token:
41+
raise ValueError(
42+
"NEMAR token must be provided either as argument or NEMAR_TOKEN environment variable"
43+
)
44+
45+
def get_datasets(self, start: int = 0, limit: int = 500) -> Optional[Dict]:
46+
"""Get list of datasets from NEMAR.
47+
48+
Args:
49+
start: Starting index for pagination.
50+
limit: Maximum number of datasets to return.
51+
52+
Returns:
53+
JSON response containing dataset information or None if request fails.
54+
55+
"""
56+
payload = {
57+
"nemar_access_token": self.token,
58+
"table_name": "dataexplorer_dataset",
59+
"start": start,
60+
"limit": limit,
61+
}
62+
63+
try:
64+
response = requests.post(
65+
f"{self.base_url}/list",
66+
headers={"Content-Type": "application/json"},
67+
json=payload,
68+
verify=False,
69+
)
70+
response.raise_for_status()
71+
return response.json()
72+
except requests.exceptions.RequestException as e:
73+
logger.error("Error fetching datasets: %s", e)
74+
return None
75+
76+
@staticmethod
77+
def extract_dataset_info(datasets_response: Dict) -> List[Dict]:
78+
"""Extract relevant information from datasets response.
79+
80+
Args:
81+
datasets_response: Response from get_datasets().
82+
83+
Returns:
84+
List of dictionaries containing dataset information.
85+
86+
"""
87+
if not datasets_response or "entries" not in datasets_response:
88+
return []
89+
90+
return [
91+
{
92+
"id": data["id"],
93+
"name": data["name"],
94+
"modalities": data["modalities"],
95+
"participants": data["participants"],
96+
"file_size": data["file_size"],
97+
"file_size_gb": float(data["file_size"]) / (1024 * 1024 * 1024),
98+
"tasks": data.get("tasks", ""),
99+
"authors": data.get("Authors", ""),
100+
"doi": data.get("DatasetDOI", ""),
101+
}
102+
for _, data in datasets_response["entries"].items()
103+
]
104+
105+
106+
def fetch_all_datasets() -> List[Dict]:
107+
"""Fetch all available datasets from NEMAR.
108+
109+
Returns:
110+
List of dataset information dictionaries.
111+
112+
"""
113+
try:
114+
nemar = NemarAPI()
115+
except ValueError as e:
116+
logger.error("Error: %s", e)
117+
logger.error(
118+
"Please set your NEMAR token using: export NEMAR_TOKEN='your_token_here'"
119+
)
120+
return []
121+
122+
all_datasets = []
123+
start = 0
124+
batch_size = 500
125+
126+
logger.info("Fetching datasets...")
127+
while True:
128+
datasets = nemar.get_datasets(start=start, limit=batch_size)
129+
if not datasets or not datasets.get("entries"):
130+
break
131+
132+
batch_info = nemar.extract_dataset_info(datasets)
133+
if not batch_info:
134+
break
135+
136+
all_datasets.extend(batch_info)
137+
logger.info("Retrieved %d datasets so far...", len(all_datasets))
138+
139+
if len(batch_info) < batch_size:
140+
break
141+
142+
start += batch_size
143+
144+
return all_datasets
145+
146+
147+
def find_undigested_datasets() -> List[Dict]:
148+
"""Find datasets that haven't been digested into eegdash yet.
149+
150+
Returns:
151+
List of dataset information dictionaries for undigested datasets.
152+
153+
"""
154+
# Get all available datasets from NEMAR
155+
all_datasets = fetch_all_datasets()
156+
157+
# Get all classes from eegdash.dataset
158+
eegdash_classes = dir(eegdash.dataset)
159+
160+
# Filter for undigested datasets
161+
undigested = []
162+
for dataset in all_datasets:
163+
# Convert dataset ID to expected class name format (e.g., ds001785 -> DS001785)
164+
class_name = dataset["id"].upper()
165+
166+
# Check if this dataset exists as a class in eegdash.dataset
167+
if class_name not in eegdash_classes:
168+
undigested.append(dataset)
169+
170+
return undigested
171+
172+
173+
def main():
174+
"""Main function to find and output undigested datasets."""
175+
undigested = find_undigested_datasets()
176+
177+
# Print just the dataset IDs and names
178+
print("\nUndigested Datasets:")
179+
print("-" * 80)
180+
for dataset in undigested:
181+
print(f"{dataset['id']}: {dataset['name']}")
182+
print(f"\nTotal undigested datasets: {len(undigested)}")
183+
184+
185+
if __name__ == "__main__":
186+
main()

0 commit comments

Comments
 (0)