Skip to content

Commit 94fa87a

Browse files
authored
Add OS independent paths for Allen dataset (#88)
* fix path in ca_movie_decoding.py * fix paths to support windows * remove print statement * Add example implementation using pathlib * add pathlib paths to allen dataset * run allen tests * remove os import from allen dataset
1 parent 3f06b71 commit 94fa87a

File tree

9 files changed

+122
-117
lines changed

9 files changed

+122
-117
lines changed

cebra/datasets/allen/ca_movie.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
import glob
2323
import hashlib
24-
import os
24+
import pathlib
2525

2626
import h5py
2727
import joblib
@@ -66,9 +66,9 @@ def __init__(
6666
num_neurons=10,
6767
seed=111,
6868
area="VISp",
69-
frame_feature_path=get_datapath(
70-
"allen/features/allen_movies/vit_base/8/movie_one_image_stack.npz/testfeat.pth"
71-
),
69+
frame_feature_path=pathlib.Path(_DEFAULT_DATADIR) / "allen" /
70+
"features" / "allen_movies" / "vit_base" / "8" /
71+
"movie_one_image_stack.npz" / "testfeat.pth",
7272
pca=False,
7373
load=None,
7474
):
@@ -116,16 +116,17 @@ def _get_pseudo_mice(self, area: str):
116116
area: The visual cortical area to sample the neurons. Possible options: VISp, VISpm, VISam, VISal, VISl, VISrl.
117117
118118
"""
119+
119120
self.area = area
120-
list_mice = glob.glob(
121-
get_datapath(
122-
f"allen/visual_drift/data/calcium_excitatory/{area}/*"))
123-
exp_containers = [
124-
int(mice.split(f"{area}/")[1].replace(".mat", ""))
125-
for mice in list_mice
126-
]
121+
path = pathlib.Path(
122+
_DEFAULT_DATADIR
123+
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(area)
124+
list_mice = path.glob("*.mat")
125+
exp_containers = [int(file.stem) for file in list_mice]
126+
127127
## Load summary file
128-
summary = pd.read_csv(get_datapath("allen/data_summary.csv"))
128+
summary = pd.read_csv(
129+
pathlib.Path(_DEFAULT_DATADIR) / "allen" / "data_summary.csv")
129130
## Filter excitatory neurons in V1
130131
area_filtered = summary[(summary["exp"].isin(exp_containers)) &
131132
(summary["target"] == area) &
@@ -169,9 +170,10 @@ def _convert_to_nums(string):
169170
indices2.sort()
170171
indices3.sort()
171172
indices = [indices1, indices2, indices3]
172-
matfile = get_datapath(
173-
f"allen/visual_drift/data/calcium_excitatory/{area}/{exp_container}.mat"
174-
)
173+
matfile = pathlib.Path(
174+
_DEFAULT_DATADIR
175+
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(
176+
area) / f"{exp_container}.mat"
175177
traces = scipy.io.loadmat(matfile)
176178
for n, i in enumerate(seq_sessions):
177179
session = traces["filtered_traces_days_events"][n, 0][
@@ -214,9 +216,10 @@ class AllenCaMoviePreLoadDataset(AllenCaMovieDataset):
214216
"""
215217

216218
def __init__(self, num_neurons, seed):
217-
preload = get_datapath(
218-
f"allen_preload/allen-movie1-ca-{num_neurons}-{seed}.jl")
219-
if not os.path.isfile(preload):
219+
preload = pathlib.Path(
220+
_DEFAULT_DATADIR
221+
) / "allen_preload" / f"allen-movie1-ca-{num_neurons}-{seed}.jl"
222+
if not preload.is_file():
220223
print("The dataset is not yet preloaded.")
221224
preload = None
222225
super().__init__(num_neurons=num_neurons, seed=seed, load=preload)

cebra/datasets/allen/ca_movie_decoding.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import glob
2323
import hashlib
2424
import os
25+
import pathlib
2526

2627
import h5py
2728
import joblib
@@ -41,6 +42,8 @@
4142
from cebra.datasets.allen import SEEDS
4243
from cebra.datasets.allen import SEEDS_DISJOINT
4344

45+
_DEFAULT_DATADIR = get_datapath()
46+
4447

4548
@parametrize(
4649
"allen-movie-{num_movie}-ca-{cortex}-{num_neurons}-{split_flag}-{test_repeat}-{seed}",
@@ -107,9 +110,9 @@ def _get_video_features(self, num_movie="one"):
107110
108111
"""
109112

110-
frame_feature_path = get_datapath(
111-
f"allen/features/allen_movies/vit_base/8/movie_{num_movie}_image_stack.npz/testfeat.pth"
112-
)
113+
frame_feature_path = pathlib.Path(
114+
_DEFAULT_DATADIR
115+
) / "allen" / "features" / "allen_movies" / "vit_base" / "8" / f"movie_{num_movie}_image_stack.npz" / "testfeat.pth"
113116
frame_feature = torch.load(frame_feature_path)
114117
return frame_feature
115118

@@ -171,15 +174,14 @@ def _get_pseudo_mice(self, area, num_movie):
171174
172175
"""
173176

174-
list_mice = glob.glob(
175-
get_datapath(
176-
f"allen/visual_drift/data/calcium_excitatory/{area}/*"))
177-
exp_containers = [
178-
int(mice.split(f"{area}/")[1].replace(".mat", ""))
179-
for mice in list_mice
180-
]
177+
path = pathlib.Path(
178+
_DEFAULT_DATADIR
179+
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(area)
180+
list_mice = path.glob("*.mat")
181+
exp_containers = [int(file.stem) for file in list_mice]
181182
## Load summary file
182-
summary = pd.read_csv(get_datapath("allen/data_summary.csv"))
183+
summary = pd.read_csv(
184+
pathlib.Path(_DEFAULT_DATADIR) / "allen" / "data_summary.csv")
183185
## Filter excitatory neurons in V1
184186
area_filtered = summary[(summary["exp"].isin(exp_containers)) &
185187
(summary["target"] == area) &
@@ -223,9 +225,10 @@ def _convert_to_nums(string):
223225
indices2.sort()
224226
indices3.sort()
225227
indices = [indices1, indices2, indices3]
226-
matfile = get_datapath(
227-
f"allen/visual_drift/data/calcium_excitatory/{area}/{exp_container}.mat"
228-
)
228+
matfile = pathlib.Path(
229+
_DEFAULT_DATADIR
230+
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(
231+
area) / f"{exp_container}.mat"
229232
traces = scipy.io.loadmat(matfile)
230233
for n, i in enumerate(seq_sessions):
231234
session = traces["filtered_traces_days_events"][n, 0][
@@ -325,10 +328,10 @@ def _get_pseudo_mice(self, area, num_movie):
325328
area: The visual cortical area to sample the neurons. Possible options: VISp, VISpm, VISam, VISal, VISl, VISrl.
326329
327330
"""
328-
329-
list_mice = glob.glob(
330-
get_datapath(
331-
f"allen/visual_drift/data/calcium_excitatory/{area}/*"))
331+
path = pathlib.Path(
332+
_DEFAULT_DATADIR
333+
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(area)
334+
list_mice = path.glob("*")
332335

333336
def _get_neural_data(num_movie, mat_file):
334337
mat = scipy.io.loadmat(mat_file)

cebra/datasets/allen/combined.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
import glob
2525
import hashlib
26-
import os
2726

2827
import h5py
2928
import joblib

cebra/datasets/allen/make_neuropixel.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import argparse
2424
import glob
25-
import os
25+
import pathlib
2626

2727
import h5py
2828
import joblib as jl
@@ -32,6 +32,8 @@
3232

3333
from cebra.datasets import get_datapath
3434

35+
_DEFAULT_DATADIR = get_datapath()
36+
3537

3638
def _filter_units(
3739
unit_ids: npt.NDArray[np.int64],
@@ -153,13 +155,13 @@ def _spike_counts(bin_edges: npt.NDArray[np.float64], units: list):
153155

154156

155157
def read_neuropixel(
156-
path: str = "/shared/neuropixel/*/*.nwb",
158+
path: str = pathlib.Path("/shared/neuropixel/"),
157159
cortex: str = "VISp",
158160
sampling_rate: float = 120.0,
159161
):
160162
"""Load 120Hz Neuropixels data recorded in the specified cortex during the movie1 stimulus.
161163
162-
The Neuropixels recordin is filtered and transformed to spike counts in a bin size specified by the sampling rat.
164+
The Neuropixels recording is filtered and transformed to spike counts in a bin size specified by the sampling rat.
163165
164166
Args:
165167
path: The wildcard file path where the neuropixels .nwb files are located.
@@ -168,7 +170,7 @@ def read_neuropixel(
168170
169171
"""
170172

171-
files = glob.glob(path)
173+
files = path.glob("*/*.nwb")
172174
sessions = {}
173175
len_recording = []
174176
session_frames = []
@@ -238,7 +240,8 @@ def read_neuropixel(
238240
parser = argparse.ArgumentParser()
239241
parser.add_argument("--data-path", default="/shared/neuropixel", type=str)
240242
parser.add_argument("--save-path",
241-
default=get_datapath("allen_movie1_neuropixel/VISp/"),
243+
default=pathlib.Path(_DEFAULT_DATADIR) /
244+
"allen_movie1_neuropixel" / "VISp",
242245
type=str)
243246
parser.add_argument("--sampling-rate", default=120, type=float)
244247
parser.add_argument("--cortex", default="VISp", type=str)
@@ -255,17 +258,14 @@ def read_neuropixel(
255258
"neural": sessions_dic,
256259
"frames": session_frames
257260
},
258-
os.path.join(
259-
args.save_path,
260-
f"neuropixel_sessions_{int(args.sampling_rate)}_filtered.jl"),
261+
Path(args.save_path) /
262+
f"neuropixel_sessions_{int(args.sampling_rate)}_filtered.jl",
261263
)
262264
jl.dump(
263265
{
264266
"neural": pseudo_mice,
265267
"frames": pseudo_mice_frames
266268
},
267-
os.path.join(
268-
args.save_path,
269-
f"neuropixel_pseudomouse_{int(args.sampling_rate)}_filtered.jl",
270-
),
269+
Path(args.save_path) /
270+
f"neuropixel_pseudomouse_{int(args.sampling_rate)}_filtered.jl",
271271
)

cebra/datasets/allen/neuropixel_movie.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import glob
2020
import hashlib
2121
import os
22+
import pathlib
2223

2324
import h5py
2425
import joblib
@@ -38,6 +39,8 @@
3839
from cebra.datasets.allen import NUM_NEURONS
3940
from cebra.datasets.allen import SEEDS
4041

42+
_DEFAULT_DATADIR = get_datapath()
43+
4144

4245
@parametrize(
4346
"allen-movie1-neuropixel-{num_neurons}-{seed}-10ms",
@@ -70,10 +73,10 @@ def _get_pseudo_mice(self, area="VISp"):
7073
7174
"""
7275
self.area = area
73-
list_recording = joblib.load(
74-
get_datapath(
75-
f"allen/allen_movie1_neuropixel/{area}/neuropixel_pseudomouse_120_filtered.jl"
76-
))
76+
path = pathlib.Path(
77+
_DEFAULT_DATADIR
78+
) / "allen" / "allen_movie1_neuropixel" / area / "neuropixel_pseudomouse_120_filtered.jl"
79+
list_recording = joblib.load(path)
7780
pseudo_mice = list_recording["neural"]
7881

7982
return pseudo_mice.transpose(1, 0)
@@ -87,10 +90,9 @@ def _get_index(self, frame_feature):
8790
frame feature: The video frame feature.
8891
8992
"""
90-
91-
list_recording = joblib.load(
92-
get_datapath(
93-
f"allen/allen_movie1_neuropixel/{self.area}/neuropixel_pseudomouse_120_filtered.jl"
94-
))
93+
path = pathlib.Path(
94+
_DEFAULT_DATADIR
95+
) / "allen" / "allen_movie1_neuropixel" / self.area / "neuropixel_pseudomouse_120_filtered.jl"
96+
list_recording = joblib.load(path)
9597
frames_index = list_recording["frames"]
9698
return frame_feature[frames_index]

cebra/datasets/allen/neuropixel_movie_decoding.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import glob
2020
import hashlib
2121
import os
22+
import pathlib
2223

2324
import h5py
2425
import joblib
@@ -40,6 +41,8 @@
4041
from cebra.datasets.allen import SEEDS
4142
from cebra.datasets.allen import SEEDS_DISJOINT
4243

44+
_DEFAULT_DATADIR = get_datapath()
45+
4346

4447
@parametrize(
4548
"allen-movie-{num_movie}-neuropixel-{cortex}-{num_neurons}-{split_flag}-10-{seed}",
@@ -87,11 +90,10 @@ def _get_pseudo_mice(self, cortex: str, num_movie: str = "one"):
8790
Args:
8891
cortex: The visual cortical area.
8992
"""
90-
91-
data = joblib.load(
92-
get_datapath(
93-
f"allen/allen_movie1_neuropixel/{cortex}/neuropixel_pseudomouse_120_filtered.jl"
94-
))
93+
path = pathlib.Path(
94+
_DEFAULT_DATADIR
95+
) / "allen" / "allen_movie1_neuropixel" / cortex / "neuropixel_pseudomouse_120_filtered.jl"
96+
data = joblib.load(path)
9597
return data
9698

9799
def _split(self, pseudo_mice, frame_feature):
@@ -148,25 +150,23 @@ class AllenNeuropixelMovie120HzCorticesDisjointDataset(
148150
149151
"""
150152

151-
def __init__(
152-
self,
153-
group,
154-
num_neurons,
155-
seed=111,
156-
cortex="VISp",
157-
split_flag="train",
158-
frame_feature_path=get_datapath(
159-
"allen/features/allen_movies/vit_base/8/movie_one_image_stack.npz/testfeat.pth"
160-
),
161-
):
153+
def __init__(self,
154+
group,
155+
num_neurons,
156+
seed=111,
157+
cortex="VISp",
158+
split_flag="train",
159+
frame_feature_path=pathlib.Path(_DEFAULT_DATADIR) / "allen" /
160+
"features" / "allen_movies" / "vit_base" / "8" /
161+
"movie_one_image_stack.npz" / "testfeat.pth"):
162162
self.split_flag = split_flag
163163
self.seed = seed
164164
self.group = group
165165
self.num_neurons = num_neurons
166166
data = joblib.load(
167-
get_datapath(
168-
f"allen/allen_movie1_neuropixel/{cortex}/neuropixel_pseudomouse_120_filtered.jl"
169-
))
167+
pathlib.Path(_DEFAULT_DATADIR) / "allen" /
168+
"allen_movie1_neuropixel" / cortex /
169+
"neuropixel_pseudomouse_120_filtered.jl")
170170
pseudo_mice = data["neural"].T
171171
self.neurons_indices = self._sample_neurons(pseudo_mice)
172172
self.movie_len = pseudo_mice.shape[1]

0 commit comments

Comments
 (0)