Skip to content

Commit b6f2e22

Browse files
authored
Merge pull request #473 from int-brain-lab/release/2.12.0
Release/2.12.0
2 parents 4245348 + 6fee263 commit b6f2e22

File tree

7 files changed

+115
-32
lines changed

7 files changed

+115
-32
lines changed

brainbox/io/one.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -700,8 +700,9 @@ def load_wheel_reaction_times(eid, one=None):
700700
return firstMove_times - trials['goCue_times']
701701

702702

703-
def load_trials_df(eid, one=None, maxlen=None, t_before=0., t_after=0., ret_wheel=False,
704-
ret_abswheel=False, wheel_binsize=0.02, addtl_types=[]):
703+
def load_trials_df(eid, one=None, maxlen=None, t_before=0., t_after=0.2, ret_wheel=False,
704+
ret_abswheel=False, wheel_binsize=0.02, addtl_types=[],
705+
align_event='stimOn_times', keeptrials=None):
705706
"""
706707
Generate a pandas dataframe of per-trial timing information about a given session.
707708
Each row in the frame will correspond to a single trial, with timing values indicating timing
@@ -776,18 +777,19 @@ def remap_trialp(probs):
776777
endtimes = trials.feedback_times
777778
tmp = {key: value for key, value in trials.items() if key in trialstypes}
778779

779-
if maxlen is not None:
780-
with np.errstate(invalid='ignore'):
781-
keeptrials = (endtimes - starttimes) <= maxlen
782-
else:
783-
keeptrials = range(len(starttimes))
780+
if keeptrials is None:
781+
if maxlen is not None:
782+
with np.errstate(invalid='ignore'):
783+
keeptrials = (endtimes - starttimes) <= maxlen
784+
else:
785+
keeptrials = range(len(starttimes))
784786
trialdata = {x: tmp[x][keeptrials] for x in trialstypes}
785787
trialdata['probabilityLeft'] = remap_trialp(trialdata['probabilityLeft'])
786788
trialsdf = pd.DataFrame(trialdata)
787789
if maxlen is not None:
788790
trialsdf.set_index(np.nonzero(keeptrials)[0], inplace=True)
789-
trialsdf['trial_start'] = trialsdf['stimOn_times'] - t_before
790-
trialsdf['trial_end'] = trialsdf['feedback_times'] + t_after
791+
trialsdf['trial_start'] = trialsdf[align_event] - t_before
792+
trialsdf['trial_end'] = trialsdf[align_event] + t_after
791793
tdiffs = trialsdf['trial_end'] - np.roll(trialsdf['trial_start'], -1)
792794
if np.any(tdiffs[:-1] > 0):
793795
logging.warning(f'{sum(tdiffs[:-1] > 0)} trials overlapping due to t_before and t_after '

brainbox/modeling/linear.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def _fit(self, dm, binned, cells=None):
8787
raise ValueError('Length of cells does not match shape of binned')
8888

8989
coefs = pd.Series(index=cells, name='coefficients', dtype=object)
90-
intercepts = pd.Series(index=cells, name='intercepts')
90+
intercepts = pd.Series(index=cells, name='intercepts', dtype=object)
9191

9292
lm = self.estimator.fit(dm, binned)
9393
weight, intercept = lm.coef_, lm.intercept_

brainbox/modeling/neural_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def fit(self, train_idx=None, printcond=True):
195195
# operate on. If all data indices are in train indices, train and test are the same set.
196196
self.traininds = train_idx
197197
if not np.all(np.isin(self.design.trialsdf.index, train_idx)):
198-
self.testinds = self.design.trialsdf.index[~self.trialsdf.index.isin(train_idx)]
198+
self.testinds = self.design.trialsdf.index[~self.design.trialsdf.index.isin(train_idx)]
199199
else:
200200
self.testinds = train_idx
201201

@@ -226,7 +226,7 @@ def score(self, testinds=None):
226226
testmask = np.isin(self.design.trlabels, testinds).flatten()
227227
dm, binned = self.design[testmask, :], self.binnedspikes[testmask]
228228

229-
scores = pd.Series(index=self.coefs.index, name='scores')
229+
scores = pd.Series(index=self.coefs.index, name='scores', dtype=object)
230230
for cell in self.coefs.index:
231231
cell_idx = np.argwhere(self.clu_ids == cell)[0, 0]
232232
wt = self.coefs.loc[cell].reshape(-1, 1)

brainbox/modeling/utils.py

Lines changed: 83 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ def neglog(weights, x, y):
3838

3939

4040
class SequentialSelector:
41-
def __init__(self, model, n_features_to_select=None, direction='forward', scoring=None):
41+
def __init__(self, model, n_features_to_select=None,
42+
direction='forward', scoring=None,
43+
train=None, test=None):
4244
"""
4345
Sequential feature selection for neural models
4446
@@ -67,52 +69,96 @@ def __init__(self, model, n_features_to_select=None, direction='forward', scorin
6769
self.scoring = scoring
6870
self.delta_scores = pd.DataFrame(index=self.model.clu_ids)
6971
self.trlabels = self.design.trlabels
70-
self.train = np.isin(self.trlabels, self.model.traininds).flatten()
71-
self.test = ~self.train
72+
if train is None:
73+
self.train = np.isin(self.trlabels, self.model.traininds).flatten()
74+
else:
75+
self.train = np.isin(self.trlabels, train).flatten()
76+
if test is None:
77+
self.test = ~self.train
78+
else:
79+
self.test = np.isin(self.trlabels, test).flatten()
7280
self.features = np.array(list(self.design.covar.keys()))
7381

74-
def fit(self, progress=False):
82+
def fit(self, train_idx=None, full_scores=False, progress=False):
7583
"""
7684
Fit the sequential feature selection
77-
7885
Parameters
7986
----------
87+
train_idx : array-like
88+
indices of trials to use in the training set. If the model passed to the SFS instance
89+
did not already have training indices, this must be specified. If it did have indices,
90+
then this will override those.
91+
full_scores : bool, optional
92+
Whether to store the full set of submodel scores at each step. Produces additional
93+
attributes .full_scores_train_ and .full_scores_test_
8094
progress : bool, optional
8195
Whether to show a progress bar, by default False
8296
"""
97+
if train_idx is None and self.train is None:
98+
raise ValueError('train_idx cannot be None if model used to create SFS did not have '
99+
'any training indices')
100+
if train_idx is not None:
101+
self.train = np.isin(self.trlabels, train_idx).flatten()
102+
self.test = ~self.train
83103
n_features = len(self.features)
84104
maskdf = pd.DataFrame(index=self.model.clu_ids, columns=self.features, dtype=bool)
85105
maskdf.loc[:, :] = False
86106
seqdf = pd.DataFrame(index=self.model.clu_ids, columns=range(self.n_features_to_select))
87-
scoredf = pd.DataFrame(index=self.model.clu_ids, columns=range(self.n_features_to_select))
107+
trainscoredf = pd.DataFrame(index=self.model.clu_ids,
108+
columns=range(self.n_features_to_select))
109+
testscoredf = pd.DataFrame(index=self.model.clu_ids,
110+
columns=range(self.n_features_to_select))
88111

89112
if not 0 < self.n_features_to_select <= n_features:
90113
raise ValueError('n_features_to_select is not a valid number in the context'
91114
' of the model.')
92115

93-
n_iterations = (
94-
self.n_features_to_select if self.direction == 'forward'
95-
else n_features - self.n_features_to_select
96-
)
116+
n_iterations = (self.n_features_to_select if self.direction == 'forward' else n_features -
117+
self.n_features_to_select)
118+
if full_scores:
119+
fullindex = pd.MultiIndex.from_product([self.model.clu_ids, np.arange(n_iterations)],
120+
names=['clu_id', 'feature_iter'])
121+
fulltrain = pd.DataFrame(index=fullindex, columns=range(len(self.design.covar)))
122+
fulltest = pd.DataFrame(index=fullindex, columns=range(len(self.design.covar)))
123+
97124
for i in tqdm(range(n_iterations), desc='step', leave=False, disable=not progress):
98125
masks_set = maskdf.groupby(self.features.tolist()).groups
99126
for current_mask in tqdm(masks_set, desc='feature subset', leave=False):
100127
cells = masks_set[current_mask]
101-
new_feature_idx, nf_score = self._get_best_new_feature(current_mask, cells)
128+
outputs = self._get_best_new_feature(current_mask, cells, full_scores)
129+
if full_scores:
130+
new_feature_idx, nf_train, nf_test, nf_fulltrain, nf_fulltest = outputs
131+
else:
132+
new_feature_idx, nf_train, nf_test = outputs
102133
for cell in cells:
103134
maskdf.at[cell, self.features[new_feature_idx.loc[cell]]] = True
104135
seqdf.loc[cell, i] = self.features[new_feature_idx.loc[cell]]
105-
scoredf.loc[cell, i] = nf_score.loc[cell]
136+
trainscoredf.loc[cell, i] = nf_train.loc[cell]
137+
testscoredf.loc[cell, i] = nf_test.loc[cell]
138+
if full_scores:
139+
fulltest.loc[cell, i] = nf_fulltest.loc[cell]
140+
fulltrain.loc[cell, i] = nf_fulltrain.loc[cell]
106141
self.support_ = maskdf
107142
self.sequences_ = seqdf
108-
self.scores_ = scoredf
143+
self.scores_test_ = testscoredf
144+
self.scores_train_ = trainscoredf
145+
if full_scores:
146+
self.full_scores_train_ = fulltrain
147+
self.full_scores_test_ = fulltest
109148

110-
def _get_best_new_feature(self, mask, cells):
149+
def _get_best_new_feature(self, mask, cells, full_scores=False):
150+
"""
151+
Returns
152+
-------
153+
maxind, trainmax, testmax, trainscores, testscores
154+
"""
111155
mask = np.array(mask)
112156
candidate_features = np.flatnonzero(~mask)
113157
cell_idxs = np.argwhere(np.isin(self.model.clu_ids, cells)).flatten()
114158
my = self.model.binnedspikes[np.ix_(self.train, cell_idxs)]
115-
scores = pd.DataFrame(index=cells, columns=candidate_features, dtype=float)
159+
my_test = self.model.binnedspikes[np.ix_(self.test, cell_idxs)]
160+
trainscores = pd.DataFrame(index=cells, columns=candidate_features, dtype=float)
161+
testscores = pd.DataFrame(index=cells, columns=candidate_features, dtype=float)
116162
for feature_idx in candidate_features:
117163
candidate_mask = mask.copy()
118164
candidate_mask[feature_idx] = True
@@ -121,9 +167,27 @@ def _get_best_new_feature(self, mask, cells):
121167
fitfeatures = self.features[candidate_mask]
122168
feat_idx = np.hstack([self.design.covar[feat]['dmcol_idx'] for feat in fitfeatures])
123169
mdm = self.design[np.ix_(self.train, feat_idx)]
170+
mdm_test = self.design[np.ix_(self.test, feat_idx)]
171+
124172
coefs, intercepts = self.model._fit(mdm, my, cells=cells)
125173
for i, cell in enumerate(cells):
126-
scores.at[cell, feature_idx] = self.model._scorer(coefs.loc[cell],
127-
intercepts.loc[cell],
128-
mdm, my[:, i])
129-
return scores.idxmax(axis=1), scores.max(axis=1)
174+
trainscores.at[cell,
175+
feature_idx] = self.model._scorer(coefs.loc[cell],
176+
intercepts.loc[cell], mdm, my[:,
177+
i])
178+
testscores.at[cell,
179+
feature_idx] = self.model._scorer(coefs.loc[cell],
180+
intercepts.loc[cell], mdm_test,
181+
my_test[:, i])
182+
183+
maxind = trainscores.idxmax(axis=1)
184+
trainmax = trainscores.max(axis=1)
185+
# Ugly kludge to compensate for DataFrame.lookup being deprecated
186+
midx, cols = pd.factorize(maxind)
187+
testmax = pd.Series(testscores.reindex(cols, axis=1).to_numpy()[np.arange(len(testscores)),
188+
midx],
189+
index=testscores.index)
190+
if full_scores:
191+
return maxind, trainmax, testmax, trainscores, testscores
192+
else:
193+
return maxind, trainmax, testmax

ibllib/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "2.11.1"
1+
__version__ = "2.12.0"
22
import warnings
33

44
from ibllib.misc import logger_config

ibllib/atlas/flatmaps.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,4 +181,11 @@ def plot_swanson(acronyms=None, values=None, ax=None, hemisphere=None, br=None,
181181
imb[s2a == 0] = 255
182182
imb[s2a == 1] = np.array([167, 169, 172, 255])
183183
ax.imshow(imb)
184+
185+
# provides the mean to sea the region on axis
186+
def format_coord(x, y):
187+
acronym = br.acronym[s2a[int(y), int(x)]]
188+
return f'x={x:1.4f}, y={x:1.4f}, {acronym}'
189+
190+
ax.format_coord = format_coord
184191
return ax

release_notes.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
## Release Notes 2.12
2+
3+
### Release Notes 2.12.0 2022-05-10
4+
- ibllib.atlas: add the Swanson flatmap backend (Olivier)
5+
- ibllib.io.extractors: output of task extractions are trial tables, not individual datasets (Miles)
6+
- Documentation: data release examples (Mayo)
7+
- ibl-neuropixel new repository contains `ibllib.dsp`, `illlib.ephys.neuropixel` and `ibllib.io.spikeglx` modules (Olivier)
8+
- brainbox.task.closed loop get impostor targets to evaluate null distribution (Brandon)
9+
- minimum supported version of Python is 3.8 (Michele)
10+
111
## Release Notes 2.11
212

313
### Release Notes 2.11.1 2022-04-12

0 commit comments

Comments
 (0)