Skip to content
This repository was archived by the owner on Jul 28, 2025. It is now read-only.

Commit 5f5a8bb

Browse files
Feat (Fetch workflow): Add new criteria to Allen Brain and improve docs (#127)
* Feat (Fetch workflow): Add new criteria to Allen Brain in improve docs
1 parent aaea977 commit 5f5a8bb

File tree

6 files changed

+76
-24
lines changed

6 files changed

+76
-24
lines changed

docs/source/fetch_config.rst

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
Fetch configuration file
2+
========================
3+
4+
The ``Fetch`` workflow aims at downloading morphology files from different source databases:
5+
6+
* `NeuroMorpho <https://neuromorpho.org>`_
7+
* `MouseLight <https://ml-neuronbrowser.janelia.org>`_
8+
* `Allen Brain <https://celltypes.brain-map.org>`_
9+
10+
At the end of the ``Fetch`` workflow, the result folder will contain all the morphology files and
11+
a ``metadata.csv`` file which contains all the metadata of each morphology file.
12+
13+
Each database needs a different configuration file. In each case, the configuration file is a JSON
14+
file containing a list of objects, each object being a configuration element that will be executed
15+
independently. Because of this, two identical configuration elements will fetch the same
16+
morphologies. The result folder will contain only one copy of each morphology file but the
17+
``metadata.csv`` file will contain duplicated entries, so it is possible to understand why the
18+
result folder does not contain the expected number of files and which filters should be updated.
19+
20+
Each configuration element can contain the following entries:
21+
22+
* ``nb_morphologies``: the number of morphologies to fetch using the filters of the current object.
23+
* ``seed``: the random seed used to choose which elements are fetched among the ones available using
24+
the filters of the current object.
25+
* all other entries should be valid for the requested source API.
26+
27+
This page shows examples of each type of configuration file.
28+
29+
NeuroMorpho
30+
-----------
31+
32+
The filter entries should any valid filter entry for the
33+
`NeuroMorpho API <https://neuromorpho.org/apiReference.html>`_.
34+
35+
.. literalinclude:: ../../src/morphology_workflows/_templates/neuromorpho_config.json
36+
:language: json
37+
38+
MouseLight
39+
----------
40+
41+
The filter entries support only the following entries:
42+
43+
* ``brain_region``: the name of the brain region to filter.
44+
45+
.. literalinclude:: ../../src/morphology_workflows/_templates/mouselight_config.json
46+
:language: json
47+
48+
Allen Brain
49+
-----------
50+
51+
The filter entries support only the following entries:
52+
53+
* ``species``: the name of the species to filter.
54+
* ``brain_region``: the name of the brain region to filter.
55+
* any other valid filter entry for the
56+
`AllenSDK API <http://alleninstitute.github.io/AllenSDK/cell_types.html>`_ (the filter keys
57+
should be chosen from the ones used to create
58+
`these entries <https://github.com/AllenInstitute/AllenSDK/blob/master/allensdk/api/queries/cell_types_api.py#L260>`_.
59+
60+
.. literalinclude:: ../../src/morphology_workflows/_templates/allen_config.json
61+
:language: json

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
Home <self>
1111
cli
1212
luigi_cfg
13+
fetch_config
1314
placeholders_config
1415
api_ref
1516
changelog

src/morphology_workflows/_templates/allen_config.json

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,17 @@
22
{
33
"species": "Mus musculus",
44
"brain_region": "VISli",
5-
"cell_type": "interneuron",
65
"nb_morphologies": 2,
76
"seed": 0
87
},
98
{
109
"species": "Mus musculus",
1110
"brain_region": "VISli",
12-
"cell_type": "pyramidal",
1311
"seed": 0
1412
},
1513
{
1614
"species": "Mus musculus",
1715
"brain_region": "VISli",
18-
"cell_type": "pyramidal",
1916
"nb_morphologies": 9999,
2017
"seed": 0
2118
}

src/morphology_workflows/tasks/fetch.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -166,18 +166,17 @@ def allen_download(self, config):
166166
api.allen_morphology_cache.mkdir(parents=True, exist_ok=True)
167167

168168
for conf_element in config:
169-
size = conf_element.get("nb_morphologies", float("inf"))
170-
species = conf_element.get("species", None)
171-
brain_region = conf_element.get("brain_region", None)
172-
# cell_type = conf_element.get("cell_type", None)
169+
criteria = copy.deepcopy(conf_element)
170+
size = criteria.pop("nb_morphologies", float("inf"))
171+
brain_region = criteria.pop("brain_region", None)
172+
seed = criteria.pop("seed", None)
173173

174174
mask = np.full(len(api.neurons), True, dtype=bool)
175-
if species is not None:
176-
mask = mask & (api.neurons.species == species)
177175
if brain_region is not None:
178176
mask = mask & (api.neurons.structure_area_abbrev == brain_region)
179-
# if cell_type is not None:
180-
# mask = mask & (api.neurons.structure_area_abbrev == region)
177+
178+
for key, value in criteria.items():
179+
mask = mask & (api.neurons[key] == value)
181180

182181
neurons = api.neurons.loc[mask]
183182

@@ -187,9 +186,7 @@ def allen_download(self, config):
187186
# Download some neurons
188187
downloaded_neurons = self._neuron_paths(
189188
api.download_neurons(
190-
neurons.sample(
191-
min(len(neurons), size), random_state=conf_element.get("seed", None)
192-
).id.values,
189+
neurons.sample(min(len(neurons), size), random_state=seed).id.values,
193190
load_neurons=False,
194191
),
195192
api.allen_morphology_cache,

tests/data/allen_config_download.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,18 @@
22
{
33
"species": "Mus musculus",
44
"brain_region": "VISli",
5-
"cell_type": "interneuron",
65
"nb_morphologies": 2,
76
"seed": 0
87
},
98
{
109
"species": "Mus musculus",
1110
"brain_region": "VISli",
12-
"cell_type": "pyramidal",
11+
"dendrite_type": "spiny",
1312
"seed": 0
1413
},
1514
{
1615
"species": "Mus musculus",
1716
"brain_region": "VISli",
18-
"cell_type": "pyramidal",
1917
"nb_morphologies": 9999,
2018
"seed": 0
2119
}

tests/test_download_morphs.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,23 +132,21 @@ def test_allen(prepare_dir, data_dir):
132132
# Check metadata
133133
expected = pd.DataFrame(
134134
{
135-
"species": ["Mus musculus"] * 10,
136-
"brain_region": ["VISli"] * 10,
137-
"cell_type": ["interneuron"] * 2 + ["pyramidal"] * 8,
138-
"nb_morphologies": [2] * 2 + [np.nan] * 4 + [9999] * 4,
139-
"seed": [0] * 10,
135+
"species": ["Mus musculus"] * 8,
136+
"brain_region": ["VISli"] * 8,
137+
"nb_morphologies": [2] * 2 + [np.nan] * 2 + [9999] * 4,
138+
"seed": [0] * 8,
140139
"morphology": [
141140
"555019563.swc",
142141
"603402458.swc",
143-
"555019563.swc",
144142
"603402458.swc",
145143
"526573598.swc",
146-
"555241040.swc",
147144
"555019563.swc",
148145
"603402458.swc",
149146
"526573598.swc",
150147
"555241040.swc",
151148
],
149+
"dendrite_type": [np.nan] * 2 + ["spiny"] * 2 + [np.nan] * 4,
152150
}
153151
)
154152

0 commit comments

Comments
 (0)