Skip to content

Commit 06655df

Browse files
authored
Merge pull request #1 from KamitaniLab/data
Data
2 parents d00b3db + 2422217 commit 06655df

File tree

11 files changed

+310
-11
lines changed

11 files changed

+310
-11
lines changed

README.md

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,20 +59,32 @@ Step2: Activate the environment.
5959

6060
### Download Dataset
6161

62-
To utilize this project, you'll need to download the required dataset [Figshare](https://figshare.com/) and organize the dataset appropriately. After downloading data (preprocessed fMRI data, stimulus DNN features, pre-trained generator), make sure to move them to the correct locations.
63-
64-
```sh
65-
# move fMRI data files (*.h5) and DNN features (*.mat) to data folder
66-
mv path_to_downloaded_fmri ./data/fmri/train_or_test
67-
mv path_to_downloaded_DNNfeature ./data/stimulus_feature/train_or_test/dataset_name/caffe/bvlc_reference_caffenet/
68-
69-
70-
# move pre-trained generator (*.pt) to generator folder
71-
mv path_to_downloaded_generator ./generator/generator_name
72-
```
62+
To utilize this project, you'll need to download the required dataset [Figshare](https://figshare.com/articles/dataset/23590302) and organize the dataset appropriately.
63+
You can download the required data with the following commands.
64+
65+
fMRI data and image feature:
66+
67+
``` shellsession
68+
# In "data" directory:
69+
70+
# Training and test fMRI data
71+
$ python download.py fmri_training
72+
$ python download.py fmri_test
73+
74+
# Stimulus image features
75+
$ python download.py stimulus_feature
76+
```
77+
78+
Pre-trained generator:
7379

80+
``` shellsession
81+
# In "generator" directory:
7482

83+
# GAN
84+
$ python download.py GAN
85+
```
7586

87+
After downloading data (preprocessed fMRI data, stimulus DNN features, pre-trained generator), make sure to move them to the correct locations.
7688

7789
## Usage
7890

data/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.zip

data/download.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import os
2+
import shutil
3+
import argparse
4+
import json
5+
import urllib.request
6+
import hashlib
7+
from typing import Union
8+
9+
from tqdm import tqdm
10+
11+
12+
def main(cfg):
13+
with open(cfg.filelist, 'r') as f:
14+
filelist = json.load(f)
15+
16+
target = filelist[cfg.target]
17+
18+
for fl in target['files']:
19+
output = os.path.join(target['save_in'], fl['name'])
20+
21+
# Downloading
22+
if not os.path.exists(output):
23+
print(f'Downloading {output} from {fl["url"]}')
24+
download_file(fl['url'], output, progress_bar=True, md5sum=fl['md5sum'])
25+
26+
# Postprocessing
27+
if 'postproc' in fl:
28+
for pp in fl['postproc']:
29+
if pp['name'] == 'unzip':
30+
print(f'Unzipping {output}')
31+
if 'destination' in pp:
32+
dest = pp['destination']
33+
else:
34+
dest = './'
35+
shutil.unpack_archive(output, extract_dir=dest)
36+
37+
38+
def download_file(url: str, destination: str, progress_bar: bool = True, md5sum: Union[str, None] = None) -> None:
39+
'''Download a file.'''
40+
41+
response = urllib.request.urlopen(url)
42+
file_size = int(response.info()["Content-Length"])
43+
44+
def _show_progress(block_num, block_size, total_size):
45+
downloaded = block_num * block_size
46+
if total_size > 0:
47+
progress_bar.update(downloaded - progress_bar.n)
48+
49+
with tqdm(total=file_size, unit='B', unit_scale=True, desc=destination, ncols=100) as progress_bar:
50+
urllib.request.urlretrieve(url, destination, _show_progress)
51+
52+
if md5sum is not None:
53+
md5_hash = hashlib.md5()
54+
with open(destination, 'rb') as f:
55+
for chunk in iter(lambda: f.read(4096), b''):
56+
md5_hash.update(chunk)
57+
md5sum_test = md5_hash.hexdigest()
58+
if md5sum != md5sum_test:
59+
raise ValueError(f'md5sum mismatch. \nExpected: {md5sum}\nActual: {md5sum_test}')
60+
61+
62+
if __name__ == '__main__':
63+
parser = argparse.ArgumentParser()
64+
parser.add_argument('--filelist', default='files.json')
65+
parser.add_argument('target')
66+
67+
cfg = parser.parse_args()
68+
69+
main(cfg)

data/files.json

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
{
2+
"fmri_training": {
3+
"zip": false,
4+
"save_in": "./fmri/train",
5+
"files": [
6+
{"md5sum": "289015c64ea489d6b012356829ed226a",
7+
"name": "S1_ImageNetTraining.h5",
8+
"url": "https://ndownloader.figshare.com/files/42438333"},
9+
{"md5sum": "0072f763932c03c861eec039cf8cba05",
10+
"name": "S1_MSCOCO.h5",
11+
"url": "https://ndownloader.figshare.com/files/42438315"},
12+
{"md5sum": "bad85b741aba7de095fc1d2b25a55946",
13+
"name": "S1_FMD.h5",
14+
"url": "https://ndownloader.figshare.com/files/42438312"},
15+
{"md5sum": "313d7bde33c6603086f4652e8f365fd2",
16+
"name": "S2_ImageNetTraining.h5",
17+
"url": "https://ndownloader.figshare.com/files/42438339"},
18+
{"md5sum": "cde0f085ca6bb4b76062018f181bcc34",
19+
"name": "S2_MSCOCO.h5",
20+
"url": "https://ndownloader.figshare.com/files/42438327"},
21+
{"md5sum": "410198be68ac12c18336f5bc4192412b",
22+
"name": "S2_FMD.h5",
23+
"url": "https://ndownloader.figshare.com/files/42438324"},
24+
{"md5sum": "b7a35ff1dbd1b950a9edf0ca29b56d6b",
25+
"name": "S3_ImageNetTraining.h5",
26+
"url": "https://ndownloader.figshare.com/files/42438330"},
27+
{"md5sum": "b4d38e71bbd36fe85f23e097af05b39c",
28+
"name": "S3_MSCOCO.h5",
29+
"url": "https://ndownloader.figshare.com/files/42438309"},
30+
{"md5sum": "90ac4403176f4aa82f4c336b90d2da1f",
31+
"name": "S3_FMD.h5",
32+
"url": "https://ndownloader.figshare.com/files/42438306"},
33+
{"md5sum": "61864aa83d1d3f2e90bb564c0103aa25",
34+
"name": "S4_ImageNetTraining.h5",
35+
"url": "https://ndownloader.figshare.com/files/42438336"},
36+
{"md5sum": "83b0d0a1ccedf0b14cb56d491bb54ad3",
37+
"name": "S4_FMD.h5",
38+
"url": "https://ndownloader.figshare.com/files/42438318"},
39+
{"md5sum": "ec185b0aa813e75241f5217a681ce981",
40+
"name": "S4_MSCOCO.h5",
41+
"url": "https://ndownloader.figshare.com/files/42438321"},
42+
{"md5sum": "910feeff73f63e75b24034ae0faf2092",
43+
"name": "S5_ImageNetTraining.h5",
44+
"url": "https://ndownloader.figshare.com/files/42438300"},
45+
{"md5sum": "0bb137c2bed1133b76bfca27b9980462",
46+
"name": "S5_MSCOCO.h5",
47+
"url": "https://ndownloader.figshare.com/files/42438288"},
48+
{"md5sum": "4daa99996a3caf452d603210dcc169ec",
49+
"name": "S5_FMD.h5",
50+
"url": "https://ndownloader.figshare.com/files/42438285"},
51+
{"md5sum": "19225556793dc25aa0620ccafede1185",
52+
"name": "S6_ImageNetTraining.h5",
53+
"url": "https://ndownloader.figshare.com/files/42438303"},
54+
{"md5sum": "937403580fcfb00874bc22cb7e6d4ec2",
55+
"name": "S6_MSCOCO.h5",
56+
"url": "https://ndownloader.figshare.com/files/42438294"},
57+
{"md5sum": "1aa81f0ba41a909591be22bbcf299adc",
58+
"name": "S6_FMD.h5",
59+
"url": "https://ndownloader.figshare.com/files/42438297"},
60+
{"md5sum": "e8fa743dbe1864b01eb685fb064d47ce",
61+
"name": "S7_ImageNetTraining.h5",
62+
"url": "https://ndownloader.figshare.com/files/42438291"},
63+
{"md5sum": "41b9318496135674f713b644a3553bec",
64+
"name": "S7_MSCOCO.h5",
65+
"url": "https://ndownloader.figshare.com/files/42438279"},
66+
{"md5sum": "b4ed001005c086302177cf80387df4ee",
67+
"name": "S7_FMD.h5",
68+
"url": "https://ndownloader.figshare.com/files/42438282"}
69+
]
70+
},
71+
"fmri_test": {
72+
"zip": false,
73+
"save_in": "./fmri/test",
74+
"files": [
75+
{
76+
"name": "S1_Illusion.h5",
77+
"url": "https://figshare.com/ndownloader/files/42439035",
78+
"md5sum": "1593b4bcdc81e042527238daaf10b112"
79+
},
80+
{
81+
"name": "S2_Illusion.h5",
82+
"url": "https://figshare.com/ndownloader/files/42439041",
83+
"md5sum": "953aeaf29a4504b8fedc2959467854cd"
84+
},
85+
{
86+
"name": "S3_Illusion.h5",
87+
"url": "https://figshare.com/ndownloader/files/42439032",
88+
"md5sum": "0979f2262800f8a0804f6d2069943f84"
89+
},
90+
{
91+
"name": "S4_Illusion.h5",
92+
"url": "https://figshare.com/ndownloader/files/42439029",
93+
"md5sum": "f6da509cda6b11bf88c9c82d0c93bab6"
94+
},
95+
{
96+
"name": "S5_Illusion.h5",
97+
"url": "https://figshare.com/ndownloader/files/42439044",
98+
"md5sum": "41ff022feb2901241562e14ad6421d5e"
99+
},
100+
{
101+
"name": "S6_Illusion.h5",
102+
"url": "https://figshare.com/ndownloader/files/42439047",
103+
"md5sum": "7c3726f207134cea78f62769623f86f8"
104+
},
105+
{
106+
"name": "S7_Illusion.h5",
107+
"url": "https://figshare.com/ndownloader/files/42439038",
108+
"md5sum": "ebcd9fc1da5c2dfc5fc45f3500a0247d"
109+
}
110+
]
111+
},
112+
"stimulus_feature": {
113+
"save_in": "./",
114+
"files": [
115+
{
116+
"name": "stimulus_feature.zip",
117+
"url": "https://figshare.com/ndownloader/files/42439479",
118+
"md5sum": "c1938fc5a7744960d4b57cdddb6ba8e8",
119+
"postproc": [{"name": "unzip"}]
120+
}
121+
]
122+
}
123+
}

data/fmri/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
*
2+
!/.gitignore
3+
!/train
4+
!/test

data/fmri/test/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!/.gitignore

data/fmri/train/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!/.gitignore

data/stimulus_feature/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!/.gitignore

generator/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.zip

generator/download.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import os
2+
import shutil
3+
import argparse
4+
import json
5+
import urllib.request
6+
import hashlib
7+
from typing import Union
8+
9+
from tqdm import tqdm
10+
11+
12+
def main(cfg):
13+
with open(cfg.filelist, 'r') as f:
14+
filelist = json.load(f)
15+
16+
target = filelist[cfg.target]
17+
18+
for fl in target['files']:
19+
output = os.path.join(target['save_in'], fl['name'])
20+
21+
# Downloading
22+
if not os.path.exists(output):
23+
print(f'Downloading {output} from {fl["url"]}')
24+
download_file(fl['url'], output, progress_bar=True, md5sum=fl['md5sum'])
25+
26+
# Postprocessing
27+
if 'postproc' in fl:
28+
for pp in fl['postproc']:
29+
if pp['name'] == 'unzip':
30+
print(f'Unzipping {output}')
31+
if 'destination' in pp:
32+
dest = pp['destination']
33+
else:
34+
dest = './'
35+
shutil.unpack_archive(output, extract_dir=dest)
36+
37+
38+
def download_file(url: str, destination: str, progress_bar: bool = True, md5sum: Union[str, None] = None) -> None:
39+
'''Download a file.'''
40+
41+
response = urllib.request.urlopen(url)
42+
file_size = int(response.info()["Content-Length"])
43+
44+
def _show_progress(block_num, block_size, total_size):
45+
downloaded = block_num * block_size
46+
if total_size > 0:
47+
progress_bar.update(downloaded - progress_bar.n)
48+
49+
with tqdm(total=file_size, unit='B', unit_scale=True, desc=destination, ncols=100) as progress_bar:
50+
urllib.request.urlretrieve(url, destination, _show_progress)
51+
52+
if md5sum is not None:
53+
md5_hash = hashlib.md5()
54+
with open(destination, 'rb') as f:
55+
for chunk in iter(lambda: f.read(4096), b''):
56+
md5_hash.update(chunk)
57+
md5sum_test = md5_hash.hexdigest()
58+
if md5sum != md5sum_test:
59+
raise ValueError(f'md5sum mismatch. \nExpected: {md5sum}\nActual: {md5sum_test}')
60+
61+
62+
if __name__ == '__main__':
63+
parser = argparse.ArgumentParser()
64+
parser.add_argument('--filelist', default='files.json')
65+
parser.add_argument('target')
66+
67+
cfg = parser.parse_args()
68+
69+
main(cfg)

0 commit comments

Comments
 (0)