Merge pull request #1 from KamitaniLab/data

ShuntaroAoki · web-flow · commit 06655df20ffc · 2023-09-24T16:48:35.000+09:00
Data
diff --git a/README.md b/README.md
@@ -59,20 +59,32 @@ Step2: Activate the environment.
 
 ### Download Dataset
 
-To utilize this project, you'll need to download the required dataset [Figshare](https://figshare.com/) and organize the dataset appropriately. After downloading data (preprocessed fMRI data, stimulus DNN features, pre-trained generator), make sure to move them to the correct locations.
- 
-  ```sh
-  # move fMRI data files (*.h5) and DNN features (*.mat) to data folder 
-  mv path_to_downloaded_fmri ./data/fmri/train_or_test
-  mv path_to_downloaded_DNNfeature ./data/stimulus_feature/train_or_test/dataset_name/caffe/bvlc_reference_caffenet/ 
-  
-  
-  # move pre-trained generator (*.pt) to generator folder
-  mv path_to_downloaded_generator ./generator/generator_name
-  ```
+To utilize this project, you'll need to download the required dataset [Figshare](https://figshare.com/articles/dataset/23590302) and organize the dataset appropriately.
+You can download the required data with the following commands.
+
+fMRI data and image feature:
+
+``` shellsession
+# In "data" directory:
+
+# Training and test fMRI data
+$ python download.py fmri_training
+$ python download.py fmri_test
+
+# Stimulus image features
+$ python download.py stimulus_feature
+```
+
+Pre-trained generator:
 
+``` shellsession
+# In "generator" directory:
 
+# GAN
+$ python download.py GAN
+```
 
+After downloading data (preprocessed fMRI data, stimulus DNN features, pre-trained generator), make sure to move them to the correct locations.
 
 ## Usage
 
diff --git a/data/.gitignore b/data/.gitignore
@@ -0,0 +1 @@
+*.zip
diff --git a/data/download.py b/data/download.py
@@ -0,0 +1,69 @@
+import os
+import shutil
+import argparse
+import json
+import urllib.request
+import hashlib
+from typing import Union
+
+from tqdm import tqdm
+
+
+def main(cfg):
+    with open(cfg.filelist, 'r') as f:
+        filelist = json.load(f)
+
+    target = filelist[cfg.target]
+
+    for fl in target['files']:
+        output = os.path.join(target['save_in'], fl['name'])
+
+        # Downloading
+        if not os.path.exists(output):
+            print(f'Downloading {output} from {fl["url"]}')
+            download_file(fl['url'], output, progress_bar=True, md5sum=fl['md5sum'])
+
+        # Postprocessing
+        if 'postproc' in fl:
+            for pp in fl['postproc']:
+                if pp['name'] == 'unzip':
+                    print(f'Unzipping {output}')
+                    if 'destination' in pp:
+                        dest = pp['destination']
+                    else:
+                        dest = './'
+                    shutil.unpack_archive(output, extract_dir=dest)
+
+
+def download_file(url: str, destination: str, progress_bar: bool = True, md5sum: Union[str, None] = None) -> None:
+    '''Download a file.'''
+
+    response = urllib.request.urlopen(url)
+    file_size = int(response.info()["Content-Length"])
+
+    def _show_progress(block_num, block_size, total_size):
+        downloaded = block_num * block_size
+        if total_size > 0:
+            progress_bar.update(downloaded - progress_bar.n)
+
+    with tqdm(total=file_size, unit='B', unit_scale=True, desc=destination, ncols=100) as progress_bar:
+        urllib.request.urlretrieve(url, destination, _show_progress)
+
+    if md5sum is not None:
+        md5_hash = hashlib.md5()
+        with open(destination, 'rb') as f:
+            for chunk in iter(lambda: f.read(4096), b''):
+                md5_hash.update(chunk)
+        md5sum_test = md5_hash.hexdigest()
+        if md5sum != md5sum_test:
+            raise ValueError(f'md5sum mismatch. \nExpected: {md5sum}\nActual: {md5sum_test}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--filelist', default='files.json')
+    parser.add_argument('target')
+
+    cfg = parser.parse_args()
+
+    main(cfg)
diff --git a/data/files.json b/data/files.json
@@ -0,0 +1,123 @@
+{
+    "fmri_training": {
+        "zip": false,
+        "save_in": "./fmri/train",
+        "files": [
+            {"md5sum": "289015c64ea489d6b012356829ed226a",
+             "name": "S1_ImageNetTraining.h5",
+             "url": "https://ndownloader.figshare.com/files/42438333"},
+            {"md5sum": "0072f763932c03c861eec039cf8cba05",
+             "name": "S1_MSCOCO.h5",
+             "url": "https://ndownloader.figshare.com/files/42438315"},
+            {"md5sum": "bad85b741aba7de095fc1d2b25a55946",
+             "name": "S1_FMD.h5",
+             "url": "https://ndownloader.figshare.com/files/42438312"},
+            {"md5sum": "313d7bde33c6603086f4652e8f365fd2",
+             "name": "S2_ImageNetTraining.h5",
+             "url": "https://ndownloader.figshare.com/files/42438339"},
+            {"md5sum": "cde0f085ca6bb4b76062018f181bcc34",
+             "name": "S2_MSCOCO.h5",
+             "url": "https://ndownloader.figshare.com/files/42438327"},
+            {"md5sum": "410198be68ac12c18336f5bc4192412b",
+             "name": "S2_FMD.h5",
+             "url": "https://ndownloader.figshare.com/files/42438324"},
+            {"md5sum": "b7a35ff1dbd1b950a9edf0ca29b56d6b",
+             "name": "S3_ImageNetTraining.h5",
+             "url": "https://ndownloader.figshare.com/files/42438330"},
+            {"md5sum": "b4d38e71bbd36fe85f23e097af05b39c",
+             "name": "S3_MSCOCO.h5",
+             "url": "https://ndownloader.figshare.com/files/42438309"},
+            {"md5sum": "90ac4403176f4aa82f4c336b90d2da1f",
+             "name": "S3_FMD.h5",
+             "url": "https://ndownloader.figshare.com/files/42438306"},
+            {"md5sum": "61864aa83d1d3f2e90bb564c0103aa25",
+             "name": "S4_ImageNetTraining.h5",
+             "url": "https://ndownloader.figshare.com/files/42438336"},
+            {"md5sum": "83b0d0a1ccedf0b14cb56d491bb54ad3",
+             "name": "S4_FMD.h5",
+             "url": "https://ndownloader.figshare.com/files/42438318"},
+            {"md5sum": "ec185b0aa813e75241f5217a681ce981",
+             "name": "S4_MSCOCO.h5",
+             "url": "https://ndownloader.figshare.com/files/42438321"},
+            {"md5sum": "910feeff73f63e75b24034ae0faf2092",
+             "name": "S5_ImageNetTraining.h5",
+             "url": "https://ndownloader.figshare.com/files/42438300"},
+            {"md5sum": "0bb137c2bed1133b76bfca27b9980462",
+             "name": "S5_MSCOCO.h5",
+             "url": "https://ndownloader.figshare.com/files/42438288"},
+            {"md5sum": "4daa99996a3caf452d603210dcc169ec",
+             "name": "S5_FMD.h5",
+             "url": "https://ndownloader.figshare.com/files/42438285"},
+            {"md5sum": "19225556793dc25aa0620ccafede1185",
+             "name": "S6_ImageNetTraining.h5",
+             "url": "https://ndownloader.figshare.com/files/42438303"},
+            {"md5sum": "937403580fcfb00874bc22cb7e6d4ec2",
+             "name": "S6_MSCOCO.h5",
+             "url": "https://ndownloader.figshare.com/files/42438294"},
+            {"md5sum": "1aa81f0ba41a909591be22bbcf299adc",
+             "name": "S6_FMD.h5",
+             "url": "https://ndownloader.figshare.com/files/42438297"},
+            {"md5sum": "e8fa743dbe1864b01eb685fb064d47ce",
+             "name": "S7_ImageNetTraining.h5",
+             "url": "https://ndownloader.figshare.com/files/42438291"},
+            {"md5sum": "41b9318496135674f713b644a3553bec",
+             "name": "S7_MSCOCO.h5",
+             "url": "https://ndownloader.figshare.com/files/42438279"},
+            {"md5sum": "b4ed001005c086302177cf80387df4ee",
+             "name": "S7_FMD.h5",
+             "url": "https://ndownloader.figshare.com/files/42438282"}
+        ]
+    },
+    "fmri_test": {
+        "zip": false,
+        "save_in": "./fmri/test",
+        "files": [
+            {
+                "name":   "S1_Illusion.h5",
+                "url":    "https://figshare.com/ndownloader/files/42439035",
+                "md5sum": "1593b4bcdc81e042527238daaf10b112"
+            },
+            {
+                "name":   "S2_Illusion.h5",
+                "url":    "https://figshare.com/ndownloader/files/42439041",
+                "md5sum": "953aeaf29a4504b8fedc2959467854cd"
+            },
+            {
+                "name":   "S3_Illusion.h5",
+                "url":    "https://figshare.com/ndownloader/files/42439032",
+                "md5sum": "0979f2262800f8a0804f6d2069943f84"
+            },
+            {
+                "name":   "S4_Illusion.h5",
+                "url":    "https://figshare.com/ndownloader/files/42439029",
+                "md5sum": "f6da509cda6b11bf88c9c82d0c93bab6"
+            },
+            {
+                "name":   "S5_Illusion.h5",
+                "url":    "https://figshare.com/ndownloader/files/42439044",
+                "md5sum": "41ff022feb2901241562e14ad6421d5e"
+            },
+            {
+                "name":   "S6_Illusion.h5",
+                "url":    "https://figshare.com/ndownloader/files/42439047",
+                "md5sum": "7c3726f207134cea78f62769623f86f8"
+            },
+            {
+                "name":   "S7_Illusion.h5",
+                "url":    "https://figshare.com/ndownloader/files/42439038",
+                "md5sum": "ebcd9fc1da5c2dfc5fc45f3500a0247d"
+            }
+        ]
+    },
+    "stimulus_feature": {
+        "save_in": "./",
+        "files": [
+            {
+                "name":     "stimulus_feature.zip",
+                "url":      "https://figshare.com/ndownloader/files/42439479",
+                "md5sum":   "c1938fc5a7744960d4b57cdddb6ba8e8",
+                "postproc": [{"name": "unzip"}]
+            }
+        ]
+    }
+}
diff --git a/data/fmri/.gitignore b/data/fmri/.gitignore
@@ -0,0 +1,4 @@
+*
+!/.gitignore
+!/train
+!/test
diff --git a/data/fmri/test/.gitignore b/data/fmri/test/.gitignore
@@ -0,0 +1,2 @@
+*
+!/.gitignore
diff --git a/data/fmri/train/.gitignore b/data/fmri/train/.gitignore
@@ -0,0 +1,2 @@
+*
+!/.gitignore
diff --git a/data/stimulus_feature/.gitignore b/data/stimulus_feature/.gitignore
@@ -0,0 +1,2 @@
+*
+!/.gitignore
diff --git a/generator/.gitignore b/generator/.gitignore
@@ -0,0 +1 @@
+*.zip
diff --git a/generator/download.py b/generator/download.py
@@ -0,0 +1,69 @@
+import os
+import shutil
+import argparse
+import json
+import urllib.request
+import hashlib
+from typing import Union
+
+from tqdm import tqdm
+
+
+def main(cfg):
+    with open(cfg.filelist, 'r') as f:
+        filelist = json.load(f)
+
+    target = filelist[cfg.target]
+
+    for fl in target['files']:
+        output = os.path.join(target['save_in'], fl['name'])
+
+        # Downloading
+        if not os.path.exists(output):
+            print(f'Downloading {output} from {fl["url"]}')
+            download_file(fl['url'], output, progress_bar=True, md5sum=fl['md5sum'])
+
+        # Postprocessing
+        if 'postproc' in fl:
+            for pp in fl['postproc']:
+                if pp['name'] == 'unzip':
+                    print(f'Unzipping {output}')
+                    if 'destination' in pp:
+                        dest = pp['destination']
+                    else:
+                        dest = './'
+                    shutil.unpack_archive(output, extract_dir=dest)
+
+
+def download_file(url: str, destination: str, progress_bar: bool = True, md5sum: Union[str, None] = None) -> None:
+    '''Download a file.'''
+
+    response = urllib.request.urlopen(url)
+    file_size = int(response.info()["Content-Length"])
+
+    def _show_progress(block_num, block_size, total_size):
+        downloaded = block_num * block_size
+        if total_size > 0:
+            progress_bar.update(downloaded - progress_bar.n)
+
+    with tqdm(total=file_size, unit='B', unit_scale=True, desc=destination, ncols=100) as progress_bar:
+        urllib.request.urlretrieve(url, destination, _show_progress)
+
+    if md5sum is not None:
+        md5_hash = hashlib.md5()
+        with open(destination, 'rb') as f:
+            for chunk in iter(lambda: f.read(4096), b''):
+                md5_hash.update(chunk)
+        md5sum_test = md5_hash.hexdigest()
+        if md5sum != md5sum_test:
+            raise ValueError(f'md5sum mismatch. \nExpected: {md5sum}\nActual: {md5sum_test}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--filelist', default='files.json')
+    parser.add_argument('target')
+
+    cfg = parser.parse_args()
+
+    main(cfg)
diff --git a/generator/files.json b/generator/files.json
@@ -0,0 +1,14 @@
+{
+    "GAN": {
+        "zip": true,
+        "save_in": "./",
+        "files": [
+            {
+                "name": "generator-GAN.zip",
+                "url": "https://figshare.com/ndownloader/files/42439869",
+                "md5sum": "9371bee9eb8b1d5c5c010e36ceea1e47",
+                "postproc": [{"name": "unzip"}]
+            }
+        ]
+    }
+}

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +*
 +!/.gitignore
 +!/train
 +!/test