|
2 | 2 | Functions to download example data from public repositories.
|
3 | 3 |
|
4 | 4 | """
|
5 |
| -from .base import InputFiles, InputFilesWithSession |
6 | 5 | import os
|
7 | 6 | import os.path as op
|
| 7 | +from dataclasses import dataclass, field |
8 | 8 | from pathlib import Path
|
| 9 | +from typing import List |
| 10 | + |
| 11 | +from .base import InputFiles, InputFilesWithSession |
9 | 12 |
|
10 | 13 |
|
11 | 14 | def get_dataset(output_dir, source='HBN'):
|
@@ -43,6 +46,77 @@ def get_s3_client():
|
43 | 46 | return s3_client
|
44 | 47 |
|
45 | 48 |
|
| 49 | +@dataclass |
| 50 | +class Study: |
| 51 | + """A dMRI based study with a BIDS compliant directory structure""" |
| 52 | + study_id: str = "HBN" |
| 53 | + bucket: str = "fcp-indi" |
| 54 | + s3_prefix: str = "data/Projects/{study_id}/MRI" |
| 55 | + site_ids: List[str] = field(default_factory=list) |
| 56 | + |
| 57 | + s3_client = get_s3_client() |
| 58 | + |
| 59 | + def _get_s3_keys(self, prefix): |
| 60 | + """Retrieve all keys in an S3 bucket that match the prefix and site ID |
| 61 | +
|
| 62 | + Parameters |
| 63 | + ---------- |
| 64 | + prefix : string |
| 65 | + S3 prefix designating the S3 "directory" in which to search. |
| 66 | +
|
| 67 | + Returns |
| 68 | + ------- |
| 69 | + list |
| 70 | + All the keys matching the prefix and site in the S3 bucket |
| 71 | + """ |
| 72 | + # Avoid duplicate trailing slash in prefix |
| 73 | + prefix = prefix.rstrip('/') |
| 74 | + |
| 75 | + response = self.s3_client.list_objects_v2( |
| 76 | + Bucket=self.bucket, |
| 77 | + Prefix=prefix, |
| 78 | + ) |
| 79 | + |
| 80 | + try: |
| 81 | + keys = [d['Key'] for d in response.get('Contents')] |
| 82 | + except TypeError: |
| 83 | + raise ValueError(f'There are no subject files in the S3 bucket ' |
| 84 | + f'with prefix {prefix}') |
| 85 | + |
| 86 | + while response['IsTruncated']: |
| 87 | + response = self.s3_client.list_objects_v2( |
| 88 | + Bucket=self.bucket, |
| 89 | + Prefix=prefix, |
| 90 | + ContinuationToken=response['NextContinuationToken'] |
| 91 | + ) |
| 92 | + |
| 93 | + keys += [d['Key'] for d in response.get('Contents')] |
| 94 | + |
| 95 | + return keys |
| 96 | + |
| 97 | + |
| 98 | +@dataclass |
| 99 | +class Site: |
| 100 | + """A single site in a (potentially multisite) dMRI study""" |
| 101 | + study_id: str = "HBN" |
| 102 | + site_id: str = "Site-SI" |
| 103 | + |
| 104 | + |
| 105 | +@dataclass |
| 106 | +class Subject: |
| 107 | + """A single dMRI study subject""" |
| 108 | + subject_id: str |
| 109 | + site: str |
| 110 | + |
| 111 | + def list_s3_keys(self): |
| 112 | + pass |
| 113 | + |
| 114 | + def download(self): |
| 115 | + pass |
| 116 | + |
| 117 | + |
| 118 | + |
| 119 | + |
46 | 120 | def get_s3_register(subject_id, site, raw_keys, deriv_keys):
|
47 | 121 | """Get the S3 keys for a single subject's input files
|
48 | 122 |
|
@@ -114,54 +188,6 @@ def get_s3_register(subject_id, site, raw_keys, deriv_keys):
|
114 | 188 | )
|
115 | 189 |
|
116 | 190 |
|
117 |
| -def get_s3_keys(prefix, s3_client, bucket='fcp-indi'): |
118 |
| - """Retrieve all keys in an S3 bucket that match the prefix and site ID |
119 |
| -
|
120 |
| - Parameters |
121 |
| - ---------- |
122 |
| - prefix : string |
123 |
| - S3 prefix designating the S3 "directory" in which to search. |
124 |
| - Do not include the site ID in the prefix. |
125 |
| -
|
126 |
| - s3_client : boto3 client object |
127 |
| - from the get_s3_client() function |
128 |
| -
|
129 |
| - bucket : string |
130 |
| - AWS S3 bucket in which to search |
131 |
| -
|
132 |
| - Returns |
133 |
| - ------- |
134 |
| - list |
135 |
| - All the keys matching the prefix and site in the S3 bucket |
136 |
| - """ |
137 |
| - # Avoid duplicate trailing slash in prefix |
138 |
| - prefix = prefix.rstrip('/') |
139 |
| - |
140 |
| - response = s3_client.list_objects_v2( |
141 |
| - Bucket=bucket, |
142 |
| - Prefix=prefix, |
143 |
| - ) |
144 |
| - |
145 |
| - try: |
146 |
| - keys = [d['Key'] for d in response.get('Contents')] |
147 |
| - except TypeError: |
148 |
| - raise ValueError( |
149 |
| - 'There are no subject files in the S3 bucket with prefix ' |
150 |
| - '{pfix:s}'.format(pfix=prefix) |
151 |
| - ) |
152 |
| - |
153 |
| - while response['IsTruncated']: |
154 |
| - response = s3_client.list_objects_v2( |
155 |
| - Bucket=bucket, |
156 |
| - Prefix=prefix, |
157 |
| - ContinuationToken=response['NextContinuationToken'] |
158 |
| - ) |
159 |
| - |
160 |
| - keys += [d['Key'] for d in response.get('Contents')] |
161 |
| - |
162 |
| - return keys |
163 |
| - |
164 |
| - |
165 | 191 | def keys_to_subject_register(keys, prefix, site):
|
166 | 192 | """Filter S3 keys based on data availability and return
|
167 | 193 |
|
|
0 commit comments