Skip to content
This repository was archived by the owner on Dec 27, 2022. It is now read-only.

Commit 1b4a59e

Browse files
author
Adam Richie-Halford
committed
Start adding subject classes
1 parent 14edd49 commit 1b4a59e

File tree

1 file changed

+75
-49
lines changed

1 file changed

+75
-49
lines changed

dmriprep/data.py

Lines changed: 75 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
Functions to download example data from public repositories.
33
44
"""
5-
from .base import InputFiles, InputFilesWithSession
65
import os
76
import os.path as op
7+
from dataclasses import dataclass, field
88
from pathlib import Path
9+
from typing import List
10+
11+
from .base import InputFiles, InputFilesWithSession
912

1013

1114
def get_dataset(output_dir, source='HBN'):
@@ -43,6 +46,77 @@ def get_s3_client():
4346
return s3_client
4447

4548

49+
@dataclass
50+
class Study:
51+
"""A dMRI based study with a BIDS compliant directory structure"""
52+
study_id: str = "HBN"
53+
bucket: str = "fcp-indi"
54+
s3_prefix: str = "data/Projects/{study_id}/MRI"
55+
site_ids: List[str] = field(default_factory=list)
56+
57+
s3_client = get_s3_client()
58+
59+
def _get_s3_keys(self, prefix):
60+
"""Retrieve all keys in an S3 bucket that match the prefix and site ID
61+
62+
Parameters
63+
----------
64+
prefix : string
65+
S3 prefix designating the S3 "directory" in which to search.
66+
67+
Returns
68+
-------
69+
list
70+
All the keys matching the prefix and site in the S3 bucket
71+
"""
72+
# Avoid duplicate trailing slash in prefix
73+
prefix = prefix.rstrip('/')
74+
75+
response = self.s3_client.list_objects_v2(
76+
Bucket=self.bucket,
77+
Prefix=prefix,
78+
)
79+
80+
try:
81+
keys = [d['Key'] for d in response.get('Contents')]
82+
except TypeError:
83+
raise ValueError(f'There are no subject files in the S3 bucket '
84+
f'with prefix {prefix}')
85+
86+
while response['IsTruncated']:
87+
response = self.s3_client.list_objects_v2(
88+
Bucket=self.bucket,
89+
Prefix=prefix,
90+
ContinuationToken=response['NextContinuationToken']
91+
)
92+
93+
keys += [d['Key'] for d in response.get('Contents')]
94+
95+
return keys
96+
97+
98+
@dataclass
99+
class Site:
100+
"""A single site in a (potentially multisite) dMRI study"""
101+
study_id: str = "HBN"
102+
site_id: str = "Site-SI"
103+
104+
105+
@dataclass
106+
class Subject:
107+
"""A single dMRI study subject"""
108+
subject_id: str
109+
site: str
110+
111+
def list_s3_keys(self):
112+
pass
113+
114+
def download(self):
115+
pass
116+
117+
118+
119+
46120
def get_s3_register(subject_id, site, raw_keys, deriv_keys):
47121
"""Get the S3 keys for a single subject's input files
48122
@@ -114,54 +188,6 @@ def get_s3_register(subject_id, site, raw_keys, deriv_keys):
114188
)
115189

116190

117-
def get_s3_keys(prefix, s3_client, bucket='fcp-indi'):
118-
"""Retrieve all keys in an S3 bucket that match the prefix and site ID
119-
120-
Parameters
121-
----------
122-
prefix : string
123-
S3 prefix designating the S3 "directory" in which to search.
124-
Do not include the site ID in the prefix.
125-
126-
s3_client : boto3 client object
127-
from the get_s3_client() function
128-
129-
bucket : string
130-
AWS S3 bucket in which to search
131-
132-
Returns
133-
-------
134-
list
135-
All the keys matching the prefix and site in the S3 bucket
136-
"""
137-
# Avoid duplicate trailing slash in prefix
138-
prefix = prefix.rstrip('/')
139-
140-
response = s3_client.list_objects_v2(
141-
Bucket=bucket,
142-
Prefix=prefix,
143-
)
144-
145-
try:
146-
keys = [d['Key'] for d in response.get('Contents')]
147-
except TypeError:
148-
raise ValueError(
149-
'There are no subject files in the S3 bucket with prefix '
150-
'{pfix:s}'.format(pfix=prefix)
151-
)
152-
153-
while response['IsTruncated']:
154-
response = s3_client.list_objects_v2(
155-
Bucket=bucket,
156-
Prefix=prefix,
157-
ContinuationToken=response['NextContinuationToken']
158-
)
159-
160-
keys += [d['Key'] for d in response.get('Contents')]
161-
162-
return keys
163-
164-
165191
def keys_to_subject_register(keys, prefix, site):
166192
"""Filter S3 keys based on data availability and return
167193

0 commit comments

Comments
 (0)