Skip to content

Commit 3baa535

Browse files
committed
Merge branch 'master' of https://github.com/ImagingDataCommons/IDC-Common into idc-test
2 parents 5771cd5 + b1ec795 commit 3baa535

File tree

2 files changed

+29
-27
lines changed

2 files changed

+29
-27
lines changed

cohorts/urls.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
urlpatterns = [
2323
url(r'^$', views.cohorts_list, name='cohort_list'),
24-
url(r'^manifests/fetch/(?P<file_name>[A-Za-z\-0-9]+\/manifest_[0-9_]+\.s5cmd)', views.fetch_user_manifest, name='fetch_user_manifest'),
25-
url(r'^manifests/check/(?P<file_name>[A-Za-z\-0-9]+\/manifest_[0-9_]+\.s5cmd)', views.check_manifest_ready, name='check_user_manifest'),
24+
url(r'^manifests/fetch/(?P<file_name>[A-Za-z\-0-9]+\/manifest_[0-9_]+(aws|gcs|gcp)\.s5cmd)', views.fetch_user_manifest, name='fetch_user_manifest'),
25+
url(r'^manifests/check/(?P<file_name>[A-Za-z\-0-9]+\/manifest_[0-9_]+(aws|gcs|gcp)\.s5cmd)', views.check_manifest_ready, name='check_user_manifest'),
2626
url(r'^manifests/fetch/$', views.fetch_user_manifest, name='fetch_user_manifest_base'),
2727
url(r'^manifests/check/$', views.check_manifest_ready, name='check_user_manifest_base'),
2828
url(r'^api/$', views.views_api_v1.cohort_list_api, name='cohort_list_api'),

idc_collections/collex_metadata_utils.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ def parse_partition_to_filter(cart_partition):
647647

648648

649649
# Manifest types supported: s5cmd, idc_index, json.
650-
def submit_manifest_job(data_version, filters, storage_loc, manifest_type, instructions, fields, cart_partition=None):
650+
def submit_manifest_job(data_version, filters, storage_loc, manifest_type, instructions, fields, cart_partition=None, filename=None):
651651
cart_filters = parse_partition_to_filter(cart_partition) if cart_partition else None
652652
child_records = None if cart_filters else "StudyInstanceUID"
653653
service_account_info = json.load(open(settings.GOOGLE_APPLICATION_CREDENTIALS))
@@ -664,7 +664,7 @@ def submit_manifest_job(data_version, filters, storage_loc, manifest_type, instr
664664
datetime.datetime.fromtimestamp(timestamp).strftime('%H:%M:%S %Y/%m/%d')
665665
) + "# {} \n".format(data_version_display) + "{instructions}"
666666

667-
file_name = "manifest_{}.s5cmd".format(datetime.datetime.fromtimestamp(timestamp).strftime('%Y%m%d_%H%M%S'))
667+
file_name = filename or "manifest_{}.s5cmd".format(datetime.datetime.fromtimestamp(timestamp).strftime('%Y%m%d_%H%M%S'))
668668

669669
reformatted_fields = [
670670
"CONCAT('cp s3://',{storage_loc},'/',crdc_series_uuid,'/* ./') AS series".format(storage_loc=storage_loc)]
@@ -745,36 +745,37 @@ def create_file_manifest(request, cohort=None):
745745
versions = cohort.get_data_versions()
746746
group_filters = cohort.get_filters_as_dict()
747747
filters = {x['name']: x['values'] for x in group_filters[0]['filters']}
748-
elif from_cart:
749-
partitions = json.loads(req.get('partitions', '[]'))
750-
filtergrp_list = json.loads(req.get('filtergrp_list', '[{}]'))
751-
versions = json.loads(req.get('versions', '[]'))
752-
mxseries = int(req.get('mxseries', '0'))
753-
mxstudies = int(req.get('mxstudies', '0'))
754748
else:
755-
filters = json.loads(req.get('filters', '{}'))
756-
if not (len(filters)):
757-
raise Exception("No filters supplied for file manifest!")
758-
759749
versions = json.loads(req.get('versions', '[]'))
760-
761-
data_types = [DataSetType.IMAGE_DATA, DataSetType.ANCILLARY_DATA, DataSetType.DERIVED_DATA]
762-
source_type = req.get('data_source_type', DataSource.SOLR)
763-
versions = ImagingDataCommonsVersion.objects.filter(active=True) if not versions else ImagingDataCommonsVersion.objects.filter(version_number__in=versions)
764-
765-
data_sets = DataSetType.objects.filter(data_type__in=data_types)
766-
sources = data_sets.get_data_sources().filter(
767-
source_type=source_type,
768-
aggregate_level__in=["SeriesInstanceUID"],
769-
id__in=versions.get_data_sources().filter(source_type=source_type).values_list("id", flat=True)
770-
).distinct()
750+
versions = ImagingDataCommonsVersion.objects.filter(
751+
active=True) if not len(versions) else ImagingDataCommonsVersion.objects.filter(version_number__in=versions)
752+
if from_cart:
753+
partitions = json.loads(req.get('partitions', '[]'))
754+
filtergrp_list = json.loads(req.get('filtergrp_list', '[{}]'))
755+
versions = json.loads(req.get('versions', '[]'))
756+
mxseries = int(req.get('mxseries', '0'))
757+
mxstudies = int(req.get('mxstudies', '0'))
758+
else:
759+
filters = json.loads(req.get('filters', '{}'))
760+
if not (len(filters)):
761+
raise Exception("No filters supplied for file manifest!")
762+
data_types = [DataSetType.IMAGE_DATA, DataSetType.ANCILLARY_DATA, DataSetType.DERIVED_DATA]
763+
source_type = req.get('data_source_type', DataSource.SOLR)
764+
data_sets = DataSetType.objects.filter(data_type__in=data_types)
765+
sources = data_sets.get_data_sources().filter(
766+
source_type=source_type,
767+
aggregate_level__in=["SeriesInstanceUID"],
768+
id__in=versions.get_data_sources().filter(source_type=source_type).values_list("id", flat=True)
769+
).distinct()
770+
771+
print("File type: {}".format(file_type))
771772

772773
if file_type in ['s5cmd', 'idc_index']:
773774
api_loc = "https://s3.amazonaws.com" if loc == 'aws' else "https://storage.googleapis.com"
774775
cmd = "# idc download <manifest file name>{}".format(os.linesep)
775776
install = "the idc-index (https://github.com/ImagingDataCommons/idc-index) python package:{}".format(
776777
os.linesep) + "# pip install --upgrade idc-index"
777-
if file_type in ['s5cmd', 'idc_index']:
778+
if file_type in ['s5cmd']:
778779
cmd = "# s5cmd --no-sign-request --endpoint-url {} run <manifest file name>{}".format(api_loc, os.linesep)
779780
install = "s5cmd (https://github.com/peak/s5cmd),"
780781
instructions = "# To download the files in this manifest, install {}{}".format(install, os.linesep) + \
@@ -785,7 +786,8 @@ def create_file_manifest(request, cohort=None):
785786
if async_download and (file_type not in ["bq"]):
786787
jobId, file_name = submit_manifest_job(
787788
ImagingDataCommonsVersion.objects.filter(active=True), filters, storage_bucket, file_type, instructions,
788-
selected_columns_sorted if file_type not in ["s5cmd", "idc_index"] else None, cart_partition=partitions
789+
selected_columns_sorted if file_type not in ["s5cmd", "idc_index"] else None, cart_partition=partitions,
790+
filename=file_name
789791
)
790792
return JsonResponse({
791793
"jobId": jobId,

0 commit comments

Comments
 (0)