Skip to content

Commit e241db5

Browse files
committed
Merge branch 'master' of https://github.com/ImagingDataCommons/IDC-Common into idc-test
2 parents a26b871 + c1be599 commit e241db5

File tree

1 file changed

+20
-26
lines changed

1 file changed

+20
-26
lines changed

idc_collections/collex_metadata_utils.py

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -802,7 +802,7 @@ def create_file_manifest(request, cohort=None):
802802

803803
# All downloads from this segment onwards are sync
804804
if from_cart:
805-
items = get_cart_manifest(filtergrp_list, partitions, mxstudies, mxseries, field_list, MAX_FILE_LIST_ENTRIES)
805+
items = cart_manifest(filtergrp_list, partitions, mxstudies, field_list, MAX_FILE_LIST_ENTRIES)
806806
else:
807807
items = filter_manifest(filters, sources, versions, field_list, MAX_FILE_LIST_ENTRIES, with_size=True, series_only=single_series)
808808
if 'docs' in items:
@@ -881,7 +881,7 @@ def create_file_manifest(request, cohort=None):
881881

882882
for row in manifest:
883883
if file_type in ['s5cmd', 'idc_index']:
884-
this_row = S5CMD_BASE.format(row[storage_bucket], row['crdc_series_uuid'], os.linesep)
884+
this_row = S5CMD_BASE.format(row[storage_bucket][0], row['crdc_series_uuid'], os.linesep) if isinstance(row[storage_bucket],list) else S5CMD_BASE.format(row[storage_bucket], row['crdc_series_uuid'], os.linesep)
885885
content_type = "text/plain"
886886
else:
887887
content_type = "text/csv"
@@ -1872,10 +1872,6 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
18721872

18731873
field_list = ['collection_id', 'PatientID', 'StudyInstanceUID', 'SeriesInstanceUID', 'Modality', 'instance_size',
18741874
'crdc_series_uuid', 'aws_bucket', 'gcs_bucket'] if with_records else None
1875-
# Do not pull the bucket from the study, as this will be an aggregated value over the Series and might differ
1876-
# for each series
1877-
field_list_study = ['collection_id', 'PatientID', 'StudyInstanceUID', 'SeriesInstanceUID', 'Modality', 'instance_size',
1878-
'crdc_series_uuid'] if with_records else None
18791875
sortStr = "collection_id asc, PatientID asc, StudyInstanceUID asc" if with_records else None
18801876
totals = ['SeriesInstanceUID', 'StudyInstanceUID', 'PatientID', 'collection_id']
18811877
custom_facets = {
@@ -1900,6 +1896,7 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
19001896
limit=int(mxseries), facets=custom_facets, sort=sortStr, counts_only=False, collapse_on=None,
19011897
uniques=None, with_cursor=None, stats=None, totals=totals, op='AND'
19021898
)
1899+
19031900
if with_records and ('response' in solr_result_series_lvl) and ('docs' in solr_result_series_lvl['response']):
19041901
serieslvl_found = True
19051902
for row in solr_result_series_lvl['response']['docs']:
@@ -1921,10 +1918,11 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
19211918
query_str = create_cart_query_string(query_list, partitions_study_lvl, False)
19221919
if len(query_str) > 0:
19231920
solr_result = query_solr(
1924-
collection=image_source.name, fields=field_list_study, query_string=None, fqs=[query_str], facets=custom_facets,
1921+
collection=image_source.name, fields=field_list, query_string=None, fqs=[query_str], facets=custom_facets,
19251922
sort=sortStr, counts_only=False, collapse_on=None, uniques=None, with_cursor=None, stats=None,
19261923
totals=['SeriesInstanceUID'], op='AND', limit=int(limit), offset=int(offset)
19271924
)
1925+
19281926
solr_result['response']['total'] = solr_result['facets']['total_SeriesInstanceUID']
19291927
solr_result['response']['total_instance_size'] = solr_result['facets']['instance_size']
19301928
else:
@@ -2000,11 +1998,10 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
20001998
if debug:
20011999
solr_result['response']['query_string'] = query_str
20022000
solr_result['response']['query_string_series_lvl'] = query_str_series_lvl
2003-
20042001
return solr_result['response']
20052002

20062003

2007-
def get_cart_data(filtergrp_list, partitions, field_list, limit, offset):
2004+
def get_cart_data_serieslvl(filtergrp_list, partitions, field_list, limit, offset):
20082005
aggregate_level = "SeriesInstanceUID"
20092006

20102007
versions=ImagingDataCommonsVersion.objects.filter(
@@ -2031,6 +2028,10 @@ def get_cart_data(filtergrp_list, partitions, field_list, limit, offset):
20312028
aux_sources, {'for_ui': True, 'for_faceting': False, 'active_only': True},
20322029
cache_as="all_ui_attr" if not sources.contains_inactive_versions() else None)
20332030

2031+
custom_facets = {
2032+
'instance_size': 'sum(instance_size)'
2033+
}
2034+
20342035
query_list=[]
20352036
for filtergrp in filtergrp_list:
20362037
query_set_for_filt = []
@@ -2047,10 +2048,12 @@ def get_cart_data(filtergrp_list, partitions, field_list, limit, offset):
20472048

20482049
query_str = create_cart_query_string(query_list, partitions, False)
20492050

2050-
solr_result = query_solr(collection=image_source.name, fields=field_list, query_string=query_str, fqs=None,
2051-
facets=None,sort=None, counts_only=False,collapse_on='SeriesInstanceUID', offset=offset, limit=limit, uniques=None,
2052-
with_cursor=None, stats=None, totals=None, op='OR')
2051+
solr_result = query_solr(collection=image_source.name, fields=field_list, query_string=None, fqs=[query_str],
2052+
facets=custom_facets,sort=None, counts_only=False,collapse_on='SeriesInstanceUID', offset=offset, limit=limit, uniques=None,
2053+
with_cursor=None, stats=None, totals=['SeriesInstanceUID'], op='AND')
20532054

2055+
solr_result['response']['total'] = solr_result['facets']['total_SeriesInstanceUID']
2056+
solr_result['response']['total_instance_size'] = solr_result['facets']['instance_size']
20542057
return solr_result['response']
20552058

20562059

@@ -2207,10 +2210,12 @@ def create_cart_sql(partitions, filtergrp_lst, storage_loc, lvl="series"):
22072210
return {'sql_string': cart_sql, 'params':params}
22082211

22092212

2210-
def get_cart_manifest(filtergrp_list, partitions, mxstudies, mxseries, field_list, MAX_FILE_LIST_ENTRIES):
2213+
def cart_manifest(filtergrp_list, partitions, mxstudies, field_list, MAX_FILE_LIST_ENTRIES):
22112214
manifest ={}
22122215
manifest['docs'] =[]
2213-
solr_result = get_cart_data_studylvl(filtergrp_list, partitions, MAX_FILE_LIST_ENTRIES, 0, mxstudies, MAX_FILE_LIST_ENTRIES, results_lvl = 'SeriesInstanceUID')
2216+
2217+
solr_result = get_cart_data_serieslvl(filtergrp_list, partitions, field_list, MAX_FILE_LIST_ENTRIES,0)
2218+
manifest['docs'] = solr_result['docs']
22142219

22152220
if 'total_SeriesInstanceUID' in solr_result:
22162221
manifest['total'] = solr_result['total_SeriesInstanceUID']
@@ -2220,18 +2225,7 @@ def get_cart_manifest(filtergrp_list, partitions, mxstudies, mxseries, field_lis
22202225
if ('total_instance_size' in solr_result):
22212226
manifest['total_instance_size'] = solr_result['total_instance_size']
22222227

2223-
for row in solr_result['docs']:
2224-
crdc_series_arr = row['crdc_series_uuid']
2225-
for id in crdc_series_arr:
2226-
manifest_row = {
2227-
'crdc_series_uuid': id,
2228-
'aws_bucket': row['series_buckets'][id]['aws_bucket'],
2229-
'gcs_bucket': row['series_buckets'][id]['gcs_bucket']
2230-
}
2231-
for field in field_list:
2232-
if field not in ['crdc_series_uuid', 'aws_bucket', 'gcs_bucket']:
2233-
manifest_row[field] = row[field]
2234-
manifest['docs'].append(manifest_row)
2228+
22352229
return manifest
22362230

22372231

0 commit comments

Comments
 (0)