Skip to content

Commit c576764

Browse files
committed
Merge branch 'master' of https://github.com/ImagingDataCommons/IDC-Common into idc-test
2 parents 7de174c + 4786f84 commit c576764

File tree

1 file changed

+29
-17
lines changed

1 file changed

+29
-17
lines changed

idc_collections/collex_metadata_utils.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -881,9 +881,7 @@ def create_file_manifest(request, cohort=None):
881881

882882
for row in manifest:
883883
if file_type in ['s5cmd', 'idc_index']:
884-
this_row = ""
885-
for bucket in row[storage_bucket]:
886-
this_row += S5CMD_BASE.format(bucket, row['crdc_series_uuid'], os.linesep)
884+
this_row = S5CMD_BASE.format(row[storage_bucket], row['crdc_series_uuid'], os.linesep)
887885
content_type = "text/plain"
888886
else:
889887
content_type = "text/csv"
@@ -1472,8 +1470,6 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
14721470
with_cart = True
14731471
if (tabletype == "collections"):
14741472
sorted_ids = current_filters["collection_id"]
1475-
1476-
14771473
elif ("facetfields" in table_data) and (sortarg in table_data["facetfields"]):
14781474
# when sorting by a 'facet' field (# of cases, # of studies etc.), we need to find the set of ids selected from
14791475
# this field by the limit, offset params in a preliminary solr call, then add that set as a filter to limit the
@@ -1564,7 +1560,6 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
15641560
row["unique_series_cart"] = 0
15651561
row["unique_series_filter_and_cart"] = 0
15661562

1567-
15681563
if (tabletype == "collections"):
15691564
row["unique_cases_cart"] = 0
15701565
row["unique_cases_filter_and_cart"] = 0
@@ -1677,7 +1672,6 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
16771672
custom_facets["upstream_study_filter"] = copy.deepcopy(upstream_cart_facets["upstream_study_filter"])
16781673
custom_facets["upstream_study_filter"]["domain"]["filter"] = studyrngfilt+no_tble_item_filt_str
16791674

1680-
16811675
if with_cart:
16821676
if tabletype in ["cases","series","studies"]:
16831677
collstr= list(attrRowNumMp["collections"].keys())
@@ -1878,6 +1872,10 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
18781872

18791873
field_list = ['collection_id', 'PatientID', 'StudyInstanceUID', 'SeriesInstanceUID', 'Modality', 'instance_size',
18801874
'crdc_series_uuid', 'aws_bucket', 'gcs_bucket'] if with_records else None
1875+
# Do not pull the bucket from the study, as this will be an aggregated value over the Series and might differ
1876+
# for each series
1877+
field_list_study = ['collection_id', 'PatientID', 'StudyInstanceUID', 'SeriesInstanceUID', 'Modality', 'instance_size',
1878+
'crdc_series_uuid'] if with_records else None
18811879
sortStr = "collection_id asc, PatientID asc, StudyInstanceUID asc" if with_records else None
18821880
totals = ['SeriesInstanceUID', 'StudyInstanceUID', 'PatientID', 'collection_id']
18831881
custom_facets = {
@@ -1925,7 +1923,7 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
19251923
query_str = create_cart_query_string(query_list, partitions_study_lvl, False)
19261924
if len(query_str) > 0:
19271925
solr_result = query_solr(
1928-
collection=image_source.name, fields=field_list, query_string=None, fqs=[query_str], facets=custom_facets,
1926+
collection=image_source.name, fields=field_list_study, query_string=None, fqs=[query_str], facets=custom_facets,
19291927
sort=sortStr, counts_only=False, collapse_on=None, uniques=None, with_cursor=None, stats=None,
19301928
totals=['SeriesInstanceUID'], op='AND', limit=int(limit), offset=int(offset)
19311929
)
@@ -1943,13 +1941,15 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
19431941
ind = 0
19441942
rowDic={}
19451943
rowsWithSeries=[]
1946-
for row in solr_result['response']['docs']:
1947-
rowDic[row['StudyInstanceUID']] = ind
1948-
ind = ind+1
1944+
# Enumerate all the Studies found in the Study-level query result
1945+
for i, row in enumerate(solr_result['response']['docs']):
1946+
rowDic[row['StudyInstanceUID']] = i
1947+
# Note the next index for if we need to add in studies only found in the series query result
1948+
ind = len(solr_result['response']['docs'])
19491949
for row in solr_result_series_lvl['response']['docs']:
19501950
studyid = row['StudyInstanceUID']
19511951
seriesid = row['SeriesInstanceUID']
1952-
if ('crdc_series_uuid' in row):
1952+
if 'crdc_series_uuid' in row:
19531953
crdcid = row['crdc_series_uuid']
19541954
# Studies which are not found in the main query but present in a series are from single-series additions
19551955
# following a study removal
@@ -1964,6 +1964,13 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
19641964
if not 'val' in studyrow:
19651965
studyrow['val'] = []
19661966
rowsWithSeries.append(studyind)
1967+
if 'series_buckets' not in studyrow:
1968+
studyrow['series_buckets'] = {}
1969+
if crdcid not in studyrow['series_buckets']:
1970+
studyrow['series_buckets'][crdcid] = {
1971+
'aws_bucket': row['aws_bucket'][0],
1972+
'gcs_bucket': row['gcs_bucket'][0],
1973+
}
19671974
if not('crdcval' in studyrow) and ('crdc_series_uuid' in row):
19681975
studyrow['crdcval'] = []
19691976
if not('seriestotsize' in studyrow):
@@ -1984,7 +1991,7 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
19841991
solr_result['response']['total']= solr_result['response']['total']-row['cnt']+row['selcnt']
19851992
else:
19861993
row['selcnt'] = row['cnt']
1987-
if ('seriestotsize' in row):
1994+
if 'seriestotsize' in row:
19881995
solr_result['response']['total_instance_size'] = solr_result['response']['total_instance_size'] -sum(row['instance_size'])+sum(row['seriestotsize'])
19891996
if results_lvl=='StudyInstanceUID':
19901997
del (row['SeriesInstanceUID'])
@@ -2209,6 +2216,8 @@ def get_cart_manifest(filtergrp_list, partitions, mxstudies, mxseries, field_lis
22092216
manifest['docs'] =[]
22102217
solr_result = get_cart_data_studylvl(filtergrp_list, partitions, MAX_FILE_LIST_ENTRIES, 0, mxstudies, MAX_FILE_LIST_ENTRIES, results_lvl = 'SeriesInstanceUID')
22112218

2219+
print(solr_result)
2220+
22122221
if 'total_SeriesInstanceUID' in solr_result:
22132222
manifest['total'] = solr_result['total_SeriesInstanceUID']
22142223
elif 'total' in solr_result:
@@ -2220,11 +2229,14 @@ def get_cart_manifest(filtergrp_list, partitions, mxstudies, mxseries, field_lis
22202229
for row in solr_result['docs']:
22212230
crdc_series_arr = row['crdc_series_uuid']
22222231
for id in crdc_series_arr:
2223-
manifest_row={}
2224-
manifest_row['crdc_series_uuid'] = id
2232+
manifest_row = {
2233+
'crdc_series_uuid': id,
2234+
'aws_bucket': row['series_buckets'][id]['aws_bucket'],
2235+
'gcs_bucket': row['series_buckets'][id]['gcs_bucket']
2236+
}
22252237
for field in field_list:
2226-
if not (field == 'crdc_series_uuid'):
2227-
manifest_row[field] = row[field]
2238+
if field not in ['crdc_series_uuid', 'aws_bucket', 'gcs_bucket']:
2239+
manifest_row[field] = row[field]
22282240
manifest['docs'].append(manifest_row)
22292241
return manifest
22302242

0 commit comments

Comments
 (0)