Skip to content

Commit 056cd14

Browse files
authored
Merge pull request #425 from ImagingDataCommons/idc-prod-sp
Release 49
2 parents 9cc819a + 720f60e commit 056cd14

File tree

7 files changed

+124
-29
lines changed

7 files changed

+124
-29
lines changed

cohorts/views/views.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,9 @@ def convert(data):
9999
return data
100100

101101

102-
def check_manifest_ready(request, file_name):
102+
def check_manifest_ready(request, file_name=None):
103+
if not file_name:
104+
return JsonResponse({"manifest_ready": False, "message": "invalid request"},status=400)
103105
# WJRL 4/25/25: use default app credentials:
104106
client = storage.Client()
105107
##client = storage.Client.from_service_account_json(settings.GOOGLE_APPLICATION_CREDENTIALS)
@@ -109,7 +111,9 @@ def check_manifest_ready(request, file_name):
109111
return JsonResponse({"manifest_ready": blob.exists()}, status=200)
110112

111113

112-
def fetch_user_manifest(request, file_name):
114+
def fetch_user_manifest(request, file_name=None):
115+
if not file_name:
116+
return JsonResponse({"message": "invalid request"},status=400)
113117
# WJRL 4/25/25: use default app credentials:
114118
client = storage.Client()
115119
##client = storage.Client.from_service_account_json(settings.GOOGLE_APPLICATION_CREDENTIALS)

idc_collections/collex_metadata_utils.py

Lines changed: 58 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ def convert_disk_size(size):
9292
size_val = ['', 'K', 'M', 'G', 'T', 'P']
9393
init_size = size
9494
val_count = 0
95-
while init_size > 1024:
95+
while init_size > 1000:
9696
val_count += 1
97-
init_size = init_size / 1024
97+
init_size = init_size / 1000
9898

9999
init_size = round(init_size, 2)
100100
return "{} {}B".format(init_size, size_val[val_count])
@@ -533,14 +533,20 @@ def build_explorer_context(is_dicofdic, source, versions, filters, fields, order
533533
}
534534
if collection.collection_id in context['collections']:
535535
name = collection.program.short_name if collection.program else collection.name
536-
programSet[name]['projects'][collection.collection_id] = {
537-
'val': context['collections'][collection.collection_id]['count'],
536+
this_collex = context['collections'][collection.collection_id]
537+
prog_collex = {
538+
'val': this_collex['count'],
538539
'display': collexDisplayVals[collection.collection_id]
539540
}
540-
if 'access' in context['collections'][collection.collection_id]:
541-
programSet[name]['projects'][collection.collection_id]['access'] = \
542-
context['collections'][collection.collection_id]['access']
543-
programSet[name]['val'] += context['collections'][collection.collection_id]['count']
541+
if 'access' in this_collex:
542+
prog_collex['access'] = \
543+
this_collex['access']
544+
programSet[name]['val'] += this_collex['count']
545+
prog_collex['total_size'] = collection.total_size
546+
this_collex['total_size'] = collection.total_size
547+
prog_collex['total_size_with_ar'] = collection.total_size_with_ar
548+
this_collex['total_size_with_ar'] = collection.total_size_with_ar
549+
programSet[name]['projects'][collection.collection_id] = prog_collex
544550

545551
if with_related:
546552
context['tcga_collections'] = Program.objects.get(short_name="TCGA").collection_set.all()
@@ -572,6 +578,7 @@ def build_explorer_context(is_dicofdic, source, versions, filters, fields, order
572578
10)
573579
if disk_size and 'total_instance_size' in source_metadata:
574580
attr_by_source['totals']['disk_size'] = convert_disk_size(source_metadata['total_instance_size'])
581+
attr_by_source['totals']['disk_size_tb'] = source_metadata['total_instance_size']/math.pow(1000, 4)
575582

576583
context['file_parts_count'] = attr_by_source['totals']['file_parts_count']
577584
context['display_file_parts_count'] = attr_by_source['totals']['display_file_parts_count']
@@ -677,7 +684,7 @@ def parse_partition_to_filter(cart_partition):
677684
def submit_manifest_job(
678685
data_version, filters, storage_loc, manifest_type, instructions, fields, from_cart=False,
679686
cart_partition=None, filtergrp_list=None, filename=None
680-
):
687+
):
681688
cart_filters = parse_partition_to_filter(cart_partition) if cart_partition else None
682689
child_records = None if cart_filters else "StudyInstanceUID"
683690
publisher = pubsub_v1.PublisherClient()
@@ -1397,8 +1404,8 @@ def create_cart_query_string(query_list, partitions, join_with_child):
13971404
"filter": ""
13981405
}
13991406
}
1400-
14011407
}
1408+
14021409
cart_facets_serieslvl = {
14031410
"series_in_cart": {
14041411
"type": "terms",
@@ -2087,7 +2094,7 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,
20872094

20882095

20892096
def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mxseries, results_lvl='StudyInstanceUID',
2090-
with_records=True, debug=False):
2097+
with_records=True, debug=False, dois_only=False, size_only=False):
20912098
aggregate_level = "StudyInstanceUID"
20922099
versions = ImagingDataCommonsVersion.objects.filter(
20932100
active=True
@@ -2118,6 +2125,8 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
21182125
aux_sources, {'for_ui': True, 'for_faceting': False, 'active_only': True},
21192126
cache_as="all_ui_attr" if not sources.contains_inactive_versions() else None)
21202127

2128+
limit = limit if with_records else 0
2129+
21212130
query_list = []
21222131
for filtergrp in filtergrp_list:
21232132
query_set_for_filt = []
@@ -2137,8 +2146,12 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
21372146
sortStr = "collection_id asc, PatientID asc, StudyInstanceUID asc" if with_records else None
21382147
totals = ['SeriesInstanceUID', 'StudyInstanceUID', 'PatientID', 'collection_id']
21392148
custom_facets = {
2140-
'instance_size': 'sum(instance_size)'
2149+
'dois': {
2150+
'type': "terms", "field": "source_DOI", "limit": -1, "missing": False
2151+
}
21412152
}
2153+
if not dois_only:
2154+
custom_facets['instance_size'] = 'sum(instance_size)'
21422155

21432156
partitions_series_lvl = []
21442157
for part in partitions:
@@ -2150,12 +2163,12 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
21502163
serieslvl_found = False
21512164
studyidsinseries = {}
21522165
query_str_series_lvl = ''
2153-
if (len(partitions_series_lvl) > 0):
2166+
if len(partitions_series_lvl) > 0:
21542167
query_str_series_lvl = create_cart_query_string([''], partitions_series_lvl, False)
2155-
if (len(query_str_series_lvl) > 0):
2168+
if len(query_str_series_lvl) > 0:
21562169
solr_result_series_lvl = query_solr(
21572170
collection=image_source_series.name, fields=field_list, query_string=None, fqs=[query_str_series_lvl],
2158-
limit=int(mxseries), facets=custom_facets, sort=sortStr, counts_only=False, collapse_on=None,
2171+
limit=int(mxseries) if with_records else 0, facets=custom_facets, sort=sortStr, counts_only=False, collapse_on=None,
21592172
uniques=None, with_cursor=None, stats=None, totals=totals, op='AND'
21602173
)
21612174
if with_records and ('response' in solr_result_series_lvl) and (
@@ -2184,8 +2197,8 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
21842197
sort=sortStr, counts_only=False, collapse_on=None, uniques=None, with_cursor=None, stats=None,
21852198
totals=['SeriesInstanceUID'], op='AND', limit=int(limit), offset=int(offset)
21862199
)
2187-
solr_result['response']['total'] = solr_result['facets']['total_SeriesInstanceUID']
2188-
solr_result['response']['total_instance_size'] = solr_result['facets']['instance_size']
2200+
solr_result['response']['total'] = solr_result['facets'].get('total_SeriesInstanceUID', None)
2201+
solr_result['response']['total_instance_size'] = solr_result['facets'].get('instance_size', None)
21892202
else:
21902203
solr_result = {}
21912204
solr_result['response'] = {}
@@ -2256,15 +2269,22 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
22562269
row['SeriesInstanceUID'] = row['val']
22572270
if ('crdcval' in row):
22582271
row['crdc_series_uuid'] = row['crdcval']
2259-
2272+
for doi in solr_result['facets']['dois']['buckets']:
2273+
if not solr_result['response'].get('dois', None):
2274+
solr_result['response']['dois'] = []
2275+
solr_result['response']['dois'].append(doi['val'])
2276+
if size_only:
2277+
solr_result['response']['total_size'] = solr_result['facets']['instance_size']
22602278
if debug:
22612279
solr_result['response']['query_string'] = query_str
22622280
solr_result['response']['query_string_series_lvl'] = query_str_series_lvl
2281+
22632282
return solr_result['response']
22642283

22652284

2266-
def get_cart_data_serieslvl(filtergrp_list, partitions, field_list, limit, offset):
2285+
def get_cart_data_serieslvl(filtergrp_list, partitions, field_list, limit, offset, with_records=True, dois_only=False, size_only=False):
22672286
aggregate_level = "SeriesInstanceUID"
2287+
limit = limit if with_records else 0
22682288

22692289
versions = ImagingDataCommonsVersion.objects.filter(
22702290
active=True
@@ -2291,14 +2311,18 @@ def get_cart_data_serieslvl(filtergrp_list, partitions, field_list, limit, offse
22912311
cache_as="all_ui_attr" if not sources.contains_inactive_versions() else None)
22922312

22932313
custom_facets = {
2294-
'instance_size': 'sum(instance_size)'
2314+
'dois': {
2315+
'type': "terms", "field": "source_DOI", "limit": -1, "missing": False
2316+
}
22952317
}
2318+
if not dois_only:
2319+
custom_facets['instance_size'] = 'sum(instance_size)'
22962320

22972321
query_list = []
22982322

22992323
for filtergrp in filtergrp_list:
23002324
query_set_for_filt = []
2301-
if (len(filtergrp) > 0):
2325+
if len(filtergrp) > 0:
23022326
solr_query = build_solr_query(
23032327
copy.deepcopy(filtergrp),
23042328
with_tags_for_ex=False,
@@ -2314,10 +2338,19 @@ def get_cart_data_serieslvl(filtergrp_list, partitions, field_list, limit, offse
23142338
solr_result = query_solr(collection=image_source.name, fields=field_list, query_string=None, fqs=[query_str],
23152339
facets=custom_facets, sort=None, counts_only=False, collapse_on='SeriesInstanceUID',
23162340
offset=offset, limit=limit, uniques=None,
2317-
with_cursor=None, stats=None, totals=['SeriesInstanceUID'], op='AND')
2318-
2341+
with_cursor=None, stats=None, totals=['SeriesInstanceUID', 'collection_id', 'PatientID', 'StudyInstanceUID'], op='AND')
23192342
solr_result['response']['total'] = solr_result['facets']['total_SeriesInstanceUID']
2320-
solr_result['response']['total_instance_size'] = solr_result['facets']['instance_size']
2343+
solr_result['response']['facets'] = solr_result['facets']
2344+
2345+
if not dois_only:
2346+
solr_result['response']['total_instance_size'] = solr_result['facets']['instance_size']
2347+
if size_only:
2348+
solr_result['response']['total_size'] = solr_result['facets']['instance_size']
2349+
for doi in solr_result['facets']['dois']['buckets']:
2350+
if not solr_result['response'].get('dois', None):
2351+
solr_result['response']['dois'] = []
2352+
solr_result['response']['dois'].append(doi['val'])
2353+
23212354
return solr_result['response']
23222355

23232356

@@ -2481,10 +2514,9 @@ def create_cart_sql(partitions, filtergrp_lst, storage_loc, lvl="series"):
24812514

24822515
def cart_manifest(filtergrp_list, partitions, mxstudies, field_list, MAX_FILE_LIST_ENTRIES):
24832516
manifest = {}
2484-
manifest['docs'] = []
2485-
24862517
solr_result = get_cart_data_serieslvl(filtergrp_list, partitions, field_list, MAX_FILE_LIST_ENTRIES, 0)
24872518
manifest['docs'] = solr_result['docs']
2519+
manifest['facets'] = solr_result['facets']
24882520

24892521
if 'total_SeriesInstanceUID' in solr_result:
24902522
manifest['total'] = solr_result['total_SeriesInstanceUID']
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Generated by Django 4.2.20 on 2025-09-11 00:17
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('idc_collections', '0012_auto_20240820_1650'),
10+
]
11+
12+
operations = [
13+
migrations.CreateModel(
14+
name='Citation',
15+
fields=[
16+
('id', models.AutoField(primary_key=True, serialize=False)),
17+
('doi', models.CharField(max_length=1024)),
18+
('cite', models.TextField()),
19+
],
20+
),
21+
]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 4.2.20 on 2025-10-20 22:50
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('idc_collections', '0013_citation'),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name='collection',
15+
name='total_size',
16+
field=models.FloatField(default=0.0),
17+
),
18+
]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 4.2.20 on 2025-10-30 17:57
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('idc_collections', '0014_collection_total_size'),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name='collection',
15+
name='total_size_with_ar',
16+
field=models.FloatField(default=0.0),
17+
),
18+
]

idc_collections/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,8 @@ class Collection(models.Model):
337337
image_types = models.CharField(max_length=255, null=True, blank=False)
338338
cancer_type = models.CharField(max_length=512, null=True, blank=False)
339339
doi = models.CharField(max_length=255, null=True, blank=False)
340+
total_size = models.FloatField(null=False, blank=False, default=0.0)
341+
total_size_with_ar = models.FloatField(null=False, blank=False, default=0.0)
340342
source_url = models.CharField(max_length=512, null=True, blank=False)
341343
supporting_data = models.CharField(max_length=255, null=True, blank=False)
342344
analysis_artifacts = models.CharField(max_length=255, null=True, blank=False)

solr_helpers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ def build_solr_facets(attrs, filter_tags=None, include_nulls=True, unique=None,
389389
# subq_join_field: If inverted filters are present, subq_join_field determines the field used to {!join} the inverted
390390
# subquery to the main query
391391
#
392-
# search_child_records_by: a dict indicating what field, if any, should be used in subquerying 'child' or related records.
392+
# search_child_records_by: a dict indicating what field, if any, should be used in subquerying 'child' or related records for each filter attribute
393393
# This allows for searching on 'related records' which are being filtered out based on lack of a filter value, but which
394394
# satisfy another criteria - eg., records from the same study may not all have the same fields pulled out, but you may
395395
# still want those records when filtering on this attribute.

0 commit comments

Comments
 (0)