1- #
21# Copyright 2015-2020, Institute for Systems Biology
32#
43# Licensed under the Apache License, Version 2.0 (the "License");
@@ -647,7 +646,10 @@ def parse_partition_to_filter(cart_partition):
647646
648647
649648# Manifest types supported: s5cmd, idc_index, json.
650- def submit_manifest_job (data_version , filters , storage_loc , manifest_type , instructions , fields , cart_partition = None , filename = None ):
649+ def submit_manifest_job (
650+ data_version , filters , storage_loc , manifest_type , instructions , fields , from_cart = False ,
651+ cart_partition = None , filtergrp_list = None , filename = None
652+ ):
651653 cart_filters = parse_partition_to_filter (cart_partition ) if cart_partition else None
652654 child_records = None if cart_filters else "StudyInstanceUID"
653655 service_account_info = json .load (open (settings .GOOGLE_APPLICATION_CREDENTIALS ))
@@ -673,11 +675,14 @@ def submit_manifest_job(data_version, filters, storage_loc, manifest_type, instr
673675
674676 filters = filters or {}
675677
676- bq_query_and_params = get_bq_metadata (
677- filters , ["crdc_series_uuid" , storage_loc ], data_version , fields , ["crdc_series_uuid" , storage_loc ],
678- no_submit = True , search_child_records_by = child_records ,
679- reformatted_fields = reformatted_fields , cart_filters = cart_filters
680- )
678+ if from_cart :
679+ bq_query_and_params = create_cart_sql (cart_partition , filtergrp_list , storage_loc , lvl = "series" )
680+ else :
681+ bq_query_and_params = get_bq_metadata (
682+ filters , ["crdc_series_uuid" , storage_loc ], data_version , fields , ["crdc_series_uuid" , storage_loc ],
683+ no_submit = True , search_child_records_by = child_records ,
684+ reformatted_fields = reformatted_fields , cart_filters = cart_filters
685+ )
681686
682687 manifest_job = {
683688 "query" : bq_query_and_params ['sql_string' ],
@@ -705,6 +710,7 @@ def create_file_manifest(request, cohort=None):
705710 req = request .GET or request .POST
706711 manifest = None
707712 partitions = None
713+ filtergrp_list = None
708714 S5CMD_BASE = "cp s3://{}/{}/* .{}"
709715 file_type = req .get ('file_type' , 's5cmd' ).lower ()
710716 loc = req .get ('loc_type_{}' .format (file_type ), 'aws' )
@@ -768,8 +774,6 @@ def create_file_manifest(request, cohort=None):
768774 id__in = versions .get_data_sources ().filter (source_type = source_type ).values_list ("id" , flat = True )
769775 ).distinct ()
770776
771- print ("File type: {}" .format (file_type ))
772-
773777 if file_type in ['s5cmd' , 'idc_index' ]:
774778 api_loc = "https://s3.amazonaws.com" if loc == 'aws' else "https://storage.googleapis.com"
775779 cmd = "# idc download <manifest file name>{}" .format (os .linesep )
@@ -786,7 +790,8 @@ def create_file_manifest(request, cohort=None):
786790 if async_download and (file_type not in ["bq" ]):
787791 jobId , file_name = submit_manifest_job (
788792 ImagingDataCommonsVersion .objects .filter (active = True ), filters , storage_bucket , file_type , instructions ,
789- selected_columns_sorted if file_type not in ["s5cmd" , "idc_index" ] else None , cart_partition = partitions ,
793+ selected_columns_sorted if file_type not in ["s5cmd" , "idc_index" ] else None , from_cart = from_cart ,
794+ cart_partition = partitions , filtergrp_list = filtergrp_list ,
790795 filename = file_name
791796 )
792797 return JsonResponse ({
@@ -852,17 +857,6 @@ def create_file_manifest(request, cohort=None):
852857 hdr = [hdr ]
853858 rows += (hdr ,)
854859
855- if items ['total' ] > MAX_FILE_LIST_ENTRIES :
856- hdr = "{}NOTE: Due to the limits of our system, we can only return {} manifest entries." .format (
857- cmt_delim , str (MAX_FILE_LIST_ENTRIES )
858- ) + " Your cohort's total entries exceeded this number. This part of {} entries has been " .format (
859- str (MAX_FILE_LIST_ENTRIES )
860- ) + " downloaded, sorted by PatientID, StudyID, SeriesID, and SOPInstanceUID.{}" .format (linesep )
861-
862- if file_type not in ['s5cmd' , 'idc_index' ]:
863- hdr = [hdr ]
864- rows += (hdr ,)
865-
866860 hdr = "{}IDC Data Version(s): {}{}" .format (
867861 cmt_delim ,
868862 "; " .join ([str (x ) for x in versions ]),
@@ -1114,7 +1108,10 @@ def parse_partition_string(partition):
11141108 id = partition ['id' ]
11151109 part_str = ''
11161110 for i in range (0 ,len (id )):
1117- part_str = part_str + '(+' + filts [i ]+ ':("' + id [i ]+ '"))'
1111+ if (i == 0 ):
1112+ part_str = part_str + '(+' + filts [i ]+ ':("' + id [i ]+ '"))'
1113+ else :
1114+ part_str = part_str + ' AND (+' + filts [i ]+ ':("' + id [i ]+ '"))'
11181115 cur_not = partition ['not' ]
11191116 if (len (cur_not )> 0 ):
11201117 cur_not = ['"' + x + '"' for x in cur_not ]
@@ -1162,7 +1159,7 @@ def create_cart_query_string(query_list, partitions, join):
11621159 cur_part_str = parse_partition_string (cur_part )
11631160 for j in range (len (cur_part_attr_strA )):
11641161 if (len (cur_part_attr_strA [j ])> 0 ):
1165- solrA .append ('(' + cur_part_str + ')(' + cur_part_attr_strA [j ] + ')' )
1162+ solrA .append ('(' + cur_part_str + ') AND (' + cur_part_attr_strA [j ] + ')' )
11661163 else :
11671164 solrA .append (cur_part_str )
11681165 solrA = ['(' + x + ')' for x in solrA ]
@@ -1341,11 +1338,12 @@ def generate_solr_cart_and_filter_strings(current_filters,filtergrp_list, partit
13411338 current_solr_query = build_solr_query (
13421339 copy .deepcopy (current_filters ),
13431340 with_tags_for_ex = False ,
1344- search_child_records_by = None
1341+ search_child_records_by = None , solr_default_op = 'AND'
13451342 )
13461343 try :
13471344 current_filt_query_set = create_query_set (current_solr_query , aux_sources , image_source , all_ui_attrs ,
13481345 image_source , DataSetType )
1346+ current_filt_query_set = ['(' + filt + ')' if not filt [0 ] == '(' else filt for filt in current_filt_query_set ]
13491347 current_filt_str = "" .join (current_filt_query_set )
13501348 except :
13511349 current_filt_str = ""
@@ -1360,7 +1358,7 @@ def generate_solr_cart_and_filter_strings(current_filters,filtergrp_list, partit
13601358 solr_query = build_solr_query (
13611359 copy .deepcopy (filtergrp ),
13621360 with_tags_for_ex = False ,
1363- search_child_records_by = None
1361+ search_child_records_by = None , solr_default_op = 'AND'
13641362 )
13651363 query_set_for_filt = create_query_set (solr_query , aux_sources , image_source , all_ui_attrs , image_source , DataSetType )
13661364 query_set_for_filt = ['(' + filt + ')' if not filt [0 ] == '(' else filt for filt in query_set_for_filt ]
@@ -1462,6 +1460,7 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
14621460 del (current_filters [tblitem ])
14631461 [current_filt_str , cart_query_str_all , cart_query_str_studylvl , cart_query_str_serieslvl ] = generate_solr_cart_and_filter_strings (current_filters ,filtergrp_list ,partitions )
14641462 no_tble_item_filt_str = current_filt_str
1463+
14651464 if len (tblfiltstr )> 0 :
14661465 current_filt_str = tblfiltstr + current_filt_str
14671466 if len (current_filt_str ) > 0 :
@@ -1473,6 +1472,7 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
14731472 if (tabletype == "collections" ):
14741473 sorted_ids = current_filters ["collection_id" ]
14751474
1475+
14761476 elif ("facetfields" in table_data ) and (sortarg in table_data ["facetfields" ]):
14771477 # when sorting by a 'facet' field (# of cases, # of studies etc.), we need to find the set of ids selected from
14781478 # this field by the limit, offset params in a preliminary solr call, then add that set as a filter to limit the
@@ -1507,8 +1507,14 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
15071507 sortStr = sortarg + " " + sortdir
15081508 imgNm = image_source_series .name if (tabletype == "series" ) else image_source .name
15091509
1510+ if (len (current_filt_str )> 0 ):
1511+ fqs = [current_filt_str ]
1512+ else :
1513+ fqs = None
1514+
1515+
15101516 rng_query = query_solr (
1511- collection = imgNm , fields = [id ], query_string = current_filt_str , fqs = None ,
1517+ collection = imgNm , fields = [id ], query_string = None , fqs = fqs ,
15121518 facets = None , sort = sortStr , counts_only = False , collapse_on = collapse_id , offset = offset , limit = limit ,
15131519 uniques = None , with_cursor = None , stats = None , totals = None , op = 'AND'
15141520 )
@@ -1588,15 +1594,18 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
15881594
15891595 #table attributes need filter query. cart queries come in via stats queries
15901596 fqset = [rngfilt ]
1597+ #fqset = rngfilt
15911598 if len (current_filt_str ) > 0 :
1592- fqset .append ("{!tag=f1}(" + current_filt_str + ")" )
1599+ fqset .append ("{!tag=f1}(+" + current_filt_str + ")" )
1600+ #fqset = fqset + " AND {!tag=f1}(" + current_filt_str + ")"
1601+ #fqset.append('{!tag=f1}(+Modality:("RTSTRUCT"))(+collection_id:("4d_lung"))')
15931602
15941603 attr_results = []
15951604 # if table is collections, don't need attributes only cart stats. if table is series used series store
15961605
15971606 if not (tabletype == "series" ) and not (tabletype == "collections" ):
15981607 solr_result = query_solr (
1599- collection = image_source .name , fields = field_list , query_string = None , fqs = fqset ,
1608+ collection = image_source .name , fields = field_list , query_string = None , fqs = fqset [:] ,
16001609 facets = None ,sort = sortStr , counts_only = False ,collapse_on = collapse_id , offset = 0 , limit = limit ,
16011610 uniques = None , with_cursor = None , stats = None , totals = None , op = 'AND'
16021611 )
@@ -1636,11 +1645,14 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
16361645
16371646 custom_facets = table_data ["facets" ]
16381647 fqset = ["{!tag=f0}" + rngfilt ]
1648+ #fqset = "{!tag=f0}" + rngfilt
16391649 colrngfilt = ""
16401650 caserngfilt = ""
16411651 seriesrngfilt = ""
1652+ #fqset=""
16421653 if len (current_filt_str ) > 0 :
1643- fqset .append ("{!tag=f1}(" + current_filt_str + ")" )
1654+ #fqset=fqset + "AND {!tag=f1}(" + current_filt_str + ")"
1655+ fqset .append ("{!tag=f1}(+" + current_filt_str + ")" )
16441656 custom_facets ["per_id_nf" ] = copy .deepcopy (table_data ["facets_not_filt" ]["per_id_nf" ])
16451657 with_filter = True
16461658
@@ -1695,8 +1707,8 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
16951707 custom_facets ["upstream_study_filter_cart" ] = copy .deepcopy (upstream_cart_facets ["upstream_study_filter_cart" ])
16961708 custom_facets ["upstream_study_filter_cart" ]["domain" ]["filter" ] = studyrngQ + no_tble_item_filt_str
16971709
1698- in_cart_domain_all = {"filter" : cart_query_str_all , "excludeTags" :"f1" } if with_filter else {"filter" : cart_query_str_all }
1699- in_filter_and_cart_domain_all = {"filter" : cart_query_str_all }
1710+ in_cart_domain_all = {"filter" : '(+' + cart_query_str_all + ')' , "excludeTags" :"f1" } if with_filter else {"filter" : '(+' + cart_query_str_all + ')' }
1711+ in_filter_and_cart_domain_all = {"filter" : '(+' + cart_query_str_all + ')' }
17001712
17011713
17021714
@@ -1710,9 +1722,9 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
17101722
17111723 if not (cart_query_str_studylvl == None ) and (len (cart_query_str_studylvl )> 0 ):
17121724
1713- in_cart_domain_studylvl = {"filter" : cart_query_str_studylvl , "excludeTags" : "f1" } if with_filter else {
1725+ in_cart_domain_studylvl = {"filter" : '(+' + cart_query_str_studylvl + ')' , "excludeTags" : "f1" } if with_filter else {
17141726 "filter" : cart_query_str_studylvl }
1715- in_filter_and_cart_domain_studylvl = {"filter" : cart_query_str_studylvl }
1727+ in_filter_and_cart_domain_studylvl = {"filter" : '(+' + cart_query_str_studylvl + ')' }
17161728
17171729 custom_facets ["series_in_filter_and_cart" ] = copy .deepcopy (cart_facets ["series_in_filter_and_cart" ])
17181730 custom_facets ["series_in_filter_and_cart" ]["field" ] = id
@@ -1819,7 +1831,7 @@ def get_table_data_with_cart_data(tabletype, sortarg, sortdir, current_filters,f
18191831 return [num_found , table_arr ]
18201832
18211833
1822- def get_cart_data_studylvl (filtergrp_list , partitions , limit , offset , length , mxseries ,results_lvl = 'StudyInstanceUID' , with_records = True ):
1834+ def get_cart_data_studylvl (filtergrp_list , partitions , limit , offset , length , mxseries ,results_lvl = 'StudyInstanceUID' , with_records = True , debug = False ):
18231835 aggregate_level = "StudyInstanceUID"
18241836 versions = ImagingDataCommonsVersion .objects .filter (
18251837 active = True
@@ -1857,7 +1869,7 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
18571869 solr_query = build_solr_query (
18581870 copy .deepcopy (filtergrp ),
18591871 with_tags_for_ex = False ,
1860- search_child_records_by = None
1872+ search_child_records_by = None , solr_default_op = 'AND'
18611873 )
18621874 query_set_for_filt = create_query_set (solr_query , aux_sources , image_source , all_ui_attrs , image_source , DataSetType )
18631875 query_set_for_filt = ['(' + filt + ')' if not filt [0 ] == '(' else filt for filt in query_set_for_filt ]
@@ -1880,11 +1892,12 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
18801892
18811893 serieslvl_found = False
18821894 studyidsinseries = {}
1895+ query_str_series_lvl = ''
18831896 if (len (partitions_series_lvl ) > 0 ):
18841897 query_str_series_lvl = create_cart_query_string (['' ], partitions_series_lvl , False )
18851898 if (len (query_str_series_lvl ) > 0 ):
18861899 solr_result_series_lvl = query_solr (
1887- collection = image_source_series .name , fields = field_list , query_string = query_str_series_lvl , fqs = None ,
1900+ collection = image_source_series .name , fields = field_list , query_string = None , fqs = [ query_str_series_lvl ] ,
18881901 limit = int (mxseries ), facets = custom_facets , sort = sortStr , counts_only = False , collapse_on = None ,
18891902 uniques = None , with_cursor = None , stats = None , totals = totals , op = 'AND'
18901903 )
@@ -1909,7 +1922,7 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
19091922 query_str = create_cart_query_string (query_list , partitions_study_lvl , False )
19101923 if len (query_str ) > 0 :
19111924 solr_result = query_solr (
1912- collection = image_source .name , fields = field_list , query_string = query_str , fqs = None , facets = custom_facets ,
1925+ collection = image_source .name , fields = field_list , query_string = None , fqs = [ query_str ] , facets = custom_facets ,
19131926 sort = sortStr , counts_only = False , collapse_on = None , uniques = None , with_cursor = None , stats = None ,
19141927 totals = ['SeriesInstanceUID' ], op = 'AND' , limit = int (limit ), offset = int (offset )
19151928 )
@@ -1976,7 +1989,9 @@ def get_cart_data_studylvl(filtergrp_list, partitions, limit, offset, length, mx
19761989 if ('crdcval' in row ):
19771990 row ['crdc_series_uuid' ] = row ['crdcval' ]
19781991
1979-
1992+ if debug :
1993+ solr_result ['response' ]['query_string' ] = query_str
1994+ solr_result ['response' ]['query_string_series_lvl' ] = query_str_series_lvl
19801995 return solr_result ['response' ]
19811996
19821997
@@ -2025,7 +2040,7 @@ def get_cart_data(filtergrp_list, partitions, field_list, limit, offset):
20252040
20262041 solr_result = query_solr (collection = image_source .name , fields = field_list , query_string = query_str , fqs = None ,
20272042 facets = None ,sort = None , counts_only = False ,collapse_on = 'SeriesInstanceUID' , offset = offset , limit = limit , uniques = None ,
2028- with_cursor = None , stats = None , totals = None , op = 'AND ' )
2043+ with_cursor = None , stats = None , totals = None , op = 'OR ' )
20292044
20302045 return solr_result ['response' ]
20312046
@@ -2044,31 +2059,31 @@ def filtergrp_to_sql(filtergrp_lst):
20442059 reformatted_fields = reformatted_fields
20452060 )
20462061 # final cart sql may involve several filters. Need to avoid collisions in parameter sets
2047- for param_list in filtersql ['params' ]:
2048- for param in param_list :
2049- param_name = param ['name' ]
2050- if param_name in used_params :
2051- param_try = param_name
2052- safe_name_found = False
2053- mtch = re .search (r'_\d+$' , param_name )
2054- if mtch == None :
2062+ for param in filtersql ['params' ]:
2063+ #for param in param_list:
2064+ param_name = param ['name' ]
2065+ if param_name in used_params :
2066+ param_try = param_name
2067+ safe_name_found = False
2068+ mtch = re .search (r'_\d+$' , param_name )
2069+ if mtch == None :
2070+ break
2071+ numtry = int (param_name [mtch .regs [0 ][0 ]+ 1 :])
2072+ while not safe_name_found :
2073+ param_try = param_name [:mtch .regs [0 ][0 ]+ 1 ] + str (numtry )
2074+ if not param_try in used_params :
2075+ param ['name' ]= param_try
2076+ used_params [param_try ]= 1
2077+ safe_name_found = True
20552078 break
2056- numtry = int (param_name [mtch .regs [0 ][0 ]+ 1 :])
2057- while not safe_name_found :
2058- param_try = param_name [:mtch .regs [0 ][0 ]+ 1 ] + str (numtry )
2059- if not param_try in used_params :
2060- param ['name' ]= param_try
2061- used_params [param_try ]= 1
2062- safe_name_found = True
2063- break
2064- numtry = numtry + 1
2065- if ('intersect_clause' in filtersql ):
2066- filtersql ['intersect_clause' ] = filtersql ['intersect_clause' ].replace (param_name , param_try )
2067- if ('query_filters' in filtersql ):
2068- for filtindex in range (len (filtersql ['query_filters' ])):
2069- filtersql ['query_filters' ][filtindex ] = filtersql ['query_filters' ][filtindex ].replace (param_name , param_try )
2070- else :
2071- used_params [param_name ]= 1
2079+ numtry = numtry + 1
2080+ if ('intersect_clause' in filtersql ):
2081+ filtersql ['intersect_clause' ] = filtersql ['intersect_clause' ].replace (param_name , param_try )
2082+ if ('query_filters' in filtersql ):
2083+ for filtindex in range (len (filtersql ['query_filters' ])):
2084+ filtersql ['query_filters' ][filtindex ] = filtersql ['query_filters' ][filtindex ].replace (param_name , param_try )
2085+ else :
2086+ used_params [param_name ]= 1
20722087 filtersA .append (filtersql )
20732088 return filtersA
20742089
@@ -2924,12 +2939,13 @@ def get_bq_metadata(filters, fields, data_version, sources_and_attrs=None, group
29242939 fields .extend (['"{}" AS {}' .format (static_fields [x ],x ) for x in static_fields ])
29252940 if reformatted_fields :
29262941 fields = reformatted_fields
2942+
29272943 for_union .append (query_base .format (
29282944 field_clause = "," .join (fields ),
29292945 table_clause = "`{}` {}" .format (table_info [image_table ]['name' ], table_info [image_table ]['alias' ]),
29302946 join_clause = """ """ .join (joins ),
2931- where_clause = (" AND ({})" .format (( " AND " .join (query_filters ) if len ( query_filters ) else "" ) if len (filters ) else "" )) if len ( filters ) else "" ,
2932- intersect_clause = "{}" .format ("" if not len (intersect_statements ) else "{}{}" .format (
2947+ where_clause = (" AND ({})" .format (" AND " .join (query_filters )) ) if len (query_filters ) else "" ,
2948+ intersect_clause = "AND {}" .format ("" if not len (intersect_statements ) else "{}{}" .format (
29332949 " AND " if len (non_related_filters ) and len (query_filters ) else "" , "{} IN ({})" .format (
29342950 child_record_search_field , intersect_clause
29352951 ))),
0 commit comments