Skip to content

Commit 2fbce5e

Browse files
authored
Merge pull request #382 from ImagingDataCommons/idc-prod-wc
Idc prod wc
2 parents afd9771 + cbc5696 commit 2fbce5e

File tree

7 files changed

+47
-77
lines changed

7 files changed

+47
-77
lines changed

cohorts/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ def get_operator(self):
446446
return self.OP_TO_STR[self.operator]
447447

448448
def get_filter(self):
449-
if self.operator not in [self.OR, self.BTW]:
449+
if self.operator not in self.OP_TO_SUFFIX and self.operator != self.OR:
450450
return {
451451
self.get_attr_name(): { 'op': self.get_operator(), 'values': self.value.split(self.value_delimiter) }
452452
}
@@ -463,7 +463,7 @@ def get_filter_flat(self):
463463
}
464464

465465
def __repr__(self):
466-
if self.operator not in [self.OR, self.BTW]:
466+
if self.operator not in self.OP_TO_SUFFIX and self.operator != self.OR:
467467
return "{ %s: {'op': %s, 'values': %s }" % (self.get_attr_name(), self.get_operator(), "[{}]".format(self.value))
468468
return "{ %s }" % ("\"{}\": [{}]".format(self.get_attr_name(), self.value))
469469

cohorts/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def _get_cohort_stats(cohort_id=0, filters=None, sources=None):
4545
'PatientID': 0,
4646
'StudyInstanceUID': 0,
4747
'SeriesInstanceUID': 0,
48+
'total_instance_size': 0,
4849
'collections': []
4950
}
5051

cohorts/utils_api_v2.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,16 @@
1717

1818
import logging
1919
import copy
20-
import json
2120

2221
from django.conf import settings
2322
from idc_collections.models import ImagingDataCommonsVersion
2423
from idc_collections.collex_metadata_utils import get_bq_metadata, get_bq_string
25-
from google_helpers.bigquery.bq_support import BigQuerySupport
2624

2725
logger = logging.getLogger('main_logger')
2826
DENYLIST_RE = settings.DENYLIST_RE
2927

28+
NUMERIC_OPS = ('_btw', '_ebtw', '_btwe', '_ebtwe', '_gte', '_lte', '_gt', '_lt', '_eq')
29+
3030
# All the filter values in the filterSet of a cohort are saved as strings. Particularly, a
3131
# filter value like [[35,45], [65,75]] is returned as ["[35,45]","[65,75]"] when you get
3232
# the filterSet. This script converts it back to numeric.
@@ -38,7 +38,10 @@ def to_numeric_list(item):
3838
# If the item is not a list, then we assume it is single string value or already a numeric
3939
elif isinstance(item, str):
4040
# if it is a string, then convert it to a float
41-
item = float(item)
41+
try:
42+
item = int(item)
43+
except ValueError:
44+
item = float(item)
4245
return item
4346

4447

@@ -71,7 +74,7 @@ def _cohort_query_api(request, cohort, data, info):
7174
for collection in filters['collection_id']:
7275
collections.append(collection.lower().replace('-', '_'))
7376
filters['collection_id'] = collections
74-
if filter.endswith(('_lt', '_lte', '_ebtw', '_ebtwe', '_btw', '_btwe', '_gte' '_gt')):
77+
if filter.endswith(NUMERIC_OPS):
7578
filters[filter] = to_numeric_list(value)
7679

7780
data_version = cohort.get_data_versions()

cohorts/views/views_api_v2.py

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,11 @@
1919

2020
import json
2121
import logging
22-
import copy
2322

24-
from django.contrib import messages
2523
from django.contrib.auth.models import User
2624
from django.conf import settings
2725
from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
2826
from django.http import HttpResponse, JsonResponse
29-
from django.shortcuts import render, redirect
3027
from django.views.decorators.csrf import csrf_protect, csrf_exempt
3128
from django.views.decorators.http import require_http_methods
3229
from ..decorators import api_auth
@@ -37,6 +34,7 @@
3734
_cohort_preview_query_api, _cohort_query_api
3835
from ..views.views import _save_cohort,_delete_cohort
3936

37+
NUMERIC_OPS = ('_btw', '_ebtw', '_btwe', '_ebtwe', '_gte', '_lte', '_gt', '_lt', '_eq')
4038
BQ_ATTEMPT_MAX = 10
4139

4240
debug = settings.DEBUG # RO global for this file
@@ -67,7 +65,7 @@ def save_cohort_api(request):
6765
return JsonResponse(response)
6866

6967
data = body["request_data"]
70-
name = data['name']
68+
cohort_name = data['name']
7169
description = data['description']
7270
filters = data['filters']
7371
# Create a cohort only against the current version
@@ -76,31 +74,28 @@ def save_cohort_api(request):
7674
# We first need to convert the filters to a form accepted by _save_cohorts
7775
filters_by_name = {}
7876
for filter, value in filters.items():
79-
if filter.endswith(('_lt', '_lte', '_ebtw', '_ebtwe', '_btw', '_btwe', '_gte' '_gt')):
80-
name = filter.rsplit('_', 1)[0]
81-
op = filter.rsplit('_', 1)[-1]
82-
filters_by_name[name] = dict(
77+
if filter.endswith(NUMERIC_OPS):
78+
attribute_name = filter.rsplit('_', 1)[0]
79+
op = filter.rsplit('_', 1)[-1].upper()
80+
filters_by_name[attribute_name] = dict(
8381
op= op,
8482
values = value
8583
)
8684
else:
8785
filters_by_name[filter] = value
8886
filters_by_id = {}
89-
for attr in Attribute.objects.filter(name__in=list(filters.keys())).values('id', 'name'):
90-
filters_by_id[str(attr['id'])] = filters[attr['name']]
91-
response = _save_cohort(user, filters=filters_by_id, name=name, desc=description, version=version,
87+
for attr in Attribute.objects.filter(name__in=list(filters_by_name.keys())).values('id', 'name'):
88+
filters_by_id[str(attr['id'])] = filters_by_name[attr['name']]
89+
response = _save_cohort(user, filters=filters_by_id, name=cohort_name, desc=description, version=version,
9290
no_stats=version.active==False)
9391
cohort_id = response['cohort_id']
9492
idc_data_version = Cohort.objects.get(id=cohort_id).get_data_versions()[0].version_number
95-
# if request.GET['return_filter'] == 'True':
96-
# response["filterSet"] = get_filterSet_api(cohort)
97-
# response["filterSet"] = get_filterSet_api(cohort)
9893

9994
response['filterSet'] = {'idc_data_version': idc_data_version, 'filters': response.pop('filters')}
10095

10196

10297
for filter, value in response['filterSet']['filters'].items():
103-
if filter.endswith(('_lt', '_lte', '_ebtw', '_ebtwe', '_btw', '_btwe', '_gte' '_gt')):
98+
if filter.endswith(NUMERIC_OPS):
10499
response['filterSet']['filters'][filter] = to_numeric_list(value)
105100

106101
cohort_properties = dict(
@@ -130,11 +125,6 @@ def cohort_query_api(request, cohort_id=0):
130125
}
131126
return JsonResponse(info)
132127

133-
# if cohort_id == 0:
134-
# messages.error(request, 'Cohort requested does not exist.')
135-
# return redirect('/user_landing')
136-
137-
# print(request.GET.get('email', ''))
138128
try:
139129
cohort = Cohort.objects.get(id=cohort_id)
140130
except ObjectDoesNotExist as e:
@@ -150,9 +140,9 @@ def cohort_query_api(request, cohort_id=0):
150140
body = json.loads(request.body.decode('utf-8'))
151141
try:
152142
user = User.objects.get(email=body['email'])
153-
except:
143+
except Exception as exc:
154144
logger.error("[ERROR] While trying to save cohort: ")
155-
logger.exception(e)
145+
logger.exception(exc)
156146
info = {
157147
"message": f"{body['email']} is not a known user",
158148
"code": 401,
@@ -248,7 +238,9 @@ def cohort_list_api(request):
248238
"filterSet": get_filterSet_api(cohort)
249239
}
250240
for filter, value in cohortMetadata['filterSet']['filters'].items():
251-
if filter.endswith(('_lt', '_lte', '_ebtw', '_ebtwe', '_btw', '_btwe', '_gte' '_gt')):
241+
# if filter.endswith(('_lt', '_lte', '_ebtw', '_ebtwe', '_btw', '_btwe', '_gte' '_gt')):
242+
# if filter.endswith(('_lt', '_lte', '_ebtw', '_ebtwe', '_btw', '_btwe', '_gte' '_gt')):
243+
if filter.endswith(NUMERIC_OPS):
252244
cohortMetadata['filterSet']['filters'][filter] = to_numeric_list(value)
253245
cohortList.append(cohortMetadata)
254246

@@ -274,9 +266,9 @@ def delete_cohort_api(request):
274266
body = json.loads(request.body.decode('utf-8'))
275267
try:
276268
user = User.objects.get(email=body['email'])
277-
except:
269+
except Exception as exc:
278270
logger.error("[ERROR] While trying to save cohort: ")
279-
logger.exception(e)
271+
logger.exception(exc)
280272
response = {
281273
"message": f"{body['email']} is not a known user",
282274
"code": 401,

google_helpers/bigquery/bq_support.py

Lines changed: 11 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ def build_bq_filter_and_params(filters, comb_with='AND', param_suffix=None, with
698698
# Standard query filters
699699
for attr, values in list(other_filters.items()):
700700
is_btw = re.search('_e?btwe?', attr.lower()) is not None
701-
attr_name = attr[:attr.rfind('_')] if re.search('_[gl]te?|_e?btwe?', attr) else attr
701+
attr_name = attr[:attr.rfind('_')] if re.search('_[gl]te?|_e?btwe?|_eq', attr) else attr
702702
if attr_name not in attr_filters:
703703
attr_filters[attr_name] = {
704704
'OP': 'OR',
@@ -745,15 +745,12 @@ def build_bq_filter_and_params(filters, comb_with='AND', param_suffix=None, with
745745
# Single scalar param
746746
query_param['parameterValue']['value'] = values[0]
747747
if query_param['parameterType']['type'] == 'STRING':
748-
if '%' in values[0] or case_insens:
749-
filter_string += "LOWER({}{}) LIKE LOWER(@{})".format('' if not field_prefix else field_prefix, attr_name, param_name)
750-
else:
751-
filter_string += "{}{} = @{}".format('' if not field_prefix else field_prefix, attr,
752-
param_name)
748+
filter_string += "{}{} = @{}".format('' if not field_prefix else field_prefix, attr,
749+
param_name)
753750
elif query_param['parameterType']['type'] == 'NUMERIC':
754751
operator = "{}{}".format(
755752
">" if re.search(r'_gte?',attr) else "<" if re.search(r'_lte?',attr) else "",
756-
'=' if re.search(r'_[lg]te',attr) or not re.search(r'_[lg]',attr) else ''
753+
'=' if re.search(r'_[lg]te',attr) or not re.search(r'_[lg]',attr) or attr.endswith('_eq') else ''
757754
)
758755
filter_string += "{}{} {} @{}".format(
759756
'' if not field_prefix else field_prefix, attr_name,
@@ -844,41 +841,14 @@ def build_bq_filter_and_params(filters, comb_with='AND', param_suffix=None, with
844841
filter_string += " OR ".join(btw_filter_strings)
845842
query_param = query_params
846843
else:
847-
# String param values that include the % or _char must be LIKE'd. We ignore that \% or \_
848-
# don't need to be LIKE'd.
849-
query_params = []
850-
strings_filter_string = []
851-
values_copy = copy.deepcopy(values)
852-
if parameter_type == 'STRING':
853-
for index, value in enumerate(values_copy):
854-
if '%' in value or '_' in value:
855-
strings_filter_string.append(
856-
"LOWER({}{}) LIKE LOWER(@{}_{})".format('' if not field_prefix else field_prefix, attr, param_name, index)
857-
)
858-
query_param_like = copy.deepcopy(query_param)
859-
query_param_like['name'] = f'{param_name}_{index}'
860-
query_param_like['parameterType']['type'] = 'STRING'
861-
query_param_like['parameterValue']['value'] = value.lower()
862-
query_params.append(query_param_like)
863-
values_copy[index] = ''
864-
865-
# Squeeze out empty strings
866-
values_copy = [value for value in values_copy if value]
867-
if values_copy:
868-
# Simple array param
869-
query_param['parameterType']['type'] = "ARRAY"
870-
query_param['parameterType']['arrayType'] = {
871-
'type': parameter_type
872-
}
873-
874-
875-
query_param['parameterValue'] = {'arrayValues': [{'value': x.lower() if parameter_type == 'STRING' else x} for x in values_copy]}
876-
877-
strings_filter_string.append("LOWER({}{}) IN UNNEST(@{})".format('' if not field_prefix else field_prefix, attr, param_name))
878-
query_params.append(query_param)
844+
# Simple array param
845+
query_param['parameterType']['type'] = "ARRAY"
846+
query_param['parameterType']['arrayType'] = {
847+
'type': parameter_type
848+
}
849+
query_param['parameterValue'] = {'arrayValues': [{'value': x.lower() if parameter_type == 'STRING' else x} for x in values]}
879850

880-
query_param = query_params
881-
filter_string += ' OR '.join(strings_filter_string)
851+
filter_string += "LOWER({}{}) IN UNNEST(@{})".format('' if not field_prefix else field_prefix, attr, param_name)
882852

883853
if with_count_toggle:
884854
filter_string = "({}) OR @{}_filtering = 'not_filtering'".format(filter_string,param_name)

idc_collections/collex_metadata_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,8 @@ def _build_attr_by_source(attrs, data_version, source_type=DataSource.BIGQUERY,
188188

189189
for attr in attrs:
190190
stripped_attr = attr if (not '_' in attr) else \
191-
attr if not attr.rsplit('_', 1)[1] in ['gt', 'gte','ebtwe','ebtw','btwe', 'btw', 'lte', 'lt'] else \
192-
attr.rsplit('_', 1)[0]
191+
attr if not attr.rsplit('_', 1)[1] in ['gt', 'gte','ebtwe','ebtw','btwe', 'btw', 'lte', 'lt', 'eq'] else \
192+
attr.rsplit('_', 1)[0]
193193

194194
for id, source in attr_data['sources'].items():
195195
if stripped_attr in source['list']:

idc_collections/views/views_api_v2.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
DENYLIST_RE = settings.DENYLIST_RE
3232

33+
NUMERIC_OPS = ('_btw', '_ebtw', '_btwe', '_ebtwe', '_gte', '_lte', '_gt', '_lt', '_eq')
3334
# Return a list of defined IDC versions
3435
@api_auth
3536
@require_http_methods(["GET"])
@@ -152,12 +153,15 @@ def attributes_list_api(request):
152153
# "active": attribute.active,
153154
"units": attribute.units,
154155
}
155-
attributes_info.append(attribute_info)
156156
if attribute_info['data_type'] == 'Continuous Numeric':
157-
for suffix in ['lt', 'lte', 'btw', 'ebtw', 'ebtwe', 'btwe', 'gte', 'gt']:
157+
# for suffix in ['lt', 'lte', 'btw', 'ebtw', 'ebtwe', 'btwe', 'gte', 'gt', 'eq']:
158+
for suffix in NUMERIC_OPS:
158159
attribute_info_copy = dict(attribute_info)
159-
attribute_info_copy['name'] = '{}_{}'.format(attribute.name, suffix)
160+
attribute_info_copy['name'] = '{}{}'.format(attribute.name, suffix)
160161
attributes_info.append(attribute_info_copy)
162+
else:
163+
attributes_info.append(attribute_info)
164+
161165
data_source = {
162166
"data_source": source.name,
163167
'filters': attributes_info

0 commit comments

Comments
 (0)