Skip to content

Commit 3694110

Browse files
authored
Merge pull request #1457 from ImagingDataCommons/idc-prod-sp
Release 45 / v21. Included tickets: - #1438 - #1453 - #1370 - #1446 - ImagingDataCommons/IDC-WebApp1416 - #828 - #990 - #1406 - #1167 - #1441 - #1454 - #1352 - #1128
2 parents 53b0b25 + b5fad8a commit 3694110

File tree

10 files changed

+326
-282
lines changed

10 files changed

+326
-282
lines changed

etl/etl.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@
6464
"program", "access", "date_updated", "tcia_wiki_collection_id", "license_short_name", "active"
6565
])}
6666

67+
TOKENIZED_FIELDS = ["PatientID", "SeriesInstanceUID", "StudyInstanceUID"]
68+
6769
ranges_needed = {
6870
'wbc_at_diagnosis': 'by_200',
6971
'event_free_survival_time_in_days': 'by_500',
@@ -424,7 +426,13 @@ def create_solr_params(schema_src, solr_src):
424426
schema = BigQuerySupport.get_table_schema(schema_src[0],schema_src[1],schema_src[2])
425427
solr_schema = []
426428
solr_index_strings = []
427-
SCHEMA_BASE = '{"add-field": %s}'
429+
field_types = ''
430+
add_copy_field = ''
431+
SCHEMA_BASE = '{{field_types}add-field": {fields}{add_copy_field}'
432+
if len(TOKENIZED_FIELDS):
433+
field_types = '"add-field-type": { "name":"tokenizedText", "class":"solr.TextField", "analyzer" : { "tokenizer": { "name":"nGram" }}}, '
434+
copy_fields = ",".join(['{"source":"{field}","dest":"{field{}_tokenized"}'.format(field) for field in TOKENIZED_FIELDS])
435+
add_copy_field = ', "add-copy-field": [{copy_fields}]'.format(copy_fields)
428436
CORE_CREATE_STRING = "sudo -u solr /opt/bitnami/solr/bin/solr create -c {solr_src} -s 2 -rf 2"
429437
SCHEMA_STRING = "curl -u {solr_user}:{solr_pwd} -X POST -H 'Content-type:application/json' --data-binary '{schema}' https://localhost:8983/solr/{solr_src}/schema --cacert solr-ssl.pem"
430438
INDEX_STRING = "curl -u {solr_user}:{solr_pwd} -X POST 'https://localhost:8983/solr/{solr_src}/update?commit=yes{params}' --data-binary @{file_name}.csv -H 'Content-type:application/csv' --cacert solr-ssl.pem"
@@ -441,11 +449,22 @@ def create_solr_params(schema_src, solr_src):
441449
"stored": True
442450
}
443451
solr_schema.append(field_schema)
452+
if TOKENIZED_FIELDS and field['name'] in TOKENIZED_FIELDS:
453+
solr_schema.append({
454+
"name": "{}_tokenized".format(field["name"]),
455+
"type": "tokenizedText",
456+
"multiValued": False if field['name'] in SOLR_SINGLE_VAL.get(solr_src.aggregate_level,
457+
{}) else True,
458+
"stored": True
459+
})
444460
if field_schema['multiValued']:
445461
solr_index_strings.append("f.{}.split=true&f.{}.separator=|".format(field['name'],field['name']))
446462

447463
with open("{}_solr_cmds.txt".format(solr_src.name), "w") as cmd_outfile:
448-
schema_array = SCHEMA_BASE % solr_schema
464+
schema_array = SCHEMA_BASE.format(
465+
field_types=field_types,
466+
add_copy_field=add_copy_field
467+
)
449468
params = "&{}".format("&".join(solr_index_strings))
450469
cmd_outfile.write(CORE_CREATE_STRING.format(solr_src=solr_src.name))
451470
cmd_outfile.write("\n\n")
@@ -662,11 +681,16 @@ def update_display_values(attr, updates):
662681
logger.info("[STATUS] Added {} display values.".format(str(len(new_vals))))
663682

664683

665-
def load_tooltips(source_objs, attr_name, source_tooltip, obj_attr=None):
684+
def load_tooltips(source_objs, attr_name, source_tooltip, obj_id_col=None):
666685
try:
667686
attr = Attribute.objects.get(name=attr_name, active=True)
668-
if not obj_attr:
669-
obj_attr = attr_name
687+
# In some cases, the data sourcing the tooltip does not have an ID column with a name which matches
688+
# the attribute name (eg. in Collections, analysis results and collections both have a collection_id,
689+
# but in Attributes, analysis_result_id and collection_id are distinct attributes).
690+
# Used obi_id_col to specify the column in which the ID of the value to associate with the tooltip source in
691+
# the case the attribute name is different from the source object's column ID
692+
if not obj_id_col:
693+
obj_id_col = attr_name
670694

671695
tips = Attribute_Tooltips.objects.select_related('attribute').filter(attribute=attr)
672696

@@ -677,7 +701,7 @@ def load_tooltips(source_objs, attr_name, source_tooltip, obj_attr=None):
677701
extent_tooltips[tip.attribute.id] = []
678702
extent_tooltips[tip.attribute.id].append(tip.tooltip_id)
679703

680-
tooltips_by_val = {x[obj_attr]: {'tip': x[source_tooltip]} for x in source_objs.values() if x[obj_attr] != '' and x[obj_attr] is not None}
704+
tooltips_by_val = {x[obj_id_col]: {'tip': x[source_tooltip]} for x in source_objs.values() if x[obj_id_col] != '' and x[obj_id_col] is not None}
681705

682706
new_tooltips = []
683707
updated_tooltips = []
@@ -715,7 +739,7 @@ def load_display_vals(filename):
715739
attr_vals_file = open(filename, "r")
716740

717741
for line in csv_reader(attr_vals_file):
718-
if 'display_value' in line:
742+
if 'Raw' in line:
719743
continue
720744
if line[0] not in display_vals:
721745
display_vals[line[0]] = {
@@ -800,7 +824,11 @@ def main():
800824
if len(args.display_vals):
801825
dvals = load_display_vals(args.display_vals)
802826
for attr in dvals:
803-
update_display_values(Attribute.objects.get(name=attr), dvals[attr]['vals'])
827+
try:
828+
attr_obj = Attribute.objects.get(name=attr)
829+
update_display_values(attr_obj, dvals[attr]['vals'])
830+
except ObjectDoesNotExist as e:
831+
print("[WARNING] Attr {} not found - display values will not be updated! Rerun ETL if this is not expected.".format(attr))
804832

805833
# Solr commands are automatically output for full ETL; the step below is for outside-of-ETL executions
806834
if len(ETL_CONFIG):

idc/settings.py

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import sys
2626
import dotenv
2727
from socket import gethostname, gethostbyname
28-
28+
import google.cloud.logging
2929

3030
SECURE_LOCAL_PATH = os.environ.get('SECURE_LOCAL_PATH', '')
3131

@@ -337,11 +337,40 @@
337337

338338
TEST_RUNNER = 'django.test.runner.DiscoverRunner'
339339

340-
# A sample logging configuration. The only tangible logging
341-
# performed by this configuration is to send an email to
342-
# the site admins on every HTTP 500 error when DEBUG=False.
343-
# See http://docs.djangoproject.com/en/dev/topics/logging for
344-
# more details on how to customize your logging configuration.
340+
handler_set = ['console_dev', 'console_prod']
341+
handlers = {
342+
'mail_admins': {
343+
'level': 'ERROR',
344+
'filters': ['require_debug_false'],
345+
'class': 'django.utils.log.AdminEmailHandler'
346+
},
347+
'console_dev': {
348+
'level': 'DEBUG',
349+
'filters': ['require_debug_true'],
350+
'class': 'logging.StreamHandler',
351+
'formatter': 'verbose',
352+
},
353+
'console_prod': {
354+
'level': 'DEBUG',
355+
'filters': ['require_debug_false'],
356+
'class': 'logging.StreamHandler',
357+
'formatter': 'simple',
358+
},
359+
}
360+
361+
if IS_APP_ENGINE:
362+
# We need to hook up Python logging to Google Cloud Logging for AppEngine (or nothing will be logged)
363+
client = google.cloud.logging_v2.Client()
364+
client.setup_logging()
365+
handler_set.append('stackdriver')
366+
handlers['stackdriver'] = {
367+
'level': 'DEBUG',
368+
'filters': ['require_debug_false'],
369+
'class': 'google.cloud.logging_v2.handlers.CloudLoggingHandler',
370+
'client': client,
371+
'formatter': 'verbose'
372+
}
373+
345374
LOGGING = {
346375
'version': 1,
347376
'disable_existing_loggers': False,
@@ -355,57 +384,33 @@
355384
},
356385
'formatters': {
357386
'verbose': {
358-
'format': '[%(levelname)s] @%(asctime)s in %(module)s/%(process)d/%(thread)d - %(message)s'
387+
'format': '[%(name)s] [%(levelname)s] @%(asctime)s in %(module)s/%(process)d/%(thread)d - %(message)s'
359388
},
360389
'simple': {
361-
'format': '[%(levelname)s] @%(asctime)s in %(module)s: %(message)s'
390+
'format': '[%(name)s] [%(levelname)s] @%(asctime)s in %(module)s: %(message)s'
362391
},
363392
},
364-
'handlers': {
365-
'mail_admins': {
366-
'level': 'ERROR',
367-
'filters': ['require_debug_false'],
368-
'class': 'django.utils.log.AdminEmailHandler'
369-
},
370-
'console_dev': {
371-
'level': 'DEBUG',
372-
'filters': ['require_debug_true'],
373-
'class': 'logging.StreamHandler',
374-
'formatter': 'verbose',
375-
},
376-
'console_prod': {
377-
'level': 'DEBUG',
378-
'filters': ['require_debug_false'],
379-
'class': 'logging.StreamHandler',
380-
'formatter': 'simple',
381-
},
393+
'handlers': handlers,
394+
'root': {
395+
'level': 'INFO',
396+
'handlers': handler_set
382397
},
383398
'loggers': {
384-
'django.request': {
385-
'handlers': ['console_dev', 'console_prod'],
386-
'level': 'DEBUG',
387-
'propagate': False,
388-
},
389-
'main_logger': {
390-
'handlers': ['console_dev', 'console_prod'],
391-
'level': 'DEBUG',
392-
'propagate': True,
399+
'': {
400+
'level': 'INFO',
401+
'handlers': handler_set,
402+
'propagate': True
393403
},
394-
'axes': {
395-
'handlers': ['console_dev', 'console_prod'],
396-
'level': 'DEBUG',
397-
'propagate': True,
404+
'django': {
405+
'level': 'INFO',
406+
'handlers': handler_set,
407+
'propagate': False
398408
},
399-
'allauth': {
400-
'handlers': ['console_dev', 'console_prod'],
401-
'level': 'DEBUG',
402-
'propagate': True,
403-
},
404-
'google_helpers': {
405-
'handlers': ['console_dev', 'console_prod'],
406-
'level': 'DEBUG',
409+
'django.request': {
410+
'handlers': ['mail_admins'],
411+
'level': 'ERROR',
407412
'propagate': True,
408-
},
413+
}
409414
},
410415
}
411416

idc/settings_unit_test.py

Lines changed: 52 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import sys
2323
import dotenv
2424
from socket import gethostname, gethostbyname
25+
import google.cloud.logging
2526

2627
SECURE_LOCAL_PATH = os.environ.get('SECURE_LOCAL_PATH', '')
2728

@@ -260,11 +261,40 @@
260261

261262
TEST_RUNNER = 'django.test.runner.DiscoverRunner'
262263

263-
# A sample logging configuration. The only tangible logging
264-
# performed by this configuration is to send an email to
265-
# the site admins on every HTTP 500 error when DEBUG=False.
266-
# See http://docs.djangoproject.com/en/dev/topics/logging for
267-
# more details on how to customize your logging configuration.
264+
handler_set = ['console_dev', 'console_prod']
265+
handlers = {
266+
'mail_admins': {
267+
'level': 'ERROR',
268+
'filters': ['require_debug_false'],
269+
'class': 'django.utils.log.AdminEmailHandler'
270+
},
271+
'console_dev': {
272+
'level': 'DEBUG',
273+
'filters': ['require_debug_true'],
274+
'class': 'logging.StreamHandler',
275+
'formatter': 'verbose',
276+
},
277+
'console_prod': {
278+
'level': 'DEBUG',
279+
'filters': ['require_debug_false'],
280+
'class': 'logging.StreamHandler',
281+
'formatter': 'simple',
282+
},
283+
}
284+
285+
if IS_APP_ENGINE:
286+
# We need to hook up Python logging to Google Cloud Logging for AppEngine (or nothing will be logged)
287+
client = google.cloud.logging_v2.Client()
288+
client.setup_logging()
289+
handler_set.append('stackdriver')
290+
handlers['stackdriver'] = {
291+
'level': 'DEBUG',
292+
'filters': ['require_debug_false'],
293+
'class': 'google.cloud.logging_v2.handlers.CloudLoggingHandler',
294+
'client': client,
295+
'formatter': 'verbose'
296+
}
297+
268298
LOGGING = {
269299
'version': 1,
270300
'disable_existing_loggers': False,
@@ -278,57 +308,33 @@
278308
},
279309
'formatters': {
280310
'verbose': {
281-
'format': '[%(levelname)s] @%(asctime)s in %(module)s/%(process)d/%(thread)d - %(message)s'
311+
'format': '[%(name)s] [%(levelname)s] @%(asctime)s in %(module)s/%(process)d/%(thread)d - %(message)s'
282312
},
283313
'simple': {
284-
'format': '[%(levelname)s] @%(asctime)s in %(module)s: %(message)s'
314+
'format': '[%(name)s] [%(levelname)s] @%(asctime)s in %(module)s: %(message)s'
285315
},
286316
},
287-
'handlers': {
288-
'mail_admins': {
289-
'level': 'ERROR',
290-
'filters': ['require_debug_false'],
291-
'class': 'django.utils.log.AdminEmailHandler'
292-
},
293-
'console_dev': {
294-
'level': 'DEBUG',
295-
'filters': ['require_debug_true'],
296-
'class': 'logging.StreamHandler',
297-
'formatter': 'verbose',
298-
},
299-
'console_prod': {
300-
'level': 'DEBUG',
301-
'filters': ['require_debug_false'],
302-
'class': 'logging.StreamHandler',
303-
'formatter': 'simple',
304-
},
317+
'handlers': handlers,
318+
'root': {
319+
'level': 'INFO',
320+
'handlers': handler_set
305321
},
306322
'loggers': {
323+
'': {
324+
'level': 'INFO',
325+
'handlers': handler_set,
326+
'propagate': True
327+
},
328+
'django': {
329+
'level': 'INFO',
330+
'handlers': handler_set,
331+
'propagate': False
332+
},
307333
'django.request': {
308334
'handlers': ['mail_admins'],
309335
'level': 'ERROR',
310336
'propagate': True,
311-
},
312-
'main_logger': {
313-
'handlers': ['console_dev', 'console_prod'],
314-
'level': 'DEBUG',
315-
'propagate': True,
316-
},
317-
'allauth': {
318-
'handlers': ['console_dev', 'console_prod'],
319-
'level': 'DEBUG',
320-
'propagate': True,
321-
},
322-
'google_helpers': {
323-
'handlers': ['console_dev', 'console_prod'],
324-
'level': 'DEBUG',
325-
'propagate': True,
326-
},
327-
'data_upload': {
328-
'handlers': ['console_dev', 'console_prod'],
329-
'level': 'DEBUG',
330-
'propagate': True,
331-
},
337+
}
332338
},
333339
}
334340

0 commit comments

Comments
 (0)