-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
4015 lines (3600 loc) · 198 KB
/
app.py
File metadata and controls
4015 lines (3600 loc) · 198 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import datetime
import redis
import glob
import os
import sys
import logging
import urllib.request
import requests
import re
import json
import pandas
import shutil
from uuid import UUID
import csv
import time
from operator import xor
from threading import Thread
from enum import Enum
import werkzeug.exceptions
from hubmap_sdk import EntitySdk, sdk_helper
from apscheduler.schedulers.background import BackgroundScheduler
from neo4j.exceptions import TransactionError, Neo4jError
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.triggers.date import DateTrigger
# Don't confuse urllib (Python native library) with urllib3 (3rd-party library, requests also uses urllib3)
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import argparse
from pathlib import Path
from flask import Flask, g, jsonify, abort, request, json, Response
from flask_cors import CORS
from flask_mail import Mail, Message
from dataset_helper_object import DatasetHelper
# HuBMAP commons
from hubmap_commons import neo4j_driver
from hubmap_commons.hm_auth import AuthHelper, secured
from hubmap_commons.autherror import AuthError
from hubmap_commons.exceptions import HTTPException
from hubmap_commons import string_helper
from hubmap_commons.string_helper import isBlank
from hubmap_commons import net_helper
from hubmap_commons import file_helper as commons_file_helper
# Should be deprecated/refactored but still in use
from hubmap_commons.hubmap_const import HubmapConst
# Local modules
from sample_helper import SampleHelper
from ingest_file_helper import IngestFileHelper
from file_upload_helper import UploadFileHelper
from prov_schema_helper import ProvenanceSchemaHelper
import app_manager
from dataset import Dataset
from datacite_doi_helper_object import DataCiteDoiHelper
from api.datacite_api import DataciteApiException
from app_utils.request_validation import require_json
from app_utils.error import unauthorized_error, not_found_error, internal_server_error, bad_request_error, forbidden_error
from app_utils.misc import __get_dict_prop, ResponseException
from app_utils.entity import __get_entity, get_entity_type_instanceof
from werkzeug import utils
from routes.auth import auth_blueprint
from routes.file import file_blueprint
from routes.assayclassifier import bp as assayclassifier_blueprint
from routes.validation import validation_blueprint
from routes.datasets_bulk_submit import datasets_bulk_submit_blueprint, DatasetHelper as ds_helper
from routes.privs import privs_blueprint
from ingest_validation_tools import schema_loader
from ingest_validation_tools.local_validation import table_validator
from ingest_validation_tools import validation_utils as iv_utils
# Set logging format and level (default is warning)
# All the API logging is forwarded to the uWSGI server and gets written into the log file `uwsgi-ingest-api.log`
# Log rotation is handled via logrotate on the host system with a configuration file
# Do NOT handle log file and rotation via the Python logging to avoid issues with multi-worker processes
logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)
# Specify the absolute path of the instance folder and use the config file relative to the instance path
app = Flask(__name__,
instance_path=os.path.join(os.path.abspath(os.path.dirname(__file__)), 'instance'),
instance_relative_config=True)
app.config.from_pyfile('app.cfg')
app.register_blueprint(auth_blueprint)
app.register_blueprint(file_blueprint)
app.register_blueprint(assayclassifier_blueprint)
app.register_blueprint(validation_blueprint)
app.register_blueprint(datasets_bulk_submit_blueprint)
app.register_blueprint(privs_blueprint)
# Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
# Enable/disable CORS from configuration based on docker or non-docker deployment
if app.config['ENABLE_CORS']:
CORS(app)
# Instantiate the Flask Mail instance
try:
if 'MAIL_SERVER' not in app.config or not app.config['MAIL_SERVER'] or \
'MAIL_PORT' not in app.config or not isinstance(app.config['MAIL_PORT'], int) or \
'MAIL_USE_TLS' not in app.config or not isinstance(app.config['MAIL_USE_TLS'], bool) or \
'MAIL_USERNAME' not in app.config or not app.config['MAIL_USERNAME'] or \
'MAIL_PASSWORD' not in app.config or not app.config['MAIL_PASSWORD'] or \
'MAIL_DEBUG' not in app.config or not isinstance(app.config['MAIL_DEBUG'], bool) or \
'MAIL_DEFAULT_SENDER' not in app.config or not isinstance(app.config['MAIL_DEFAULT_SENDER'], tuple) or \
len(app.config['MAIL_DEFAULT_SENDER']) != 2 or \
not app.config['MAIL_DEFAULT_SENDER'][0] or not app.config['MAIL_DEFAULT_SENDER'][1]:
logger.fatal(f"Flask Mail settings are not correct.")
if 'MAIL_ADMIN_LIST' not in app.config or not isinstance(app.config['MAIL_ADMIN_LIST'], list) or \
len(app.config['MAIL_ADMIN_LIST']) < 1 or \
not app.config['MAIL_ADMIN_LIST'][0]:
# Admin emails, not part of Flask-Mail configuration
logger.fatal(f"ingest-api custom email setting for MAIL_ADMIN_LIST are not correct.")
flask_mail = Mail(app)
except Exception as e:
logger.fatal(f"An error occurred configuring the app to email. {str(e)}")
####################################################################################################
## Register error handlers
####################################################################################################
# Error handler for 400 Bad Request with custom error message
@app.errorhandler(400)
def http_bad_request(e):
return jsonify(error=str(e)), 400
# Error handler for 401 Unauthorized with custom error message
@app.errorhandler(401)
def http_unauthorized(e):
return jsonify(error=str(e)), 401
# Error handler for 404 Not Found with custom error message
@app.errorhandler(404)
def http_not_found(e):
return jsonify(error=str(e)), 404
# Error handler for 500 Internal Server Error with custom error message
@app.errorhandler(500)
def http_internal_server_error(e):
return jsonify(error=str(e)), 500
####################################################################################################
## AuthHelper initialization
####################################################################################################
# Initialize AuthHelper class and ensure singleton
try:
if AuthHelper.isInitialized() == False:
auth_helper_instance = AuthHelper.create(app.config['APP_CLIENT_ID'],
app.config['APP_CLIENT_SECRET'])
logger.info("Initialized AuthHelper class successfully :)")
else:
auth_helper_instance = AuthHelper.instance()
except Exception:
msg = "Failed to initialize the AuthHelper class"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
####################################################################################################
## Neo4j connection initialization
####################################################################################################
# The neo4j_driver (from commons package) is a singleton module
# This neo4j_driver_instance will be used for application-specific neo4j queries
# as well as being passed to the schema_manager
try:
neo4j_driver_instance = neo4j_driver.instance(app.config['NEO4J_SERVER'],
app.config['NEO4J_USERNAME'],
app.config['NEO4J_PASSWORD'])
logger.info("Initialized neo4j_driver module successfully :)")
except Exception:
msg = "Failed to initialize the neo4j_driver module"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
if not 'METADATA_TSV_BACKUP_DIR' in app.config:
logger.exception("ERROR: METADATA_TSV_BACKUP_DIR property not found in configuration file")
tsv_backup_dir = None
else:
tsv_backup_dir = app.config['METADATA_TSV_BACKUP_DIR']
####################################################################################################
## File upload initialization
####################################################################################################
try:
# Initialize the UploadFileHelper class and ensure singleton
if UploadFileHelper.is_initialized() == False:
file_upload_helper_instance = UploadFileHelper.create(app.config['FILE_UPLOAD_TEMP_DIR'],
app.config['FILE_UPLOAD_DIR'],
app.config['UUID_WEBSERVICE_URL'])
logger.info("Initialized UploadFileHelper class successfully :)")
# This will delete all the temp dirs on restart
#file_upload_helper_instance.clean_temp_dir()
else:
file_upload_helper_instance = UploadFileHelper.instance()
# Use a broad catch-all here
except Exception:
msg = "Failed to initialize the UploadFileHelper class"
# Log the full stack trace, prepend a line with our message
logger.exception(msg)
# Admin group UUID
data_admin_group_uuid = app.config['HUBMAP_DATA_ADMIN_GROUP_UUID']
data_curator_group_uuid = app.config['HUBMAP_DATA_CURATOR_GROUP_UUID']
prov_schema_helper = ProvenanceSchemaHelper(app.config)
####################################################################################################
## Default and Status Routes
####################################################################################################
@app.route('/', methods=['GET'])
def index():
return "Hello! This is HuBMAP Ingest API service :)"
# Show status of neo4j connection and optionally of the dependent web services
# to show the status of the other hubmap services that ingest-api is dependent on
# use the url parameter "?check-ws-dependencies=true
# returns a json body with the status of the neo4j service and optionally the
# status/time that it took for the dependent web services to respond
# e.g.:
# {
# "build": "adfadsfasf",
# "entity_ws": 130,
# "neo4j_connection": true,
# "search_ws_check": 127,
# "uuid_ws": 105,
# "version": "1.15.4"
# }
@app.route('/status', methods=['GET'])
def status():
response_code = 200
try:
file_build_content = (Path(__file__).absolute().parent.parent / 'BUILD').read_text().strip()
except Exception as e:
file_build_content = str(e)
try:
redis_conn = redis.from_url(app.config['REDIS_URL'])
redis_ping_status = redis_conn.ping()
except Exception as e:
redis_ping_status = str(e)
response_data = {
# Use strip() to remove leading and trailing spaces, newlines, and tabs
'version': (Path(__file__).absolute().parent.parent / 'VERSION').read_text().strip(),
'redis': redis_ping_status,
'build': file_build_content
}
try:
#if ?check-ws-dependencies=true is present in the url request params
#set a flag to check these other web services
check_ws_calls = string_helper.isYes(request.args.get('check-ws-dependencies'))
#check the neo4j connection
try:
with neo4j_driver_instance.session() as session:
recds = session.run("Match () Return 1 Limit 1")
for recd in recds:
if recd[0] == 1:
is_connected = True
else:
is_connected = False
is_connected = True
#the neo4j connection will often fail via exception so
#catch it here, flag as failure and track the returned error message
except Exception as e:
response_code = 500
response_data['neo4j_error'] = str(e)
is_connected = False
if is_connected:
response_data['neo4j_connection'] = True
else:
response_code = 500
response_data['neo4j_connection'] = False
#if the flag was set to check ws dependencies do it now
#for each dependency try to connect via helper which calls the
#service's /status method
#The helper method will return False if the connection fails or
#an integer with the number of milliseconds that it took to get
#the services status
if check_ws_calls:
uuid_ws_url = app.config['UUID_WEBSERVICE_URL'].strip()
if uuid_ws_url.endswith('hmuuid'): uuid_ws_url = uuid_ws_url[:len(uuid_ws_url) - 6]
uuid_ws_check = net_helper.check_hm_ws(uuid_ws_url)
entity_ws_check = net_helper.check_hm_ws(app.config['ENTITY_WEBSERVICE_URL'])
search_ws_check = net_helper.check_hm_ws(app.config['SEARCH_WEBSERVICE_URL'])
if not uuid_ws_check or not entity_ws_check or not search_ws_check: response_code = 500
response_data['uuid_ws'] = uuid_ws_check
response_data['entity_ws'] = entity_ws_check
response_data['search_ws_check'] = search_ws_check
#catch any unhandled exceptions
except Exception as e:
response_code = 500
response_data['exception_message'] = str(e)
finally:
return Response(json.dumps(response_data), response_code, mimetype='application/json')
####################################################################################################
## Slack Notification
####################################################################################################
# Send an email with the specified text in the body and the specified subject line to
# the data curation/ingest staff email addresses specified in the app.cfg MAIL_ADMIN_LIST entry.
def email_admin_list(message_text, subject):
msg = Message( body=message_text
,recipients=app.config['MAIL_ADMIN_LIST']
,subject=subject)
flask_mail.send(msg)
"""
Notify data curation/ingest staff of events during the data ingest process by sending a message to the
target Slack channel, with an option to email the same message to addresses in the MAIL_ADMIN_LIST value
of app.cfg. HuBMAP-Read access is required in the "old gateway" used by ingest-api, running on a PSC VM.
Input
--------
POST request body data is a JSON object containing the following fields:
message : str
The message to be sent to the channel. Required.
channel : str
The target Slack channel. Optional, with default from configuration used if not specified.
send_to_email : bool
Indication if the message should also be sent via email to addresses configured in MAIL_ADMIN_LIST.
Optional, defaulting to False when not in the JSON.
Returns
--------
dict
Dictionary with separate dictionary entries for 'Slack' and 'Email', each containing a summary of the notification.
"""
@app.route('/notify', methods=['POST'])
def notify():
channel = app.config['SLACK_DEFAULT_CHANNEL']
user_name = ''
user_email = ''
# Get user info based on token
# At this point we should have a valid token since the gateway already checked the auth
user_info = auth_helper_instance.getUserInfo(AuthHelper.parseAuthorizationTokens(request.headers))
if user_info is None:
unauthorized_error("Unable to obtain user information for groups token")
elif isinstance(user_info, Response) and user_info.status_code in [400, 401, 403]:
unauthorized_error(f"Unable to dispatch notifications with the groups token presented.")
else:
try:
user_name = user_info['name']
user_email = user_info['email']
except Exception as e:
logger.error(f"An exception occurred authorizing the user for notification dispatching. {str(e)}")
unauthorized_error(f"An error occurred authorizing the notification. See logs.")
require_json(request)
json_data = request.json
logger.debug(f"======notify() Request json:======")
logger.debug(json_data)
if 'channel' in json_data:
if not isinstance(json_data['channel'], str):
bad_request_error("The value of 'channel' must be a string")
# Use the user provided channel rather than the configured default value
channel = json_data['channel']
if 'message' not in json_data:
bad_request_error("The 'message' field is required.")
if not isinstance(json_data['message'], str):
bad_request_error("The value of 'message' must be a string")
response = send_slack_message(f"From {user_name} ({user_email}):\n{json_data['message']}", channel)
notification_results = {'Slack': None, 'Email': None}
# Note: Slack API wraps the error response in the 200 response instead of using non-200 status code
# Callers should always check the value of the 'ok' params in the response
if response.status_code == 200:
result = response.json()
# 'ok' filed is boolean value
if 'ok' in result:
if result['ok']:
output = {
"channel": channel,
"message": json_data['message'],
"user_name": user_name,
"user_email": user_email
}
logger.debug("======notify() Sent Notification Summary======")
logger.info(output)
notification_results['Slack'] = output
else:
logger.error(f"Unable to notify Slack channel: {channel} with the message: {json_data['message']}")
logger.debug("======notify() response json from Slack API======")
logger.debug(result)
# https://api.slack.com/methods/chat.postMessage#errors
if 'error' in result:
bad_request_error(result['error'])
else:
internal_server_error("Slack API unable to process the request, 'error' param/field missing from Slack API response json")
else:
internal_server_error("The 'ok' param/field missing from Slack API response json")
else:
internal_server_error("Failed to send a request to Slack API")
if 'send_to_email' in json_data and json_data['send_to_email']:
logger.debug(json_data['send_to_email'])
try:
subject_line = app.config['MAIL_SUBJECT_LINE'].format( user_name=user_name
,user_email=user_email)
email_admin_list( message_text=json_data['message']
,subject=subject_line)
output = {
"email_recipient_list": str(app.config['MAIL_ADMIN_LIST']),
"message": json_data['message'],
"user_name": user_name,
"user_email": user_email
}
logger.debug("======notify() Sent Email Summary======")
logger.info(output)
notification_results['Email'] = output
except Exception as e:
logger.error(f"Failed to send email message. {str(e)}", exc_info=True)
return jsonify( f"Failed to send email message, after Slack notification resulted in"
f" {notification_results['Slack']}"), 400
return jsonify(notification_results)
####################################################################################################
## Internal Functions
####################################################################################################
"""
Validate the provided token when Authorization header presents
Parameters
----------
request : flask.request object
The Flask http request object
"""
def _validate_token_if_auth_header_exists(request):
# No matter if token is required or not, when an invalid token provided,
# we need to tell the client with a 401 error
# HTTP header names are case-insensitive
# request.headers.get('Authorization') returns None if the header doesn't exist
if request.headers.get('Authorization') is not None:
user_token = auth_helper_instance.getAuthorizationTokens(request.headers)
# When the Authorization header provided but the user_token is a flask.Response instance,
# it MUST be a 401 error with message.
# That's how commons.auth_helper.getAuthorizationTokens() was designed
if isinstance(user_token, Response):
# We wrap the message in a json and send back to requester as 401 too
# The Response.data returns binary string, need to decode
unauthorized_error(user_token.get_data().decode())
# Also check if the parsed token is invalid or expired
# Set the second parameter as False to skip group check
user_info = auth_helper_instance.getUserInfo(user_token, False)
if isinstance(user_info, Response):
unauthorized_error(user_info.get_data().decode())
# Use the Flask request.args MultiDict to see if 'reindex' is a URL parameter passed in with the
# request and if it indicates reindexing should be suppressed. Default to reindexing in all other cases.
def _suppress_reindex() -> bool:
# N.B. This logic should be the same as that used by
# entity-api schema_manager.py suppress_reindex()
# https://github.com/hubmapconsortium/entity-api/blob/main/src/schema/schema_manager.py
if 'reindex' not in request.args:
return False
reindex_str = request.args.get('reindex').lower()
if reindex_str == 'false':
return True
elif reindex_str == 'true':
return False
raise Exception(f"The value of the 'reindex' parameter must be True or False (case-insensitive)."
f" '{request.args.get('reindex')}' is not recognized.")
"""
See if 'reindex-priority' is a URL parameter passed in with the request, if it is valid, and
if it is compatible with the calculated _suppress_reindex() result. Default to 1 when not specified.
Parameters
----------
request_args:
The Flask request.args passed in from application request
calc_suppress_reindex:
The value returned from the suppress_reindex() method, if previously called.
Returns
-------
int value from the enumeration ReindexPriorityLevelEnum
"""
def _get_reindex_priority(calc_suppress_reindex:bool) -> int:
# N.B. This logic should be the same as that used by
# ingest-api app.py _get_reindex_priority()
# https://github.com/hubmapconsortium/ingest-api/blob/main/src/app.py
# Define an enumeration of re-index priority level types.
# N.B. This is the same values maintained in entity-api schema_constants.py, which
# must be the same levels defined for the enqueue() method at
# https://github.com/x-atlas-consortia/jobq/blob/main/src/atlas_consortia_jobq/queue.py
class ReindexPriorityLevelEnum(Enum):
HIGH = 1
MEDIUM = 2
LOW = 3
if calc_suppress_reindex and 'reindex-priority' in request.args:
raise Exception("Specifying a re-index priority is incompatible with suppressing re-indexing.")
if 'reindex-priority' not in request.args:
return ReindexPriorityLevelEnum.HIGH.value
try:
priority_int = int(request.args.get('reindex-priority'))
except ValueError as ve:
raise Exception("The value of the 'reindex-priority' parameter must be an integer.")
if priority_int not in ReindexPriorityLevelEnum:
raise Exception(f"The value of the 'reindex-priority' parameter must be"
f" greater than or equal to {ReindexPriorityLevelEnum.HIGH.value} (high priority)"
f" and less than or equal to {ReindexPriorityLevelEnum.LOW.value} (low priority).")
return priority_int
####################################################################################################
## Ingest API Endpoints
####################################################################################################
"""
For each element in a list of identifiers, return accessibility information appropriate
for the user authorization of the Request.
An HTTP 400 Response is returned for reasons described in the error message, such as
not providing the list of identifiers.
An HTTP 401 Response is returned when a token is presented that is not valid.
An HTTP 500 is returned for unexpected errors
Parameters
----------
request : flask.request
The flask http request object that containing the Authorization header
with a valid Globus groups token for checking group information. The
Request will have the Content-type header set to application/json. The
JSON body of the request will contain a JSON Array of strings with
UUID or HuBMAP-ID strings.
Returns
-------
json
Valid JSON for a single JSON Object containing only JSON Objects, one per
entity evaluated. This enclosing Object will have keys for each identifier
submitted with the request, whose value is a JSON Object containing
accessibility information for the entity. Each entity JSON Object will contain
"valid_id": true/false, --true if the id resolves to a HuBMAP Dataset or Upload
---------- below here only returned if valid_id == true
"access_allowed": true/false --true if the user is allowed to access the data for this entity
---------- below here only returned if access_allowed == true
"hubmap_id": "<corresponding HuBMAP ID of the requested id>",
"uuid": "<uuid of Dataset or Upload>",
"entity_type": "<Dataset or Upload>",
"file_system_path": "<full absolute file system path to the Dataset or upload>"
"""
@app.route('/entities/accessible-data-directories', methods=['POST'])
def get_accessible_data_directories():
dataset_helper = DatasetHelper()
# If an invalid token provided, we need to tell the client with a 401 error, rather
# than a 500 error later if the token is not good.
_validate_token_if_auth_header_exists(request)
# Get user group information which will be used to determine accessibility on
# a per-entity basis.
user_data_access_level = auth_helper_instance.getUserDataAccessLevel(request)
user_data_access_level['group_membership_ids'] = []
if not request.is_json:
bad_request_error("A json body and appropriate Content-Type header are required.")
json_payload = request.get_json()
if not isinstance(json_payload, list) or not json_payload:
bad_request_error('The Request payload must be a non-empty JSON Array of strings.')
for identifier in json_payload:
if not isinstance(identifier, str):
bad_request_error('The Request payload JSON Array must contain only identifier strings.')
try:
identifier_accessibility_dict = dataset_helper.get_entity_accessibility(neo4j_driver_instance, json_payload, user_data_access_level=user_data_access_level)
except Neo4jError as ne:
logger.error(str(ne.message))
return jsonify({'Unexpected error': 'Failed to retrieve accessibility info from Neo4j. Check the logs'}), 500
except ValueError as ve:
logger.error(str(ve))
return jsonify({'error': str(ve)}), 400
except Exception as e:
logger.error(e, exc_info=True)
return Response("Unexpected error: " + str(e), 500)
return jsonify(identifier_accessibility_dict), 200
"""
Retrieve the path of Datasets or Uploads relative to the Globus endpoint mount point give from a list of entity uuids
This is a public endpoint, not authorization token is required.
Input
--------
Input is via POST request body data as a Json array of Upload or Dataset HuBMAP IDs or UUIDs.
Traditionally this would be a GET method as it isn't posting/creating anything, but we need to
use the ability to send request body data with this method, even though GET can support a
request body with data we've found that our Gateway service (AWS API Gateway) doesn't support
GET with data.
ds_uuid_list : list
ds_uuid : str
The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target dataset or upload
Example: ["HBM123.ABCD.456", "HBM939.ZYES.733", "a9382ce928b32839dbe83746f383ea8"]
Returns
--------
out_list : json array of json objects with information about the individual
entities where the json objects have the following properties:
id: the id of the dataset as sent in the input
entity_type: The type of entity ("Upload" or "Dataset")
rel_path: the path on the file system where the data for the entity sits relative to the mount point of the Globus endpoint
globus_endpoint_uuid: The Globus id of the endpoint where the data can be downloaded
Example:
[{
"id":"HBM123.ABCD.4564",
"entity_type":"Dataset",
"hubmap_id":"HBM123.ABCD.4564",
"rel_path":"/consortium/IEC Testing/db382ce928b32839dbe83746f384e354"
"globus_endpoint_uuid":"a935-ce928b328-39dbe83746f3-84bdae",
"uuid", "db382ce928b32839dbe83746f384e354"
},
{
"id":"HBM478.BYRE.7748",
"entity_type":"Dataset",
"rel_path":"/consortium/IEC Testing/db382ce928b32839dbe83746f384e354"
"globus_endpoint_uuid":"a935-ce928b328-39dbe83746f3-84bdae"
}]
"""
@app.route('/entities/file-system-rel-path', methods=['POST'])
def get_file_system_relative_path():
ds_uuid_list = request.json
include_protected = False
if request.args.get('from-protected-space', '').lower().strip() == 'true':
include_protected = True
out_list = []
error_id_list = []
for ds_uuid in ds_uuid_list:
try:
ent_recd = {}
ent_recd['id'] = ds_uuid
dset = __get_entity(ds_uuid, auth_header="Bearer " + auth_helper_instance.getProcessSecret())
ent_type_m = __get_dict_prop(dset, 'entity_type')
ent_recd['entity_type'] = ent_type_m
group_uuid = __get_dict_prop(dset, 'group_uuid')
status = __get_dict_prop(dset, 'status')
if ent_type_m is None or ent_type_m.strip() == '':
error_id = {'id': ds_uuid, 'message': 'id not for Dataset, Publication or Upload', 'status_code': 400}
error_id_list.append(error_id)
ent_type = ent_type_m.lower().strip()
ingest_helper = IngestFileHelper(app.config)
if ent_type == 'upload':
path = ingest_helper.get_upload_directory_relative_path(group_uuid=group_uuid, upload_uuid=dset['uuid'])
elif get_entity_type_instanceof(ent_type, 'Dataset', auth_header="Bearer " + auth_helper_instance.getProcessSecret()):
is_phi = __get_dict_prop(dset, 'contains_human_genetic_sequences')
if group_uuid is None:
error_id = {'id': ds_uuid, 'message': 'Unable to find group uuid on dataset', 'status_code': 400}
error_id_list.append(error_id)
if is_phi is None:
error_id = {'id': ds_uuid,
'message': f"contains_human_genetic_sequences is not set on {ent_type} dataset",
'status_code': 400}
error_id_list.append(error_id)
if not include_protected and status.lower() == 'published':
path = ingest_helper.get_dataset_directory_relative_path({'contains_human_genetic_sequences': False, 'data_access_level': 'public', 'status': status}, group_uuid, dset['uuid'])
else:
path = ingest_helper.get_dataset_directory_relative_path(dset, group_uuid, dset['uuid'])
else:
error_id = {'id': ds_uuid, 'message': f'Unhandled entity type, must be Upload, Publication or Dataset, '
f'found {ent_type_m}', 'status_code': 400}
error_id_list.append(error_id)
ent_recd['rel_path'] = path['rel_path']
ent_recd['globus_endpoint_uuid'] = path['globus_endpoint_uuid']
ent_recd['uuid'] = (__get_dict_prop(dset, 'uuid'))
ent_recd['hubmap_id'] = (__get_dict_prop(dset, 'hubmap_id'))
out_list.append(ent_recd)
except HTTPException as hte:
error_id = {'id': ds_uuid, 'message': hte.get_description(), 'status_code': hte.get_status_code()}
error_id_list.append(error_id)
except Exception as e:
logger.error(e, exc_info=True)
error_id = {'id': ds_uuid, 'message': str(e), 'status_code': 500}
error_id_list.append(error_id)
if len(error_id_list) > 0:
status_code = 400
for each in error_id_list:
if each['status_code'] == 500:
status_code = 500
return jsonify(error_id_list), status_code
return jsonify(out_list), 200
@app.route('/uploads/<ds_uuid>/file-system-abs-path', methods=['GET'])
@app.route('/datasets/<ds_uuid>/file-system-abs-path', methods=['GET'])
def get_file_system_absolute_path(ds_uuid: str):
try:
r = requests.get(app.config['UUID_WEBSERVICE_URL'] + "/" + ds_uuid)
r.raise_for_status()
except Exception as e:
status_code = r.status_code
response_text = r.text
if status_code == 404:
not_found_error(response_text)
elif status_code == 500:
internal_server_error(response_text)
else:
return Response(response_text, status_code)
ds_uuid = r.json().get("uuid")
try:
path = get_dataset_abs_path(ds_uuid)
return jsonify({'path': path}), 200
except ResponseException as re:
return re.response
except HTTPException as hte:
return Response(f"Error while getting file-system-abs-path for {ds_uuid}: " +
hte.get_description(), hte.get_status_code())
except Exception as e:
logger.error(e, exc_info=True)
return Response(f"Unexpected error while retrieving entity {ds_uuid}: " + str(e), 500)
@app.route('/uploads/file-system-abs-path', methods=['POST'])
@app.route('/datasets/file-system-abs-path', methods=['POST'])
def get_mulltiple_file_system_absolute_paths():
out_list = []
if not request.is_json:
return Response("json request required", 400)
uuids_list = request.json
is_valid = validate_json_list(uuids_list)
if not is_valid:
bad_request_error("json must be a list of uuids")
try:
ingest_helper = IngestFileHelper(app.config)
with neo4j_driver_instance.session() as neo_session:
q = (f"MATCH (entity) "
f"WHERE entity.uuid in {uuids_list} OR entity.hubmap_id in {uuids_list} "
f"RETURN entity.entity_type AS entity_type, "
f"entity.group_uuid AS group_uuid, entity.contains_human_genetic_sequences as contains_human_genetic_sequences, "
f"entity.data_access_level AS data_access_level, entity.status AS status, entity.uuid AS uuid, entity.hubmap_id AS hubmap_id")
result = neo_session.run(q).data()
returned_uuids = []
for entity in result:
returned_uuids.append(entity['uuid'])
if entity.get('hubmap_id'):
returned_uuids.append(entity['hubmap_id'])
for uuid in uuids_list:
if uuid not in returned_uuids:
out_list.append({'uuid': uuid, 'error': 'No results for given uuid'})
if len(result) < 1:
raise ResponseException("No result found for uuids in list", 400)
for entity in result:
ent_type = entity['entity_type']
group_uuid = entity['group_uuid']
is_phi = entity['contains_human_genetic_sequences']
ds_uuid = entity['uuid']
if ent_type is None or ent_type.strip() == '':
raise ResponseException(f"Entity with uuid:{ds_uuid} needs to be a Dataset or Upload.", 400)
if ent_type.lower().strip() == 'upload':
out_list.append({'path': ingest_helper.get_upload_directory_absolute_path(group_uuid=group_uuid, upload_uuid=ds_uuid), 'uuid': ds_uuid})
continue
if not get_entity_type_instanceof(ent_type, 'Dataset', auth_header=request.headers.get("AUTHORIZATION")):
raise ResponseException(f"Entity with uuid: {ds_uuid} is not a Dataset, Publication or upload", 400)
if group_uuid is None:
raise ResponseException(f"Unable to find group uuid on dataset {ds_uuid}", 400)
if is_phi is None:
raise ResponseException(f"Contains_human_genetic_sequences is not set on dataset {ds_uuid}", 400)
path = ingest_helper.get_dataset_directory_absolute_path(entity, group_uuid, ds_uuid)
out_list.append({'uuid': ds_uuid, 'path': path})
return jsonify(out_list), 200
except ResponseException as re:
return re.response
except HTTPException as hte:
return Response(f"Error while getting file-system-abs-path for entities: " + hte.get_description(), hte.get_status_code())
except Exception as e:
logger.error(e, exc_info=True)
return Response(f"Unexpected error while retrieving entities: " + str(e), 500)
#passthrough method to call mirror method on entity-api
#this is need by ingest-pipeline that can only call
#methods via http (running on the same machine for security reasons)
#and ingest-api will for the foreseeable future run on the same
#machine
@app.route('/entities/<entity_uuid>', methods = ['GET'])
#@secured(groups="HuBMAP-read")
def get_entity(entity_uuid):
try:
entity = __get_entity(entity_uuid, auth_header = request.headers.get("AUTHORIZATION"))
return jsonify (entity), 200
except HTTPException as hte:
return Response(hte.get_description(), hte.get_status_code())
except Exception as e:
logger.error(e, exc_info=True)
return Response(f"Unexpected error while retrieving entity {entity_uuid}: " + str(e), 500)
# Create derived dataset
"""
Input JSON example with "source_dataset_uuid" being an array of uuids:
{
"source_dataset_uuid":["6e24ba7b41725e4b06630192476f8364", "hyt0tse652d3c4f22ace7f21fd64208ac"],
"derived_dataset_name":"Test derived dataset 1",
"derived_dataset_types":["QX11", "xxx"]
}
OR with "source_dataset_uuid" being a single uuid string to support past cases:
{
"source_dataset_uuid": "6e24ba7b41725e4b06630192476f8364",
"derived_dataset_name":"Test derived dataset 1",
"derived_dataset_types":["QX11", "xxx"]
}
Output JSON example:
{
"derived_dataset_uuid": "78462470866bdda77deaaebe21ae7151",
"full_path": "/hive/hubmap-dev/data/consortium/IEC Testing Group/78462470866bdda77deaaebe21ae7151",
"group_display_name": "IEC Testing Group",
"group_uuid": "5bd084c8-edc2-11e8-802f-0e368f3075e8"
}
"""
@app.route('/datasets/derived', methods=['POST'])
#@secured(groups="HuBMAP-read")
def create_derived_dataset():
# Token is required
nexus_token = None
try:
nexus_token = AuthHelper.parseAuthorizationTokens(request.headers)
except Exception:
internal_server_error("Unable to parse globus token from request header")
require_json(request)
json_data = request.json
logger.info("++++++++++Calling /datasets/derived")
logger.info("++++++++++Request:" + json.dumps(json_data))
if 'source_dataset_uuids' not in json_data:
bad_request_error("The 'source_dataset_uuids' property is required.")
if 'derived_dataset_name' not in json_data:
bad_request_error("The 'derived_dataset_name' property is required.")
if 'derived_dataset_types' not in json_data:
bad_request_error("The 'derived_dataset_types' property is required.")
# source_dataset_uuids can either be a single uuid string OR a json array
if not isinstance(json_data['source_dataset_uuids'], (str, list)):
bad_request_error("The 'source_dataset_uuids' must either be a json string or an array")
# Ensure the derived_dataset_types is json array
if not isinstance(json_data['derived_dataset_types'], list):
bad_request_error("The 'derived_dataset_types' must be a json array")
# Ensure the arrays are not empty
if isinstance(json_data['source_dataset_uuids'], list) and len(json_data['source_dataset_uuids']) == 0:
bad_request_error("The 'source_dataset_uuids' can not be an empty array")
if len(json_data['derived_dataset_types']) == 0:
bad_request_error("The 'derived_dataset_types' can not be an empty array")
try:
dataset = Dataset(app.config)
new_record = dataset.create_derived_datastage(nexus_token, json_data)
return jsonify( new_record ), 201
except HTTPException as hte:
status_code = hte.get_status_code()
response_text = hte.get_description()
if status_code == 400:
bad_request_error(response_text)
elif status_code == 401:
unauthorized_error(response_text)
elif status_code == 404:
not_found_error(response_text)
elif status_code == 500:
internal_server_error(response_text)
else:
return Response(response_text, status_code)
except Exception as e:
logger.error(e, exc_info=True)
internal_server_error("Unexpected error while creating derived dataset: " + str(e))
@app.route('/datasets', methods=['POST'])
@app.route('/publications', methods=['POST'])
def create_datastage():
if not request.is_json:
return Response("json request required", 400)
if request.path.lower() == '/datasets':
entity_type = "dataset"
elif request.path.lower() == '/publications':
entity_type = "publication"
try:
dataset_request = request.json
auth_helper = AuthHelper.configured_instance(app.config['APP_CLIENT_ID'], app.config['APP_CLIENT_SECRET'])
auth_tokens = auth_helper.getAuthorizationTokens(request.headers)
if isinstance(auth_tokens, Response):
return(auth_tokens)
elif isinstance(auth_tokens, str):
token = auth_tokens
elif 'nexus_token' in auth_tokens:
token = auth_tokens['nexus_token']
else:
return(Response("Valid nexus auth token required", 401))
requested_group_uuid = None
if 'group_uuid' in dataset_request:
requested_group_uuid = dataset_request['group_uuid']
ingest_helper = IngestFileHelper(app.config)
requested_group_uuid = auth_helper.get_write_group_uuid(token, requested_group_uuid)
dataset_request['group_uuid'] = requested_group_uuid
# Check URL parameters before proceeding to any CRUD operations, halting on validation failures.
#
try:
# Check if re-indexing is to be suppressed after entity creation.
suppress_reindex = _suppress_reindex()
# Determine valid re-indexing priority using Request parameters.
reindex_priority = _get_reindex_priority(calc_suppress_reindex=suppress_reindex)
except Exception as e:
bad_request_error(e.args[0])
post_url = f"{commons_file_helper.ensureTrailingSlashURL(app.config['ENTITY_WEBSERVICE_URL'])}" \
f"entities/{entity_type}" \
f"{'?reindex=False' if suppress_reindex else ''}"
response = requests.post(post_url
, json = dataset_request
, headers = {'Authorization': 'Bearer ' + token, 'X-Hubmap-Application':'ingest-api' }
, verify = False)
if response.status_code != 200:
return Response(response.text, response.status_code)
new_dataset = response.json()
ingest_helper.create_dataset_directory(new_dataset, requested_group_uuid, new_dataset['uuid'])
return jsonify(new_dataset)
except werkzeug.exceptions.HTTPException as hte:
return Response(hte.description, hte.code)
except HTTPException as hte:
return Response(hte.get_description(), hte.get_status_code())
except Exception as e:
logger.error(e, exc_info=True)
return Response("Unexpected error while creating a dataset: " + str(e) + " Check the logs", 500)
@app.route('/datasets/components', methods=['POST'])
def multiple_components():
if not request.is_json:
return Response("json request required", 400)
try:
component_request = request.json
auth_helper = AuthHelper.configured_instance(app.config['APP_CLIENT_ID'], app.config['APP_CLIENT_SECRET'])
auth_tokens = auth_helper.getAuthorizationTokens(request.headers)
if isinstance(auth_tokens, Response):
return(auth_tokens)
elif isinstance(auth_tokens, str):
token = auth_tokens
else:
return(Response("Valid globus groups token required", 401))
# Check that `dataset_link_abs_dir` exists for both datasets and that it is a valid directory
json_data_dict = request.get_json()
for dataset in json_data_dict.get('datasets'):
if 'dataset_link_abs_dir' in dataset:
if not os.path.exists(dataset['dataset_link_abs_dir']):
return Response(f"The filepath specified with 'dataset_link_abs_dir' does not exist: {dataset['dataset_link_abs_dir']}", 400)
if not os.path.isdir(dataset.get('dataset_link_abs_dir')):
return Response(f"{dataset.get('dataset_link_abs_dir')} is not a directory", 400)