Skip to content

Commit afd5283

Browse files
authored
Merge pull request #954 from hubmapconsortium/Derek-Furst/redirect-large-responses
Derek furst/redirect large responses
2 parents 25ccf51 + 654a4a1 commit afd5283

File tree

1 file changed

+148
-87
lines changed

1 file changed

+148
-87
lines changed

src/app.py

Lines changed: 148 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,8 @@ def get_provenance_metadata_by_id_for_auth_level(id):
762762
"""
763763
@app.route('/entities/<id>', methods = ['GET'])
764764
def get_entity_by_id(id):
765+
global anS3Worker
766+
765767
# Token is not required, but if an invalid token provided,
766768
# we need to tell the client with a 401 error
767769
validate_token_if_auth_header_exists(request)
@@ -900,6 +902,14 @@ def get_entity_by_id(id):
900902
if public_entity and not user_in_hubmap_read_group(request):
901903
final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
902904

905+
# Check the size of what is to be returned through the AWS Gateway, and replace it with
906+
# a response that links to an Object in the AWS S3 Bucket, if appropriate.
907+
resp_body = json.dumps(final_result).encode('utf-8')
908+
try_resp = try_stash_response_body(resp_body)
909+
if try_resp is not None:
910+
return try_resp
911+
912+
# Return a regular response through the AWS Gateway
903913
return jsonify(final_result)
904914

905915

@@ -1616,6 +1626,8 @@ def update_entity(id):
16161626
"""
16171627
@app.route('/ancestors/<id>', methods = ['GET'])
16181628
def get_ancestors(id):
1629+
global anS3Worker
1630+
16191631
final_result = []
16201632

16211633
# Token is not required, but if an invalid token provided,
@@ -1706,6 +1718,15 @@ def get_ancestors(id):
17061718
else:
17071719
filtered_final_result.append(ancestor)
17081720
final_result = filtered_final_result
1721+
1722+
# Check the size of what is to be returned through the AWS Gateway, and replace it with
1723+
# a response that links to an Object in the AWS S3 Bucket, if appropriate.
1724+
resp_body = json.dumps(final_result).encode('utf-8')
1725+
try_resp = try_stash_response_body(resp_body)
1726+
if try_resp is not None:
1727+
return try_resp
1728+
1729+
# Return a regular response through the AWS Gateway
17091730
return jsonify(final_result)
17101731

17111732

@@ -1784,22 +1805,11 @@ def get_descendants(id):
17841805

17851806
# Check the size of what is to be returned through the AWS Gateway, and replace it with
17861807
# a response that links to an Object in the AWS S3 Bucket, if appropriate.
1787-
try:
1788-
resp_body = json.dumps(final_result).encode('utf-8')
1789-
s3_url = anS3Worker.stash_response_body_if_big(resp_body)
1790-
if s3_url is not None:
1791-
return Response(response=s3_url
1792-
, status=303) # See Other
1793-
# The HuBMAP Commons S3Worker will return None for a URL when the response body is
1794-
# smaller than it is configured to store, so the response should be returned through
1795-
# the AWS Gateway
1796-
except Exception as s3exception:
1797-
logger.error(f"Error using anS3Worker to handle len(resp_body)="
1798-
f"{len(resp_body)}.")
1799-
logger.error(s3exception, exc_info=True)
1800-
return Response(response=f"Unexpected error storing large results in S3. See logs."
1801-
, status=500)
1802-
1808+
resp_body = json.dumps(final_result).encode('utf-8')
1809+
try_resp = try_stash_response_body(resp_body)
1810+
if try_resp is not None:
1811+
return try_resp
1812+
18031813
# Return a regular response through the AWS Gateway
18041814
return jsonify(final_result)
18051815

@@ -1824,6 +1834,7 @@ def get_descendants(id):
18241834
"""
18251835
@app.route('/parents/<id>', methods = ['GET'])
18261836
def get_parents(id):
1837+
global anS3Worker
18271838
final_result = []
18281839

18291840
# Token is not required, but if an invalid token provided,
@@ -1915,6 +1926,14 @@ def get_parents(id):
19151926
filtered_final_result.append(parent)
19161927
final_result = filtered_final_result
19171928

1929+
# Check the size of what is to be returned through the AWS Gateway, and replace it with
1930+
# a response that links to an Object in the AWS S3 Bucket, if appropriate.
1931+
resp_body = json.dumps(final_result).encode('utf-8')
1932+
try_resp = try_stash_response_body(resp_body)
1933+
if try_resp is not None:
1934+
return try_resp
1935+
1936+
# Return a regular response through the AWS Gateway
19181937
return jsonify(final_result)
19191938

19201939

@@ -1935,6 +1954,8 @@ def get_parents(id):
19351954
"""
19361955
@app.route('/children/<id>', methods = ['GET'])
19371956
def get_children(id):
1957+
global anS3Worker
1958+
19381959
final_result = []
19391960

19401961
# Get user token from Authorization header
@@ -1989,6 +2010,14 @@ def get_children(id):
19892010
# Final result after normalization
19902011
final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
19912012

2013+
# Check the size of what is to be returned through the AWS Gateway, and replace it with
2014+
# a response that links to an Object in the AWS S3 Bucket, if appropriate.
2015+
resp_body = json.dumps(final_result).encode('utf-8')
2016+
try_resp = try_stash_response_body(resp_body)
2017+
if try_resp is not None:
2018+
return try_resp
2019+
2020+
# Return a regular response through the AWS Gateway
19922021
return jsonify(final_result)
19932022

19942023

@@ -2012,6 +2041,8 @@ def get_children(id):
20122041
"""
20132042
@app.route('/entities/<id>/siblings', methods = ['GET'])
20142043
def get_siblings(id):
2044+
global anS3Worker
2045+
20152046
final_result = []
20162047

20172048
# Token is not required, but if an invalid token provided,
@@ -2081,39 +2112,49 @@ def get_siblings(id):
20812112
include_revisions = False
20822113
sibling_list = app_neo4j_queries.get_siblings(neo4j_driver_instance, uuid, status, property_key, include_revisions)
20832114
if property_key is not None:
2084-
return jsonify(sibling_list)
2115+
final_result = sibling_list
20852116
# Generate trigger data
20862117
# Skip some of the properties that are time-consuming to generate via triggers
20872118
# Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional
20882119
# checks when the target Dataset is public but the revisions are not public
2089-
properties_to_skip = [
2090-
# Properties to skip for Sample
2091-
'direct_ancestor',
2092-
# Properties to skip for Dataset
2093-
'direct_ancestors',
2094-
'collections',
2095-
'upload',
2096-
'title',
2097-
'next_revision_uuid',
2098-
'previous_revision_uuid',
2099-
'associated_collection',
2100-
'creation_action',
2101-
'local_directory_rel_path'
2102-
]
2120+
else:
2121+
properties_to_skip = [
2122+
# Properties to skip for Sample
2123+
'direct_ancestor',
2124+
# Properties to skip for Dataset
2125+
'direct_ancestors',
2126+
'collections',
2127+
'upload',
2128+
'title',
2129+
'next_revision_uuid',
2130+
'previous_revision_uuid',
2131+
'associated_collection',
2132+
'creation_action',
2133+
'local_directory_rel_path'
2134+
]
21032135

2104-
complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, sibling_list, properties_to_skip)
2105-
# Final result after normalization
2106-
final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
2107-
filtered_final_result = []
2108-
for sibling in final_result:
2109-
sibling_entity_type = sibling.get('entity_type')
2110-
fields_to_exclude = schema_manager.get_fields_to_exclude(sibling_entity_type)
2111-
if public_entity and not user_in_hubmap_read_group(request):
2112-
filtered_sibling = schema_manager.exclude_properties_from_response(fields_to_exclude, sibling)
2113-
filtered_final_result.append(filtered_sibling)
2114-
else:
2115-
filtered_final_result.append(sibling)
2116-
final_result = filtered_final_result
2136+
complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, sibling_list, properties_to_skip)
2137+
# Final result after normalization
2138+
output = schema_manager.normalize_entities_list_for_response(complete_entities_list)
2139+
filtered_final_result = []
2140+
for sibling in output:
2141+
sibling_entity_type = sibling.get('entity_type')
2142+
fields_to_exclude = schema_manager.get_fields_to_exclude(sibling_entity_type)
2143+
if public_entity and not user_in_hubmap_read_group(request):
2144+
filtered_sibling = schema_manager.exclude_properties_from_response(fields_to_exclude, sibling)
2145+
filtered_final_result.append(filtered_sibling)
2146+
else:
2147+
filtered_final_result.append(sibling)
2148+
final_result = filtered_final_result
2149+
2150+
# Check the size of what is to be returned through the AWS Gateway, and replace it with
2151+
# a response that links to an Object in the AWS S3 Bucket, if appropriate.
2152+
resp_body = json.dumps(final_result).encode('utf-8')
2153+
try_resp = try_stash_response_body(resp_body)
2154+
if try_resp is not None:
2155+
return try_resp
2156+
2157+
# Return a regular response through the AWS Gateway
21172158
return jsonify(final_result)
21182159

21192160

@@ -2137,6 +2178,7 @@ def get_siblings(id):
21372178
"""
21382179
@app.route('/entities/<id>/tuplets', methods = ['GET'])
21392180
def get_tuplets(id):
2181+
global anS3Worker
21402182
final_result = []
21412183

21422184
# Token is not required, but if an invalid token provided,
@@ -2196,39 +2238,49 @@ def get_tuplets(id):
21962238
bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}")
21972239
tuplet_list = app_neo4j_queries.get_tuplets(neo4j_driver_instance, uuid, status, property_key)
21982240
if property_key is not None:
2199-
return jsonify(tuplet_list)
2241+
final_result = tuplet_list
22002242
# Generate trigger data
22012243
# Skip some of the properties that are time-consuming to generate via triggers
22022244
# Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional
22032245
# checks when the target Dataset is public but the revisions are not public
2204-
properties_to_skip = [
2205-
# Properties to skip for Sample
2206-
'direct_ancestor',
2207-
# Properties to skip for Dataset
2208-
'direct_ancestors',
2209-
'collections',
2210-
'upload',
2211-
'title',
2212-
'next_revision_uuid',
2213-
'previous_revision_uuid',
2214-
'associated_collection',
2215-
'creation_action',
2216-
'local_directory_rel_path'
2217-
]
2246+
else:
2247+
properties_to_skip = [
2248+
# Properties to skip for Sample
2249+
'direct_ancestor',
2250+
# Properties to skip for Dataset
2251+
'direct_ancestors',
2252+
'collections',
2253+
'upload',
2254+
'title',
2255+
'next_revision_uuid',
2256+
'previous_revision_uuid',
2257+
'associated_collection',
2258+
'creation_action',
2259+
'local_directory_rel_path'
2260+
]
22182261

2219-
complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, tuplet_list, properties_to_skip)
2220-
# Final result after normalization
2221-
final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
2222-
filtered_final_result = []
2223-
for tuplet in final_result:
2224-
tuple_entity_type = tuplet.get('entity_type')
2225-
fields_to_exclude = schema_manager.get_fields_to_exclude(tuple_entity_type)
2226-
if public_entity and not user_in_hubmap_read_group(request):
2227-
filtered_tuplet = schema_manager.exclude_properties_from_response(fields_to_exclude, tuplet)
2228-
filtered_final_result.append(filtered_tuplet)
2229-
else:
2230-
filtered_final_result.append(tuplet)
2231-
final_result = filtered_final_result
2262+
complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, tuplet_list, properties_to_skip)
2263+
# Final result after normalization
2264+
output = schema_manager.normalize_entities_list_for_response(complete_entities_list)
2265+
filtered_final_result = []
2266+
for tuplet in output:
2267+
tuple_entity_type = tuplet.get('entity_type')
2268+
fields_to_exclude = schema_manager.get_fields_to_exclude(tuple_entity_type)
2269+
if public_entity and not user_in_hubmap_read_group(request):
2270+
filtered_tuplet = schema_manager.exclude_properties_from_response(fields_to_exclude, tuplet)
2271+
filtered_final_result.append(filtered_tuplet)
2272+
else:
2273+
filtered_final_result.append(tuplet)
2274+
final_result = filtered_final_result
2275+
2276+
# Check the size of what is to be returned through the AWS Gateway, and replace it with
2277+
# a response that links to an Object in the AWS S3 Bucket, if appropriate.
2278+
resp_body = json.dumps(final_result).encode('utf-8')
2279+
try_resp = try_stash_response_body(resp_body)
2280+
if try_resp is not None:
2281+
return try_resp
2282+
2283+
# Return a regular response through the AWS Gateway
22322284
return jsonify(final_result)
22332285

22342286

@@ -3719,21 +3771,11 @@ def get_prov_info_for_dataset(id):
37193771
writer.writerows(dataset_prov_list)
37203772
new_tsv_file.seek(0)
37213773
resp_body = new_tsv_file.read()
3722-
3723-
# Check the size of what is to be returned through the AWS Gateway, and replace it with
3724-
# a response that links to an Object in the AWS S3 Bucket, if appropriate.
3725-
try:
3726-
s3_url = anS3Worker.stash_response_body_if_big(resp_body)
3727-
if s3_url is not None:
3728-
return Response(response=s3_url
3729-
, status=303) # See Other
3730-
except Exception as s3exception:
3731-
logger.error(f"Error using anS3Worker to handle len(resp_body)="
3732-
f"{len(resp_body)}.")
3733-
logger.error(s3exception, exc_info=True)
3734-
return Response(response=f"Unexpected error storing large results in S3. See logs."
3735-
, status=500)
3736-
3774+
3775+
try_resp = try_stash_response_body(resp_body)
3776+
if try_resp is not None:
3777+
return try_resp
3778+
37373779
# Return a regular response through the AWS Gateway
37383780
if return_json:
37393781
return jsonify(dataset_prov_list[0])
@@ -4463,6 +4505,25 @@ def validate_token_if_auth_header_exists(request):
44634505
unauthorized_error(user_info.get_data().decode())
44644506

44654507

4508+
def try_stash_response_body(resp_body):
4509+
try:
4510+
s3_url = anS3Worker.stash_response_body_if_big(resp_body)
4511+
if s3_url is not None:
4512+
return Response(response=s3_url
4513+
, status=303) # See Other
4514+
# The HuBMAP Commons S3Worker will return None for a URL when the response body is
4515+
# smaller than it is configured to store, so the response should be returned through
4516+
# the AWS Gateway
4517+
except Exception as s3exception:
4518+
logger.error(f"Error using anS3Worker to handle len(resp_body)="
4519+
f"{len(resp_body)}.")
4520+
logger.error(s3exception, exc_info=True)
4521+
return Response(response=f"Unexpected error storing large results in S3. See logs."
4522+
, status=500)
4523+
return None
4524+
4525+
4526+
44664527
"""
44674528
Get the token for internal use only
44684529

0 commit comments

Comments
 (0)