Merge pull request #954 from hubmapconsortium/Derek-Furst/redirect-large-responses

yuanzhou · web-flow · commit afd5283acdff · 2025-10-30T13:06:22.000-04:00
Derek furst/redirect large responses
diff --git a/src/app.py b/src/app.py
@@ -762,6 +762,8 @@ def get_provenance_metadata_by_id_for_auth_level(id):
 """
 @app.route('/entities/<id>', methods = ['GET'])
 def get_entity_by_id(id):
+    global anS3Worker
+
     # Token is not required, but if an invalid token provided,
     # we need to tell the client with a 401 error
     validate_token_if_auth_header_exists(request)
@@ -900,6 +902,14 @@ def get_entity_by_id(id):
     if public_entity and not user_in_hubmap_read_group(request):
         final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
     
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -1616,6 +1626,8 @@ def update_entity(id):
 """
 @app.route('/ancestors/<id>', methods = ['GET'])
 def get_ancestors(id):
+    global anS3Worker
+
     final_result = []
 
     # Token is not required, but if an invalid token provided,
@@ -1706,6 +1718,15 @@ def get_ancestors(id):
             else:
                 filtered_final_result.append(ancestor)
         final_result = filtered_final_result
+    
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+    
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -1784,22 +1805,11 @@ def get_descendants(id):
 
     # Check the size of what is to be returned through the AWS Gateway, and replace it with
     # a response that links to an Object in the AWS S3 Bucket, if appropriate.
-    try:
-        resp_body = json.dumps(final_result).encode('utf-8')
-        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
-        if s3_url is not None:
-            return Response(response=s3_url
-                            , status=303)  # See Other
-        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
-        # smaller than it is configured to store, so the response should be returned through
-        # the AWS Gateway
-    except Exception as s3exception:
-        logger.error(f"Error using anS3Worker to handle len(resp_body)="
-                     f"{len(resp_body)}.")
-        logger.error(s3exception, exc_info=True)
-        return Response(response=f"Unexpected error storing large results in S3. See logs."
-                        , status=500)
-
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+    
     # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
@@ -1824,6 +1834,7 @@ def get_descendants(id):
 """
 @app.route('/parents/<id>', methods = ['GET'])
 def get_parents(id):
+    global anS3Worker
     final_result = []
 
     # Token is not required, but if an invalid token provided,
@@ -1915,6 +1926,14 @@ def get_parents(id):
                 filtered_final_result.append(parent)
         final_result = filtered_final_result
 
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+    
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -1935,6 +1954,8 @@ def get_parents(id):
 """
 @app.route('/children/<id>', methods = ['GET'])
 def get_children(id):
+    global anS3Worker
+
     final_result = []
 
     # Get user token from Authorization header
@@ -1989,6 +2010,14 @@ def get_children(id):
         # Final result after normalization
         final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+    
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -2012,6 +2041,8 @@ def get_children(id):
 """
 @app.route('/entities/<id>/siblings', methods = ['GET'])
 def get_siblings(id):
+    global anS3Worker
+
     final_result = []
 
     # Token is not required, but if an invalid token provided,
@@ -2081,39 +2112,49 @@ def get_siblings(id):
                 include_revisions = False
     sibling_list = app_neo4j_queries.get_siblings(neo4j_driver_instance, uuid, status, property_key, include_revisions)
     if property_key is not None:
-        return jsonify(sibling_list)
+        final_result =  sibling_list
     # Generate trigger data
     # Skip some of the properties that are time-consuming to generate via triggers
     # Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional
     # checks when the target Dataset is public but the revisions are not public
-    properties_to_skip = [
-        # Properties to skip for Sample
-        'direct_ancestor',
-        # Properties to skip for Dataset
-        'direct_ancestors',
-        'collections',
-        'upload',
-        'title',
-        'next_revision_uuid',
-        'previous_revision_uuid',
-        'associated_collection',
-        'creation_action',
-        'local_directory_rel_path'
-    ]
+    else:
+        properties_to_skip = [
+            # Properties to skip for Sample
+            'direct_ancestor',
+            # Properties to skip for Dataset
+            'direct_ancestors',
+            'collections',
+            'upload',
+            'title',
+            'next_revision_uuid',
+            'previous_revision_uuid',
+            'associated_collection',
+            'creation_action',
+            'local_directory_rel_path'
+        ]
 
-    complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, sibling_list, properties_to_skip)
-    # Final result after normalization
-    final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
-    filtered_final_result = []
-    for sibling in final_result:
-        sibling_entity_type = sibling.get('entity_type')
-        fields_to_exclude = schema_manager.get_fields_to_exclude(sibling_entity_type)
-        if public_entity and not user_in_hubmap_read_group(request):
-            filtered_sibling = schema_manager.exclude_properties_from_response(fields_to_exclude, sibling)
-            filtered_final_result.append(filtered_sibling)
-        else:
-            filtered_final_result.append(sibling)
-    final_result = filtered_final_result
+        complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, sibling_list, properties_to_skip)
+        # Final result after normalization
+        output = schema_manager.normalize_entities_list_for_response(complete_entities_list)
+        filtered_final_result = []
+        for sibling in output:
+            sibling_entity_type = sibling.get('entity_type')
+            fields_to_exclude = schema_manager.get_fields_to_exclude(sibling_entity_type)
+            if public_entity and not user_in_hubmap_read_group(request):
+                filtered_sibling = schema_manager.exclude_properties_from_response(fields_to_exclude, sibling)
+                filtered_final_result.append(filtered_sibling)
+            else:
+                filtered_final_result.append(sibling)
+        final_result = filtered_final_result
+
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+    
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -2137,6 +2178,7 @@ def get_siblings(id):
 """
 @app.route('/entities/<id>/tuplets', methods = ['GET'])
 def get_tuplets(id):
+    global anS3Worker
     final_result = []
 
     # Token is not required, but if an invalid token provided,
@@ -2196,39 +2238,49 @@ def get_tuplets(id):
                 bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}")
     tuplet_list = app_neo4j_queries.get_tuplets(neo4j_driver_instance, uuid, status, property_key)
     if property_key is not None:
-        return jsonify(tuplet_list)
+        final_result = tuplet_list
     # Generate trigger data
     # Skip some of the properties that are time-consuming to generate via triggers
     # Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional
     # checks when the target Dataset is public but the revisions are not public
-    properties_to_skip = [
-        # Properties to skip for Sample
-        'direct_ancestor',
-        # Properties to skip for Dataset
-        'direct_ancestors',
-        'collections',
-        'upload',
-        'title',
-        'next_revision_uuid',
-        'previous_revision_uuid',
-        'associated_collection',
-        'creation_action',
-        'local_directory_rel_path'
-    ]
+    else:
+        properties_to_skip = [
+            # Properties to skip for Sample
+            'direct_ancestor',
+            # Properties to skip for Dataset
+            'direct_ancestors',
+            'collections',
+            'upload',
+            'title',
+            'next_revision_uuid',
+            'previous_revision_uuid',
+            'associated_collection',
+            'creation_action',
+            'local_directory_rel_path'
+        ]
 
-    complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, tuplet_list, properties_to_skip)
-    # Final result after normalization
-    final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
-    filtered_final_result = []
-    for tuplet in final_result:
-        tuple_entity_type = tuplet.get('entity_type')
-        fields_to_exclude = schema_manager.get_fields_to_exclude(tuple_entity_type)
-        if public_entity and not user_in_hubmap_read_group(request):
-            filtered_tuplet = schema_manager.exclude_properties_from_response(fields_to_exclude, tuplet)
-            filtered_final_result.append(filtered_tuplet)
-        else:
-            filtered_final_result.append(tuplet)
-    final_result = filtered_final_result
+        complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, tuplet_list, properties_to_skip)
+        # Final result after normalization
+        output = schema_manager.normalize_entities_list_for_response(complete_entities_list)
+        filtered_final_result = []
+        for tuplet in output:
+            tuple_entity_type = tuplet.get('entity_type')
+            fields_to_exclude = schema_manager.get_fields_to_exclude(tuple_entity_type)
+            if public_entity and not user_in_hubmap_read_group(request):
+                filtered_tuplet = schema_manager.exclude_properties_from_response(fields_to_exclude, tuplet)
+                filtered_final_result.append(filtered_tuplet)
+            else:
+                filtered_final_result.append(tuplet)
+        final_result = filtered_final_result
+    
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+    
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -3719,21 +3771,11 @@ def get_prov_info_for_dataset(id):
         writer.writerows(dataset_prov_list)
         new_tsv_file.seek(0)
         resp_body = new_tsv_file.read()
-
-    # Check the size of what is to be returned through the AWS Gateway, and replace it with
-    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
-    try:
-        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
-        if s3_url is not None:
-            return Response(response=s3_url
-                            , status=303)  # See Other
-    except Exception as s3exception:
-        logger.error(f"Error using anS3Worker to handle len(resp_body)="
-                     f"{len(resp_body)}.")
-        logger.error(s3exception, exc_info=True)
-        return Response(response=f"Unexpected error storing large results in S3. See logs."
-                        , status=500)
-
+    
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+    
     # Return a regular response through the AWS Gateway
     if return_json:
         return jsonify(dataset_prov_list[0])
@@ -4463,6 +4505,25 @@ def validate_token_if_auth_header_exists(request):
             unauthorized_error(user_info.get_data().decode())
 
 
+def try_stash_response_body(resp_body):
+    try:
+        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
+        if s3_url is not None:
+            return Response(response=s3_url
+                            , status=303)  # See Other
+        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
+        # smaller than it is configured to store, so the response should be returned through
+        # the AWS Gateway
+    except Exception as s3exception:
+        logger.error(f"Error using anS3Worker to handle len(resp_body)="
+                     f"{len(resp_body)}.")
+        logger.error(s3exception, exc_info=True)
+        return Response(response=f"Unexpected error storing large results in S3. See logs."
+                        , status=500)
+    return None
+    
+
+
 """
 Get the token for internal use only