From b461958921f00290377581e69ae462dc29408a36 Mon Sep 17 00:00:00 2001
From: DerekFurstPitt <drf57@pitt.edu>
Date: Mon, 27 Oct 2025 17:33:53 -0400
Subject: [PATCH 1/5] modified /ancestors/parents/siblings/tuples/entities/
 endpoints to return a url to s3 if the side of the data exceeds 10mb

---
 src/app.py | 241 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 184 insertions(+), 57 deletions(-)
diff --git a/src/app.py b/src/app.py
index 3a9dafe3..51a4a32c 100644
--- a/src/app.py
+++ b/src/app.py
@@ -95,7 +95,7 @@
 
 # Read the secret key which may be submitted in HTTP Request Headers to override the lockout of
 # updates to entities with characteristics prohibiting their modification.
-LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY']
+# LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY']
 
 # Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled
 requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning)
@@ -762,6 +762,8 @@ def get_provenance_metadata_by_id_for_auth_level(id):
 """
 @app.route('/entities/<id>', methods = ['GET'])
 def get_entity_by_id(id):
+    global anS3Worker
+
     # Token is not required, but if an invalid token provided,
     # we need to tell the client with a 401 error
     validate_token_if_auth_header_exists(request)
@@ -900,6 +902,23 @@ def get_entity_by_id(id):
     if public_entity and not user_in_hubmap_read_group(request):
         final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
     
+    try:
+        resp_body = json.dumps(final_result).encode('utf-8')
+        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
+        if s3_url is not None:
+            return Response(response=s3_url
+                            , status=303)  # See Other
+        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
+        # smaller than it is configured to store, so the response should be returned through
+        # the AWS Gateway
+    except Exception as s3exception:
+        logger.error(f"Error using anS3Worker to handle len(resp_body)="
+                     f"{len(resp_body)}.")
+        logger.error(s3exception, exc_info=True)
+        return Response(response=f"Unexpected error storing large results in S3. See logs."
+                        , status=500)
+
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -1616,6 +1635,8 @@ def update_entity(id):
 """
 @app.route('/ancestors/<id>', methods = ['GET'])
 def get_ancestors(id):
+    global anS3Worker
+
     final_result = []
 
     # Token is not required, but if an invalid token provided,
@@ -1706,6 +1727,26 @@ def get_ancestors(id):
             else:
                 filtered_final_result.append(ancestor)
         final_result = filtered_final_result
+    
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    try:
+        resp_body = json.dumps(final_result).encode('utf-8')
+        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
+        if s3_url is not None:
+            return Response(response=s3_url
+                            , status=303)  # See Other
+        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
+        # smaller than it is configured to store, so the response should be returned through
+        # the AWS Gateway
+    except Exception as s3exception:
+        logger.error(f"Error using anS3Worker to handle len(resp_body)="
+                     f"{len(resp_body)}.")
+        logger.error(s3exception, exc_info=True)
+        return Response(response=f"Unexpected error storing large results in S3. See logs."
+                        , status=500)
+    
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -1824,6 +1865,7 @@ def get_descendants(id):
 """
 @app.route('/parents/<id>', methods = ['GET'])
 def get_parents(id):
+    global anS3Worker
     final_result = []
 
     # Token is not required, but if an invalid token provided,
@@ -1915,6 +1957,25 @@ def get_parents(id):
                 filtered_final_result.append(parent)
         final_result = filtered_final_result
 
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    try:
+        resp_body = json.dumps(final_result).encode('utf-8')
+        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
+        if s3_url is not None:
+            return Response(response=s3_url
+                            , status=303)  # See Other
+        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
+        # smaller than it is configured to store, so the response should be returned through
+        # the AWS Gateway
+    except Exception as s3exception:
+        logger.error(f"Error using anS3Worker to handle len(resp_body)="
+                     f"{len(resp_body)}.")
+        logger.error(s3exception, exc_info=True)
+        return Response(response=f"Unexpected error storing large results in S3. See logs."
+                        , status=500)
+
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -1935,6 +1996,8 @@ def get_parents(id):
 """
 @app.route('/children/<id>', methods = ['GET'])
 def get_children(id):
+    global anS3Worker
+
     final_result = []
 
     # Get user token from Authorization header
@@ -1989,6 +2052,25 @@ def get_children(id):
         # Final result after normalization
         final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    try:
+        resp_body = json.dumps(final_result).encode('utf-8')
+        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
+        if s3_url is not None:
+            return Response(response=s3_url
+                            , status=303)  # See Other
+        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
+        # smaller than it is configured to store, so the response should be returned through
+        # the AWS Gateway
+    except Exception as s3exception:
+        logger.error(f"Error using anS3Worker to handle len(resp_body)="
+                     f"{len(resp_body)}.")
+        logger.error(s3exception, exc_info=True)
+        return Response(response=f"Unexpected error storing large results in S3. See logs."
+                        , status=500)
+    
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -2012,6 +2094,8 @@ def get_children(id):
 """
 @app.route('/entities/<id>/siblings', methods = ['GET'])
 def get_siblings(id):
+    global anS3Worker
+
     final_result = []
 
     # Token is not required, but if an invalid token provided,
@@ -2081,39 +2165,60 @@ def get_siblings(id):
                 include_revisions = False
     sibling_list = app_neo4j_queries.get_siblings(neo4j_driver_instance, uuid, status, property_key, include_revisions)
     if property_key is not None:
-        return jsonify(sibling_list)
+        final_result =  sibling_list
     # Generate trigger data
     # Skip some of the properties that are time-consuming to generate via triggers
     # Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional
     # checks when the target Dataset is public but the revisions are not public
-    properties_to_skip = [
-        # Properties to skip for Sample
-        'direct_ancestor',
-        # Properties to skip for Dataset
-        'direct_ancestors',
-        'collections',
-        'upload',
-        'title',
-        'next_revision_uuid',
-        'previous_revision_uuid',
-        'associated_collection',
-        'creation_action',
-        'local_directory_rel_path'
-    ]
+    else:
+        properties_to_skip = [
+            # Properties to skip for Sample
+            'direct_ancestor',
+            # Properties to skip for Dataset
+            'direct_ancestors',
+            'collections',
+            'upload',
+            'title',
+            'next_revision_uuid',
+            'previous_revision_uuid',
+            'associated_collection',
+            'creation_action',
+            'local_directory_rel_path'
+        ]
 
-    complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, sibling_list, properties_to_skip)
-    # Final result after normalization
-    final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
-    filtered_final_result = []
-    for sibling in final_result:
-        sibling_entity_type = sibling.get('entity_type')
-        fields_to_exclude = schema_manager.get_fields_to_exclude(sibling_entity_type)
-        if public_entity and not user_in_hubmap_read_group(request):
-            filtered_sibling = schema_manager.exclude_properties_from_response(fields_to_exclude, sibling)
-            filtered_final_result.append(filtered_sibling)
-        else:
-            filtered_final_result.append(sibling)
-    final_result = filtered_final_result
+        complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, sibling_list, properties_to_skip)
+        # Final result after normalization
+        output = schema_manager.normalize_entities_list_for_response(complete_entities_list)
+        filtered_final_result = []
+        for sibling in output:
+            sibling_entity_type = sibling.get('entity_type')
+            fields_to_exclude = schema_manager.get_fields_to_exclude(sibling_entity_type)
+            if public_entity and not user_in_hubmap_read_group(request):
+                filtered_sibling = schema_manager.exclude_properties_from_response(fields_to_exclude, sibling)
+                filtered_final_result.append(filtered_sibling)
+            else:
+                filtered_final_result.append(sibling)
+        final_result = filtered_final_result
+
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    try:
+        resp_body = json.dumps(final_result).encode('utf-8')
+        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
+        if s3_url is not None:
+            return Response(response=s3_url
+                            , status=303)  # See Other
+        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
+        # smaller than it is configured to store, so the response should be returned through
+        # the AWS Gateway
+    except Exception as s3exception:
+        logger.error(f"Error using anS3Worker to handle len(resp_body)="
+                     f"{len(resp_body)}.")
+        logger.error(s3exception, exc_info=True)
+        return Response(response=f"Unexpected error storing large results in S3. See logs."
+                        , status=500)
+    
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 
@@ -2137,6 +2242,7 @@ def get_siblings(id):
 """
 @app.route('/entities/<id>/tuplets', methods = ['GET'])
 def get_tuplets(id):
+    global anS3Worker
     final_result = []
 
     # Token is not required, but if an invalid token provided,
@@ -2196,39 +2302,60 @@ def get_tuplets(id):
                 bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}")
     tuplet_list = app_neo4j_queries.get_tuplets(neo4j_driver_instance, uuid, status, property_key)
     if property_key is not None:
-        return jsonify(tuplet_list)
+        final_result = tuplet_list
     # Generate trigger data
     # Skip some of the properties that are time-consuming to generate via triggers
     # Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional
     # checks when the target Dataset is public but the revisions are not public
-    properties_to_skip = [
-        # Properties to skip for Sample
-        'direct_ancestor',
-        # Properties to skip for Dataset
-        'direct_ancestors',
-        'collections',
-        'upload',
-        'title',
-        'next_revision_uuid',
-        'previous_revision_uuid',
-        'associated_collection',
-        'creation_action',
-        'local_directory_rel_path'
-    ]
+    else:
+        properties_to_skip = [
+            # Properties to skip for Sample
+            'direct_ancestor',
+            # Properties to skip for Dataset
+            'direct_ancestors',
+            'collections',
+            'upload',
+            'title',
+            'next_revision_uuid',
+            'previous_revision_uuid',
+            'associated_collection',
+            'creation_action',
+            'local_directory_rel_path'
+        ]
 
-    complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, tuplet_list, properties_to_skip)
-    # Final result after normalization
-    final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
-    filtered_final_result = []
-    for tuplet in final_result:
-        tuple_entity_type = tuplet.get('entity_type')
-        fields_to_exclude = schema_manager.get_fields_to_exclude(tuple_entity_type)
-        if public_entity and not user_in_hubmap_read_group(request):
-            filtered_tuplet = schema_manager.exclude_properties_from_response(fields_to_exclude, tuplet)
-            filtered_final_result.append(filtered_tuplet)
-        else:
-            filtered_final_result.append(tuplet)
-    final_result = filtered_final_result
+        complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, tuplet_list, properties_to_skip)
+        # Final result after normalization
+        output = schema_manager.normalize_entities_list_for_response(complete_entities_list)
+        filtered_final_result = []
+        for tuplet in output:
+            tuple_entity_type = tuplet.get('entity_type')
+            fields_to_exclude = schema_manager.get_fields_to_exclude(tuple_entity_type)
+            if public_entity and not user_in_hubmap_read_group(request):
+                filtered_tuplet = schema_manager.exclude_properties_from_response(fields_to_exclude, tuplet)
+                filtered_final_result.append(filtered_tuplet)
+            else:
+                filtered_final_result.append(tuplet)
+        final_result = filtered_final_result
+    
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    try:
+        resp_body = json.dumps(final_result).encode('utf-8')
+        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
+        if s3_url is not None:
+            return Response(response=s3_url
+                            , status=303)  # See Other
+        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
+        # smaller than it is configured to store, so the response should be returned through
+        # the AWS Gateway
+    except Exception as s3exception:
+        logger.error(f"Error using anS3Worker to handle len(resp_body)="
+                     f"{len(resp_body)}.")
+        logger.error(s3exception, exc_info=True)
+        return Response(response=f"Unexpected error storing large results in S3. See logs."
+                        , status=500)
+    
+    # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
 

From 984724d7665c9c7b86c9164436f7c40dbcbbf685 Mon Sep 17 00:00:00 2001
From: DerekFurstPitt <drf57@pitt.edu>
Date: Mon, 27 Oct 2025 17:35:37 -0400
Subject: [PATCH 2/5] uncommented locked_entity_update_override_key from
 testing

---
 src/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/app.py b/src/app.py
index 51a4a32c..a4a81c8f 100644
--- a/src/app.py
+++ b/src/app.py
@@ -95,7 +95,7 @@
 
 # Read the secret key which may be submitted in HTTP Request Headers to override the lockout of
 # updates to entities with characteristics prohibiting their modification.
-# LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY']
+LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY']
 
 # Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled
 requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning)

From e04e555abb725b42052dbb8fa896c7260954570c Mon Sep 17 00:00:00 2001
From: kburke <209327+kburke@users.noreply.github.com>
Date: Wed, 29 Oct 2025 16:42:22 -0400
Subject: [PATCH 3/5] Switch to api_base_image 1.2.0, including
 dual-installation of Python 3.13 for uWSGI to use.

---
 docker/entity-api/Dockerfile | 71 +++++++++++++++++++++++-------------
 docker/entity-api/start.sh   |  2 +-
 src/requirements.txt         |  2 +-
 3 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/docker/entity-api/Dockerfile b/docker/entity-api/Dockerfile
index 9d861c54..4e147389 100644
--- a/docker/entity-api/Dockerfile
+++ b/docker/entity-api/Dockerfile
@@ -1,5 +1,5 @@
 # Parent image
-FROM hubmap/api-base-image:1.1.0
+FROM hubmap/api-base-image:1.2.0
 
 LABEL description="HuBMAP Entity API Service"
 
@@ -13,45 +13,64 @@ WORKDIR /usr/src/app
 # Copy from host to image
 COPY . .
 
-# http://nginx.org/en/linux_packages.html#RHEL-CentOS
-# Set up the yum repository to install the latest mainline version of Nginx
-RUN echo $'[nginx-mainline]\n\
-name=nginx mainline repo\n\
-baseurl=http://nginx.org/packages/mainline/centos/$releasever/$basearch/\n\
-gpgcheck=1\n\
-enabled=0\n\
-gpgkey=https://nginx.org/keys/nginx_signing.key\n\
-module_hotfixes=true\n'\
->> /etc/yum.repos.d/nginx.repo
+# Set up the repository file for the mainline version of
+# nginx which dnf should use (in the legacy "yum" location.)
+RUN set -eux && \
+    cat <<'EOF' > /etc/yum.repos.d/nginx.repo
+[nginx-mainline]
+name=nginx mainline repo
+baseurl=http://nginx.org/packages/mainline/centos/$releasever/$basearch/
+gpgcheck=1
+enabled=0
+gpgkey=https://nginx.org/keys/nginx_signing.key
+module_hotfixes=true
+EOF
 
 # Reduce the number of layers in image by minimizing the number of separate RUN commands
 # 1 - Install the prerequisites
 # 2 - By default, the repository for stable nginx packages is used. We would like to use mainline nginx packages
-# 3 - Install nginx (using the custom yum repo specified earlier)
+# 3 - Install nginx (using the custom dnf/yum repo specified earlier)
 # 4 - Remove the default nginx config file
 # 5 - Overwrite the nginx.conf with ours to run nginx as non-root
 # 6 - Remove the nginx directory copied from host machine (nginx/conf.d gets mounted to the container)
-# 7 - Upgrade pip (the one installed in base image may be old) and install flask app dependencies (pip3 also works)
+# 7 - Upgrade pip (the one installed in base image may be old) and install service requirements.txt packages
 # 8 - Make the start script executable
-# 9 - Clean all yum cache
-RUN yum install -y yum-utils && \
-    yum-config-manager --enable nginx-mainline && \
-    yum install -y nginx && \
-    rm /etc/nginx/conf.d/default.conf && \
-    mv nginx/nginx.conf /etc/nginx/nginx.conf && \
-    rm -rf nginx && \
-    pip install --upgrade pip -r src/requirements.txt && \
-    chmod +x start.sh && \
-    yum clean all 
+# 9 - Clean the dnf/yum cache and other locations to reduce Docker Image layer size.
+# Assume the base image has upgraded dnf and installed its dnf-plugins-core
+ RUN dnf install --assumeyes dnf-plugins-core && \
+     dnf config-manager --enable nginx-mainline && \
+     dnf install --assumeyes nginx && \
+     # Push aside nginx default.conf files that may exist on the system
+     [ ! -f /etc/nginx/conf.d/default.conf ] || mv /etc/nginx/conf.d/default.conf /tmp/etc_nginx_conf.d_default.conf.ORIGINAL && \
+     [ ! -f /etc/nginx/nginx.conf ] || mv /etc/nginx/nginx.conf /tmp/etc_nginx_nginx.conf.ORIGINAL && \
+     # Install the nginx default.conf file just installed in WORKDIR
+     mv nginx/nginx.conf /etc/nginx/nginx.conf && \
+     # Clean up the nginx install directory in WORKDIR
+     [ ! -d nginx ] || mv nginx /tmp/nginx_from_WORKDIR && \
+     # Push aside the verification file from the base image which will
+     # no longer report correctly once uWSGI is started for the service.
+     [ ! -f /tmp/verify_uwsgi.sh ] || mv /tmp/verify_uwsgi.sh /tmp/verify_uwsgi.sh.ORIGINAL && \
+     # Install the requirements.txt file for the service
+     pip3.13 install --no-cache-dir --upgrade pip -r src/requirements.txt && \
+     # Make the script referenced in the CMD directive below executable.
+     chmod 755 start.sh && \
+     # Clean up artifacts to slim down this layer of the Docker Image
+     dnf clean all && \
+     rm -rf /var/cache/dnf \
+            /var/log/dnf \
+     	     /var/log/yum \
+     	     /root/.cache
 
 # The EXPOSE instruction informs Docker that the container listens on the specified network ports at runtime. 
 # EXPOSE does not make the ports of the container accessible to the host.
 # Here 5000 is for the uwsgi socket, 8080 for nginx
 EXPOSE 5000 8080
 
-# Set an entrypoint
-COPY entrypoint.sh /usr/local/bin/entrypoint.sh
-RUN chmod +x /usr/local/bin/entrypoint.sh
+# Set an entrypoint by moving the file copied into the WORKDIR to
+# the location referenced by the ENTRYPOINT directive below, and
+# make it executable.
+RUN mv entrypoint.sh /usr/local/bin/entrypoint.sh && \
+    chmod 755 /usr/local/bin/entrypoint.sh
 
 ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
 
diff --git a/docker/entity-api/start.sh b/docker/entity-api/start.sh
index 71410ad9..839c251e 100755
--- a/docker/entity-api/start.sh
+++ b/docker/entity-api/start.sh
@@ -5,4 +5,4 @@
 nginx -g 'daemon off;' &
 
 # Start uwsgi and keep it running in foreground
-uwsgi --ini /usr/src/app/src/uwsgi.ini
\ No newline at end of file
+/usr/local/python3.13/bin/uwsgi --ini /usr/src/app/src/uwsgi.ini
diff --git a/src/requirements.txt b/src/requirements.txt
index ae3fa02e..6e110fec 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -12,7 +12,7 @@ nested-lookup==0.2.22
 
 # The commons package requires requests>=2.22.0 and PyYAML>=5.3.1
 requests==2.32.3
-PyYAML==5.4.1
+PyYAML==6.0.3
 
 # Use the published package from PyPI as default
 # Use the branch name of commons from github for testing new changes made in commons from different branch

From 654a4a1eb6ea877f027bbff5a1449207f937bac9 Mon Sep 17 00:00:00 2001
From: DerekFurstPitt <drf57@pitt.edu>
Date: Thu, 30 Oct 2025 11:32:28 -0400
Subject: [PATCH 4/5] split out the try/catch handling of s3 responses into its
 own helper function and replaced its usage the 8 or so places it occurred

---
 src/app.py | 178 +++++++++++++++++------------------------------------
 1 file changed, 56 insertions(+), 122 deletions(-)

diff --git a/src/app.py b/src/app.py
index a4a81c8f..87c65fd4 100644
--- a/src/app.py
+++ b/src/app.py
@@ -902,21 +902,12 @@ def get_entity_by_id(id):
     if public_entity and not user_in_hubmap_read_group(request):
         final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
     
-    try:
-        resp_body = json.dumps(final_result).encode('utf-8')
-        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
-        if s3_url is not None:
-            return Response(response=s3_url
-                            , status=303)  # See Other
-        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
-        # smaller than it is configured to store, so the response should be returned through
-        # the AWS Gateway
-    except Exception as s3exception:
-        logger.error(f"Error using anS3Worker to handle len(resp_body)="
-                     f"{len(resp_body)}.")
-        logger.error(s3exception, exc_info=True)
-        return Response(response=f"Unexpected error storing large results in S3. See logs."
-                        , status=500)
+    # Check the size of what is to be returned through the AWS Gateway, and replace it with
+    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
 
     # Return a regular response through the AWS Gateway
     return jsonify(final_result)
@@ -1730,21 +1721,10 @@ def get_ancestors(id):
     
     # Check the size of what is to be returned through the AWS Gateway, and replace it with
     # a response that links to an Object in the AWS S3 Bucket, if appropriate.
-    try:
-        resp_body = json.dumps(final_result).encode('utf-8')
-        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
-        if s3_url is not None:
-            return Response(response=s3_url
-                            , status=303)  # See Other
-        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
-        # smaller than it is configured to store, so the response should be returned through
-        # the AWS Gateway
-    except Exception as s3exception:
-        logger.error(f"Error using anS3Worker to handle len(resp_body)="
-                     f"{len(resp_body)}.")
-        logger.error(s3exception, exc_info=True)
-        return Response(response=f"Unexpected error storing large results in S3. See logs."
-                        , status=500)
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
     
     # Return a regular response through the AWS Gateway
     return jsonify(final_result)
@@ -1825,22 +1805,11 @@ def get_descendants(id):
 
     # Check the size of what is to be returned through the AWS Gateway, and replace it with
     # a response that links to an Object in the AWS S3 Bucket, if appropriate.
-    try:
-        resp_body = json.dumps(final_result).encode('utf-8')
-        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
-        if s3_url is not None:
-            return Response(response=s3_url
-                            , status=303)  # See Other
-        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
-        # smaller than it is configured to store, so the response should be returned through
-        # the AWS Gateway
-    except Exception as s3exception:
-        logger.error(f"Error using anS3Worker to handle len(resp_body)="
-                     f"{len(resp_body)}.")
-        logger.error(s3exception, exc_info=True)
-        return Response(response=f"Unexpected error storing large results in S3. See logs."
-                        , status=500)
-
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+    
     # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
@@ -1959,22 +1928,11 @@ def get_parents(id):
 
     # Check the size of what is to be returned through the AWS Gateway, and replace it with
     # a response that links to an Object in the AWS S3 Bucket, if appropriate.
-    try:
-        resp_body = json.dumps(final_result).encode('utf-8')
-        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
-        if s3_url is not None:
-            return Response(response=s3_url
-                            , status=303)  # See Other
-        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
-        # smaller than it is configured to store, so the response should be returned through
-        # the AWS Gateway
-    except Exception as s3exception:
-        logger.error(f"Error using anS3Worker to handle len(resp_body)="
-                     f"{len(resp_body)}.")
-        logger.error(s3exception, exc_info=True)
-        return Response(response=f"Unexpected error storing large results in S3. See logs."
-                        , status=500)
-
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+    
     # Return a regular response through the AWS Gateway
     return jsonify(final_result)
 
@@ -2054,21 +2012,10 @@ def get_children(id):
 
     # Check the size of what is to be returned through the AWS Gateway, and replace it with
     # a response that links to an Object in the AWS S3 Bucket, if appropriate.
-    try:
-        resp_body = json.dumps(final_result).encode('utf-8')
-        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
-        if s3_url is not None:
-            return Response(response=s3_url
-                            , status=303)  # See Other
-        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
-        # smaller than it is configured to store, so the response should be returned through
-        # the AWS Gateway
-    except Exception as s3exception:
-        logger.error(f"Error using anS3Worker to handle len(resp_body)="
-                     f"{len(resp_body)}.")
-        logger.error(s3exception, exc_info=True)
-        return Response(response=f"Unexpected error storing large results in S3. See logs."
-                        , status=500)
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
     
     # Return a regular response through the AWS Gateway
     return jsonify(final_result)
@@ -2202,21 +2149,10 @@ def get_siblings(id):
 
     # Check the size of what is to be returned through the AWS Gateway, and replace it with
     # a response that links to an Object in the AWS S3 Bucket, if appropriate.
-    try:
-        resp_body = json.dumps(final_result).encode('utf-8')
-        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
-        if s3_url is not None:
-            return Response(response=s3_url
-                            , status=303)  # See Other
-        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
-        # smaller than it is configured to store, so the response should be returned through
-        # the AWS Gateway
-    except Exception as s3exception:
-        logger.error(f"Error using anS3Worker to handle len(resp_body)="
-                     f"{len(resp_body)}.")
-        logger.error(s3exception, exc_info=True)
-        return Response(response=f"Unexpected error storing large results in S3. See logs."
-                        , status=500)
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
     
     # Return a regular response through the AWS Gateway
     return jsonify(final_result)
@@ -2339,21 +2275,10 @@ def get_tuplets(id):
     
     # Check the size of what is to be returned through the AWS Gateway, and replace it with
     # a response that links to an Object in the AWS S3 Bucket, if appropriate.
-    try:
-        resp_body = json.dumps(final_result).encode('utf-8')
-        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
-        if s3_url is not None:
-            return Response(response=s3_url
-                            , status=303)  # See Other
-        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
-        # smaller than it is configured to store, so the response should be returned through
-        # the AWS Gateway
-    except Exception as s3exception:
-        logger.error(f"Error using anS3Worker to handle len(resp_body)="
-                     f"{len(resp_body)}.")
-        logger.error(s3exception, exc_info=True)
-        return Response(response=f"Unexpected error storing large results in S3. See logs."
-                        , status=500)
+    resp_body = json.dumps(final_result).encode('utf-8')
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
     
     # Return a regular response through the AWS Gateway
     return jsonify(final_result)
@@ -3846,21 +3771,11 @@ def get_prov_info_for_dataset(id):
         writer.writerows(dataset_prov_list)
         new_tsv_file.seek(0)
         resp_body = new_tsv_file.read()
-
-    # Check the size of what is to be returned through the AWS Gateway, and replace it with
-    # a response that links to an Object in the AWS S3 Bucket, if appropriate.
-    try:
-        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
-        if s3_url is not None:
-            return Response(response=s3_url
-                            , status=303)  # See Other
-    except Exception as s3exception:
-        logger.error(f"Error using anS3Worker to handle len(resp_body)="
-                     f"{len(resp_body)}.")
-        logger.error(s3exception, exc_info=True)
-        return Response(response=f"Unexpected error storing large results in S3. See logs."
-                        , status=500)
-
+    
+    try_resp = try_stash_response_body(resp_body)
+    if try_resp is not None:
+        return try_resp
+    
     # Return a regular response through the AWS Gateway
     if return_json:
         return jsonify(dataset_prov_list[0])
@@ -4590,6 +4505,25 @@ def validate_token_if_auth_header_exists(request):
             unauthorized_error(user_info.get_data().decode())
 
 
+def try_stash_response_body(resp_body):
+    try:
+        s3_url = anS3Worker.stash_response_body_if_big(resp_body)
+        if s3_url is not None:
+            return Response(response=s3_url
+                            , status=303)  # See Other
+        # The HuBMAP Commons S3Worker will return None for a URL when the response body is
+        # smaller than it is configured to store, so the response should be returned through
+        # the AWS Gateway
+    except Exception as s3exception:
+        logger.error(f"Error using anS3Worker to handle len(resp_body)="
+                     f"{len(resp_body)}.")
+        logger.error(s3exception, exc_info=True)
+        return Response(response=f"Unexpected error storing large results in S3. See logs."
+                        , status=500)
+    return None
+    
+
+
 """
 Get the token for internal use only
 

From 3dbdc157a5f5b2565ed910cab3186cfb59ca16fa Mon Sep 17 00:00:00 2001
From: kburke <209327+kburke@users.noreply.github.com>
Date: Thu, 30 Oct 2025 14:19:30 -0400
Subject: [PATCH 5/5] Switch to nginx-stable release, turn off cache during
 build

---
 docker/docker-development.sh |  2 +-
 docker/entity-api/Dockerfile | 20 +++++++++-----------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/docker/docker-development.sh b/docker/docker-development.sh
index 05febd50..d28334e1 100755
--- a/docker/docker-development.sh
+++ b/docker/docker-development.sh
@@ -106,7 +106,7 @@ else
         cp ../VERSION entity-api
         cp ../BUILD entity-api
 
-	docker compose -f docker-compose.yml -f docker-compose.development.yml -p entity-api build
+	docker compose -f docker-compose.yml -f docker-compose.development.yml -p entity-api build --no-cache
     elif [ "$1" = "start" ]; then
 	docker compose -f docker-compose.yml -f docker-compose.development.yml -p entity-api up -d
     elif [ "$1" = "stop" ]; then
diff --git a/docker/entity-api/Dockerfile b/docker/entity-api/Dockerfile
index 4e147389..9ab08a92 100644
--- a/docker/entity-api/Dockerfile
+++ b/docker/entity-api/Dockerfile
@@ -13,22 +13,22 @@ WORKDIR /usr/src/app
 # Copy from host to image
 COPY . .
 
-# Set up the repository file for the mainline version of
+# Set up the repository file for the stable version of
 # nginx which dnf should use (in the legacy "yum" location.)
 RUN set -eux && \
     cat <<'EOF' > /etc/yum.repos.d/nginx.repo
-[nginx-mainline]
-name=nginx mainline repo
-baseurl=http://nginx.org/packages/mainline/centos/$releasever/$basearch/
+[nginx-stable]
+name=nginx stable repo
+baseurl=http://nginx.org/packages/centos/$releasever/$basearch/
 gpgcheck=1
-enabled=0
+enabled=1
 gpgkey=https://nginx.org/keys/nginx_signing.key
 module_hotfixes=true
 EOF
 
 # Reduce the number of layers in image by minimizing the number of separate RUN commands
 # 1 - Install the prerequisites
-# 2 - By default, the repository for stable nginx packages is used. We would like to use mainline nginx packages
+# 2 - By default, the repository for stable nginx packages is used.
 # 3 - Install nginx (using the custom dnf/yum repo specified earlier)
 # 4 - Remove the default nginx config file
 # 5 - Overwrite the nginx.conf with ours to run nginx as non-root
@@ -37,9 +37,7 @@ EOF
 # 8 - Make the start script executable
 # 9 - Clean the dnf/yum cache and other locations to reduce Docker Image layer size.
 # Assume the base image has upgraded dnf and installed its dnf-plugins-core
- RUN dnf install --assumeyes dnf-plugins-core && \
-     dnf config-manager --enable nginx-mainline && \
-     dnf install --assumeyes nginx && \
+ RUN dnf install --assumeyes nginx && \
      # Push aside nginx default.conf files that may exist on the system
      [ ! -f /etc/nginx/conf.d/default.conf ] || mv /etc/nginx/conf.d/default.conf /tmp/etc_nginx_conf.d_default.conf.ORIGINAL && \
      [ ! -f /etc/nginx/nginx.conf ] || mv /etc/nginx/nginx.conf /tmp/etc_nginx_nginx.conf.ORIGINAL && \
@@ -53,7 +51,7 @@ EOF
      # Install the requirements.txt file for the service
      pip3.13 install --no-cache-dir --upgrade pip -r src/requirements.txt && \
      # Make the script referenced in the CMD directive below executable.
-     chmod 755 start.sh && \
+     chmod a+x start.sh && \
      # Clean up artifacts to slim down this layer of the Docker Image
      dnf clean all && \
      rm -rf /var/cache/dnf \
@@ -70,7 +68,7 @@ EXPOSE 5000 8080
 # the location referenced by the ENTRYPOINT directive below, and
 # make it executable.
 RUN mv entrypoint.sh /usr/local/bin/entrypoint.sh && \
-    chmod 755 /usr/local/bin/entrypoint.sh
+    chmod a+x /usr/local/bin/entrypoint.sh
 
 ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]