From b461958921f00290377581e69ae462dc29408a36 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 27 Oct 2025 17:33:53 -0400 Subject: [PATCH 1/5] modified /ancestors/parents/siblings/tuples/entities/ endpoints to return a url to s3 if the side of the data exceeds 10mb --- src/app.py | 241 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 184 insertions(+), 57 deletions(-) diff --git a/src/app.py b/src/app.py index 3a9dafe3..51a4a32c 100644 --- a/src/app.py +++ b/src/app.py @@ -95,7 +95,7 @@ # Read the secret key which may be submitted in HTTP Request Headers to override the lockout of # updates to entities with characteristics prohibiting their modification. -LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY'] +# LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY'] # Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning) @@ -762,6 +762,8 @@ def get_provenance_metadata_by_id_for_auth_level(id): """ @app.route('/entities/', methods = ['GET']) def get_entity_by_id(id): + global anS3Worker + # Token is not required, but if an invalid token provided, # we need to tell the client with a 401 error validate_token_if_auth_header_exists(request) @@ -900,6 +902,23 @@ def get_entity_by_id(id): if public_entity and not user_in_hubmap_read_group(request): final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result) + try: + resp_body = json.dumps(final_result).encode('utf-8') + s3_url = anS3Worker.stash_response_body_if_big(resp_body) + if s3_url is not None: + return Response(response=s3_url + , status=303) # See Other + # The HuBMAP Commons S3Worker will return None for a URL when the response body is + # smaller than it is configured to store, so the response should be returned through + # the AWS Gateway + except Exception as s3exception: + logger.error(f"Error using anS3Worker to handle len(resp_body)=" + f"{len(resp_body)}.") + logger.error(s3exception, exc_info=True) + return Response(response=f"Unexpected error storing large results in S3. See logs." + , status=500) + + # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -1616,6 +1635,8 @@ def update_entity(id): """ @app.route('/ancestors/', methods = ['GET']) def get_ancestors(id): + global anS3Worker + final_result = [] # Token is not required, but if an invalid token provided, @@ -1706,6 +1727,26 @@ def get_ancestors(id): else: filtered_final_result.append(ancestor) final_result = filtered_final_result + + # Check the size of what is to be returned through the AWS Gateway, and replace it with + # a response that links to an Object in the AWS S3 Bucket, if appropriate. + try: + resp_body = json.dumps(final_result).encode('utf-8') + s3_url = anS3Worker.stash_response_body_if_big(resp_body) + if s3_url is not None: + return Response(response=s3_url + , status=303) # See Other + # The HuBMAP Commons S3Worker will return None for a URL when the response body is + # smaller than it is configured to store, so the response should be returned through + # the AWS Gateway + except Exception as s3exception: + logger.error(f"Error using anS3Worker to handle len(resp_body)=" + f"{len(resp_body)}.") + logger.error(s3exception, exc_info=True) + return Response(response=f"Unexpected error storing large results in S3. See logs." + , status=500) + + # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -1824,6 +1865,7 @@ def get_descendants(id): """ @app.route('/parents/', methods = ['GET']) def get_parents(id): + global anS3Worker final_result = [] # Token is not required, but if an invalid token provided, @@ -1915,6 +1957,25 @@ def get_parents(id): filtered_final_result.append(parent) final_result = filtered_final_result + # Check the size of what is to be returned through the AWS Gateway, and replace it with + # a response that links to an Object in the AWS S3 Bucket, if appropriate. + try: + resp_body = json.dumps(final_result).encode('utf-8') + s3_url = anS3Worker.stash_response_body_if_big(resp_body) + if s3_url is not None: + return Response(response=s3_url + , status=303) # See Other + # The HuBMAP Commons S3Worker will return None for a URL when the response body is + # smaller than it is configured to store, so the response should be returned through + # the AWS Gateway + except Exception as s3exception: + logger.error(f"Error using anS3Worker to handle len(resp_body)=" + f"{len(resp_body)}.") + logger.error(s3exception, exc_info=True) + return Response(response=f"Unexpected error storing large results in S3. See logs." + , status=500) + + # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -1935,6 +1996,8 @@ def get_parents(id): """ @app.route('/children/', methods = ['GET']) def get_children(id): + global anS3Worker + final_result = [] # Get user token from Authorization header @@ -1989,6 +2052,25 @@ def get_children(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + # Check the size of what is to be returned through the AWS Gateway, and replace it with + # a response that links to an Object in the AWS S3 Bucket, if appropriate. + try: + resp_body = json.dumps(final_result).encode('utf-8') + s3_url = anS3Worker.stash_response_body_if_big(resp_body) + if s3_url is not None: + return Response(response=s3_url + , status=303) # See Other + # The HuBMAP Commons S3Worker will return None for a URL when the response body is + # smaller than it is configured to store, so the response should be returned through + # the AWS Gateway + except Exception as s3exception: + logger.error(f"Error using anS3Worker to handle len(resp_body)=" + f"{len(resp_body)}.") + logger.error(s3exception, exc_info=True) + return Response(response=f"Unexpected error storing large results in S3. See logs." + , status=500) + + # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -2012,6 +2094,8 @@ def get_children(id): """ @app.route('/entities//siblings', methods = ['GET']) def get_siblings(id): + global anS3Worker + final_result = [] # Token is not required, but if an invalid token provided, @@ -2081,39 +2165,60 @@ def get_siblings(id): include_revisions = False sibling_list = app_neo4j_queries.get_siblings(neo4j_driver_instance, uuid, status, property_key, include_revisions) if property_key is not None: - return jsonify(sibling_list) + final_result = sibling_list # Generate trigger data # Skip some of the properties that are time-consuming to generate via triggers # Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional # checks when the target Dataset is public but the revisions are not public - properties_to_skip = [ - # Properties to skip for Sample - 'direct_ancestor', - # Properties to skip for Dataset - 'direct_ancestors', - 'collections', - 'upload', - 'title', - 'next_revision_uuid', - 'previous_revision_uuid', - 'associated_collection', - 'creation_action', - 'local_directory_rel_path' - ] + else: + properties_to_skip = [ + # Properties to skip for Sample + 'direct_ancestor', + # Properties to skip for Dataset + 'direct_ancestors', + 'collections', + 'upload', + 'title', + 'next_revision_uuid', + 'previous_revision_uuid', + 'associated_collection', + 'creation_action', + 'local_directory_rel_path' + ] - complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, sibling_list, properties_to_skip) - # Final result after normalization - final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) - filtered_final_result = [] - for sibling in final_result: - sibling_entity_type = sibling.get('entity_type') - fields_to_exclude = schema_manager.get_fields_to_exclude(sibling_entity_type) - if public_entity and not user_in_hubmap_read_group(request): - filtered_sibling = schema_manager.exclude_properties_from_response(fields_to_exclude, sibling) - filtered_final_result.append(filtered_sibling) - else: - filtered_final_result.append(sibling) - final_result = filtered_final_result + complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, sibling_list, properties_to_skip) + # Final result after normalization + output = schema_manager.normalize_entities_list_for_response(complete_entities_list) + filtered_final_result = [] + for sibling in output: + sibling_entity_type = sibling.get('entity_type') + fields_to_exclude = schema_manager.get_fields_to_exclude(sibling_entity_type) + if public_entity and not user_in_hubmap_read_group(request): + filtered_sibling = schema_manager.exclude_properties_from_response(fields_to_exclude, sibling) + filtered_final_result.append(filtered_sibling) + else: + filtered_final_result.append(sibling) + final_result = filtered_final_result + + # Check the size of what is to be returned through the AWS Gateway, and replace it with + # a response that links to an Object in the AWS S3 Bucket, if appropriate. + try: + resp_body = json.dumps(final_result).encode('utf-8') + s3_url = anS3Worker.stash_response_body_if_big(resp_body) + if s3_url is not None: + return Response(response=s3_url + , status=303) # See Other + # The HuBMAP Commons S3Worker will return None for a URL when the response body is + # smaller than it is configured to store, so the response should be returned through + # the AWS Gateway + except Exception as s3exception: + logger.error(f"Error using anS3Worker to handle len(resp_body)=" + f"{len(resp_body)}.") + logger.error(s3exception, exc_info=True) + return Response(response=f"Unexpected error storing large results in S3. See logs." + , status=500) + + # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -2137,6 +2242,7 @@ def get_siblings(id): """ @app.route('/entities//tuplets', methods = ['GET']) def get_tuplets(id): + global anS3Worker final_result = [] # Token is not required, but if an invalid token provided, @@ -2196,39 +2302,60 @@ def get_tuplets(id): bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}") tuplet_list = app_neo4j_queries.get_tuplets(neo4j_driver_instance, uuid, status, property_key) if property_key is not None: - return jsonify(tuplet_list) + final_result = tuplet_list # Generate trigger data # Skip some of the properties that are time-consuming to generate via triggers # Also skip next_revision_uuid and previous_revision_uuid for Dataset to avoid additional # checks when the target Dataset is public but the revisions are not public - properties_to_skip = [ - # Properties to skip for Sample - 'direct_ancestor', - # Properties to skip for Dataset - 'direct_ancestors', - 'collections', - 'upload', - 'title', - 'next_revision_uuid', - 'previous_revision_uuid', - 'associated_collection', - 'creation_action', - 'local_directory_rel_path' - ] + else: + properties_to_skip = [ + # Properties to skip for Sample + 'direct_ancestor', + # Properties to skip for Dataset + 'direct_ancestors', + 'collections', + 'upload', + 'title', + 'next_revision_uuid', + 'previous_revision_uuid', + 'associated_collection', + 'creation_action', + 'local_directory_rel_path' + ] - complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, tuplet_list, properties_to_skip) - # Final result after normalization - final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) - filtered_final_result = [] - for tuplet in final_result: - tuple_entity_type = tuplet.get('entity_type') - fields_to_exclude = schema_manager.get_fields_to_exclude(tuple_entity_type) - if public_entity and not user_in_hubmap_read_group(request): - filtered_tuplet = schema_manager.exclude_properties_from_response(fields_to_exclude, tuplet) - filtered_final_result.append(filtered_tuplet) - else: - filtered_final_result.append(tuplet) - final_result = filtered_final_result + complete_entities_list = schema_manager.get_complete_entities_list(request.args, token, tuplet_list, properties_to_skip) + # Final result after normalization + output = schema_manager.normalize_entities_list_for_response(complete_entities_list) + filtered_final_result = [] + for tuplet in output: + tuple_entity_type = tuplet.get('entity_type') + fields_to_exclude = schema_manager.get_fields_to_exclude(tuple_entity_type) + if public_entity and not user_in_hubmap_read_group(request): + filtered_tuplet = schema_manager.exclude_properties_from_response(fields_to_exclude, tuplet) + filtered_final_result.append(filtered_tuplet) + else: + filtered_final_result.append(tuplet) + final_result = filtered_final_result + + # Check the size of what is to be returned through the AWS Gateway, and replace it with + # a response that links to an Object in the AWS S3 Bucket, if appropriate. + try: + resp_body = json.dumps(final_result).encode('utf-8') + s3_url = anS3Worker.stash_response_body_if_big(resp_body) + if s3_url is not None: + return Response(response=s3_url + , status=303) # See Other + # The HuBMAP Commons S3Worker will return None for a URL when the response body is + # smaller than it is configured to store, so the response should be returned through + # the AWS Gateway + except Exception as s3exception: + logger.error(f"Error using anS3Worker to handle len(resp_body)=" + f"{len(resp_body)}.") + logger.error(s3exception, exc_info=True) + return Response(response=f"Unexpected error storing large results in S3. See logs." + , status=500) + + # Return a regular response through the AWS Gateway return jsonify(final_result) From 984724d7665c9c7b86c9164436f7c40dbcbbf685 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Mon, 27 Oct 2025 17:35:37 -0400 Subject: [PATCH 2/5] uncommented locked_entity_update_override_key from testing --- src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index 51a4a32c..a4a81c8f 100644 --- a/src/app.py +++ b/src/app.py @@ -95,7 +95,7 @@ # Read the secret key which may be submitted in HTTP Request Headers to override the lockout of # updates to entities with characteristics prohibiting their modification. -# LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY'] +LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY'] # Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning) From e04e555abb725b42052dbb8fa896c7260954570c Mon Sep 17 00:00:00 2001 From: kburke <209327+kburke@users.noreply.github.com> Date: Wed, 29 Oct 2025 16:42:22 -0400 Subject: [PATCH 3/5] Switch to api_base_image 1.2.0, including dual-installation of Python 3.13 for uWSGI to use. --- docker/entity-api/Dockerfile | 71 +++++++++++++++++++++++------------- docker/entity-api/start.sh | 2 +- src/requirements.txt | 2 +- 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/docker/entity-api/Dockerfile b/docker/entity-api/Dockerfile index 9d861c54..4e147389 100644 --- a/docker/entity-api/Dockerfile +++ b/docker/entity-api/Dockerfile @@ -1,5 +1,5 @@ # Parent image -FROM hubmap/api-base-image:1.1.0 +FROM hubmap/api-base-image:1.2.0 LABEL description="HuBMAP Entity API Service" @@ -13,45 +13,64 @@ WORKDIR /usr/src/app # Copy from host to image COPY . . -# http://nginx.org/en/linux_packages.html#RHEL-CentOS -# Set up the yum repository to install the latest mainline version of Nginx -RUN echo $'[nginx-mainline]\n\ -name=nginx mainline repo\n\ -baseurl=http://nginx.org/packages/mainline/centos/$releasever/$basearch/\n\ -gpgcheck=1\n\ -enabled=0\n\ -gpgkey=https://nginx.org/keys/nginx_signing.key\n\ -module_hotfixes=true\n'\ ->> /etc/yum.repos.d/nginx.repo +# Set up the repository file for the mainline version of +# nginx which dnf should use (in the legacy "yum" location.) +RUN set -eux && \ + cat <<'EOF' > /etc/yum.repos.d/nginx.repo +[nginx-mainline] +name=nginx mainline repo +baseurl=http://nginx.org/packages/mainline/centos/$releasever/$basearch/ +gpgcheck=1 +enabled=0 +gpgkey=https://nginx.org/keys/nginx_signing.key +module_hotfixes=true +EOF # Reduce the number of layers in image by minimizing the number of separate RUN commands # 1 - Install the prerequisites # 2 - By default, the repository for stable nginx packages is used. We would like to use mainline nginx packages -# 3 - Install nginx (using the custom yum repo specified earlier) +# 3 - Install nginx (using the custom dnf/yum repo specified earlier) # 4 - Remove the default nginx config file # 5 - Overwrite the nginx.conf with ours to run nginx as non-root # 6 - Remove the nginx directory copied from host machine (nginx/conf.d gets mounted to the container) -# 7 - Upgrade pip (the one installed in base image may be old) and install flask app dependencies (pip3 also works) +# 7 - Upgrade pip (the one installed in base image may be old) and install service requirements.txt packages # 8 - Make the start script executable -# 9 - Clean all yum cache -RUN yum install -y yum-utils && \ - yum-config-manager --enable nginx-mainline && \ - yum install -y nginx && \ - rm /etc/nginx/conf.d/default.conf && \ - mv nginx/nginx.conf /etc/nginx/nginx.conf && \ - rm -rf nginx && \ - pip install --upgrade pip -r src/requirements.txt && \ - chmod +x start.sh && \ - yum clean all +# 9 - Clean the dnf/yum cache and other locations to reduce Docker Image layer size. +# Assume the base image has upgraded dnf and installed its dnf-plugins-core + RUN dnf install --assumeyes dnf-plugins-core && \ + dnf config-manager --enable nginx-mainline && \ + dnf install --assumeyes nginx && \ + # Push aside nginx default.conf files that may exist on the system + [ ! -f /etc/nginx/conf.d/default.conf ] || mv /etc/nginx/conf.d/default.conf /tmp/etc_nginx_conf.d_default.conf.ORIGINAL && \ + [ ! -f /etc/nginx/nginx.conf ] || mv /etc/nginx/nginx.conf /tmp/etc_nginx_nginx.conf.ORIGINAL && \ + # Install the nginx default.conf file just installed in WORKDIR + mv nginx/nginx.conf /etc/nginx/nginx.conf && \ + # Clean up the nginx install directory in WORKDIR + [ ! -d nginx ] || mv nginx /tmp/nginx_from_WORKDIR && \ + # Push aside the verification file from the base image which will + # no longer report correctly once uWSGI is started for the service. + [ ! -f /tmp/verify_uwsgi.sh ] || mv /tmp/verify_uwsgi.sh /tmp/verify_uwsgi.sh.ORIGINAL && \ + # Install the requirements.txt file for the service + pip3.13 install --no-cache-dir --upgrade pip -r src/requirements.txt && \ + # Make the script referenced in the CMD directive below executable. + chmod 755 start.sh && \ + # Clean up artifacts to slim down this layer of the Docker Image + dnf clean all && \ + rm -rf /var/cache/dnf \ + /var/log/dnf \ + /var/log/yum \ + /root/.cache # The EXPOSE instruction informs Docker that the container listens on the specified network ports at runtime. # EXPOSE does not make the ports of the container accessible to the host. # Here 5000 is for the uwsgi socket, 8080 for nginx EXPOSE 5000 8080 -# Set an entrypoint -COPY entrypoint.sh /usr/local/bin/entrypoint.sh -RUN chmod +x /usr/local/bin/entrypoint.sh +# Set an entrypoint by moving the file copied into the WORKDIR to +# the location referenced by the ENTRYPOINT directive below, and +# make it executable. +RUN mv entrypoint.sh /usr/local/bin/entrypoint.sh && \ + chmod 755 /usr/local/bin/entrypoint.sh ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/docker/entity-api/start.sh b/docker/entity-api/start.sh index 71410ad9..839c251e 100755 --- a/docker/entity-api/start.sh +++ b/docker/entity-api/start.sh @@ -5,4 +5,4 @@ nginx -g 'daemon off;' & # Start uwsgi and keep it running in foreground -uwsgi --ini /usr/src/app/src/uwsgi.ini \ No newline at end of file +/usr/local/python3.13/bin/uwsgi --ini /usr/src/app/src/uwsgi.ini diff --git a/src/requirements.txt b/src/requirements.txt index ae3fa02e..6e110fec 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -12,7 +12,7 @@ nested-lookup==0.2.22 # The commons package requires requests>=2.22.0 and PyYAML>=5.3.1 requests==2.32.3 -PyYAML==5.4.1 +PyYAML==6.0.3 # Use the published package from PyPI as default # Use the branch name of commons from github for testing new changes made in commons from different branch From 654a4a1eb6ea877f027bbff5a1449207f937bac9 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 30 Oct 2025 11:32:28 -0400 Subject: [PATCH 4/5] split out the try/catch handling of s3 responses into its own helper function and replaced its usage the 8 or so places it occurred --- src/app.py | 178 +++++++++++++++++------------------------------------ 1 file changed, 56 insertions(+), 122 deletions(-) diff --git a/src/app.py b/src/app.py index a4a81c8f..87c65fd4 100644 --- a/src/app.py +++ b/src/app.py @@ -902,21 +902,12 @@ def get_entity_by_id(id): if public_entity and not user_in_hubmap_read_group(request): final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result) - try: - resp_body = json.dumps(final_result).encode('utf-8') - s3_url = anS3Worker.stash_response_body_if_big(resp_body) - if s3_url is not None: - return Response(response=s3_url - , status=303) # See Other - # The HuBMAP Commons S3Worker will return None for a URL when the response body is - # smaller than it is configured to store, so the response should be returned through - # the AWS Gateway - except Exception as s3exception: - logger.error(f"Error using anS3Worker to handle len(resp_body)=" - f"{len(resp_body)}.") - logger.error(s3exception, exc_info=True) - return Response(response=f"Unexpected error storing large results in S3. See logs." - , status=500) + # Check the size of what is to be returned through the AWS Gateway, and replace it with + # a response that links to an Object in the AWS S3 Bucket, if appropriate. + resp_body = json.dumps(final_result).encode('utf-8') + try_resp = try_stash_response_body(resp_body) + if try_resp is not None: + return try_resp # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -1730,21 +1721,10 @@ def get_ancestors(id): # Check the size of what is to be returned through the AWS Gateway, and replace it with # a response that links to an Object in the AWS S3 Bucket, if appropriate. - try: - resp_body = json.dumps(final_result).encode('utf-8') - s3_url = anS3Worker.stash_response_body_if_big(resp_body) - if s3_url is not None: - return Response(response=s3_url - , status=303) # See Other - # The HuBMAP Commons S3Worker will return None for a URL when the response body is - # smaller than it is configured to store, so the response should be returned through - # the AWS Gateway - except Exception as s3exception: - logger.error(f"Error using anS3Worker to handle len(resp_body)=" - f"{len(resp_body)}.") - logger.error(s3exception, exc_info=True) - return Response(response=f"Unexpected error storing large results in S3. See logs." - , status=500) + resp_body = json.dumps(final_result).encode('utf-8') + try_resp = try_stash_response_body(resp_body) + if try_resp is not None: + return try_resp # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -1825,22 +1805,11 @@ def get_descendants(id): # Check the size of what is to be returned through the AWS Gateway, and replace it with # a response that links to an Object in the AWS S3 Bucket, if appropriate. - try: - resp_body = json.dumps(final_result).encode('utf-8') - s3_url = anS3Worker.stash_response_body_if_big(resp_body) - if s3_url is not None: - return Response(response=s3_url - , status=303) # See Other - # The HuBMAP Commons S3Worker will return None for a URL when the response body is - # smaller than it is configured to store, so the response should be returned through - # the AWS Gateway - except Exception as s3exception: - logger.error(f"Error using anS3Worker to handle len(resp_body)=" - f"{len(resp_body)}.") - logger.error(s3exception, exc_info=True) - return Response(response=f"Unexpected error storing large results in S3. See logs." - , status=500) - + resp_body = json.dumps(final_result).encode('utf-8') + try_resp = try_stash_response_body(resp_body) + if try_resp is not None: + return try_resp + # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -1959,22 +1928,11 @@ def get_parents(id): # Check the size of what is to be returned through the AWS Gateway, and replace it with # a response that links to an Object in the AWS S3 Bucket, if appropriate. - try: - resp_body = json.dumps(final_result).encode('utf-8') - s3_url = anS3Worker.stash_response_body_if_big(resp_body) - if s3_url is not None: - return Response(response=s3_url - , status=303) # See Other - # The HuBMAP Commons S3Worker will return None for a URL when the response body is - # smaller than it is configured to store, so the response should be returned through - # the AWS Gateway - except Exception as s3exception: - logger.error(f"Error using anS3Worker to handle len(resp_body)=" - f"{len(resp_body)}.") - logger.error(s3exception, exc_info=True) - return Response(response=f"Unexpected error storing large results in S3. See logs." - , status=500) - + resp_body = json.dumps(final_result).encode('utf-8') + try_resp = try_stash_response_body(resp_body) + if try_resp is not None: + return try_resp + # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -2054,21 +2012,10 @@ def get_children(id): # Check the size of what is to be returned through the AWS Gateway, and replace it with # a response that links to an Object in the AWS S3 Bucket, if appropriate. - try: - resp_body = json.dumps(final_result).encode('utf-8') - s3_url = anS3Worker.stash_response_body_if_big(resp_body) - if s3_url is not None: - return Response(response=s3_url - , status=303) # See Other - # The HuBMAP Commons S3Worker will return None for a URL when the response body is - # smaller than it is configured to store, so the response should be returned through - # the AWS Gateway - except Exception as s3exception: - logger.error(f"Error using anS3Worker to handle len(resp_body)=" - f"{len(resp_body)}.") - logger.error(s3exception, exc_info=True) - return Response(response=f"Unexpected error storing large results in S3. See logs." - , status=500) + resp_body = json.dumps(final_result).encode('utf-8') + try_resp = try_stash_response_body(resp_body) + if try_resp is not None: + return try_resp # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -2202,21 +2149,10 @@ def get_siblings(id): # Check the size of what is to be returned through the AWS Gateway, and replace it with # a response that links to an Object in the AWS S3 Bucket, if appropriate. - try: - resp_body = json.dumps(final_result).encode('utf-8') - s3_url = anS3Worker.stash_response_body_if_big(resp_body) - if s3_url is not None: - return Response(response=s3_url - , status=303) # See Other - # The HuBMAP Commons S3Worker will return None for a URL when the response body is - # smaller than it is configured to store, so the response should be returned through - # the AWS Gateway - except Exception as s3exception: - logger.error(f"Error using anS3Worker to handle len(resp_body)=" - f"{len(resp_body)}.") - logger.error(s3exception, exc_info=True) - return Response(response=f"Unexpected error storing large results in S3. See logs." - , status=500) + resp_body = json.dumps(final_result).encode('utf-8') + try_resp = try_stash_response_body(resp_body) + if try_resp is not None: + return try_resp # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -2339,21 +2275,10 @@ def get_tuplets(id): # Check the size of what is to be returned through the AWS Gateway, and replace it with # a response that links to an Object in the AWS S3 Bucket, if appropriate. - try: - resp_body = json.dumps(final_result).encode('utf-8') - s3_url = anS3Worker.stash_response_body_if_big(resp_body) - if s3_url is not None: - return Response(response=s3_url - , status=303) # See Other - # The HuBMAP Commons S3Worker will return None for a URL when the response body is - # smaller than it is configured to store, so the response should be returned through - # the AWS Gateway - except Exception as s3exception: - logger.error(f"Error using anS3Worker to handle len(resp_body)=" - f"{len(resp_body)}.") - logger.error(s3exception, exc_info=True) - return Response(response=f"Unexpected error storing large results in S3. See logs." - , status=500) + resp_body = json.dumps(final_result).encode('utf-8') + try_resp = try_stash_response_body(resp_body) + if try_resp is not None: + return try_resp # Return a regular response through the AWS Gateway return jsonify(final_result) @@ -3846,21 +3771,11 @@ def get_prov_info_for_dataset(id): writer.writerows(dataset_prov_list) new_tsv_file.seek(0) resp_body = new_tsv_file.read() - - # Check the size of what is to be returned through the AWS Gateway, and replace it with - # a response that links to an Object in the AWS S3 Bucket, if appropriate. - try: - s3_url = anS3Worker.stash_response_body_if_big(resp_body) - if s3_url is not None: - return Response(response=s3_url - , status=303) # See Other - except Exception as s3exception: - logger.error(f"Error using anS3Worker to handle len(resp_body)=" - f"{len(resp_body)}.") - logger.error(s3exception, exc_info=True) - return Response(response=f"Unexpected error storing large results in S3. See logs." - , status=500) - + + try_resp = try_stash_response_body(resp_body) + if try_resp is not None: + return try_resp + # Return a regular response through the AWS Gateway if return_json: return jsonify(dataset_prov_list[0]) @@ -4590,6 +4505,25 @@ def validate_token_if_auth_header_exists(request): unauthorized_error(user_info.get_data().decode()) +def try_stash_response_body(resp_body): + try: + s3_url = anS3Worker.stash_response_body_if_big(resp_body) + if s3_url is not None: + return Response(response=s3_url + , status=303) # See Other + # The HuBMAP Commons S3Worker will return None for a URL when the response body is + # smaller than it is configured to store, so the response should be returned through + # the AWS Gateway + except Exception as s3exception: + logger.error(f"Error using anS3Worker to handle len(resp_body)=" + f"{len(resp_body)}.") + logger.error(s3exception, exc_info=True) + return Response(response=f"Unexpected error storing large results in S3. See logs." + , status=500) + return None + + + """ Get the token for internal use only From 3dbdc157a5f5b2565ed910cab3186cfb59ca16fa Mon Sep 17 00:00:00 2001 From: kburke <209327+kburke@users.noreply.github.com> Date: Thu, 30 Oct 2025 14:19:30 -0400 Subject: [PATCH 5/5] Switch to nginx-stable release, turn off cache during build --- docker/docker-development.sh | 2 +- docker/entity-api/Dockerfile | 20 +++++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/docker/docker-development.sh b/docker/docker-development.sh index 05febd50..d28334e1 100755 --- a/docker/docker-development.sh +++ b/docker/docker-development.sh @@ -106,7 +106,7 @@ else cp ../VERSION entity-api cp ../BUILD entity-api - docker compose -f docker-compose.yml -f docker-compose.development.yml -p entity-api build + docker compose -f docker-compose.yml -f docker-compose.development.yml -p entity-api build --no-cache elif [ "$1" = "start" ]; then docker compose -f docker-compose.yml -f docker-compose.development.yml -p entity-api up -d elif [ "$1" = "stop" ]; then diff --git a/docker/entity-api/Dockerfile b/docker/entity-api/Dockerfile index 4e147389..9ab08a92 100644 --- a/docker/entity-api/Dockerfile +++ b/docker/entity-api/Dockerfile @@ -13,22 +13,22 @@ WORKDIR /usr/src/app # Copy from host to image COPY . . -# Set up the repository file for the mainline version of +# Set up the repository file for the stable version of # nginx which dnf should use (in the legacy "yum" location.) RUN set -eux && \ cat <<'EOF' > /etc/yum.repos.d/nginx.repo -[nginx-mainline] -name=nginx mainline repo -baseurl=http://nginx.org/packages/mainline/centos/$releasever/$basearch/ +[nginx-stable] +name=nginx stable repo +baseurl=http://nginx.org/packages/centos/$releasever/$basearch/ gpgcheck=1 -enabled=0 +enabled=1 gpgkey=https://nginx.org/keys/nginx_signing.key module_hotfixes=true EOF # Reduce the number of layers in image by minimizing the number of separate RUN commands # 1 - Install the prerequisites -# 2 - By default, the repository for stable nginx packages is used. We would like to use mainline nginx packages +# 2 - By default, the repository for stable nginx packages is used. # 3 - Install nginx (using the custom dnf/yum repo specified earlier) # 4 - Remove the default nginx config file # 5 - Overwrite the nginx.conf with ours to run nginx as non-root @@ -37,9 +37,7 @@ EOF # 8 - Make the start script executable # 9 - Clean the dnf/yum cache and other locations to reduce Docker Image layer size. # Assume the base image has upgraded dnf and installed its dnf-plugins-core - RUN dnf install --assumeyes dnf-plugins-core && \ - dnf config-manager --enable nginx-mainline && \ - dnf install --assumeyes nginx && \ + RUN dnf install --assumeyes nginx && \ # Push aside nginx default.conf files that may exist on the system [ ! -f /etc/nginx/conf.d/default.conf ] || mv /etc/nginx/conf.d/default.conf /tmp/etc_nginx_conf.d_default.conf.ORIGINAL && \ [ ! -f /etc/nginx/nginx.conf ] || mv /etc/nginx/nginx.conf /tmp/etc_nginx_nginx.conf.ORIGINAL && \ @@ -53,7 +51,7 @@ EOF # Install the requirements.txt file for the service pip3.13 install --no-cache-dir --upgrade pip -r src/requirements.txt && \ # Make the script referenced in the CMD directive below executable. - chmod 755 start.sh && \ + chmod a+x start.sh && \ # Clean up artifacts to slim down this layer of the Docker Image dnf clean all && \ rm -rf /var/cache/dnf \ @@ -70,7 +68,7 @@ EXPOSE 5000 8080 # the location referenced by the ENTRYPOINT directive below, and # make it executable. RUN mv entrypoint.sh /usr/local/bin/entrypoint.sh && \ - chmod 755 /usr/local/bin/entrypoint.sh + chmod a+x /usr/local/bin/entrypoint.sh ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]