From 68829e2eb9bdf400c19c84a27fab6f028d2aabaf Mon Sep 17 00:00:00 2001
From: yuanzhou <yuanzhou19@gmail.com>
Date: Tue, 30 Sep 2025 18:59:35 -0400
Subject: [PATCH 1/9] Bump version to 2.6.1

---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index e70b4523..6a6a3d8e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.6.0
+2.6.1

From bd11d07655f00f7f11e2ce693bb14ff40edc8201 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Wed, 1 Oct 2025 19:52:13 -0400
Subject: [PATCH 2/9] Initial implementation of property exclusion from URL

---
 src/app.py                         | 111 ++++++++++++++++++++--------
 src/schema/schema_manager.py       | 115 ++++++++++++++++++++++++++++-
 src/schema/schema_neo4j_queries.py |  14 +++-
 src/schema/schema_triggers.py      |  23 +++++-
 4 files changed, 227 insertions(+), 36 deletions(-)

diff --git a/src/app.py b/src/app.py
index f118f000..d4cb9f19 100644
--- a/src/app.py
+++ b/src/app.py
@@ -774,11 +774,82 @@ def get_entity_by_id(id):
     # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
     entity_dict = query_target_entity(id, token)
     normalized_entity_type = entity_dict['entity_type']
+    
+    # These are the top-level fields and nested fields defined in the schema yaml
     fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type)
 
+
+
+    ######
+    
+    # Only support defined query string parameters for filtering purposes
+    # 'property' was initially introduced to return a single field
+    # 'exclude' is newly added as a short-term workaround otherwise AWS API Gateway 
+    # returns 500 error when the large paylod >10 MB
+    # When both 'property' and 'exclude' are specified in the URL, 'property' dominates
+    # since the final result is a single field value - Zhou 10/1/2025
+    supported_qs_params = ['property', 'exclude']
+
+    triggered_properties_to_skip = []
+    neo4j_properties_to_skip = []
+
+    if bool(request.args):
+        # First make sure the user provided query string params are valid
+        for param in request.args:
+            if param not in supported_qs_params:
+                bad_request_error(f"Only the following URL query string parameters (case-sensitive) are supported: {COMMA_SEPARATOR.join(supported_qs_params)}")
+
+        # Return a single property key and value using ?property=<property_key>
+        if 'property' in request.args:
+            single_property_key = request.args.get('property')
+
+            # Single property key that is immediately avaibale in Neo4j without running any triggers
+            # The `data_access_level` property is available in all entities Donor/Sample/Dataset
+            # and this filter is being used by gateway to check the data_access_level for file assets
+            # The `status` property is only available in Dataset and being used by search-api for revision
+            supported_property_keys = ['data_access_level', 'status']
+
+            # Validate the target property
+            if single_property_key not in supported_property_keys:
+                bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(supported_property_keys)}")
+
+            if single_property_key == 'status' and \
+                    not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
+                bad_request_error(f"Only Dataset or Publication supports 'status' property key in the query string")
+
+            # Response with the property value directly
+            # Don't use jsonify() on string value
+            return entity_dict[single_property_key]
+
+        # Exclude fields—either top-level or nested—specified by the user via the URL query string,
+        # using the format `?exclude=a.b,a.c,x`, where:
+        #   - `x` is a top-level property
+        #   - `a.b` and `a.c` are nested fields (dot-notated)
+        #
+        # Note: This is not the most efficient approach, as exclusion is performed after the Neo4j query
+        # rather than within it. However, it leverages the existing `exclude_properties_from_response()` 
+        # function for simplicity and maintainability. - Zhou 10/1/2025
+        if 'exclude' in request.args:
+            properties_to_exclude_str = request.args.get('exclude')
+
+            if properties_to_exclude_str is not None:
+                flat_list = [item.strip() for item in properties_to_exclude_str.split(",")]
+
+                logger.info(f"User specified flat_list: {flat_list}")
+
+                # Determine which properties to exclude from triggers and which to exclude directly from the resulting Neo4j `entity_dict`
+                triggered_properties_to_skip, neo4j_properties_to_skip = schema_manager.determine_property_exclusion_type(normalized_entity_type, flat_list)
+            else:
+                bad_request_error("Must specify the properties to exclude in the form of exclude=[a, b, c, d.e]")
+
+    ######
+ 
+
     # Get the generated complete entity result from cache if exists
     # Otherwise re-generate on the fly
-    complete_dict = schema_manager.get_complete_entity_result(request, token, entity_dict)
+    # NOTE: top-level properties in `triggered_properties_to_skip` will skip the trigger methods
+    # Nested properties like `direct_ancestors.files` will be handled by the trigger method - Zhou 10/1/2025
+    complete_dict = schema_manager.get_complete_entity_result(request, token, entity_dict, triggered_properties_to_skip)
 
     # Determine if the entity is publicly visible base on its data, only.
     # To verify if a Collection is public, it is necessary to have its Datasets, which
@@ -813,37 +884,19 @@ def get_entity_by_id(id):
         forbidden_error(f"The requested {normalized_entity_type} has non-public data."
                         f"  A Globus token with access permission is required.")
 
+    ########
+    # Remove the top-level properties that are directly available in the resulting Neo4j `entity_dict`
+    for item in neo4j_properties_to_skip:
+        complete_dict.pop(item)
+
     # Also normalize the result based on schema
     final_result = schema_manager.normalize_entity_result_for_response(complete_dict)
 
-    # Result filtering based on query string
-    # The `data_access_level` property is available in all entities Donor/Sample/Dataset
-    # and this filter is being used by gateway to check the data_access_level for file assets
-    # The `status` property is only available in Dataset and being used by search-api for revision
-    result_filtering_accepted_property_keys = ['data_access_level', 'status']
-
-    if bool(request.args):
-        property_key = request.args.get('property')
-
-        if property_key is not None:
-            # Validate the target property
-            if property_key not in result_filtering_accepted_property_keys:
-                bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}")
-
-            if property_key == 'status' and \
-                    not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
-                bad_request_error(f"Only Dataset or Publication supports 'status' property key in the query string")
-
-            # Response with the property value directly
-            # Don't use jsonify() on string value
-            return complete_dict[property_key]
-        else:
-            bad_request_error("The specified query string is not supported. Use '?property=<key>' to filter the result")
-    else:
-        # Response with the dict
-        if public_entity and not user_in_hubmap_read_group(request):
-            final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
-        return jsonify(final_result)
+    # Response with the dict
+    if public_entity and not user_in_hubmap_read_group(request):
+        final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
+    
+    return jsonify(final_result)
 
 
 """
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index 7be00fb3..b49e372b 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -305,7 +305,7 @@ def get_fields_to_exclude(normalized_class=None):
 Parameters
 ----------
 excluded_fields : list
-    A list of the fields to be excluded
+    A JSON list of the fields to be excluded, may have nested fields
 output_dict : dictionary
     A dictionary representing the data to be modified
 
@@ -349,6 +349,116 @@ def delete_nested_field(data, nested_path):
     return output_dict
 
 
+"""
+Transform a flat list of dot-notated strings into a hybrid list that:
+- keeps plain strings as-is
+- converts entries with dot-notation (like 'direct_ancestors.files') into a dictionary, grouping by the prefix
+
+Example: ['a.b', 'a.c', 'x'] -> ['x', {'a': ['b', 'c']}]
+
+Used by `GET /entities/<id>?exclude=a.b, a.c, x` to build a JSON list 
+that can be futher processed by `exclude_properties_from_response()`.
+
+Parameters
+----------
+flat_list : list
+    A flat list of strings, dot-notated strings are optional and can be used to indicate nested fields
+    Example: ['a.b', 'a.c', 'x']
+
+Returns
+-------
+list
+    A list mixing strings and grouped dicts
+"""
+def flatten_and_group_dot_notation_fields(flat_list):
+    output_list = []
+    grouped_dict = {}
+
+    for item in flat_list:
+        if '.' in item:
+            prefix, field = item.split('.', 1)
+            grouped_dict.setdefault(prefix, []).append(field)
+        else:
+            output_list.append(item)
+
+    # Add grouped items as dictionaries
+    for prefix, fields in grouped_dict.items():
+        output_list.append({prefix: fields})
+
+    return output_list
+
+
+"""
+Transform a flat list of dot-notated strings into a hybrid list that:
+- keeps plain strings as-is
+- converts entries with dot-notation (like 'direct_ancestors.files') into a dictionary, grouping by the prefix
+
+Example: ['a.b', 'a.c', 'x'] -> ['x', {'a': ['b', 'c']}]
+
+Used by `GET /entities/<id>?exclude=a.b, a.c, x` to build a JSON list 
+that can be futher processed by `exclude_properties_from_response()`.
+
+Parameters
+----------
+flat_list : list
+    A flat list of strings, dot-notated strings are optional and can be used to indicate nested fields
+    Example: ['a.b', 'a.c', 'x']
+
+Returns
+-------
+list
+    A list mixing strings and grouped dicts
+"""
+def determine_property_exclusion_type(normalized_entity_type, flat_list):
+    global _schema
+
+    triggered_properties_to_skip = []
+    neo4j_properties_to_skip = []
+
+    properties = _schema['ENTITIES'][normalized_entity_type]['properties']
+
+    top_level_list = []
+
+    for item in flat_list:
+        # Only target at properties don't use the dot notation
+        if '.' not in item:
+            top_level_list.append(item)
+
+    
+    for item in top_level_list:
+        if item in properties and 'on_read_trigger' in properties[item]:
+            triggered_properties_to_skip.append(item)
+        else:
+            neo4j_properties_to_skip.append(item)
+
+
+    return triggered_properties_to_skip, neo4j_properties_to_skip
+
+ 
+"""
+Use the Flask request.args MultiDict to see if 'reindex' is a URL parameter passed in with the
+request and if it indicates reindexing should be supressed. Default to reindexing in all other cases.
+
+Parameters
+----------
+request: Flask request object
+    The instance of Flask request passed in from application request
+
+Returns
+-------
+bool
+"""
+def get_fields_to_exclude_from_query_string(request):
+    properties_to_exclude_str = request.args.get('exclude')
+
+    properties_to_exclude_list = [item.strip() for item in properties_to_exclude_str.split(",")]
+
+    # Transform the flat JSON string list to a Python list mixing strings and grouped dicts
+    prepared_list = flatten_and_group_dot_notation_fields(properties_to_exclude_list)
+
+    return properties_to_exclude_list
+
+
 """
 Generating triggered data based on the target events and methods
 
@@ -396,6 +506,8 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req
     # decides the ordering of which trigger method gets to run first
     properties = schema_section[normalized_class]['properties']
 
+    logger.info(f"Skipping triggered data generation for the following properties: {properties_to_skip}")
+
     # Set each property value and put all resulting data into a dictionary for:
     # before_create_trigger|before_update_trigger|on_read_trigger
     # No property value to be set for: after_create_trigger|after_update_trigger
@@ -2001,7 +2113,6 @@ def convert_str_literal(data_str):
             data = ast.literal_eval(data_str)
 
             if isinstance(data, (list, dict)):
-                logger.info(f"The input string literal has been converted to {type(data)} successfully")
                 return data
             else:
                 logger.info(f"The input string literal is not list or dict after evaluation, return the original string input")
diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
index 738ce81d..575ec6a8 100644
--- a/src/schema/schema_neo4j_queries.py
+++ b/src/schema/schema_neo4j_queries.py
@@ -572,7 +572,7 @@ def get_uploads(neo4j_driver, uuid, property_key = None):
 list
     A unique list of uuids of source entities
 """
-def get_dataset_direct_ancestors(neo4j_driver, uuid, property_key = None):
+def get_dataset_direct_ancestors(neo4j_driver, uuid, property_key = None, properties_to_exclude = []):
     results = []
 
     if property_key:
@@ -580,9 +580,15 @@ def get_dataset_direct_ancestors(neo4j_driver, uuid, property_key = None):
                  f"WHERE t.uuid = '{uuid}' "
                  f"RETURN apoc.coll.toSet(COLLECT(s.{property_key})) AS {record_field_name}")
     else:
-        query = (f"MATCH (s:Entity)-[:ACTIVITY_INPUT]->(a:Activity)-[:ACTIVITY_OUTPUT]->(t:Dataset) "
-                 f"WHERE t.uuid = '{uuid}' "
-                 f"RETURN apoc.coll.toSet(COLLECT(s)) AS {record_field_name}")
+        if properties_to_exclude:
+            query = (f"MATCH (s:Entity)-[:ACTIVITY_INPUT]->(a:Activity)-[:ACTIVITY_OUTPUT]->(t:Dataset) "
+                     f"WHERE t.uuid = '{uuid}' "
+                     f"WITH apoc.coll.toSet(COLLECT(s)) AS uniqueDirectAncestors "
+                     f"RETURN [a IN uniqueDirectAncestors | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {properties_to_exclude}))] AS {record_field_name}")
+        else:
+            query = (f"MATCH (s:Entity)-[:ACTIVITY_INPUT]->(a:Activity)-[:ACTIVITY_OUTPUT]->(t:Dataset) "
+                     f"WHERE t.uuid = '{uuid}' "
+                     f"RETURN apoc.coll.toSet(COLLECT(s)) AS {record_field_name}")
 
     logger.info("======get_dataset_direct_ancestors() query======")
     logger.debug(query)
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index 4445718e..ac3020a8 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -987,7 +987,28 @@ def get_dataset_direct_ancestors(property_key, normalized_type, request, user_to
 
     logger.info(f"Executing 'get_dataset_direct_ancestors()' trigger method on uuid: {existing_data_dict['uuid']}")
 
-    direct_ancestors_list = schema_neo4j_queries.get_dataset_direct_ancestors(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
+    # Get all the mixed fields either top-level or nested from the original query string in request URL
+    all_properties_to_exclude = schema_manager.get_fields_to_exclude_from_query_string(request)
+
+    logger.info(f"all_properties_to_exclude: {all_properties_to_exclude}")
+
+    # Find the specific sub list, depth is limited to 2
+    # For example, direct_ancestors.files is supported, but direct_ancestors.metadata.acquisition_id is not - Zhou 10/1/2025
+    neo4j_properties_to_exclude = []
+
+    parsed_fields = schema_manager.flatten_and_group_dot_notation_fields(all_properties_to_exclude)
+
+    for item in parsed_fields:
+        # Find the depth 2 properties (top-level to this triggered entity)
+        if isinstance(item, dict) and property_key in item:
+            neo4j_properties_to_exclude = item[property_key]
+            
+            logger.info(f"neo4j_properties_to_exclude: {neo4j_properties_to_exclude}")
+            
+            # Stop after finding the first match
+            break
+
+    direct_ancestors_list = schema_neo4j_queries.get_dataset_direct_ancestors(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'], property_key = None, properties_to_exclude = neo4j_properties_to_exclude)
 
     # Get rid of the entity node properties that are not defined in the yaml schema
     # as well as the ones defined as `exposed: false` in the yaml schema

From 671d3e9ce373b5a35ae6356b35e5798ade239646 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Wed, 1 Oct 2025 23:24:58 -0400
Subject: [PATCH 3/9] Handle exclusion by types

---
 src/app.py                    |  59 ++++++++++---------
 src/schema/schema_manager.py  | 106 ++++++++++++++++++++--------------
 src/schema/schema_triggers.py |  21 ++++---
 3 files changed, 104 insertions(+), 82 deletions(-)

diff --git a/src/app.py b/src/app.py
index d4cb9f19..e7854975 100644
--- a/src/app.py
+++ b/src/app.py
@@ -778,20 +778,20 @@ def get_entity_by_id(id):
     # These are the top-level fields and nested fields defined in the schema yaml
     fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type)
 
-
-
-    ######
-    
     # Only support defined query string parameters for filtering purposes
-    # 'property' was initially introduced to return a single field
-    # 'exclude' is newly added as a short-term workaround otherwise AWS API Gateway 
-    # returns 500 error when the large paylod >10 MB
+    # 'property' was initially introduced to return one of the single fields ['data_access_level', 'status']
+    # 'exclude' is newly added to reduce the large paylod caused by certain fields (`direct_ancestors.files` for instance)
     # When both 'property' and 'exclude' are specified in the URL, 'property' dominates
     # since the final result is a single field value - Zhou 10/1/2025
     supported_qs_params = ['property', 'exclude']
 
-    triggered_properties_to_skip = []
-    neo4j_properties_to_skip = []
+    # There are three types of properties that can be excluded from the GET response
+    # - properties generated by trigger methods
+    # - properties returned as part of Neo4j node properties
+    # - properties returned by Neo4j but nested
+    triggered_top_properties_to_skip = []
+    neo4j_top_properties_to_skip = []
+    neo4j_nested_properties_to_skip = []
 
     if bool(request.args):
         # First make sure the user provided query string params are valid
@@ -823,33 +823,26 @@ def get_entity_by_id(id):
 
         # Exclude fields—either top-level or nested—specified by the user via the URL query string,
         # using the format `?exclude=a.b,a.c,x`, where:
-        #   - `x` is a top-level property
-        #   - `a.b` and `a.c` are nested fields (dot-notated)
+        #   - `x` is a top-level property of the target entity
+        #   - `a.b` and `a.c` are nested fields in a dot-notated form (b and c could be from a different entity type)
         #
         # Note: This is not the most efficient approach, as exclusion is performed after the Neo4j query
         # rather than within it. However, it leverages the existing `exclude_properties_from_response()` 
         # function for simplicity and maintainability. - Zhou 10/1/2025
-        if 'exclude' in request.args:
-            properties_to_exclude_str = request.args.get('exclude')
-
-            if properties_to_exclude_str is not None:
-                flat_list = [item.strip() for item in properties_to_exclude_str.split(",")]
-
-                logger.info(f"User specified flat_list: {flat_list}")
-
-                # Determine which properties to exclude from triggers and which to exclude directly from the resulting Neo4j `entity_dict`
-                triggered_properties_to_skip, neo4j_properties_to_skip = schema_manager.determine_property_exclusion_type(normalized_entity_type, flat_list)
-            else:
-                bad_request_error("Must specify the properties to exclude in the form of exclude=[a, b, c, d.e]")
-
-    ######
- 
+        try:
+            all_properties_to_exclude = schema_manager.get_all_fields_to_exclude_from_query_string(request)
+            
+            # Determine which top-level properties to exclude from triggers and which to exclude directly from the resulting Neo4j `entity_dict`
+            # Also get nested properties that are directly returned from Neo4j, which will be handled differently
+            triggered_top_properties_to_skip, neo4j_top_properties_to_skip, neo4j_nested_properties_to_skip = schema_manager.determine_property_exclusion_type(normalized_entity_type, all_properties_to_exclude)
+        except Exception as e:
+            bad_request_error(e)
 
     # Get the generated complete entity result from cache if exists
     # Otherwise re-generate on the fly
-    # NOTE: top-level properties in `triggered_properties_to_skip` will skip the trigger methods
+    # NOTE: top-level properties in `triggered_top_properties_to_skip` will skip the trigger methods
     # Nested properties like `direct_ancestors.files` will be handled by the trigger method - Zhou 10/1/2025
-    complete_dict = schema_manager.get_complete_entity_result(request, token, entity_dict, triggered_properties_to_skip)
+    complete_dict = schema_manager.get_complete_entity_result(request, token, entity_dict, triggered_top_properties_to_skip)
 
     # Determine if the entity is publicly visible base on its data, only.
     # To verify if a Collection is public, it is necessary to have its Datasets, which
@@ -884,14 +877,20 @@ def get_entity_by_id(id):
         forbidden_error(f"The requested {normalized_entity_type} has non-public data."
                         f"  A Globus token with access permission is required.")
 
-    ########
     # Remove the top-level properties that are directly available in the resulting Neo4j `entity_dict`
-    for item in neo4j_properties_to_skip:
+    # Due to the use of entity cache from `query_target_entity()`, we don't want to exclude the `neo4j_top_properties_to_skip`
+    # from actual Neo4j query. And it's not s performance concern neither. - Zhou 10/1/2025
+    for item in neo4j_top_properties_to_skip:
         complete_dict.pop(item)
 
     # Also normalize the result based on schema
     final_result = schema_manager.normalize_entity_result_for_response(complete_dict)
 
+    # In addition, there may be nested fields like `ingest_metadata.dag_provenance_list` (for Dataset) 
+    # where that `ingest_metadata` is an actual Neo4j node property containing `dag_provenance_list`
+    # For such cases, we can't handle via Neo4j query. Instead, exclude at Python app level. - Zhou 10/1/2025
+    final_result = schema_manager.exclude_properties_from_response(neo4j_nested_properties_to_skip, final_result)
+
     # Response with the dict
     if public_entity and not user_in_hubmap_read_group(request):
         final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index b49e372b..fc9de819 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -349,6 +349,40 @@ def delete_nested_field(data, nested_path):
     return output_dict
 
 
+"""
+Use the Flask request.args MultiDict to see if 'exclude' is a URL parameter passed in with the
+request and parse the comma-separated properties to be excluded from final response
+
+For now, only support one dot for nested fields (depth 2)
+
+Parameters
+----------
+request: Flask request object
+    The instance of Flask request passed in from application request
+
+Returns
+-------
+list
+    A flat list of strings containing top-level and/or nested dot-notated properties
+    Example: ['a.b', 'a.c', 'x']
+"""
+def get_all_fields_to_exclude_from_query_string(request):
+    all_properties_to_exclude = []
+
+    if 'exclude' in request.args:
+        # Treat query string value as case-insensitive
+        properties_to_exclude_str = request.args.get('exclude').lower()
+        
+        if properties_to_exclude_str is not None:
+            all_properties_to_exclude = [item.strip() for item in properties_to_exclude_str.split(",")]
+
+            logger.info(f"User specified properties to exclude in request URL: {all_properties_to_exclude}")
+        else:
+            raise Exception(f"The value of the 'exclude' query string arameter can not be empty and must be similar to the form of 'a, b, c, d.e, ...' (case-insensitive).")
+
+    return all_properties_to_exclude
+
+
 """
 Transform a flat list of dot-notated strings into a hybrid list that:
 - keeps plain strings as-is
@@ -368,13 +402,14 @@ def delete_nested_field(data, nested_path):
 Returns
 -------
 list
-    A list mixing strings and grouped dicts
+    A list mixing strings and grouped dicts, like ['x', {'a': ['b', 'c']}]
 """
-def flatten_and_group_dot_notation_fields(flat_list):
+def group_dot_notation_fields(flat_list):
     output_list = []
     grouped_dict = {}
 
     for item in flat_list:
+        # For now, only support one dot for nested fields (depth 2)
         if '.' in item:
             prefix, field = item.split('.', 1)
             grouped_dict.setdefault(prefix, []).append(field)
@@ -389,17 +424,17 @@ def flatten_and_group_dot_notation_fields(flat_list):
 
 
 """
-Transform a flat list of dot-notated strings into a hybrid list that:
-- keeps plain strings as-is
-- converts entries with dot-notation (like 'direct_ancestors.files') into a dictionary, grouping by the prefix
-
-Example: ['a.b', 'a.c', 'x'] -> ['x', {'a': ['b', 'c']}]
+Group properties by exclusion type
 
-Used by `GET /entities/<id>?exclude=a.b, a.c, x` to build a JSON list 
-that can be futher processed by `exclude_properties_from_response()`.
+Example: ['a.b', 'a.c', 'x', 'y'] where 
+- x and y are top-level properties
+- x is Neo4j node property, and y is generated via trigger method
+- a.b and a.c are nested properties while a is a top-level property of either type
 
 Parameters
 ----------
+normalized_entity_type : str
+    One of the normalized entity types: Dataset, Collection, Sample, Donor, Upload, Publication
 flat_list : list
     A flat list of strings, dot-notated strings are optional and can be used to indicate nested fields
     Example: ['a.b', 'a.c', 'x']
@@ -407,56 +442,39 @@ def flatten_and_group_dot_notation_fields(flat_list):
 Returns
 -------
 list
-    A list mixing strings and grouped dicts
+    Three lists - one for triggered properties and one for Neo4j node properties
+    Example for Dataset: ['direct_ancestors', 'title'], ['dataset_type'], ['ingest_metadata.dag_provenance_list']
 """
 def determine_property_exclusion_type(normalized_entity_type, flat_list):
     global _schema
 
-    triggered_properties_to_skip = []
-    neo4j_properties_to_skip = []
-
-    properties = _schema['ENTITIES'][normalized_entity_type]['properties']
-
+    triggered_top_properties_to_skip = []
+    neo4j_top_properties_to_skip = []
+    neo4j_nested_properties_to_skip =[]
     top_level_list = []
+    second_level_list = []
+    properties = _schema['ENTITIES'][normalized_entity_type]['properties']
 
+    # First find the top-level properties
     for item in flat_list:
-        # Only target at properties don't use the dot notation
         if '.' not in item:
             top_level_list.append(item)
+        else:
+            second_level_list.append(item)
 
-    
+    # Only care about the properties defined in schema yaml
     for item in top_level_list:
         if item in properties and 'on_read_trigger' in properties[item]:
-            triggered_properties_to_skip.append(item)
+            triggered_top_properties_to_skip.append(item)
         else:
-            neo4j_properties_to_skip.append(item)
-
-
-    return triggered_properties_to_skip, neo4j_properties_to_skip
-
- 
-"""
-Use the Flask request.args MultiDict to see if 'reindex' is a URL parameter passed in with the
-request and if it indicates reindexing should be supressed. Default to reindexing in all other cases.
-
-Parameters
-----------
-request: Flask request object
-    The instance of Flask request passed in from application request
-
-Returns
--------
-bool
-"""
-def get_fields_to_exclude_from_query_string(request):
-    properties_to_exclude_str = request.args.get('exclude')
-
-    properties_to_exclude_list = [item.strip() for item in properties_to_exclude_str.split(",")]
+            neo4j_top_properties_to_skip.append(item)
 
-    # Transform the flat JSON string list to a Python list mixing strings and grouped dicts
-    prepared_list = flatten_and_group_dot_notation_fields(properties_to_exclude_list)
+    # # In addition, there may be nested fields like `ingest_metadata.dag_provenance_list` (for Dataset) 
+    # where that `ingest_metadata` is an actual Neo4j node property containing `dag_provenance_list`
+    # For such cases, exclude via `exclude_properties_from_response()` at Python app level.
+    neo4j_nested_properties_to_skip = group_dot_notation_fields(second_level_list)
 
-    return properties_to_exclude_list
+    return triggered_top_properties_to_skip, neo4j_top_properties_to_skip, neo4j_nested_properties_to_skip
 
 
 """
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index ac3020a8..f1210c54 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -987,23 +987,28 @@ def get_dataset_direct_ancestors(property_key, normalized_type, request, user_to
 
     logger.info(f"Executing 'get_dataset_direct_ancestors()' trigger method on uuid: {existing_data_dict['uuid']}")
 
-    # Get all the mixed fields either top-level or nested from the original query string in request URL
-    all_properties_to_exclude = schema_manager.get_fields_to_exclude_from_query_string(request)
-
-    logger.info(f"all_properties_to_exclude: {all_properties_to_exclude}")
+    # Get all the user specified fields either top-level or nested from the original query string in request URL
+    try:
+        all_properties_to_exclude = schema_manager.get_all_fields_to_exclude_from_query_string(request)
+    except Exception as e:
+        raise Exception(e)
 
     # Find the specific sub list, depth is limited to 2
+    # We only care about the Neo4j node properties as we don't run nested triggers inside a trigger method
     # For example, direct_ancestors.files is supported, but direct_ancestors.metadata.acquisition_id is not - Zhou 10/1/2025
     neo4j_properties_to_exclude = []
+    grouped_fields = schema_manager.group_dot_notation_fields(all_properties_to_exclude)
 
-    parsed_fields = schema_manager.flatten_and_group_dot_notation_fields(all_properties_to_exclude)
-
-    for item in parsed_fields:
+    for item in grouped_fields:
         # Find the depth 2 properties (top-level to this triggered entity)
         if isinstance(item, dict) and property_key in item:
+            for field in item[property_key]:
+                if not isinstance(field, str):
+                    item[property_key].pop(field)
+
             neo4j_properties_to_exclude = item[property_key]
             
-            logger.info(f"neo4j_properties_to_exclude: {neo4j_properties_to_exclude}")
+            logger.info(f"User specified neo4j properties to exclude in request URL: {neo4j_properties_to_exclude}")
             
             # Stop after finding the first match
             break

From 01efb5f677546e7fd487a11c38f08d545c5d9ef6 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Wed, 1 Oct 2025 23:47:01 -0400
Subject: [PATCH 4/9] Apply to sample.direct_ancestor trigger

---
 src/schema/schema_neo4j_queries.py | 19 +++++---
 src/schema/schema_triggers.py      | 75 +++++++++++++++++++++---------
 2 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
index 575ec6a8..43ce4a29 100644
--- a/src/schema/schema_neo4j_queries.py
+++ b/src/schema/schema_neo4j_queries.py
@@ -566,6 +566,8 @@ def get_uploads(neo4j_driver, uuid, property_key = None):
     The uuid of target entity 
 property_key : str
     A target property key for result filtering
+properties_to_exclude : list
+    A list of node properties to exclude from result
 
 Returns
 -------
@@ -1557,28 +1559,33 @@ def count_attached_published_datasets(neo4j_driver, entity_type, uuid):
     The uuid of target entity 
 property_key : str
     A target property key for result filtering
+properties_to_exclude : list
+    A list of node properties to exclude from result
 
 Returns
 -------
 dict
     The parent dict, can either be a Sample or Donor
 """
-def get_sample_direct_ancestor(neo4j_driver, uuid, property_key = None):
+def get_sample_direct_ancestor(neo4j_driver, uuid, property_key = None, properties_to_exclude = []):
     result = {}
 
     if property_key:
         query = (f"MATCH (s:Sample)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
                  # Filter out the Lab entity if it's the ancestor
                  f"WHERE s.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
-                 # COLLECT() returns a list
-                 # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN parent.{property_key} AS {record_field_name}")
     else:
-        query = (f"MATCH (s:Sample)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
+        if properties_to_exclude:
+            query = (f"MATCH (s:Sample)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
+                 # Filter out the Lab entity if it's the ancestor
+                 f"WHERE s.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
+                 f"WITH parent AS p "
+                 f"RETURN apoc.create.vNode(labels(p), apoc.map.removeKeys(properties(p), {properties_to_exclude})) AS {record_field_name}")
+        else:
+            query = (f"MATCH (s:Sample)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(parent:Entity) "
                  # Filter out the Lab entity if it's the ancestor
                  f"WHERE s.uuid='{uuid}' AND parent.entity_type <> 'Lab' "
-                 # COLLECT() returns a list
-                 # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN parent AS {record_field_name}")
 
     logger.info("======get_sample_direct_ancestor() query======")
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index f1210c54..6a4dd750 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -988,30 +988,10 @@ def get_dataset_direct_ancestors(property_key, normalized_type, request, user_to
     logger.info(f"Executing 'get_dataset_direct_ancestors()' trigger method on uuid: {existing_data_dict['uuid']}")
 
     # Get all the user specified fields either top-level or nested from the original query string in request URL
-    try:
-        all_properties_to_exclude = schema_manager.get_all_fields_to_exclude_from_query_string(request)
-    except Exception as e:
-        raise Exception(e)
-
     # Find the specific sub list, depth is limited to 2
     # We only care about the Neo4j node properties as we don't run nested triggers inside a trigger method
     # For example, direct_ancestors.files is supported, but direct_ancestors.metadata.acquisition_id is not - Zhou 10/1/2025
-    neo4j_properties_to_exclude = []
-    grouped_fields = schema_manager.group_dot_notation_fields(all_properties_to_exclude)
-
-    for item in grouped_fields:
-        # Find the depth 2 properties (top-level to this triggered entity)
-        if isinstance(item, dict) and property_key in item:
-            for field in item[property_key]:
-                if not isinstance(field, str):
-                    item[property_key].pop(field)
-
-            neo4j_properties_to_exclude = item[property_key]
-            
-            logger.info(f"User specified neo4j properties to exclude in request URL: {neo4j_properties_to_exclude}")
-            
-            # Stop after finding the first match
-            break
+    neo4j_properties_to_exclude = _get_neo4j_properties_to_exclude(property_key, request)
 
     direct_ancestors_list = schema_neo4j_queries.get_dataset_direct_ancestors(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'], property_key = None, properties_to_exclude = neo4j_properties_to_exclude)
 
@@ -2029,7 +2009,13 @@ def get_sample_direct_ancestor(property_key, normalized_type, request, user_toke
     
     logger.info(f"Executing 'get_sample_direct_ancestor()' trigger method on uuid: {existing_data_dict['uuid']}")
 
-    direct_ancestor_dict = schema_neo4j_queries.get_sample_direct_ancestor(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
+    # Get all the user specified fields either top-level or nested from the original query string in request URL
+    # Find the specific sub list, depth is limited to 2
+    # We only care about the Neo4j node properties as we don't run nested triggers inside a trigger method
+    # For example, direct_ancestors.files is supported, but direct_ancestors.metadata.acquisition_id is not - Zhou 10/1/2025
+    neo4j_properties_to_exclude = _get_neo4j_properties_to_exclude(property_key, request)
+
+    direct_ancestor_dict = schema_neo4j_queries.get_sample_direct_ancestor(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'], property_key = None, properties_to_exclude = neo4j_properties_to_exclude)
 
     # Get rid of the entity node properties that are not defined in the yaml schema
     # as well as the ones defined as `exposed: false` in the yaml schema
@@ -2695,3 +2681,48 @@ def _get_age_age_units_race_sex_phrase(age:str=None, age_units:str='units', race
         return f"{age}-{age_units}-old {race} {sex}"
 
 
+"""
+Parse the original user request to determine the Neo4j properties to exclude from trigger generated data
+
+Parameters
+----------
+property_key : str
+    The target property key of the value to be generated
+request: Flask request object
+    The instance of Flask request passed in from application request
+
+Returns
+-------
+list: A list containing Neo4j node properties to exclude
+"""
+def _get_neo4j_properties_to_exclude(property_key, request):
+    neo4j_properties_to_exclude = []
+
+    # Get all the user specified fields either top-level or nested from the original query string in request URL
+    try:
+        all_properties_to_exclude = schema_manager.get_all_fields_to_exclude_from_query_string(request)
+    except Exception as e:
+        raise Exception(e)
+
+    # Find the specific sub list, depth is limited to 2
+    # We only care about the Neo4j node properties as we don't run nested triggers inside a trigger method
+    # For example, direct_ancestors.files is supported, but direct_ancestors.metadata.acquisition_id is not - Zhou 10/1/2025
+    grouped_fields = schema_manager.group_dot_notation_fields(all_properties_to_exclude)
+
+    for item in grouped_fields:
+        # Find the depth 2 properties (top-level to this triggered entity)
+        if isinstance(item, dict) and property_key in item:
+            for field in item[property_key]:
+                if not isinstance(field, str):
+                    item[property_key].pop(field)
+
+            neo4j_properties_to_exclude = item[property_key]
+            
+            logger.info(f"User specified neo4j properties to exclude in request URL: {neo4j_properties_to_exclude}")
+            
+            # Stop after finding the first match
+            break
+
+    return neo4j_properties_to_exclude
+
+

From 111e80a705b0259920563f90362f83cf89217f52 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Thu, 2 Oct 2025 01:02:49 -0400
Subject: [PATCH 5/9] Fix to exclusion types and more trigger filters

---
 src/app.py                         |  8 ++++---
 src/schema/schema_manager.py       | 36 +++++++++++++++++++++---------
 src/schema/schema_neo4j_queries.py | 32 +++++++++++++++++++-------
 src/schema/schema_triggers.py      | 16 +++++++++++--
 4 files changed, 69 insertions(+), 23 deletions(-)

diff --git a/src/app.py b/src/app.py
index e7854975..a032a026 100644
--- a/src/app.py
+++ b/src/app.py
@@ -887,9 +887,11 @@ def get_entity_by_id(id):
     final_result = schema_manager.normalize_entity_result_for_response(complete_dict)
 
     # In addition, there may be nested fields like `ingest_metadata.dag_provenance_list` (for Dataset) 
-    # where that `ingest_metadata` is an actual Neo4j node property containing `dag_provenance_list`
-    # For such cases, we can't handle via Neo4j query. Instead, exclude at Python app level. - Zhou 10/1/2025
-    final_result = schema_manager.exclude_properties_from_response(neo4j_nested_properties_to_skip, final_result)
+    # where `ingest_metadata` is an actual Neo4j node string property containing `dag_provenance_list`
+    # For such cases, we can't handle via simple Neo4j query. Instead, exclude at Python app level.
+    # NOTE: need to convert the `neo4j_nested_properties_to_skip` to a format that can be used by 
+    # `exclude_properties_from_response()`  - Zhou 10/1/2025
+    final_result = schema_manager.exclude_properties_from_response(schema_manager.group_dot_notation_fields(neo4j_nested_properties_to_skip), final_result)
 
     # Response with the dict
     if public_entity and not user_in_hubmap_read_group(request):
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index fc9de819..d8c41e6d 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -443,7 +443,11 @@ def group_dot_notation_fields(flat_list):
 -------
 list
     Three lists - one for triggered properties and one for Neo4j node properties
-    Example for Dataset: ['direct_ancestors', 'title'], ['dataset_type'], ['ingest_metadata.dag_provenance_list']
+    
+    Example for Dataset:
+    - triggered_top_properties_to_skip: ['direct_ancestors.files', 'direct_ancestors.ingest_metadata', 'upload.title']
+    - neo4j_top_properties_to_skip: ['data_access_level']
+    - neo4j_nested_properties_to_skip: ['status_history.status']
 """
 def determine_property_exclusion_type(normalized_entity_type, flat_list):
     global _schema
@@ -455,7 +459,7 @@ def determine_property_exclusion_type(normalized_entity_type, flat_list):
     second_level_list = []
     properties = _schema['ENTITIES'][normalized_entity_type]['properties']
 
-    # First find the top-level properties
+    # First find the top-level properties without using dot-notation
     for item in flat_list:
         if '.' not in item:
             top_level_list.append(item)
@@ -464,16 +468,28 @@ def determine_property_exclusion_type(normalized_entity_type, flat_list):
 
     # Only care about the properties defined in schema yaml
     for item in top_level_list:
-        if item in properties and 'on_read_trigger' in properties[item]:
-            triggered_top_properties_to_skip.append(item)
-        else:
-            neo4j_top_properties_to_skip.append(item)
+        if item in properties:
+            if 'on_read_trigger' in properties[item]:
+                triggered_top_properties_to_skip.append(item)
+            else:
+                neo4j_top_properties_to_skip.append(item)
+
+    # Nested second-level properties, such as `direct_ancestors.files`, belong to `triggered_top_properties_to_skip`
+    # `ingest_metadata.dag_provenance_list` belongs to `neo4j_nested_properties_to_skip`
+    for item in second_level_list:
+        prefix = item.split('.')[0]
+        if prefix in properties:
+            if 'on_read_trigger' in properties[prefix]:
+                triggered_top_properties_to_skip.append(item)
+            else:
+                neo4j_nested_properties_to_skip.append(item)
 
-    # # In addition, there may be nested fields like `ingest_metadata.dag_provenance_list` (for Dataset) 
-    # where that `ingest_metadata` is an actual Neo4j node property containing `dag_provenance_list`
-    # For such cases, exclude via `exclude_properties_from_response()` at Python app level.
-    neo4j_nested_properties_to_skip = group_dot_notation_fields(second_level_list)
+    logger.info(f"Determined property exclusion type - triggered_top_properties_to_skip: {triggered_top_properties_to_skip}")
+    logger.info(f"Determined property exclusion type - neo4j_top_properties_to_skip: {neo4j_top_properties_to_skip}")
+    logger.info(f"Determined property exclusion type - neo4j_nested_properties_to_skip: {neo4j_nested_properties_to_skip}")
 
+    # NOTE: Will need to convert the `neo4j_nested_properties_to_skip` to a format that can be used by 
+    # `exclude_properties_from_response()`  - Zhou 10/1/2025
     return triggered_top_properties_to_skip, neo4j_top_properties_to_skip, neo4j_nested_properties_to_skip
 
 
diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
index 43ce4a29..223ab58f 100644
--- a/src/schema/schema_neo4j_queries.py
+++ b/src/schema/schema_neo4j_queries.py
@@ -1083,13 +1083,15 @@ def get_collection_associated_datasets(neo4j_driver, uuid, property_key = None):
     The uuid of dataset or publication
 property_key : str
     A target property key for result filtering
+properties_to_exclude : list
+    A list of node properties to exclude from result
 
 Returns
 -------
 list
     A list of collection uuids
 """
-def get_dataset_collections(neo4j_driver, uuid, property_key = None):
+def get_dataset_collections(neo4j_driver, uuid, property_key = None, properties_to_exclude = []):
     results = []
 
     if property_key:
@@ -1097,9 +1099,15 @@ def get_dataset_collections(neo4j_driver, uuid, property_key = None):
                  f"WHERE e.uuid = '{uuid}' "
                  f"RETURN apoc.coll.toSet(COLLECT(c.{property_key})) AS {record_field_name}")
     else:
-        query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
-                 f"WHERE e.uuid = '{uuid}' "
-                 f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}")
+        if properties_to_exclude:
+            query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
+                     f"WHERE e.uuid = '{uuid}' "
+                     f"WITH apoc.coll.toSet(COLLECT(c)) AS uniqueCollections "
+                     f"RETURN [c IN uniqueCollections | apoc.create.vNode(labels(c), apoc.map.removeKeys(properties(c), {properties_to_exclude}))] AS {record_field_name}")
+        else:
+            query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
+                     f"WHERE e.uuid = '{uuid}' "
+                     f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}")
 
     logger.info("======get_dataset_collections() query======")
     logger.debug(query)
@@ -1164,18 +1172,26 @@ def get_publication_associated_collection(neo4j_driver, uuid):
     The neo4j database connection pool
 uuid : str
     The uuid of dataset
+properties_to_exclude : list
+    A list of node properties to exclude from result
 
 Returns
 -------
 dict
     A Upload dict
 """
-def get_dataset_upload(neo4j_driver, uuid):
+def get_dataset_upload(neo4j_driver, uuid, properties_to_exclude = []):
     result = {}
 
-    query = (f"MATCH (e:Entity)-[:IN_UPLOAD]->(s:Upload) "
-             f"WHERE e.uuid = '{uuid}' "
-             f"RETURN s AS {record_field_name}")
+    if properties_to_exclude:
+        query = (f"MATCH (e:Entity)-[:IN_UPLOAD]->(s:Upload) "
+                 f"WHERE e.uuid = '{uuid}' "
+                 f"WITH s AS up "
+                 f"RETURN apoc.create.vNode(labels(up), apoc.map.removeKeys(properties(up), {properties_to_exclude})) AS {record_field_name}")
+    else:
+        query = (f"MATCH (e:Entity)-[:IN_UPLOAD]->(s:Upload) "
+                 f"WHERE e.uuid = '{uuid}' "
+                 f"RETURN s AS {record_field_name}")
 
     logger.info("======get_dataset_upload() query======")
     logger.debug(query)
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index 6a4dd750..c44fe255 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -781,7 +781,13 @@ def get_dataset_collections(property_key, normalized_type, request, user_token,
 
     logger.info(f"Executing 'get_dataset_collections()' trigger method on uuid: {existing_data_dict['uuid']}")
 
-    collections_list = schema_neo4j_queries.get_dataset_collections(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
+    # Get all the user specified fields either top-level or nested from the original query string in request URL
+    # Find the specific sub list, depth is limited to 2
+    # We only care about the Neo4j node properties as we don't run nested triggers inside a trigger method
+    # For example, direct_ancestors.files is supported, but direct_ancestors.metadata.acquisition_id is not - Zhou 10/1/2025
+    neo4j_properties_to_exclude = _get_neo4j_properties_to_exclude(property_key, request)
+
+    collections_list = schema_neo4j_queries.get_dataset_collections(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'], property_key = None, properties_to_exclude = neo4j_properties_to_exclude)
 
     # Get rid of the entity node properties that are not defined in the yaml schema
     # as well as the ones defined as `exposed: false` in the yaml schema
@@ -859,7 +865,13 @@ def get_dataset_upload(property_key, normalized_type, request, user_token, exist
 
     logger.info(f"Executing 'get_dataset_upload()' trigger method on uuid: {existing_data_dict['uuid']}")
 
-    upload_dict = schema_neo4j_queries.get_dataset_upload(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
+    # Get all the user specified fields either top-level or nested from the original query string in request URL
+    # Find the specific sub list, depth is limited to 2
+    # We only care about the Neo4j node properties as we don't run nested triggers inside a trigger method
+    # For example, direct_ancestors.files is supported, but direct_ancestors.metadata.acquisition_id is not - Zhou 10/1/2025
+    neo4j_properties_to_exclude = _get_neo4j_properties_to_exclude(property_key, request)
+
+    upload_dict = schema_neo4j_queries.get_dataset_upload(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'], properties_to_exclude = neo4j_properties_to_exclude)
     
     # Get rid of the entity node properties that are not defined in the yaml schema
     # as well as the ones defined as `exposed: false` in the yaml schema

From f6af9a52bb3da816ecfdb07e32b995a9f3687bcd Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Thu, 2 Oct 2025 01:15:49 -0400
Subject: [PATCH 6/9] Enhanced validation

---
 src/schema/schema_manager.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index d8c41e6d..44733fd8 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -373,12 +373,18 @@ def get_all_fields_to_exclude_from_query_string(request):
         # Treat query string value as case-insensitive
         properties_to_exclude_str = request.args.get('exclude').lower()
         
-        if properties_to_exclude_str is not None:
+        if properties_to_exclude_str:
             all_properties_to_exclude = [item.strip() for item in properties_to_exclude_str.split(",")]
 
             logger.info(f"User specified properties to exclude in request URL: {all_properties_to_exclude}")
         else:
-            raise Exception(f"The value of the 'exclude' query string arameter can not be empty and must be similar to the form of 'a, b, c, d.e, ...' (case-insensitive).")
+            raise Exception("The value of the 'exclude' query string parameter can not be empty and must be similar to the form of 'a, b, c, d.e, ...' (case-insensitive).")
+
+        # A bit more validation to limit to depth 2
+        for item in all_properties_to_exclude:
+            if '.' in item:
+                if len(item.split('.')) > 2:
+                    raise Exception("Only single dot-separated keys are allowed in `exclude` (e.g., a.b). Keys with multiple dots like a.b.c are not supported.")
 
     return all_properties_to_exclude
 

From 2666f49012d1ca7d51355f90e24cfa60c71557fb Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Thu, 2 Oct 2025 11:05:19 -0400
Subject: [PATCH 7/9] Update to use commons 2.1.21

---
 src/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/requirements.txt b/src/requirements.txt
index 6026e84d..ae3fa02e 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -18,7 +18,7 @@ PyYAML==5.4.1
 # Use the branch name of commons from github for testing new changes made in commons from different branch
 # Default is main branch specified in docker-compose.development.yml if not set
 # git+https://github.com/hubmapconsortium/commons.git@${COMMONS_BRANCH}#egg=hubmap-commons
-hubmap-commons==2.1.19
+hubmap-commons==2.1.21
 
 # For unit test
 nose2==0.10.0

From 9f4f106bf29f893629ead576ba73327f7d8c8c9e Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Thu, 2 Oct 2025 11:47:14 -0400
Subject: [PATCH 8/9] Support exclude to upload.datasets and
 collection.datasets with lowercase validation

---
 src/app.py                         |  6 ++---
 src/schema/schema_manager.py       | 14 +++++++---
 src/schema/schema_neo4j_queries.py | 42 +++++++++++++++++++++++-------
 src/schema/schema_triggers.py      | 16 ++++++++++--
 4 files changed, 59 insertions(+), 19 deletions(-)

diff --git a/src/app.py b/src/app.py
index a032a026..240a13c7 100644
--- a/src/app.py
+++ b/src/app.py
@@ -786,9 +786,9 @@ def get_entity_by_id(id):
     supported_qs_params = ['property', 'exclude']
 
     # There are three types of properties that can be excluded from the GET response
-    # - properties generated by trigger methods
-    # - properties returned as part of Neo4j node properties
-    # - properties returned by Neo4j but nested
+    # - top-level properties generated by trigger methods
+    # - top-level properties returned as part of Neo4j node properties
+    # - second-level properties returned by Neo4j but nested and can't be skipped in Cypher query
     triggered_top_properties_to_skip = []
     neo4j_top_properties_to_skip = []
     neo4j_nested_properties_to_skip = []
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index 44733fd8..ef3d7590 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -370,21 +370,27 @@ def get_all_fields_to_exclude_from_query_string(request):
     all_properties_to_exclude = []
 
     if 'exclude' in request.args:
-        # Treat query string value as case-insensitive
-        properties_to_exclude_str = request.args.get('exclude').lower()
+        # The query string values are case-sensitive as the property keys in schema yaml are case-sensitive
+        properties_to_exclude_str = request.args.get('exclude')
         
         if properties_to_exclude_str:
+            # Must all lowercase values
+            has_upper = any(c.isupper() for c in properties_to_exclude_str)
+            
+            if has_upper:
+                raise Exception("All the properties specified in 'exclude' query string in URL must be lowercase.")
+
             all_properties_to_exclude = [item.strip() for item in properties_to_exclude_str.split(",")]
 
             logger.info(f"User specified properties to exclude in request URL: {all_properties_to_exclude}")
         else:
-            raise Exception("The value of the 'exclude' query string parameter can not be empty and must be similar to the form of 'a, b, c, d.e, ...' (case-insensitive).")
+            raise Exception("The value of the 'exclude' query string parameter can not be empty and must be similar to the form of 'a, b, c, d.e, ...' (case-sensitive).")
 
         # A bit more validation to limit to depth 2
         for item in all_properties_to_exclude:
             if '.' in item:
                 if len(item.split('.')) > 2:
-                    raise Exception("Only single dot-separated keys are allowed in `exclude` (e.g., a.b). Keys with multiple dots like a.b.c are not supported.")
+                    raise Exception("Only single dot-separated keys are allowed in 'exclude' (e.g., a.b). Keys with multiple dots like a.b.c are not supported.")
 
     return all_properties_to_exclude
 
diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py
index 223ab58f..5add3874 100644
--- a/src/schema/schema_neo4j_queries.py
+++ b/src/schema/schema_neo4j_queries.py
@@ -1215,20 +1215,32 @@ def get_dataset_upload(neo4j_driver, uuid, properties_to_exclude = []):
     The neo4j database connection pool
 uuid : str
     The uuid of collection
+properties_to_exclude : list
+    A list of node properties to exclude from result
 
 Returns
 -------
 list
     The list containing associated dataset dicts
 """
-def get_collection_datasets(neo4j_driver, uuid):
+def get_collection_datasets(neo4j_driver, uuid, properties_to_exclude = []):
     results = []
 
     fields_to_omit = SchemaConstants.OMITTED_FIELDS
-    query = (f"MATCH (e:Dataset)-[:IN_COLLECTION]->(c:Collection) "
-             f"WHERE c.uuid = '{uuid}' "
-             f"WITH COLLECT(DISTINCT e) AS uniqueDataset "
-             f"RETURN [a IN uniqueDataset | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")
+
+
+    if properties_to_exclude:
+        merged_list = properties_to_exclude + fields_to_omit
+        
+        query = (f"MATCH (e:Dataset)-[:IN_COLLECTION]->(c:Collection) "
+                 f"WHERE c.uuid = '{uuid}' "
+                 f"WITH COLLECT(DISTINCT e) AS uniqueDataset "
+                 f"RETURN [a IN uniqueDataset | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {merged_list}))] AS {record_field_name}")
+    else:
+        query = (f"MATCH (e:Dataset)-[:IN_COLLECTION]->(c:Collection) "
+                 f"WHERE c.uuid = '{uuid}' "
+                 f"WITH COLLECT(DISTINCT e) AS uniqueDataset "
+                 f"RETURN [a IN uniqueDataset | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")
 
     logger.info("======get_collection_datasets() query======")
     logger.debug(query)
@@ -1424,13 +1436,15 @@ def unlink_datasets_from_upload(neo4j_driver, upload_uuid, dataset_uuids_list):
     The uuid of Upload
 property_key : str
     A target property key for result filtering
+properties_to_exclude : list
+    A list of node properties to exclude from result
 
 Returns
 -------
 list
     The list containing associated dataset dicts
 """
-def get_upload_datasets(neo4j_driver, uuid, property_key = None):
+def get_upload_datasets(neo4j_driver, uuid, property_key = None, properties_to_exclude = []):
     results = []
     fields_to_omit = SchemaConstants.OMITTED_FIELDS
     if property_key:
@@ -1440,10 +1454,18 @@ def get_upload_datasets(neo4j_driver, uuid, property_key = None):
                  # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN apoc.coll.toSet(COLLECT(e.{property_key})) AS {record_field_name}")
     else:
-        query = (f"MATCH (e:Dataset)-[:IN_UPLOAD]->(s:Upload) "
-                 f"WHERE s.uuid = '{uuid}' "
-                 f"WITH COLLECT(DISTINCT e) AS uniqueUploads "
-                 f"RETURN [a IN uniqueUploads | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")
+        if properties_to_exclude:
+            merged_list = properties_to_exclude + fields_to_omit
+
+            query = (f"MATCH (e:Dataset)-[:IN_UPLOAD]->(s:Upload) "
+                     f"WHERE s.uuid = '{uuid}' "
+                     f"WITH COLLECT(DISTINCT e) AS uniqueUploads "
+                     f"RETURN [a IN uniqueUploads | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {merged_list}))] AS {record_field_name}")
+        else:
+            query = (f"MATCH (e:Dataset)-[:IN_UPLOAD]->(s:Upload) "
+                     f"WHERE s.uuid = '{uuid}' "
+                     f"WITH COLLECT(DISTINCT e) AS uniqueUploads "
+                     f"RETURN [a IN uniqueUploads | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}")
 
     logger.info("======get_upload_datasets() query======")
     logger.debug(query)
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index c44fe255..f5299595 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -708,8 +708,14 @@ def get_collection_datasets(property_key, normalized_type, request, user_token,
         raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_datasets()' trigger method.")
 
     logger.info(f"Executing 'get_collection_datasets()' trigger method on uuid: {existing_data_dict['uuid']}")
+    
+    # Get all the user specified fields either top-level or nested from the original query string in request URL
+    # Find the specific sub list, depth is limited to 2
+    # We only care about the Neo4j node properties as we don't run nested triggers inside a trigger method
+    # For example, direct_ancestors.files is supported, but direct_ancestors.metadata.acquisition_id is not - Zhou 10/1/2025
+    neo4j_properties_to_exclude = _get_neo4j_properties_to_exclude(property_key, request)
 
-    datasets_list = schema_neo4j_queries.get_collection_datasets(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
+    datasets_list = schema_neo4j_queries.get_collection_datasets(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'], properties_to_exclude = neo4j_properties_to_exclude)
 
     # Get rid of the entity node properties that are not defined in the yaml schema
     # as well as the ones defined as `exposed: false` in the yaml schema
@@ -2294,7 +2300,13 @@ def get_upload_datasets(property_key, normalized_type, request, user_token, exis
 
     logger.info(f"Executing 'get_upload_datasets()' trigger method on uuid: {existing_data_dict['uuid']}")
 
-    datasets_list = schema_neo4j_queries.get_upload_datasets(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'])
+    # Get all the user specified fields either top-level or nested from the original query string in request URL
+    # Find the specific sub list, depth is limited to 2
+    # We only care about the Neo4j node properties as we don't run nested triggers inside a trigger method
+    # For example, direct_ancestors.files is supported, but direct_ancestors.metadata.acquisition_id is not - Zhou 10/1/2025
+    neo4j_properties_to_exclude = _get_neo4j_properties_to_exclude(property_key, request)
+
+    datasets_list = schema_neo4j_queries.get_upload_datasets(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid'], property_key = None, properties_to_exclude = neo4j_properties_to_exclude)
 
     # Get rid of the entity node properties that are not defined in the yaml schema
     # as well as the ones defined as `exposed: false` in the yaml schema

From 7e411f14f81f2118dfc6abb3243767a16fec7471 Mon Sep 17 00:00:00 2001
From: yuanzhou <zhy19@pitt.edu>
Date: Thu, 2 Oct 2025 14:52:39 -0400
Subject: [PATCH 9/9] Add validation on prohibited properties

---
 src/app.py                    |  4 +++-
 src/schema/schema_manager.py  | 15 ++++++++++++---
 src/schema/schema_triggers.py |  2 ++
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/app.py b/src/app.py
index 240a13c7..22013627 100644
--- a/src/app.py
+++ b/src/app.py
@@ -835,8 +835,10 @@ def get_entity_by_id(id):
             # Determine which top-level properties to exclude from triggers and which to exclude directly from the resulting Neo4j `entity_dict`
             # Also get nested properties that are directly returned from Neo4j, which will be handled differently
             triggered_top_properties_to_skip, neo4j_top_properties_to_skip, neo4j_nested_properties_to_skip = schema_manager.determine_property_exclusion_type(normalized_entity_type, all_properties_to_exclude)
-        except Exception as e:
+        except ValueError as e:
             bad_request_error(e)
+        except Exception as e:
+            internal_server_error(e)
 
     # Get the generated complete entity result from cache if exists
     # Otherwise re-generate on the fly
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index ef3d7590..0f50201c 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -378,19 +378,28 @@ def get_all_fields_to_exclude_from_query_string(request):
             has_upper = any(c.isupper() for c in properties_to_exclude_str)
             
             if has_upper:
-                raise Exception("All the properties specified in 'exclude' query string in URL must be lowercase.")
+                raise ValueError("All the properties specified in 'exclude' query string in URL must be lowercase.")
 
             all_properties_to_exclude = [item.strip() for item in properties_to_exclude_str.split(",")]
 
             logger.info(f"User specified properties to exclude in request URL: {all_properties_to_exclude}")
         else:
-            raise Exception("The value of the 'exclude' query string parameter can not be empty and must be similar to the form of 'a, b, c, d.e, ...' (case-sensitive).")
+            raise ValueError("The value of the 'exclude' query string parameter can not be empty and must be similar to the form of 'a, b, c, d.e, ...' (case-sensitive).")
 
         # A bit more validation to limit to depth 2
         for item in all_properties_to_exclude:
             if '.' in item:
                 if len(item.split('.')) > 2:
-                    raise Exception("Only single dot-separated keys are allowed in 'exclude' (e.g., a.b). Keys with multiple dots like a.b.c are not supported.")
+                    raise ValueError("Only single dot-separated keys are allowed in 'exclude' (e.g., a.b). Keys with multiple dots like a.b.c are not supported.")
+        
+        # More validation - ensure prohibited properties are not accepted
+        # This two properties are required internally by `normalize_entity_result_for_response()`
+        prohibited_properties = ['uuid', 'entity_type']
+        second_level_list = []
+
+        for item in all_properties_to_exclude:
+            if item in prohibited_properties or item.split('.')[1] in prohibited_properties:
+                raise ValueError(f"Entity property '{item}' is not allowed in the 'exclude' query parameter.")
 
     return all_properties_to_exclude
 
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index f5299595..87211de4 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -2725,6 +2725,8 @@ def _get_neo4j_properties_to_exclude(property_key, request):
     # Get all the user specified fields either top-level or nested from the original query string in request URL
     try:
         all_properties_to_exclude = schema_manager.get_all_fields_to_exclude_from_query_string(request)
+    except ValueError as e:
+        raise ValueError(e)
     except Exception as e:
         raise Exception(e)