Skip to content

Commit 29470ff

Browse files
authored
Merge pull request #901 from hubmapconsortium/Derek-Furst/support-public-version
reworked get_accessibility_data_dict and its accompanying helper func…
2 parents fa7b9e4 + ef8440b commit 29470ff

File tree

2 files changed

+148
-142
lines changed

2 files changed

+148
-142
lines changed

src/app.py

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import werkzeug.exceptions
2121
from hubmap_sdk import EntitySdk, sdk_helper
2222
from apscheduler.schedulers.background import BackgroundScheduler
23-
from neo4j.exceptions import TransactionError
23+
from neo4j.exceptions import TransactionError, Neo4jError
2424
from apscheduler.triggers.interval import IntervalTrigger
2525
from apscheduler.triggers.date import DateTrigger
2626
# Don't confuse urllib (Python native library) with urllib3 (3rd-party library, requests also uses urllib3)
@@ -588,22 +588,14 @@ class ReindexPriorityLevelEnum(Enum):
588588
def get_accessible_data_directories():
589589
dataset_helper = DatasetHelper()
590590

591-
# If not token is provided or an invalid token is provided, return a 401 error.
592-
if request.headers.get('Authorization') is None:
593-
unauthorized_error('A valid token must be provided.')
594-
595591
# If an invalid token provided, we need to tell the client with a 401 error, rather
596592
# than a 500 error later if the token is not good.
597593
_validate_token_if_auth_header_exists(request)
598594

599-
# Get user token from Authorization header
600-
# Get the user token from Authorization header
601-
user_token = auth_helper_instance.getAuthorizationTokens(request.headers)
602-
603595
# Get user group information which will be used to determine accessibility on
604596
# a per-entity basis.
605597
user_data_access_level = auth_helper_instance.getUserDataAccessLevel(request)
606-
598+
user_data_access_level['group_membership_ids'] = []
607599
if not request.is_json:
608600
bad_request_error("A json body and appropriate Content-Type header are required.")
609601
json_payload = request.get_json()
@@ -613,22 +605,18 @@ def get_accessible_data_directories():
613605
if not isinstance(identifier, str):
614606
bad_request_error('The Request payload JSON Array must contain only identifier strings.')
615607

616-
payload_accessibility_dict = {}
617-
for identifier in json_payload:
618-
try:
619-
identifier_accessibility_dict = dataset_helper.get_entity_accessibility(identifier
620-
, user_token
621-
, user_data_access_level=user_data_access_level)
622-
payload_accessibility_dict[identifier] = identifier_accessibility_dict
623-
except (HTTPException, sdk_helper.HTTPException) as he:
624-
return jsonify({'error': he.get_description()}), he.get_status_code()
625-
except ValueError as ve:
626-
logger.error(str(ve))
627-
return jsonify({'error': str(ve)}), 400
628-
except Exception as e:
629-
logger.error(e, exc_info=True)
630-
return Response("Unexpected error: " + str(e), 500)
631-
return jsonify(payload_accessibility_dict), 200
608+
try:
609+
identifier_accessibility_dict = dataset_helper.get_entity_accessibility(neo4j_driver_instance, json_payload, user_data_access_level=user_data_access_level)
610+
except Neo4jError as ne:
611+
logger.error(str(ne.message))
612+
return jsonify({'Unexpected error': 'Failed to retrieve accessibility info from Neo4j. Check the logs'}), 500
613+
except ValueError as ve:
614+
logger.error(str(ve))
615+
return jsonify({'error': str(ve)}), 400
616+
except Exception as e:
617+
logger.error(e, exc_info=True)
618+
return Response("Unexpected error: " + str(e), 500)
619+
return jsonify(identifier_accessibility_dict), 200
632620

633621
"""
634622
Retrieve the path of Datasets or Uploads relative to the Globus endpoint mount point give from a list of entity uuids

src/dataset_helper_object.py

Lines changed: 134 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import requests
66
import logging
77
from flask import Flask
8+
from neo4j.exceptions import Neo4jError
89
from hubmap_commons.hubmap_const import HubmapConst
910
from hubmap_sdk import EntitySdk, SearchSdk, sdk_helper
1011
from pandas.core.array_algos.take import take_nd
@@ -120,129 +121,146 @@ def verify_dataset_title_info(self, dataset_uuid: str, user_token: str) -> array
120121

121122
return rslt
122123

123-
# entity_id - UUID or HM_ID
124-
# user_token - The authorization token for the user, which is used to generate an appropriate
125-
# description of the user's access to the entity.
124+
# neo4j_driver - The driver instance for neo4j
125+
# json_payload - A list of ids (HM_ID or UUID)
126126
# user_data_access_level - Data access level information for the user, notably including
127127
# Globus Group membership information.
128128
#
129-
# Returns a JSON Object containing accessibility information for the entity.
130-
#
131-
def get_entity_accessibility(self, entity_id: str, user_token: str, user_data_access_level: dict = None) -> dict:
132-
entity_api = EntitySdk(token=user_token, service_url=_entity_api_url)
129+
# Returns a Dict of Dicts where each of the dicts inside is keyed by its original id given
130+
# in the json_payload and contains information about the accessibility of that directory
131+
# including its globus url.
132+
def get_entity_accessibility(self, neo4j_driver, json_payload, user_data_access_level) -> dict:
133133
supported_entity_type_list = ['Dataset', 'Upload']
134+
accessibility_dicts = {}
134135

135-
# Grab the entity from the entity-api service.
136-
try:
137-
sdk_entity = entity_api.get_entity_by_id(entity_id)
138-
except sdk_helper.HTTPException as he:
139-
# Determine if this entity_id should be shown as inaccessible in an
140-
# HTTP 200 Response. Otherwise, let the HTTPException be processed
141-
if he.status_code == 404:
142-
# We will log when the user is checking on entities which are inaccessible.
143-
logger.debug(f"User accessibility retrieval of non-valid {entity_id}"
144-
f" resulted in {he.status_code} exception he={str(he)}")
145-
# Create a simple dict when entity_id is not for an existing entity
146-
return {'valid_id': False}
147-
elif he.status_code == 403:
148-
# We will log when the user is checking on entities which are inaccessible.
149-
logger.debug(f"User accessibility retrieval of valid, inaccessible {entity_id}"
150-
f" resulted in {he.status_code} exception he={str(he)}")
151-
# Create a simple dict when entity_id is not for an existing entity
152-
return {'valid_id': True
153-
, 'access_allowed': False}
136+
query = (
137+
"MATCH (e:Entity) "
138+
"WHERE e.uuid IN $ids OR e.hubmap_id IN $ids "
139+
"RETURN COLLECT({"
140+
"uuid: e.uuid, "
141+
"hubmap_id: e.hubmap_id, "
142+
"entity_type: e.entity_type, "
143+
"status: e.status, "
144+
"group_name: e.group_name, "
145+
"group_uuid: e.group_uuid, "
146+
"contains_human_genetic_sequences: e.contains_human_genetic_sequences, "
147+
"data_access_level: e.data_access_level"
148+
"}) AS entities"
149+
)
150+
151+
with neo4j_driver.session() as session:
152+
result = session.run(query, ids=json_payload)
153+
record = result.single()
154+
entities = record["entities"] if record else []
155+
156+
requested_ids = set(json_payload)
157+
matched_ids = set()
158+
for e in entities:
159+
if e.get("uuid"):
160+
matched_ids.add(e["uuid"])
161+
if e.get("hubmap_id"):
162+
matched_ids.add(e["hubmap_id"])
163+
if e.get("uuid") in requested_ids:
164+
e['original_id'] = e.get("uuid")
154165
else:
155-
raise he
156-
except Exception as e:
157-
msg = f"Unable to get data to determine accessibility of '{entity_id}'"
158-
logger.exception(msg)
159-
raise Exception(msg)
160-
161-
entity_dict = vars(sdk_entity)
162-
if entity_dict['entity_type'] not in supported_entity_type_list:
163-
return {'valid_id': False}
164-
165-
# Make sure all expected elements for the business requirements are in the returned entity.
166-
# Need to determine entity "visibility" using the same rules found in the
167-
168-
missing_entity_elements = []
169-
if 'entity_type' not in entity_dict:
170-
missing_entity_elements.append('entity_type')
171-
if 'uuid' not in entity_dict:
172-
missing_entity_elements.append('uuid')
173-
if 'hubmap_id' not in entity_dict:
174-
missing_entity_elements.append('hubmap_id')
175-
if 'status' not in entity_dict:
176-
missing_entity_elements.append('status')
177-
if 'group_name' not in entity_dict:
178-
missing_entity_elements.append('group_name')
179-
if 'group_uuid' not in entity_dict:
180-
missing_entity_elements.append('group_uuid')
181-
if 'contains_human_genetic_sequences' not in entity_dict and \
182-
entity_dict['entity_type'] == 'Dataset':
183-
missing_entity_elements.append('contains_human_genetic_sequences')
184-
if 'data_access_level' not in entity_dict and \
185-
entity_dict['entity_type'] == 'Dataset':
186-
missing_entity_elements.append('data_access_level')
187-
if missing_entity_elements:
188-
logger.error(f"Unexpected format for '{entity_id}'"
189-
f" , missing {str(missing_entity_elements)}"
190-
f" from entity={str(entity_dict)}.")
191-
raise Exception(f"Data error determining accessibility of '{entity_id}'")
192-
193-
if entity_dict['entity_type'] == 'Dataset':
194-
user_access_allowed = (entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_PUBLIC)
195-
if not user_access_allowed:
196-
user_access_allowed = (entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_CONSORTIUM) and \
197-
user_data_access_level['data_access_level'] in [
198-
HubmapConst.ACCESS_LEVEL_CONSORTIUM \
199-
, HubmapConst.ACCESS_LEVEL_PROTECTED]
200-
if not user_access_allowed:
201-
user_access_allowed = (entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_PROTECTED) and \
202-
(user_data_access_level['data_access_level'] in [
203-
HubmapConst.ACCESS_LEVEL_PROTECTED] \
204-
or entity_dict['group_uuid'] in user_data_access_level['group_membership_ids'])
205-
206-
if entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_PROTECTED:
166+
e['original_id'] = e.get("hubmap_id")
167+
168+
invalid_ids = list(requested_ids - matched_ids)
169+
170+
for invalid in invalid_ids:
171+
accessibility_dicts[invalid] = {'valid_id': False}
172+
for entity_dict in entities:
173+
174+
if entity_dict['entity_type'] not in supported_entity_type_list:
175+
accessibility_dicts[entity_dict['original_id']] = {'valid_id': False}
176+
177+
# Make sure all expected elements for the business requirements are in the returned entity.
178+
# Need to determine entity "visibility" using the same rules found in the
179+
180+
missing_entity_elements = []
181+
if 'entity_type' not in entity_dict:
182+
missing_entity_elements.append('entity_type')
183+
if 'uuid' not in entity_dict:
184+
missing_entity_elements.append('uuid')
185+
if 'hubmap_id' not in entity_dict:
186+
missing_entity_elements.append('hubmap_id')
187+
if 'status' not in entity_dict:
188+
missing_entity_elements.append('status')
189+
if 'group_name' not in entity_dict:
190+
missing_entity_elements.append('group_name')
191+
if 'group_uuid' not in entity_dict:
192+
missing_entity_elements.append('group_uuid')
193+
if 'contains_human_genetic_sequences' not in entity_dict and \
194+
entity_dict['entity_type'] == 'Dataset':
195+
missing_entity_elements.append('contains_human_genetic_sequences')
196+
if 'data_access_level' not in entity_dict and \
197+
entity_dict['entity_type'] == 'Dataset':
198+
missing_entity_elements.append('data_access_level')
199+
200+
if missing_entity_elements:
201+
logger.error(f"Unexpected format for '{entity_dict['original_id']}'"
202+
f" , missing {str(missing_entity_elements)}"
203+
f" from entity={str(entity_dict)}.")
204+
raise Exception(f"Data error determining accessibility of '{entity_dict['origina_id']}'")
205+
206+
if entity_dict['entity_type'] == 'Dataset':
207+
user_access_allowed = (entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_PUBLIC)
208+
if not user_access_allowed:
209+
user_access_allowed = (entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_CONSORTIUM) and \
210+
user_data_access_level['data_access_level'] in [
211+
HubmapConst.ACCESS_LEVEL_CONSORTIUM \
212+
, HubmapConst.ACCESS_LEVEL_PROTECTED]
213+
if not user_access_allowed:
214+
user_access_allowed = (entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_PROTECTED) and \
215+
(user_data_access_level['data_access_level'] in [
216+
HubmapConst.ACCESS_LEVEL_PROTECTED] \
217+
or entity_dict['group_uuid'] in user_data_access_level['group_membership_ids'])
218+
219+
if (entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_PROTECTED) and not user_access_allowed and entity_dict.get('status').lower() == 'published':
220+
abs_path = os.path.join(_globus_public_endpoint_filepath
221+
, entity_dict['uuid'])
222+
elif (entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_PROTECTED):
223+
abs_path = os.path.join(_globus_protected_endpoint_filepath
224+
, entity_dict['group_name']
225+
, entity_dict['uuid'])
226+
elif entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_CONSORTIUM:
227+
abs_path = os.path.join(_globus_consortium_endpoint_filepath
228+
, entity_dict['group_name']
229+
, entity_dict['uuid'])
230+
elif entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_PUBLIC:
231+
abs_path = os.path.join(_globus_public_endpoint_filepath
232+
, entity_dict['uuid'])
233+
else:
234+
raise Exception(f"Unexpected error for {entity_dict['original_id']} of type"
235+
f" {entity_dict['entity_type']} with data access level"
236+
f" {entity_dict['data_access_level']}.")
237+
238+
entity_accessibility_dict = {'valid_id': True, 'access_allowed': user_access_allowed}
239+
if entity_dict.get('status').lower() == 'published':
240+
entity_accessibility_dict['access_allowed'] = True
241+
if user_access_allowed or entity_dict.get('status').lower() == 'published':
242+
entity_accessibility_dict['hubmap_id'] = entity_dict['hubmap_id']
243+
entity_accessibility_dict['uuid'] = entity_dict['uuid']
244+
entity_accessibility_dict['entity_type'] = entity_dict['entity_type']
245+
entity_accessibility_dict['file_system_path'] = abs_path
246+
accessibility_dicts[entity_dict['original_id']] = entity_accessibility_dict
247+
elif entity_dict['entity_type'] == 'Upload':
248+
user_access_allowed = (user_data_access_level['data_access_level'] in [
249+
HubmapConst.ACCESS_LEVEL_PROTECTED]
250+
or entity_dict['group_uuid'] in user_data_access_level['group_membership_ids'])
207251
abs_path = os.path.join(_globus_protected_endpoint_filepath
208252
, entity_dict['group_name']
209253
, entity_dict['uuid'])
210-
elif entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_CONSORTIUM:
211-
abs_path = os.path.join(_globus_consortium_endpoint_filepath
212-
, entity_dict['group_name']
213-
, entity_dict['uuid'])
214-
elif entity_dict['data_access_level'] == HubmapConst.ACCESS_LEVEL_PUBLIC:
215-
abs_path = os.path.join(_globus_public_endpoint_filepath
216-
, entity_dict['uuid'])
254+
255+
entity_accessibility_dict = { 'valid_id': True
256+
, 'access_allowed': user_access_allowed}
257+
if user_access_allowed:
258+
entity_accessibility_dict['hubmap_id'] = entity_dict['hubmap_id']
259+
entity_accessibility_dict['uuid'] = entity_dict['uuid']
260+
entity_accessibility_dict['entity_type'] = entity_dict['entity_type']
261+
entity_accessibility_dict['file_system_path'] = abs_path
262+
accessibility_dicts[entity_dict['original_id']] = entity_accessibility_dict
217263
else:
218-
raise Exception(f"Unexpected error for {entity_id} of type"
219-
f" {entity_dict['entity_type']} with data access level"
220-
f" {entity_dict['data_access_level']}.")
221-
222-
entity_accessibility_dict = {'valid_id': True
223-
, 'access_allowed': user_access_allowed}
224-
if user_access_allowed:
225-
entity_accessibility_dict['hubmap_id'] = entity_dict['hubmap_id']
226-
entity_accessibility_dict['uuid'] = entity_dict['uuid']
227-
entity_accessibility_dict['entity_type'] = entity_dict['entity_type']
228-
entity_accessibility_dict['file_system_path'] = abs_path
229-
return entity_accessibility_dict
230-
elif entity_dict['entity_type'] == 'Upload':
231-
user_access_allowed = (user_data_access_level['data_access_level'] in [
232-
HubmapConst.ACCESS_LEVEL_PROTECTED]
233-
or entity_dict['group_uuid'] in user_data_access_level['group_membership_ids'])
234-
abs_path = os.path.join(_globus_protected_endpoint_filepath
235-
, entity_dict['group_name']
236-
, entity_dict['uuid'])
237-
238-
entity_accessibility_dict = { 'valid_id': True
239-
, 'access_allowed': user_access_allowed}
240-
if user_access_allowed:
241-
entity_accessibility_dict['hubmap_id'] = entity_dict['hubmap_id']
242-
entity_accessibility_dict['uuid'] = entity_dict['uuid']
243-
entity_accessibility_dict['entity_type'] = entity_dict['entity_type']
244-
entity_accessibility_dict['file_system_path'] = abs_path
245-
return entity_accessibility_dict
246-
else:
247-
raise Exception(f"Unexpected error for {entity_id} of type"
248-
f" {entity_dict['entity_type']}.")
264+
raise Exception(f"Unexpected error for {entity_dict['original_id']} of type"
265+
f" {entity_dict['entity_type']}.")
266+
return accessibility_dicts

0 commit comments

Comments
 (0)