Skip to content

Commit 7d054e0

Browse files
max-zillatcnichollmarini
authored
Private extractor support (#991)
* add owners list to models * add owner check to lookup * add logic to filter inaccessible extractors * add permission checks to edit endpoints * new endpoints, rename Owners to Access * Update heartbeat_listener.py * update heartbeat logic * Further clean up heartbeat listener * set alive status * typo fix * Permission modification endpoints * Clean up permissions logic * run codegen * Fix process logic if no process rules given * enforce permissions in more places * fix pytest * clean up dataset permission logic * revert admin_mode logic checks * clean up test logic * Update authorization_deps.py * ObjectId -> PydanticObjectId * add missing comma * Fix failing tests, codegen * import Union * fix missing import * Update ListenersService.ts * fixing an error i was seeing that left the listener page totally blank dataset_id was being sent in by the component as 'true' when it should be 'null' and then the 'true' should have gone after * fixing registration of extractors * Fixed listener query when checking if a user is in the user list. The query was not matching a user since it was an `equality` query on an `array` and not a `in` query. * Reverting prior listener user matching query since it was not the issue and it broke the query. Not sure what happened. --------- Co-authored-by: toddn <[email protected]> Co-authored-by: Luigi Marini <[email protected]>
1 parent 11a4860 commit 7d054e0

37 files changed

+1456
-310
lines changed

backend/app/deps/authorization_deps.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -423,8 +423,8 @@ class ListenerAuthorization:
423423
For more info see https://fastapi.tiangolo.com/advanced/advanced-dependencies/.
424424
Regular users are not allowed to run non-active listeners"""
425425

426-
# def __init__(self, optional_arg: str = None):
427-
# self.optional_arg = optional_arg
426+
# def __init__(self, role: str = "viewer"):
427+
# self.role = role
428428

429429
async def __call__(
430430
self,
@@ -438,10 +438,32 @@ async def __call__(
438438
if admin and admin_mode:
439439
return True
440440

441-
# Else check if listener is active or current user is the creator of the extractor
442441
if (
443442
listener := await EventListenerDB.get(PydanticObjectId(listener_id))
444443
) is not None:
444+
# If listener has access restrictions, evaluate them against requesting user
445+
if listener.access is not None:
446+
group_q = await GroupDB.find(
447+
Or(
448+
GroupDB.creator == current_user,
449+
GroupDB.users.email == current_user,
450+
),
451+
).to_list()
452+
user_groups = [g.id for g in group_q]
453+
454+
valid_modificaiton = (
455+
(admin and admin_mode)
456+
or (listener.creator and listener.creator.email == current_user)
457+
or (listener.access.owner == current_user)
458+
or (current_user in listener.access.users)
459+
or (not set(user_groups).isdisjoint(listener.access.groups))
460+
)
461+
if not valid_modificaiton:
462+
raise HTTPException(
463+
status_code=403,
464+
detail=f"User `{current_user} does not have permission on listener `{listener_id}`",
465+
)
466+
445467
if listener.active is True or (
446468
listener.creator and listener.creator.email == current_user
447469
):

backend/app/heartbeat_listener_sync.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import json
22
import logging
3-
43
import pika
4+
from beanie import PydanticObjectId
5+
56
from app.config import settings
67
from app.models.listeners import EventListenerDB, EventListenerOut, ExtractorInfo
78
from app.models.search import SearchCriteria
@@ -29,22 +30,32 @@ def callback(ch, method, properties, body):
2930
extractor_db = EventListenerDB(
3031
**extractor_info, properties=ExtractorInfo(**extractor_info)
3132
)
33+
owner = msg["owner"]
34+
if owner is not None:
35+
extractor_db.access = {"owner": owner}
3236

3337
mongo_client = MongoClient(settings.MONGODB_URL)
3438
db = mongo_client[settings.MONGO_DATABASE]
3539

3640
# check to see if extractor already exists
37-
existing_extractor = db["listeners"].find_one({"name": msg["queue"]})
41+
if owner is None:
42+
existing_extractor = EventListenerDB.find_one(
43+
EventListenerDB.name == msg["queue"], EventListenerDB.access == None
44+
)
45+
else:
46+
existing_extractor = EventListenerDB.find_one(
47+
EventListenerDB.name == msg["queue"], EventListenerDB.access.owner == owner
48+
)
3849
if existing_extractor is not None:
3950
# Update existing listener
4051
existing_version = existing_extractor["version"]
4152
new_version = extractor_db.version
4253
if version.parse(new_version) > version.parse(existing_version):
4354
# if this is a new version, add it to the database
44-
new_extractor = db["listeners"].insert_one(extractor_db.to_mongo())
45-
found = db["listeners"].find_one({"_id": new_extractor.inserted_id})
55+
new_extractor = EventListenerDB.insert_one(extractor_db.to_mongo())
56+
found = EventListenerDB.get(PydanticObjectId(new_extractor.inserted_id))
4657
# TODO - for now we are not deleting an older version of the extractor, just adding a new one
47-
# removed = db["listeners"].delete_one({"_id": existing_extractor["_id"]})
58+
# removed = EventListenerDB.delete_one(EventListenerDB.id == PydanticObjectId(existing_extractor["_id"]))
4859
extractor_out = EventListenerOut.from_mongo(found)
4960
logger.info(
5061
"%s updated from %s to %s"
@@ -53,8 +64,8 @@ def callback(ch, method, properties, body):
5364
return extractor_out
5465
else:
5566
# Register new listener
56-
new_extractor = db["listeners"].insert_one(extractor_db.to_mongo())
57-
found = db["listeners"].find_one({"_id": new_extractor.inserted_id})
67+
new_extractor = EventListenerDB.insert_one(extractor_db.to_mongo())
68+
found = EventListenerDB.get(PydanticObjectId(new_extractor.inserted_id))
5869
extractor_out = EventListenerOut.from_mongo(found)
5970
logger.info("New extractor registered: " + extractor_name)
6071

@@ -97,7 +108,7 @@ def callback(ch, method, properties, body):
97108
FeedListener(listener_id=extractor_out.id, automatic=True)
98109
],
99110
)
100-
db["feeds"].insert_one(new_feed.to_mongo())
111+
FeedDB.insert_one(new_feed.to_mongo())
101112

102113
return extractor_out
103114

backend/app/models/listeners.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
from typing import List, Optional, Union
44

55
import pymongo
6+
from beanie import Document, PydanticObjectId, View
7+
from pydantic import AnyUrl, BaseModel, Field
8+
69
from app.config import settings
710
from app.models.authorization import AuthorizationDB
811
from app.models.mongomodel import MongoDBRef
912
from app.models.users import UserOut
10-
from beanie import Document, PydanticObjectId, View
11-
from pydantic import AnyUrl, BaseModel, Field
1213

1314

1415
class Repository(BaseModel):
@@ -37,6 +38,17 @@ class ExtractorInfo(BaseModel):
3738
categories: Optional[List[str]] = []
3839
parameters: Optional[dict] = None
3940
version: Optional[str] = "1.0"
41+
unique_key: Optional[str] = None
42+
43+
44+
class AccessList(BaseModel):
45+
"""Container object for lists of user emails/group IDs/dataset IDs that can submit to listener.
46+
The singular owner is the primary who can modify other lists."""
47+
48+
owner: str
49+
users: List[str] = []
50+
groups: List[PydanticObjectId] = []
51+
datasets: List[PydanticObjectId] = []
4052

4153

4254
class EventListenerBase(BaseModel):
@@ -45,6 +57,7 @@ class EventListenerBase(BaseModel):
4557
name: str
4658
version: str = "1.0"
4759
description: str = ""
60+
access: Optional[AccessList] = None
4861

4962

5063
class EventListenerIn(EventListenerBase):
@@ -68,7 +81,7 @@ class EventListenerDB(Document, EventListenerBase):
6881
created: datetime = Field(default_factory=datetime.now)
6982
modified: datetime = Field(default_factory=datetime.now)
7083
lastAlive: datetime = None
71-
alive: Optional[bool] = None # made up field to indicate if extractor is alive
84+
alive: Optional[bool] = None
7285
active: bool = False
7386
properties: Optional[ExtractorInfo] = None
7487

backend/app/routers/authorization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ async def get_dataset_roles(
437437
roles = DatasetRoles(dataset_id=str(dataset.id))
438438

439439
async for auth in AuthorizationDB.find(
440-
AuthorizationDB.dataset_id == ObjectId(dataset_id)
440+
AuthorizationDB.dataset_id == PydanticObjectId(dataset_id)
441441
):
442442
# First, fetch all groups that have a role on the dataset
443443
group_user_counts = {}

backend/app/routers/datasets.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -322,18 +322,18 @@ async def get_dataset_files(
322322
) is not None:
323323
if authenticated or public or (admin and admin_mode):
324324
query = [
325-
FileDBViewList.dataset_id == ObjectId(dataset_id),
325+
FileDBViewList.dataset_id == PydanticObjectId(dataset_id),
326326
]
327327
else:
328328
query = [
329-
FileDBViewList.dataset_id == ObjectId(dataset_id),
329+
FileDBViewList.dataset_id == PydanticObjectId(dataset_id),
330330
Or(
331331
FileDBViewList.creator.email == user_id,
332332
FileDBViewList.auth.user_ids == user_id,
333333
),
334334
]
335335
if folder_id is not None:
336-
query.append(FileDBViewList.folder_id == ObjectId(folder_id))
336+
query.append(FileDBViewList.folder_id == PydanticObjectId(folder_id))
337337

338338
files_and_count = (
339339
await FileDBViewList.find(*query)
@@ -403,7 +403,7 @@ async def patch_dataset(
403403

404404
if dataset_info.status is not None:
405405
query = [
406-
FileDBViewList.dataset_id == ObjectId(dataset_id),
406+
FileDBViewList.dataset_id == PydanticObjectId(dataset_id),
407407
]
408408
files_views = await FileDBViewList.find(*query).to_list()
409409
for file_view in files_views:
@@ -709,18 +709,20 @@ async def get_dataset_folders(
709709
) is not None:
710710
if authenticated or public:
711711
query = [
712-
FolderDBViewList.dataset_id == ObjectId(dataset_id),
712+
FolderDBViewList.dataset_id == PydanticObjectId(dataset_id),
713713
]
714714
else:
715715
query = [
716-
FolderDBViewList.dataset_id == ObjectId(dataset_id),
716+
FolderDBViewList.dataset_id == PydanticObjectId(dataset_id),
717717
Or(
718718
FolderDBViewList.creator.email == user_id,
719719
FolderDBViewList.auth.user_ids == user_id,
720720
),
721721
]
722722
if parent_folder is not None:
723-
query.append(FolderDBViewList.parent_folder == ObjectId(parent_folder))
723+
query.append(
724+
FolderDBViewList.parent_folder == PydanticObjectId(parent_folder)
725+
)
724726
else:
725727
query.append(FolderDBViewList.parent_folder == None) # noqa: E711
726728

@@ -768,11 +770,11 @@ async def get_dataset_folders_and_files(
768770
) is not None:
769771
if authenticated or public or (admin and admin_mode):
770772
query = [
771-
FolderFileViewList.dataset_id == ObjectId(dataset_id),
773+
FolderFileViewList.dataset_id == PydanticObjectId(dataset_id),
772774
]
773775
else:
774776
query = [
775-
FolderFileViewList.dataset_id == ObjectId(dataset_id),
777+
FolderFileViewList.dataset_id == PydanticObjectId(dataset_id),
776778
Or(
777779
FolderFileViewList.creator.email == user_id,
778780
FolderFileViewList.auth.user_ids == user_id,
@@ -790,8 +792,8 @@ async def get_dataset_folders_and_files(
790792
else:
791793
query.append(
792794
Or(
793-
FolderFileViewList.folder_id == ObjectId(folder_id),
794-
FolderFileViewList.parent_folder == ObjectId(folder_id),
795+
FolderFileViewList.folder_id == PydanticObjectId(folder_id),
796+
FolderFileViewList.parent_folder == PydanticObjectId(folder_id),
795797
)
796798
)
797799

@@ -838,15 +840,17 @@ async def delete_folder(
838840
if (await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
839841
if (folder := await FolderDB.get(PydanticObjectId(folder_id))) is not None:
840842
# delete current folder and files
841-
async for file in FileDB.find(FileDB.folder_id == ObjectId(folder_id)):
843+
async for file in FileDB.find(
844+
FileDB.folder_id == PydanticObjectId(folder_id)
845+
):
842846
await remove_file_entry(file.id, fs, es)
843847

844848
# recursively delete child folder and files
845849
async def _delete_nested_folders(parent_folder_id):
846850
while (
847851
await FolderDB.find_one(
848-
FolderDB.dataset_id == ObjectId(dataset_id),
849-
FolderDB.parent_folder == ObjectId(parent_folder_id),
852+
FolderDB.dataset_id == PydanticObjectId(dataset_id),
853+
FolderDB.parent_folder == PydanticObjectId(parent_folder_id),
850854
)
851855
) is not None:
852856
async for subfolder in FolderDB.find(
@@ -1201,7 +1205,7 @@ async def download_dataset(
12011205

12021206
# Write dataset metadata if found
12031207
metadata = await MetadataDB.find(
1204-
MetadataDB.resource.resource_id == ObjectId(dataset_id)
1208+
MetadataDB.resource.resource_id == PydanticObjectId(dataset_id)
12051209
).to_list()
12061210
if len(metadata) > 0:
12071211
datasetmetadata_path = os.path.join(
@@ -1220,7 +1224,7 @@ async def download_dataset(
12201224
file_count = 0
12211225

12221226
async for file in FileDBViewList.find(
1223-
FileDBViewList.dataset_id == ObjectId(dataset_id)
1227+
FileDBViewList.dataset_id == PydanticObjectId(dataset_id)
12241228
):
12251229
# find the bytes id
12261230
# if it's working draft file_id == origin_id
@@ -1254,7 +1258,7 @@ async def download_dataset(
12541258
bag_size += current_file_size
12551259

12561260
metadata = await MetadataDB.find(
1257-
MetadataDB.resource.resource_id == ObjectId(dataset_id)
1261+
MetadataDB.resource.resource_id == PydanticObjectId(dataset_id)
12581262
).to_list()
12591263
if len(metadata) > 0:
12601264
metadata_filename = file_name + "_metadata.json"

0 commit comments

Comments
 (0)