Skip to content

Commit 9c64e8b

Browse files
tcnichollmarini
andauthored
464 datasets visible to logged in users with AUTHENTICATED status (#466)
* logged in users can now see 'public' datasets * remove print statements * public datasets visible * we can view file, but does not show up in dataset view * files visible for public datasets * fixing indentation * formatting * adding new modal for dataset, just a copy of existing one, will change * we can now see the right status, button works, but status does not change yet * status change now works, page reloads and shows new status * change status instead of make public * import DatasetStatus * formatting * codegen * anonymous user added * as todos, anonymous user is here. not sure if this is a good solution. * dataset page does not load if not logged in, uncaught error in console, but no redirect to sign in * now redirects, does not let you see the page * reverting * adding a check file status * using checkfilestatus in files * check public status for dataset metadata * fixing capital PUBLIC * formatting * matching main * new dependency uses anonymous user added to dataset router, methods that should allow public access * anonymous user can access public datasets * formatting * using get user or anonymous in method * formatting * TODO for later * isAuthorized checks if dataset can be accessed without login getCurrEmail breaks the view when not logged in * ispublic method works, but page reloads on dataset * does not work on page * cleanup after merge currently not using anonymous user, will find some other way * no more anonymous user * no more anonymous user * no more anonymous user * remove unused imports that were deleted in merge * sharing tab disabled for public only access * add metadata button removed * the delete button is gone if user is viewer or dataset is public * edit button now removed for all kinds of metadata * need to pass role to widget * MERGE public files not showing up right bug - file that should be in folder is showing up at root level for some reason * error if no role on file, retutn public auth if does have role on file * we will now see public datasets in explore * formatting * fixing package log to match main * fixing changes lost in merge we can change status, new status reflected on other users explore pages * add metadata button disabled for users who see it as public dataset * fixing conflict * adding datasetRole, fixing indent * remove published, not currently an option * no share or submit extractor public datasets * using 'authenticated' instead of 'public' for datasets all authenticated users can view * formatting * changing name, fits better auth * Moved submission button to bottom dialog actions in line with other popups and renamed to Update. --------- Co-authored-by: Luigi Marini <[email protected]>
1 parent b0b560c commit 9c64e8b

File tree

18 files changed

+549
-85
lines changed

18 files changed

+549
-85
lines changed

backend/app/deps/authorization_deps.py

Lines changed: 105 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from app.keycloak_auth import get_current_username
77
from app.models.authorization import RoleType, AuthorizationDB
8-
from app.models.datasets import DatasetDB
8+
from app.models.datasets import DatasetDB, DatasetStatus
99
from app.models.files import FileOut, FileDB
1010
from app.models.groups import GroupOut, GroupDB
1111
from app.models.metadata import MetadataDB
@@ -40,6 +40,24 @@ async def get_role_by_file(
4040
AuthorizationDB.user_ids == current_user,
4141
),
4242
)
43+
if authorization is None:
44+
if (
45+
dataset := await DatasetDB.get(PydanticObjectId(file.dataset_id))
46+
) is not None:
47+
if dataset.status == DatasetStatus.AUTHENTICATED.name:
48+
auth_dict = {
49+
"creator": dataset.author.email,
50+
"dataset_id": file.dataset_id,
51+
"user_ids": [current_user],
52+
"role": RoleType.VIEWER,
53+
}
54+
authenticated_auth = AuthorizationDB(**auth_dict)
55+
return authenticated_auth
56+
else:
57+
raise HTTPException(
58+
status_code=403,
59+
detail=f"User `{current_user} does not have role on file {file_id}",
60+
)
4361
return authorization.role
4462
raise HTTPException(status_code=404, detail=f"File {file_id} not found")
4563

@@ -96,6 +114,28 @@ async def get_role_by_group(
96114
raise HTTPException(status_code=404, detail=f"Group {group_id} not found")
97115

98116

117+
async def is_public_dataset(
118+
dataset_id: str,
119+
) -> bool:
120+
"""Checks if a dataset is public."""
121+
if (dataset_out := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
122+
if dataset_out.status == DatasetStatus.PUBLIC:
123+
return True
124+
else:
125+
return False
126+
127+
128+
async def is_authenticated_dataset(
129+
dataset_id: str,
130+
) -> bool:
131+
"""Checks if a dataset is authenticated."""
132+
if (dataset_out := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
133+
if dataset_out.status == DatasetStatus.AUTHENTICATED:
134+
return True
135+
else:
136+
return False
137+
138+
99139
class Authorization:
100140
"""We use class dependency so that we can provide the `permission` parameter to the dependency.
101141
For more info see https://fastapi.tiangolo.com/advanced/advanced-dependencies/."""
@@ -125,10 +165,24 @@ async def __call__(
125165
detail=f"User `{current_user} does not have `{self.role}` permission on dataset {dataset_id}",
126166
)
127167
else:
128-
raise HTTPException(
129-
status_code=403,
130-
detail=f"User `{current_user} does not have `{self.role}` permission on dataset {dataset_id}",
131-
)
168+
if (
169+
current_dataset := await DatasetDB.get(PydanticObjectId(dataset_id))
170+
) is not None:
171+
if (
172+
current_dataset.status == DatasetStatus.AUTHENTICATED.name
173+
and self.role == "viewer"
174+
):
175+
return True
176+
else:
177+
raise HTTPException(
178+
status_code=403,
179+
detail=f"User `{current_user} does not have `{self.role}` permission on dataset {dataset_id}",
180+
)
181+
else:
182+
raise HTTPException(
183+
status_code=404,
184+
detail=f"The dataset {dataset_id} is not found",
185+
)
132186

133187

134188
class FileAuthorization:
@@ -251,6 +305,52 @@ async def __call__(
251305
raise HTTPException(status_code=404, detail=f"Group {group_id} not found")
252306

253307

308+
class CheckStatus:
309+
"""We use class dependency so that we can provide the `permission` parameter to the dependency.
310+
For more info see https://fastapi.tiangolo.com/advanced/advanced-dependencies/."""
311+
312+
def __init__(self, status: str):
313+
self.status = status
314+
315+
async def __call__(
316+
self,
317+
dataset_id: str,
318+
):
319+
if (dataset := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
320+
if dataset.status == self.status:
321+
return True
322+
else:
323+
return False
324+
else:
325+
return False
326+
327+
328+
class CheckFileStatus:
329+
"""We use class dependency so that we can provide the `permission` parameter to the dependency.
330+
For more info see https://fastapi.tiangolo.com/advanced/advanced-dependencies/."""
331+
332+
def __init__(self, status: str):
333+
self.status = status
334+
335+
async def __call__(
336+
self,
337+
file_id: str,
338+
):
339+
if (file_out := await FileDB.get(PydanticObjectId(file_id))) is not None:
340+
dataset_id = file_out.dataset_id
341+
if (
342+
dataset := await DatasetDB.get(PydanticObjectId(dataset_id))
343+
) is not None:
344+
if dataset.status == self.status:
345+
return True
346+
else:
347+
return False
348+
else:
349+
return False
350+
else:
351+
return False
352+
353+
254354
def access(user_role: RoleType, role_required: RoleType) -> bool:
255355
"""Enforce implied role hierarchy OWNER > EDITOR > UPLOADER > VIEWER"""
256356
if user_role == RoleType.OWNER:
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import logging
2+
import pika
3+
import json
4+
from packaging import version
5+
from pymongo import MongoClient
6+
7+
from app.config import settings
8+
from app.models.search import SearchCriteria
9+
from app.routers.feeds import FeedIn, FeedListener, FeedOut, FeedDB, associate_listener
10+
from app.models.listeners import EventListenerDB, EventListenerOut, ExtractorInfo
11+
12+
logging.basicConfig(level=logging.INFO)
13+
logger = logging.getLogger(__name__)
14+
logger.setLevel(logging.INFO)
15+
16+
17+
def callback(ch, method, properties, body):
18+
"""This method receives messages from RabbitMQ and processes them.
19+
the extractor info is parsed from the message and if the extractor is new
20+
or is a later version, the db is updated.
21+
"""
22+
msg = json.loads(body.decode("utf-8"))
23+
24+
extractor_info = msg["extractor_info"]
25+
extractor_name = extractor_info["name"]
26+
extractor_db = EventListenerDB(
27+
**extractor_info, properties=ExtractorInfo(**extractor_info)
28+
)
29+
30+
mongo_client = MongoClient(settings.MONGODB_URL)
31+
db = mongo_client[settings.MONGO_DATABASE]
32+
33+
# check to see if extractor alredy exists
34+
existing_extractor = db["listeners"].find_one({"name": msg["queue"]})
35+
if existing_extractor is not None:
36+
# Update existing listener
37+
existing_version = existing_extractor["version"]
38+
new_version = extractor_db.version
39+
if version.parse(new_version) > version.parse(existing_version):
40+
# if this is a new version, add it to the database
41+
new_extractor = db["listeners"].insert_one(extractor_db.to_mongo())
42+
found = db["listeners"].find_one({"_id": new_extractor.inserted_id})
43+
# TODO - for now we are not deleting an older version of the extractor, just adding a new one
44+
# removed = db["listeners"].delete_one({"_id": existing_extractor["_id"]})
45+
extractor_out = EventListenerOut.from_mongo(found)
46+
logger.info(
47+
"%s updated from %s to %s"
48+
% (extractor_name, existing_version, new_version)
49+
)
50+
return extractor_out
51+
else:
52+
# Register new listener
53+
new_extractor = db["listeners"].insert_one(extractor_db.to_mongo())
54+
found = db["listeners"].find_one({"_id": new_extractor.inserted_id})
55+
extractor_out = EventListenerOut.from_mongo(found)
56+
logger.info("New extractor registered: " + extractor_name)
57+
58+
# Assign MIME-based listener if needed
59+
if extractor_out.properties and extractor_out.properties.process:
60+
process = extractor_out.properties.process
61+
if "file" in process:
62+
# Create a MIME-based feed for this v1 extractor
63+
criteria_list = []
64+
for mime in process["file"]:
65+
main_type = mime.split("/")[0] if mime.find("/") > -1 else mime
66+
sub_type = mime.split("/")[1] if mime.find("/") > -1 else None
67+
if sub_type:
68+
if sub_type == "*":
69+
# If a wildcard, just match on main type
70+
criteria_list.append(
71+
SearchCriteria(
72+
field="content_type_main", value=main_type
73+
)
74+
)
75+
else:
76+
# Otherwise match the whole string
77+
criteria_list.append(
78+
SearchCriteria(field="content_type", value=mime)
79+
)
80+
else:
81+
criteria_list.append(
82+
SearchCriteria(field="content_type", value=mime)
83+
)
84+
85+
# TODO: Who should the author be for an auto-generated feed? Currently None.
86+
new_feed = FeedDB(
87+
name=extractor_name,
88+
search={
89+
"index_name": "file",
90+
"criteria": criteria_list,
91+
"mode": "or",
92+
},
93+
listeners=[
94+
FeedListener(listener_id=extractor_out.id, automatic=True)
95+
],
96+
)
97+
db["feeds"].insert_one(new_feed.to_mongo())
98+
99+
return extractor_out
100+
101+
102+
def listen_for_heartbeats():
103+
"""
104+
105+
this method runs continuously listening for extractor heartbeats send over rabbitmq
106+
107+
"""
108+
credentials = pika.PlainCredentials(settings.RABBITMQ_USER, settings.RABBITMQ_PASS)
109+
parameters = pika.ConnectionParameters(
110+
settings.RABBITMQ_HOST, 5672, "/", credentials
111+
)
112+
connection = pika.BlockingConnection(parameters)
113+
channel = connection.channel()
114+
115+
channel.exchange_declare(
116+
exchange=settings.HEARTBEAT_EXCHANGE, exchange_type="fanout", durable=True
117+
)
118+
result = channel.queue_declare(queue="", exclusive=True)
119+
queue_name = result.method.queue
120+
channel.queue_bind(exchange=settings.HEARTBEAT_EXCHANGE, queue=queue_name)
121+
122+
logger.info(" [*] Waiting for heartbeats. To exit press CTRL+C")
123+
channel.basic_consume(queue=queue_name, on_message_callback=callback, auto_ack=True)
124+
channel.start_consuming()
125+
126+
127+
if __name__ == "__main__":
128+
listen_for_heartbeats()

backend/app/main.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,7 @@
122122
dependencies=[Depends(get_current_username)],
123123
)
124124
api_router.include_router(
125-
metadata_datasets.router,
126-
prefix="/datasets",
127-
tags=["metadata"],
128-
dependencies=[Depends(get_current_username)],
125+
metadata_datasets.router, prefix="/datasets", tags=["metadata"]
129126
)
130127
api_router.include_router(
131128
folders.router,

backend/app/models/datasets.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def _generate_next_value_(name, start, count, last_values):
1919
class DatasetStatus(AutoName):
2020
PRIVATE = auto()
2121
PUBLIC = auto()
22+
AUTHENTICATED = auto()
2223
DEFAULT = auto()
2324
TRIAL = auto()
2425

@@ -35,6 +36,7 @@ class DatasetIn(DatasetBase):
3536
class DatasetPatch(BaseModel):
3637
name: Optional[str]
3738
description: Optional[str]
39+
status: Optional[str]
3840

3941

4042
class DatasetDB(Document, DatasetBase):
@@ -63,6 +65,7 @@ class DatasetDBViewList(View, DatasetBase):
6365
modified: datetime = Field(default_factory=datetime.utcnow)
6466
auth: List[AuthorizationDB]
6567
thumbnail_id: Optional[PydanticObjectId] = None
68+
status: Optional[str]
6669

6770
class Settings:
6871
source = DatasetDB

backend/app/routers/authorization.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from bson import ObjectId
44
from fastapi import APIRouter, Depends
55
from fastapi.exceptions import HTTPException
6-
76
from app.dependencies import get_elasticsearchclient
87
from app.deps.authorization_deps import (
98
Authorization,
@@ -25,6 +24,7 @@
2524
DatasetRoles,
2625
DatasetDB,
2726
DatasetOut,
27+
DatasetStatus,
2828
)
2929
from app.models.groups import GroupDB
3030
from app.models.pyobjectid import PyObjectId
@@ -80,9 +80,23 @@ async def get_dataset_role(
8080
),
8181
)
8282
) is None:
83-
raise HTTPException(
84-
status_code=404, detail=f"No authorization found for dataset: {dataset_id}"
85-
)
83+
if (
84+
current_dataset := await DatasetDB.get(PydanticObjectId(dataset_id))
85+
) is not None:
86+
if current_dataset.status == DatasetStatus.AUTHENTICATED.name:
87+
public_authorization_in = {
88+
"dataset_id": PydanticObjectId(dataset_id),
89+
"role": RoleType.VIEWER,
90+
}
91+
authorization = AuthorizationDB(
92+
**public_authorization_in, creator=current_dataset.creator.email
93+
)
94+
return authorization.dict()
95+
else:
96+
raise HTTPException(
97+
status_code=404,
98+
detail=f"No authorization found for dataset: {dataset_id}",
99+
)
86100
else:
87101
return auth_db.dict()
88102

0 commit comments

Comments
 (0)