Skip to content

Commit c6ffa05

Browse files
authored
1066 index folders (#1090)
* index and remove index folder * add folder to search result * sort by date; last date first * update dataset will trigger reindex of folders
1 parent 2c2d76f commit c6ffa05

File tree

9 files changed

+242
-10
lines changed

9 files changed

+242
-10
lines changed

backend/app/routers/datasets.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,12 @@
4141
from app.routers.files import add_file_entry, add_local_file_entry, remove_file_entry
4242
from app.routers.licenses import delete_license
4343
from app.search.connect import delete_document_by_id
44-
from app.search.index import index_dataset, index_file
44+
from app.search.index import (
45+
index_dataset,
46+
index_file,
47+
index_folder,
48+
remove_folder_index,
49+
)
4550
from beanie import PydanticObjectId
4651
from beanie.odm.operators.update.general import Inc
4752
from beanie.operators import And, Or
@@ -341,6 +346,13 @@ async def edit_dataset(
341346

342347
# Update entry to the dataset index
343348
await index_dataset(es, DatasetOut(**dataset.dict()), update=True)
349+
350+
# Update folders index since its using dataset downloads and status to index
351+
async for folder in FolderDB.find(
352+
FolderDB.dataset_id == PydanticObjectId(dataset_id)
353+
):
354+
await index_folder(es, FolderOut(**folder.dict()), update=True)
355+
344356
return dataset.dict()
345357
raise HTTPException(status_code=404, detail=f"Dataset {dataset_id} not found")
346358

@@ -379,6 +391,13 @@ async def patch_dataset(
379391

380392
# Update entry to the dataset index
381393
await index_dataset(es, DatasetOut(**dataset.dict()), update=True)
394+
395+
# Update folders index since its using dataset downloads and status to index
396+
async for folder in FolderDB.find(
397+
FolderDB.dataset_id == PydanticObjectId(dataset_id)
398+
):
399+
await index_folder(es, FolderOut(**folder.dict()), update=True)
400+
382401
return dataset.dict()
383402

384403

@@ -423,6 +442,7 @@ async def add_folder(
423442
dataset_id: str,
424443
folder_in: FolderIn,
425444
user=Depends(get_current_user),
445+
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
426446
allow: bool = Depends(Authorization("uploader")),
427447
):
428448
if (await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
@@ -436,6 +456,7 @@ async def add_folder(
436456
**folder_in.dict(), creator=user, dataset_id=PydanticObjectId(dataset_id)
437457
)
438458
await new_folder.insert()
459+
await index_folder(es, FolderOut(**new_folder.dict()))
439460
return new_folder.dict()
440461
raise HTTPException(status_code=404, detail=f"Dataset {dataset_id} not found")
441462

@@ -595,9 +616,11 @@ async def _delete_nested_folders(parent_folder_id):
595616
await remove_file_entry(file.id, fs, es)
596617
await _delete_nested_folders(subfolder.id)
597618
await subfolder.delete()
619+
await remove_folder_index(subfolder.id, es)
598620

599621
await _delete_nested_folders(folder_id)
600622
await folder.delete()
623+
await remove_folder_index(folder.id, es)
601624
return {"deleted": folder_id}
602625
else:
603626
raise HTTPException(status_code=404, detail=f"Folder {folder_id} not found")
@@ -623,6 +646,7 @@ async def patch_folder(
623646
dataset_id: str,
624647
folder_id: str,
625648
folder_info: FolderPatch,
649+
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
626650
user=Depends(get_current_user),
627651
allow: bool = Depends(Authorization("editor")),
628652
):
@@ -640,6 +664,8 @@ async def patch_folder(
640664
folder.parent_folder = folder_info.parent_folder
641665
folder.modified = datetime.datetime.utcnow()
642666
await folder.save()
667+
await index_folder(es, FolderOut(**folder.dict()), update=True)
668+
643669
return folder.dict()
644670
else:
645671
raise HTTPException(status_code=404, detail=f"Folder {folder_id} not found")
@@ -894,6 +920,7 @@ async def create_dataset_from_zip(
894920
@router.get("/{dataset_id}/download", response_model=DatasetOut)
895921
async def download_dataset(
896922
dataset_id: str,
923+
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
897924
user=Depends(get_current_user),
898925
fs: Minio = Depends(dependencies.get_fs),
899926
allow: bool = Depends(Authorization("viewer")),
@@ -1043,6 +1070,15 @@ async def download_dataset(
10431070
response.headers["Content-Disposition"] = "attachment; filename=%s" % zip_name
10441071
# Increment download count
10451072
await dataset.update(Inc({DatasetDB.downloads: 1}))
1073+
1074+
# reindex
1075+
await index_dataset(es, DatasetOut(**dataset.dict()), update=True)
1076+
# Update folders index since its using dataset downloads and status to index
1077+
async for folder in FolderDB.find(
1078+
FolderDB.dataset_id == PydanticObjectId(dataset_id)
1079+
):
1080+
await index_folder(es, FolderOut(**folder.dict()), update=True)
1081+
10461082
return response
10471083
raise HTTPException(status_code=404, detail=f"Dataset {dataset_id} not found")
10481084

backend/app/routers/files.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ async def download_file(
290290
file_id: str,
291291
version: Optional[int] = None,
292292
increment: Optional[bool] = True,
293+
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
293294
fs: Minio = Depends(dependencies.get_fs),
294295
allow: bool = Depends(FileAuthorization("viewer")),
295296
):
@@ -340,6 +341,10 @@ async def download_file(
340341
if increment:
341342
# Increment download count
342343
await file.update(Inc({FileDB.downloads: 1}))
344+
345+
# reindex
346+
await index_file(es, FileOut(**file.dict()), update=True)
347+
343348
return response
344349

345350
else:
@@ -351,6 +356,7 @@ async def download_file_url(
351356
file_id: str,
352357
version: Optional[int] = None,
353358
expires_in_seconds: Optional[int] = 3600,
359+
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
354360
external_fs: Minio = Depends(dependencies.get_external_fs),
355361
allow: bool = Depends(FileAuthorization("viewer")),
356362
):
@@ -392,6 +398,9 @@ async def download_file_url(
392398
# Increment download count
393399
await file.update(Inc({FileDB.downloads: 1}))
394400

401+
# reindex
402+
await index_file(es, FileOut(**file.dict()), update=True)
403+
395404
# return presigned url
396405
return {"presigned_url": presigned_url}
397406
else:

backend/app/routers/public_datasets.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@
1414
from app.models.folders import FolderDB, FolderDBViewList, FolderOut
1515
from app.models.metadata import MetadataDB, MetadataDefinitionDB, MetadataOut
1616
from app.models.pages import Paged, _construct_page_metadata, _get_page_query
17+
from app.search.index import index_dataset, index_folder
1718
from beanie import PydanticObjectId
1819
from beanie.odm.operators.update.general import Inc
1920
from beanie.operators import And, Or
2021
from bson import ObjectId, json_util
22+
from elasticsearch import Elasticsearch
2123
from fastapi import APIRouter, Depends, Form, HTTPException
2224
from fastapi.responses import StreamingResponse
2325
from fastapi.security import HTTPBearer
@@ -217,6 +219,7 @@ async def get_dataset_metadata(
217219
@router.get("/{dataset_id}/download", response_model=DatasetOut)
218220
async def download_dataset(
219221
dataset_id: str,
222+
es: Elasticsearch = Depends(dependencies.get_elasticsearchclient),
220223
fs: Minio = Depends(dependencies.get_fs),
221224
):
222225
if (dataset := await DatasetDB.get(PydanticObjectId(dataset_id))) is not None:
@@ -370,6 +373,15 @@ async def download_dataset(
370373
)
371374
# Increment download count
372375
await dataset.update(Inc({DatasetDB.downloads: 1}))
376+
377+
# reindex
378+
await index_dataset(es, DatasetOut(**dataset.dict()), update=True)
379+
# Update folders index since its using dataset downloads and status to index
380+
async for folder in FolderDB.find(
381+
FolderDB.dataset_id == PydanticObjectId(dataset_id)
382+
):
383+
await index_folder(es, FolderOut(**folder.dict()), update=True)
384+
373385
return response
374386
else:
375387
raise HTTPException(

backend/app/search/index.py

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1-
from typing import List, Optional
1+
from typing import List, Optional, Union
22

33
from app.config import settings
44
from app.models.authorization import AuthorizationDB
5-
from app.models.datasets import DatasetOut
5+
from app.models.datasets import DatasetDB, DatasetOut
66
from app.models.files import FileDB, FileOut
7+
from app.models.folders import FolderOut
78
from app.models.metadata import MetadataDB
89
from app.models.search import ElasticsearchEntry
910
from app.models.thumbnails import ThumbnailDB
10-
from app.search.connect import insert_record, update_record
11+
from app.search.connect import delete_document_by_id, insert_record, update_record
1112
from beanie import PydanticObjectId
1213
from bson import ObjectId
1314
from elasticsearch import Elasticsearch, NotFoundError
15+
from fastapi import HTTPException
1416

1517

1618
async def index_dataset(
@@ -112,6 +114,50 @@ async def index_file(
112114
insert_record(es, settings.elasticsearch_index, doc, file.id)
113115

114116

117+
async def index_folder(
118+
es: Elasticsearch,
119+
folder: FolderOut,
120+
user_ids: Optional[List[str]] = None,
121+
update: bool = False,
122+
):
123+
"""Create or update an Elasticsearch entry for the folder."""
124+
# find dataset this folder belongs to
125+
if (
126+
dataset := await DatasetDB.find_one(
127+
DatasetDB.id == PydanticObjectId(folder.dataset_id)
128+
)
129+
) is not None:
130+
downloads = dataset.downloads
131+
status = dataset.status
132+
else:
133+
raise HTTPException(
134+
status_code=404, detail="Orphan folder doesn't belong to any dataset."
135+
)
136+
137+
doc = ElasticsearchEntry(
138+
resource_type="folder",
139+
name=folder.name,
140+
creator=folder.creator.email,
141+
created=folder.created,
142+
dataset_id=str(folder.dataset_id),
143+
folder_id=str(folder.id),
144+
downloads=downloads,
145+
status=status,
146+
).dict()
147+
148+
if update:
149+
try:
150+
update_record(es, settings.elasticsearch_index, {"doc": doc}, folder.id)
151+
except NotFoundError:
152+
insert_record(es, settings.elasticsearch_index, doc, folder.id)
153+
else:
154+
insert_record(es, settings.elasticsearch_index, doc, folder.id)
155+
156+
157+
async def remove_folder_index(folderId: Union[str, ObjectId], es: Elasticsearch):
158+
delete_document_by_id(es, settings.elasticsearch_index, str(folderId))
159+
160+
115161
async def index_thumbnail(
116162
es: Elasticsearch,
117163
thumbnail_id: str,

frontend/src/components/search/PublicSearch.tsx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,18 +164,19 @@ export function PublicSearch() {
164164
{luceneOn ? (
165165
<ReactiveList
166166
componentId="results"
167-
dataField="_score"
167+
dataField="created"
168168
size={20}
169169
pagination={true}
170170
react={{
171171
and: ["string-searchbox"],
172172
}}
173173
render={({ data }) => <PublicSearchResult data={data} />}
174+
sortBy="desc"
174175
/>
175176
) : (
176177
<ReactiveList
177178
componentId="results"
178-
dataField="_score"
179+
dataField="created"
179180
size={20}
180181
pagination={true}
181182
react={{
@@ -189,6 +190,7 @@ export function PublicSearch() {
189190
render={({ data }) => {
190191
return <PublicSearchResult data={data} />;
191192
}}
193+
sortBy="desc"
192194
/>
193195
)}
194196
</ReactiveBase>

frontend/src/components/search/PublicSearchResult.tsx

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { parseDate } from "../../utils/common";
1414
import { theme } from "../../theme";
1515

1616
import parse from "html-react-parser";
17+
import FolderIcon from "@mui/icons-material/Folder";
1718

1819
// Function to parse the elastic search parameter
1920
// If it contains HTML tags like <mark>, it removes them
@@ -52,6 +53,28 @@ function buildDatasetResult(item) {
5253
);
5354
}
5455

56+
function buildFolderResult(item) {
57+
return (
58+
<>
59+
<ListItemAvatar sx={{ color: theme.palette.primary.main }}>
60+
<FolderIcon />
61+
</ListItemAvatar>
62+
<Box sx={{ marginTop: "5px" }}>
63+
<MuiLink
64+
component={Link}
65+
to={`/public_datasets/${item.dataset_id}?folder=${item._id}`}
66+
sx={{ fontWeight: "bold", fontSize: "18px" }}
67+
>
68+
{parseString(item.name)}
69+
</MuiLink>
70+
<Typography variant="body2" color={theme.palette.info.main}>
71+
Created by {parseString(item.creator)} at {parseDate(item.created)}
72+
</Typography>
73+
</Box>
74+
</>
75+
);
76+
}
77+
5578
function buildFileResult(item) {
5679
return (
5780
<>
@@ -93,7 +116,11 @@ export function PublicSearchResult(props) {
93116
<ListItem alignItems="flex-start" key={item._id}>
94117
{item.resource_type === "dataset"
95118
? buildDatasetResult(item)
96-
: buildFileResult(item)}
119+
: item.resource_type === "file"
120+
? buildFileResult(item)
121+
: item.resource_type === "folder"
122+
? buildFolderResult(item)
123+
: null}
97124
</ListItem>
98125
))}
99126
</List>

frontend/src/components/search/Search.tsx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,18 +204,19 @@ export function Search() {
204204
{luceneOn ? (
205205
<ReactiveList
206206
componentId="results"
207-
dataField="_score"
207+
dataField="created"
208208
size={20}
209209
pagination={true}
210210
react={{
211211
and: ["string-searchbox"],
212212
}}
213213
render={({ data }) => <SearchResult data={data} />}
214+
sortBy="desc"
214215
/>
215216
) : (
216217
<ReactiveList
217218
componentId="results"
218-
dataField="_score"
219+
dataField="created"
219220
size={20}
220221
pagination={true}
221222
react={{
@@ -230,6 +231,7 @@ export function Search() {
230231
render={({ data }) => {
231232
return <SearchResult data={data} />;
232233
}}
234+
sortBy="desc"
233235
/>
234236
)}
235237
</ReactiveBase>

0 commit comments

Comments
 (0)