Skip to content

Commit 526d757

Browse files
committed
WIP ✨ (backend) Add document search view.
TODO : unit tests with iocd auth Signed-off-by: Fabre Florian <[email protected]>
1 parent a01ade3 commit 526d757

File tree

6 files changed

+263
-9
lines changed

6 files changed

+263
-9
lines changed

src/backend/core/api/serializers.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,3 +801,16 @@ class MoveDocumentSerializer(serializers.Serializer):
801801
choices=enums.MoveNodePositionChoices.choices,
802802
default=enums.MoveNodePositionChoices.LAST_CHILD,
803803
)
804+
805+
806+
class FindDocumentSerializer(serializers.Serializer):
807+
"""Serializer for Find search requests"""
808+
q = serializers.CharField(required=True)
809+
810+
def validate_q(self, value):
811+
"""Ensure the text field is not empty."""
812+
813+
if len(value.strip()) == 0:
814+
raise serializers.ValidationError("Text field cannot be empty.")
815+
816+
return value

src/backend/core/api/viewsets.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from django.db.models.functions import Left, Length
2020
from django.http import Http404, StreamingHttpResponse
2121
from django.urls import reverse
22+
from django.utils.decorators import method_decorator
2223
from django.utils.functional import cached_property
2324
from django.utils.text import capfirst, slugify
2425
from django.utils.translation import gettext_lazy as _
@@ -29,6 +30,7 @@
2930
from csp.constants import NONE
3031
from csp.decorators import csp_update
3132
from lasuite.malware_detection import malware_detection
33+
from lasuite.oidc_login.decorators import refresh_oidc_access_token
3234
from rest_framework import filters, status, viewsets
3335
from rest_framework import response as drf_response
3436
from rest_framework.permissions import AllowAny
@@ -37,6 +39,7 @@
3739
from core import authentication, choices, enums, models
3840
from core.services.ai_services import AIService
3941
from core.services.collaboration_services import CollaborationService
42+
from core.services.search_indexers import FindDocumentIndexer
4043
from core.tasks.mail import send_ask_for_access_mail
4144
from core.utils import extract_attachments, filter_descendants
4245

@@ -48,6 +51,12 @@
4851
# pylint: disable=too-many-ancestors
4952

5053

54+
class ServiceUnavailable(drf.exceptions.APIException):
55+
status_code = 503
56+
default_detail = 'Service unavailable.'
57+
default_code = 'service_unavailable'
58+
59+
5160
class NestedGenericViewSet(viewsets.GenericViewSet):
5261
"""
5362
A generic Viewset aims to be used in a nested route context.
@@ -367,6 +376,7 @@ class DocumentViewSet(
367376
list_serializer_class = serializers.ListDocumentSerializer
368377
trashbin_serializer_class = serializers.ListDocumentSerializer
369378
tree_serializer_class = serializers.ListDocumentSerializer
379+
search_serializer_class = serializers.ListDocumentSerializer
370380

371381
def get_queryset(self):
372382
"""Get queryset performing all annotation and filtering on the document tree structure."""
@@ -980,10 +990,32 @@ def duplicate(self, request, *args, **kwargs):
980990
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
981991
)
982992

983-
# TODO
984-
# @drf.decorators.action(detail=False, methods=["get"])
985-
# def search(self, request, *args, **kwargs):
986-
# index.search()
993+
@drf.decorators.action(detail=False, methods=["get"], url_path="search")
994+
@method_decorator(refresh_oidc_access_token)
995+
def search(self, request, *args, **kwargs):
996+
access_token = request.session.get("oidc_access_token")
997+
998+
serializer = serializers.FindDocumentSerializer(
999+
data=request.query_params
1000+
)
1001+
serializer.is_valid(raise_exception=True)
1002+
1003+
indexer = FindDocumentIndexer()
1004+
try:
1005+
queryset = indexer.search(
1006+
text=serializer.validated_data.get("q", ""),
1007+
user=request.user,
1008+
token=access_token
1009+
)
1010+
except RuntimeError as err:
1011+
raise ServiceUnavailable()
1012+
1013+
return self.get_response_for_queryset(
1014+
queryset,
1015+
context={
1016+
"request": request,
1017+
},
1018+
)
9871019

9881020
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
9891021
def versions_list(self, request, *args, **kwargs):

src/backend/core/services/search_indexers.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,6 @@ def search(self, text, user, token):
135135
"services": ["docs"],
136136
}, token=token)
137137

138-
print(response)
139-
140138
return self.format_response(response)
141139

142140
@abstractmethod
@@ -207,7 +205,7 @@ def search_query(self, data, token) -> requests.Response:
207205

208206
if not url:
209207
raise RuntimeError(
210-
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before indexing."
208+
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before search."
211209
)
212210

213211
try:
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""
2+
Tests for Documents API endpoint in impress's core app: list
3+
"""
4+
import responses
5+
6+
import pytest
7+
from faker import Faker
8+
from rest_framework.test import APIClient
9+
10+
from core import factories, models
11+
12+
fake = Faker()
13+
pytestmark = pytest.mark.django_db
14+
15+
16+
@pytest.mark.parametrize("role", models.LinkRoleChoices.values)
17+
@pytest.mark.parametrize("reach", models.LinkReachChoices.values)
18+
def test_api_documents_search_anonymous(reach, role):
19+
"""
20+
Anonymous users should not be allowed to search documents whatever the
21+
link reach and link role
22+
"""
23+
factories.DocumentFactory(link_reach=reach, link_role=role)
24+
25+
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
26+
27+
assert response.status_code == 200
28+
assert response.json() == {
29+
"count": 0,
30+
"next": None,
31+
"previous": None,
32+
"results": [],
33+
}
34+
35+
36+
def test_api_documents_search_endpoint_is_none(settings):
37+
"""Missing SEARCH_INDEXER_QUERY_URL should throw an error"""
38+
settings.SEARCH_INDEXER_QUERY_URL = None
39+
40+
user = factories.UserFactory()
41+
42+
client = APIClient()
43+
client.force_login(user)
44+
45+
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
46+
47+
assert response.status_code == 503
48+
assert response.json() == {
49+
'detail': 'Service unavailable.'
50+
}
51+
52+
53+
@responses.activate
54+
def test_api_documents_search_invalid_params(settings):
55+
"""Validate the format of documents as returned by the search view."""
56+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
57+
58+
user = factories.UserFactory()
59+
60+
client = APIClient()
61+
client.force_login(user)
62+
63+
response = APIClient().get("/api/v1.0/documents/search/")
64+
65+
assert response.status_code == 400
66+
assert response.json() == {
67+
'q': ['This field is required.']
68+
}
69+
70+
71+
@responses.activate
72+
def test_api_documents_search_format(settings):
73+
"""Validate the format of documents as returned by the search view."""
74+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
75+
76+
user = factories.UserFactory()
77+
78+
client = APIClient()
79+
client.force_login(user)
80+
81+
user_a, user_b, user_c = factories.UserFactory.create_batch(3)
82+
document = factories.DocumentFactory(
83+
title="alpha",
84+
users=(user_a, user_c),
85+
link_traces=(user, user_b),
86+
)
87+
access = factories.UserDocumentAccessFactory(document=document, user=user)
88+
89+
# Find response
90+
responses.add(
91+
responses.POST,
92+
"http://find/api/v1.0/search",
93+
json=[
94+
{"_id": str(document.pk)},
95+
],
96+
status=200,
97+
)
98+
response = client.get("/api/v1.0/documents/search/", data={"q": "alpha"})
99+
100+
assert response.status_code == 200
101+
content = response.json()
102+
results = content.pop("results")
103+
assert content == {
104+
"count": 1,
105+
"next": None,
106+
"previous": None,
107+
}
108+
assert len(results) == 1
109+
assert results[0] == {
110+
"id": str(document.id),
111+
"abilities": document.get_abilities(user),
112+
"ancestors_link_reach": None,
113+
"ancestors_link_role": None,
114+
"computed_link_reach": document.computed_link_reach,
115+
"computed_link_role": document.computed_link_role,
116+
"created_at": document.created_at.isoformat().replace("+00:00", "Z"),
117+
"creator": str(document.creator.id),
118+
"depth": 1,
119+
"excerpt": document.excerpt,
120+
"link_reach": document.link_reach,
121+
"link_role": document.link_role,
122+
"nb_accesses_ancestors": 3,
123+
"nb_accesses_direct": 3,
124+
"numchild": 0,
125+
"path": document.path,
126+
"title": document.title,
127+
"updated_at": document.updated_at.isoformat().replace("+00:00", "Z"),
128+
"user_role": access.role,
129+
}

src/backend/core/tests/test_services_search_indexers.py

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
"""Tests for Documents search indexers"""
22

3+
from functools import partial
34
from unittest.mock import patch
45

56
import pytest
67

7-
from core import factories, utils
8-
from core.services.search_indexers import FindDocumentIndexer
8+
from django.contrib.auth.models import AnonymousUser
9+
10+
from core import factories, models, utils
11+
from core.services.search_indexers import FindDocumentIndexer, get_visited_document_ids_of
912

1013
pytestmark = pytest.mark.django_db
1114

@@ -258,3 +261,79 @@ def test_push_uses_correct_url_and_data(mock_post, settings):
258261
assert args[0] == settings.SEARCH_INDEXER_URL
259262
assert kwargs.get("json") == sample_data
260263
assert kwargs.get("timeout") == 10
264+
265+
266+
def test_get_visited_document_ids_of():
267+
"""
268+
get_visited_document_ids_of() returns the ids of the documents viewed
269+
by the user BUT without specific access configuration (like public ones)
270+
"""
271+
user = factories.UserFactory()
272+
other = factories.UserFactory()
273+
anonymous = AnonymousUser()
274+
275+
assert get_visited_document_ids_of(anonymous) == []
276+
assert get_visited_document_ids_of(user) == []
277+
278+
doc1, doc2, _ = factories.DocumentFactory.create_batch(3)
279+
280+
create_link = partial(models.LinkTrace.objects.create, user=user, is_masked=False)
281+
282+
create_link(document=doc1)
283+
create_link(document=doc2)
284+
285+
# The third document is not visited
286+
assert sorted(get_visited_document_ids_of(user)) == sorted([str(doc1.pk), str(doc2.pk)])
287+
288+
factories.UserDocumentAccessFactory(user=other, document=doc1)
289+
factories.UserDocumentAccessFactory(user=user, document=doc2)
290+
291+
# The second document have an access for the user
292+
assert get_visited_document_ids_of(user) == [str(doc1.pk)]
293+
294+
295+
@patch("requests.post")
296+
def test_services_search_indexers_search(mock_post, settings):
297+
user = factories.UserFactory()
298+
indexer = FindDocumentIndexer()
299+
300+
mock_response = mock_post.return_value
301+
mock_response.raise_for_status.return_value = None # No error
302+
303+
doc1, doc2, _ = factories.DocumentFactory.create_batch(3)
304+
305+
create_link = partial(models.LinkTrace.objects.create, user=user, is_masked=False)
306+
307+
create_link(document=doc1)
308+
create_link(document=doc2)
309+
310+
indexer.search('alpha', user=user, token='mytoken')
311+
312+
args, kwargs = mock_post.call_args
313+
314+
assert args[0] == settings.SEARCH_INDEXER_QUERY_URL
315+
316+
query_data = kwargs.get("json")
317+
assert query_data['q'] == 'alpha'
318+
assert sorted(query_data['visited']) == sorted([str(doc1.pk), str(doc2.pk)])
319+
assert query_data['services'] == ['docs']
320+
321+
assert kwargs.get("headers") == {"Authorization": "Bearer mytoken"}
322+
assert kwargs.get("timeout") == 10
323+
324+
325+
def test_search_query_raises_error_if_search_endpoint_is_none(settings):
326+
"""
327+
Indexer should raise RuntimeError if SEARCH_INDEXER_QUERY_URL is None or empty.
328+
"""
329+
settings.SEARCH_INDEXER_QUERY_URL = None
330+
indexer = FindDocumentIndexer()
331+
user = factories.UserFactory()
332+
333+
with pytest.raises(RuntimeError) as exc_info:
334+
indexer.search('alpha', user=user, token='mytoken')
335+
336+
assert (
337+
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before indexing."
338+
in str(exc_info.value)
339+
)

src/backend/impress/settings.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ class Base(Configuration):
109109
SEARCH_INDEXER_SECRET = values.Value(
110110
default=None, environ_name="SEARCH_INDEXER_SECRET", environ_prefix=None
111111
)
112+
SEARCH_INDEXER_QUERY_URL = values.Value(
113+
default=None, environ_name="SEARCH_INDEXER_QUERY_URL", environ_prefix=None
114+
)
112115

113116
# Static files (CSS, JavaScript, Images)
114117
STATIC_URL = "/static/"

0 commit comments

Comments
 (0)