Skip to content

Commit 5323429

Browse files
committed
♻️(core) simplify data flow
I remove DB access Signed-off-by: charles <[email protected]>
1 parent 4200948 commit 5323429

File tree

5 files changed

+153
-27
lines changed

5 files changed

+153
-27
lines changed

src/backend/core/api/viewsets.py

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,41 +1204,30 @@ def _search_simple(self, request, text):
12041204
},
12051205
)
12061206

1207-
def _search_with_indexer(self, indexer, request, params):
1207+
@staticmethod
1208+
def _search_with_indexer(indexer, request, params):
12081209
"""
1209-
Returns a list of documents matching the query according to the configured indexer.
1210+
Returns a list of documents matching the query (q) according to the configured indexer.
12101211
"""
1211-
text = params.validated_data["q"]
1212-
path = (
1213-
params.validated_data["path"] if "path" in params.validated_data else None
1214-
)
12151212
queryset = models.Document.objects.all()
12161213

1217-
# Retrieve the documents ids according to indexer.
12181214
results = indexer.search(
1219-
text=text,
1215+
q=params.validated_data["q"],
12201216
token=request.session.get("oidc_access_token"),
1221-
path=path,
1217+
path=(
1218+
params.validated_data["path"]
1219+
if "path" in params.validated_data
1220+
else None
1221+
),
12221222
visited=get_visited_document_ids_of(queryset, request.user),
12231223
)
12241224

1225-
docs_by_uuid = {str(d.pk): d for d in queryset.filter(pk__in=results)}
1226-
ordered_docs = [docs_by_uuid[id] for id in results if id in docs_by_uuid]
1227-
1228-
serializer = self.get_serializer(
1229-
ordered_docs,
1230-
many=True,
1231-
context={
1232-
"request": request,
1233-
},
1234-
)
1235-
12361225
return drf_response.Response(
12371226
{
1238-
"count": len(serializer.data),
1227+
"count": len(results),
12391228
"next": None,
12401229
"previous": None,
1241-
"results": serializer.data,
1230+
"results": results,
12421231
}
12431232
)
12441233

src/backend/core/services/search_indexers.py

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,15 +185,15 @@ def push(self, data):
185185
"""
186186

187187
# pylint: disable-next=too-many-arguments,too-many-positional-arguments
188-
def search(self, text, token, visited=(), nb_results=None, path=None):
188+
def search(self, q, token, visited=(), nb_results=None, path=None):
189189
"""
190190
Search for documents in Find app.
191191
Ensure the same default ordering as "Docs" list : -updated_at
192192
193193
Returns ids of the documents
194194
195195
Args:
196-
text (str): Text search content.
196+
q (str): Text search content.
197197
token (str): OIDC Authentication token.
198198
visited (list, optional):
199199
List of ids of active public documents with LinkTrace
@@ -205,9 +205,9 @@ def search(self, text, token, visited=(), nb_results=None, path=None):
205205
The path to filter documents.
206206
"""
207207
nb_results = nb_results or self.search_limit
208-
response = self.search_query(
208+
results = self.search_query(
209209
data={
210-
"q": text,
210+
"q": q,
211211
"visited": visited,
212212
"services": ["docs"],
213213
"nb_results": nb_results,
@@ -218,7 +218,7 @@ def search(self, text, token, visited=(), nb_results=None, path=None):
218218
token=token,
219219
)
220220

221-
return [d["_id"] for d in response]
221+
return results
222222

223223
@abstractmethod
224224
def search_query(self, data, token) -> dict:
@@ -234,6 +234,51 @@ class SearchIndexer(BaseDocumentIndexer):
234234
Document indexer that pushes documents to La Suite Find app.
235235
"""
236236

237+
def search(self, q, token, visited=(), nb_results=None, path=None):
238+
"""format Find search results"""
239+
search_results = super().search(q, token, visited, nb_results, path)
240+
return [
241+
{
242+
"id": hit["_id"],
243+
"title": self.get_title(hit["_source"]),
244+
**hit["_source"],
245+
}
246+
for hit in search_results
247+
]
248+
249+
@staticmethod
250+
def get_title(source):
251+
"""
252+
Extract the title from a search result source dictionary.
253+
254+
Handles multiple cases:
255+
- Localized title fields like "title.<some_extension>"
256+
- Fallback to plain "title" field if localized version not found
257+
- Returns empty string if no title field exists
258+
259+
Args:
260+
source (dict): The _source dictionary from a search hit
261+
262+
Returns:
263+
str: The extracted title or empty string if not found
264+
265+
Example:
266+
>>> get_title({"title.fr": "Bonjour", "id": 1})
267+
"Bonjour"
268+
>>> get_title({"title": "Hello", "id": 1})
269+
"Hello"
270+
>>> get_title({"id": 1})
271+
""
272+
"""
273+
titles = utils.get_value_by_pattern(source, r"^title\.")
274+
if titles:
275+
title = titles[0]
276+
elif "title" in source:
277+
title = source["title"]
278+
else:
279+
title = ""
280+
return title
281+
237282
def serialize_document(self, document, accesses):
238283
"""
239284
Convert a Document to the JSON format expected by La Suite Find.

src/backend/core/tests/test_services_search_indexers.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,3 +633,51 @@ def test_services_search_indexers_search_nb_results(mock_post, indexer_settings)
633633

634634
assert args[0] == indexer_settings.SEARCH_INDEXER_QUERY_URL
635635
assert kwargs.get("json")["nb_results"] == 109
636+
637+
638+
def test_search_indexer_get_title_with_localized_field():
639+
"""Test extracting title from localized title field."""
640+
source = {"title.extension": "Bonjour", "id": 1, "content": "test"}
641+
result = SearchIndexer.get_title(source)
642+
643+
assert result == "Bonjour"
644+
645+
646+
def test_search_indexer_get_title_with_multiple_localized_fields():
647+
"""Test that first matching localized title is returned."""
648+
source = {"title.extension": "Bonjour", "title.en": "Hello", "id": 1}
649+
result = SearchIndexer.get_title(source)
650+
651+
assert result in ["Bonjour", "Hello"]
652+
653+
654+
def test_search_indexer_get_title_fallback_to_plain_title():
655+
"""Test fallback to plain 'title' field when no localized field exists."""
656+
source = {"title": "Hello World", "id": 1}
657+
result = SearchIndexer.get_title(source)
658+
659+
assert result == "Hello World"
660+
661+
662+
def test_search_indexer_get_title_no_title_field():
663+
"""Test that empty string is returned when no title field exists."""
664+
source = {"id": 1, "content": "test"}
665+
result = SearchIndexer.get_title(source)
666+
667+
assert result == ""
668+
669+
670+
def test_search_indexer_get_title_with_empty_localized_title():
671+
"""Test that fallback works when localized title is empty."""
672+
source = {"title.extension": "", "title": "Fallback Title", "id": 1}
673+
result = SearchIndexer.get_title(source)
674+
675+
assert result == "Fallback Title"
676+
677+
678+
def test_search_indexer_get_title_with_multiple_extension():
679+
"""Test extracting title from title field with multiple extensions."""
680+
source = {"title.extension_1.extension_2": "Bonjour", "id": 1, "content": "test"}
681+
result = SearchIndexer.get_title(source)
682+
683+
assert result == "Bonjour"

src/backend/core/tests/test_utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,27 @@ def test_utils_get_ancestor_to_descendants_map_multiple_paths():
100100
"000100020005": {"000100020005"},
101101
"00010003": {"00010003"},
102102
}
103+
104+
105+
def test_utils_get_value_by_pattern_matching_key():
106+
"""Test extracting value from a dictionary with a matching key pattern."""
107+
data = {"title.extension": "Bonjour", "id": 1, "content": "test"}
108+
result = utils.get_value_by_pattern(data, r"^title\.")
109+
110+
assert result == ["Bonjour"]
111+
112+
113+
def test_utils_get_value_by_pattern_multiple_matches():
114+
"""Test that all matching keys are returned."""
115+
data = {"title.extension_1": "Bonjour", "title.extension_2": "Hello", "id": 1}
116+
result = utils.get_value_by_pattern(data, r"^title\.")
117+
118+
assert set(result) == {"Bonjour", "Hello"}
119+
120+
121+
def test_utils_get_value_by_pattern_no_match():
122+
"""Test that empty list is returned when no key matches the pattern."""
123+
data = {"name": "Test", "id": 1}
124+
result = utils.get_value_by_pattern(data, r"^title\.")
125+
126+
assert result == []

src/backend/core/utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,26 @@
1010
from core import enums
1111

1212

13+
def get_value_by_pattern(data, pattern):
14+
"""
15+
Get all values from keys matching a regex pattern in a dictionary.
16+
17+
Args:
18+
data (dict): Source dictionary to search
19+
pattern (str): Regex pattern to match against keys
20+
21+
Returns:
22+
list: List of values for all matching keys, empty list if no matches
23+
24+
Example:
25+
>>> get_value_by_pattern({"title.fr": "Bonjour", "id": 1}, r"^title\.")
26+
["Bonjour"]
27+
>>> get_value_by_pattern({"title.fr": "Bonjour", "title.en": "Hello"}, r"^title\.")
28+
["Bonjour", "Hello"]
29+
"""
30+
return [value for key, value in data.items() if re.compile(pattern).match(key)]
31+
32+
1333
def get_ancestor_to_descendants_map(paths, steplen):
1434
"""
1535
Given a list of document paths, return a mapping of ancestor_path -> set of descendant_paths.

0 commit comments

Comments
 (0)