Skip to content

Commit f5df740

Browse files
committed
[Fixes #13641] Multilang: index - tests
1 parent 54e2b5b commit f5df740

File tree

6 files changed

+366
-5
lines changed

6 files changed

+366
-5
lines changed

geonode/indexing/manager.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class TSVectorIndexManager:
1414
def __init__(self):
1515
self.LANGUAGES = multi.get_2letters_languages()
1616

17-
def _gather_fields_values(self, jsonschema: dict, jsoninstance: dict):
17+
def _gather_fields_values(self, jsoninstance: dict):
1818
ml_fields = {}
1919
non_ml_fields = {}
2020

@@ -44,9 +44,9 @@ def _gather_fields_values(self, jsonschema: dict, jsoninstance: dict):
4444

4545
return non_ml_fields, ml_fields
4646

47-
def update_index(self, resource_id, jsonschema: dict, jsoninstance: dict):
47+
def update_index(self, resource_id, jsoninstance: dict):
4848

49-
non_ml_fields, ml_fields = self._gather_fields_values(jsonschema, jsoninstance)
49+
non_ml_fields, ml_fields = self._gather_fields_values(jsoninstance)
5050

5151
# 3rd loop: create indexes
5252
for index_name, index_fields in settings.METADATA_INDEXES.items():
@@ -81,7 +81,7 @@ def update_index(self, resource_id, jsonschema: dict, jsoninstance: dict):
8181

8282
ml_text = " ".join(filter(None, (ml_fields[f][lang] for f in index_fields if f in ml_fields)))
8383
vector = Func(
84-
Value(f"{ml_text} {non_ml_text}"),
84+
Value(" ".join(filter(None, [ml_text, non_ml_text]))),
8585
function="to_tsvector",
8686
template=f"%(function)s('{multi.get_pg_language(lang)}', %(expressions)s)",
8787
)

geonode/indexing/tests/__init__.py

Whitespace-only changes.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"title": {
3+
"type": "string",
4+
"title": "Title",
5+
"description": "name by which the cited resource is known",
6+
"maxLength": 255,
7+
"geonode:handler": "fake",
8+
"geonode:required": true
9+
},
10+
"abstract": {
11+
"type": "string",
12+
"title": "Abstract",
13+
"description": "brief narrative summary of the content of the resource(s)",
14+
"maxLength": 2000,
15+
"ui:options": {
16+
"widget": "textarea",
17+
"rows": 5
18+
},
19+
"geonode:handler": "fake",
20+
"geonode:required": true
21+
},
22+
"license": {
23+
"type": "object",
24+
"title": "License",
25+
"description": "license of the dataset",
26+
"maxLength": 255,
27+
"properties": {
28+
"id": {
29+
"type": "string"
30+
},
31+
"label": {
32+
"type": "string"
33+
}
34+
},
35+
"geonode:handler": "fake"
36+
}
37+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#########################################################################
2+
#
3+
# Copyright (C) 2024 OSGeo
4+
#
5+
# This program is free software: you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
#
18+
#########################################################################
19+
20+
import os
21+
import logging
22+
from types import SimpleNamespace
23+
24+
from unittest.mock import patch
25+
26+
from django.test import override_settings
27+
28+
from geonode.base.models import ResourceBase
29+
from geonode.metadata.handlers.multilang import MultiLangHandler
30+
from geonode.metadata.handlers.sparse import SparseHandler, SparseFieldRegistry
31+
from geonode.metadata.manager import MetadataManager
32+
from geonode.metadata.tests.handlers import FakeHandler, LoaderHandler
33+
34+
from geonode.tests.base import GeoNodeBaseTestSupport
35+
36+
37+
logger = logging.getLogger(__name__)
38+
39+
40+
class IndexingInvocationTests(GeoNodeBaseTestSupport):
41+
42+
def setUp(self):
43+
pass
44+
45+
def tearDown(self):
46+
super().tearDown()
47+
48+
def create_metadata_manager(self):
49+
sr = SparseFieldRegistry()
50+
mm = MetadataManager()
51+
mm.handlers = {
52+
# "base": BaseHandler(),
53+
"loader": LoaderHandler(schemafile=os.path.join(os.path.dirname(__file__), "data/minimal_schema.json")),
54+
"fake": FakeHandler(),
55+
"sparse": SparseHandler(registry=sr),
56+
"multilang": MultiLangHandler(registry=sr),
57+
}
58+
mm.post_init()
59+
return mm, sr
60+
61+
@patch("geonode.base.models.ResourceBase.get_real_instance_class")
62+
@patch("geonode.indexing.manager.TSVectorIndexManager.update_index")
63+
@patch("geonode.metadata.handlers.sparse.SparseHandler.update_resource")
64+
def test_indexmanager_invocation(self, mock_get_real_instance_class, mock_update_index, mock_sparse_update):
65+
"""
66+
The index manager should be called when the metadata are saved
67+
"""
68+
with override_settings(
69+
LANGUAGE_CODE="it",
70+
LANGUAGES=[("en", "English"), ("it", "Italiano")],
71+
MULTILANG_FIELDS=["title"],
72+
):
73+
mm, _ = self.create_metadata_manager()
74+
self.assertNotIn("base", mm.handlers)
75+
instance = {
76+
"title": "whatever",
77+
"title_multilang_it": "title_it",
78+
"title_multilang_en": None,
79+
"abstract": "abstract_fake",
80+
"license": "license_fake",
81+
}
82+
83+
resource = ResourceBase()
84+
fake_req = SimpleNamespace(data=instance, user=None)
85+
mm.update_schema_instance(resource, fake_req)
86+
87+
mock_update_index.assert_called_once()
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
#########################################################################
2+
#
3+
# Copyright (C) 2024 OSGeo
4+
#
5+
# This program is free software: you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
#
18+
#########################################################################
19+
20+
import logging
21+
22+
from unittest.mock import patch, ANY
23+
24+
from django.test import override_settings
25+
26+
from geonode.indexing.manager import TSVectorIndexManager
27+
from geonode.tests.base import GeoNodeBaseTestSupport
28+
29+
30+
logger = logging.getLogger(__name__)
31+
32+
33+
class IndexingTests(GeoNodeBaseTestSupport):
34+
35+
def setUp(self):
36+
pass
37+
38+
def tearDown(self):
39+
super().tearDown()
40+
41+
@patch("geonode.indexing.models.ResourceIndex.objects.update_or_create")
42+
def test_no_multilang(self, mock_uoc):
43+
"""
44+
If no multilang fields, indexes should be created with lang=None
45+
"""
46+
with override_settings(
47+
LANGUAGE_CODE="en",
48+
LANGUAGES=[("en", "English"), ("it", "Italiano")],
49+
MULTILANG_FIELDS=[],
50+
METADATA_INDEXES={
51+
"idx1": ["title"],
52+
"idx2": ["title", "f2"],
53+
},
54+
):
55+
instance = {
56+
"title": "TheTitle",
57+
"f2": "data2",
58+
}
59+
60+
expected_calls = (
61+
({"defaults": ANY, "resource_id": 0, "lang": None, "name": "idx1"}, "TheTitle"),
62+
({"defaults": ANY, "resource_id": 0, "lang": None, "name": "idx2"}, "TheTitle data2"),
63+
)
64+
65+
self._run_index_test(instance, mock_uoc, expected_calls)
66+
67+
@patch("geonode.indexing.models.ResourceIndex.objects.update_or_create")
68+
def test_multilang_title(self, mock_uoc):
69+
"""
70+
Only the title is multilang
71+
"""
72+
with override_settings(
73+
LANGUAGE_CODE="en",
74+
LANGUAGES=[("en", "English"), ("it", "Italiano")],
75+
MULTILANG_FIELDS=["title"],
76+
METADATA_INDEXES={
77+
"idx1": ["title"],
78+
"idx12": ["title", "f2"],
79+
"idx123": ["title", "f2", "f3"],
80+
},
81+
):
82+
instance = {
83+
"title": "TheTitleBase",
84+
"title_multilang_en": "TheTitle",
85+
"title_multilang_it": "IlTitolo",
86+
"f2": "v2",
87+
"f3": "v3",
88+
}
89+
90+
expected_calls = (
91+
({"defaults": ANY, "resource_id": 0, "lang": "en", "name": "idx1"}, "TheTitle"),
92+
({"defaults": ANY, "resource_id": 0, "lang": "it", "name": "idx1"}, "IlTitolo"),
93+
({"defaults": ANY, "resource_id": 0, "lang": "en", "name": "idx12"}, "TheTitle v2"),
94+
({"defaults": ANY, "resource_id": 0, "lang": "it", "name": "idx12"}, "IlTitolo v2"),
95+
({"defaults": ANY, "resource_id": 0, "lang": "en", "name": "idx123"}, "TheTitle v2 v3"),
96+
({"defaults": ANY, "resource_id": 0, "lang": "it", "name": "idx123"}, "IlTitolo v2 v3"),
97+
)
98+
99+
self._run_index_test(instance, mock_uoc, expected_calls)
100+
101+
@patch("geonode.indexing.models.ResourceIndex.objects.update_or_create")
102+
def test_multilang_title_missing(self, mock_uoc):
103+
"""
104+
Title is multilang, one translation is missing.
105+
The missing index should include the default field and all the translated entries
106+
"""
107+
with override_settings(
108+
LANGUAGE_CODE="en",
109+
LANGUAGES=[("en", "English"), ("it", "Italiano")],
110+
MULTILANG_FIELDS=["title"],
111+
METADATA_INDEXES={
112+
"idx1": ["title"],
113+
"idx12": ["title", "f2"],
114+
"idx123": ["title", "f2", "f3"],
115+
},
116+
):
117+
instance = {
118+
"title": "TheTitleBase",
119+
"title_multilang_en": "TheTitle",
120+
"title_multilang_it": None, # this is the missing translation
121+
"f2": "v2",
122+
"f3": "v3",
123+
}
124+
125+
expected_calls = (
126+
({"defaults": ANY, "resource_id": 0, "lang": "en", "name": "idx1"}, "TheTitle"),
127+
({"defaults": ANY, "resource_id": 0, "lang": "it", "name": "idx1"}, "TheTitle TheTitleBase"),
128+
({"defaults": ANY, "resource_id": 0, "lang": "en", "name": "idx12"}, "TheTitle v2"),
129+
({"defaults": ANY, "resource_id": 0, "lang": "it", "name": "idx12"}, "TheTitle TheTitleBase v2"),
130+
({"defaults": ANY, "resource_id": 0, "lang": "en", "name": "idx123"}, "TheTitle v2 v3"),
131+
({"defaults": ANY, "resource_id": 0, "lang": "it", "name": "idx123"}, "TheTitle TheTitleBase v2 v3"),
132+
)
133+
134+
self._run_index_test(instance, mock_uoc, expected_calls)
135+
136+
@patch("geonode.indexing.models.ResourceIndex.objects.update_or_create")
137+
def test_multilang_single_secondary_field(self, mock_uoc):
138+
"""
139+
The multilang field is a secondary field.
140+
In a multilang context, indexes created with only non multilang fields should be created with lang=None.
141+
"""
142+
with override_settings(
143+
LANGUAGE_CODE="en",
144+
LANGUAGES=[("en", "English"), ("it", "Italiano")],
145+
MULTILANG_FIELDS=["f3"],
146+
METADATA_INDEXES={
147+
"idx1": ["title"],
148+
"idx12": ["title", "f2"],
149+
"idx123": ["title", "f2", "f3"],
150+
},
151+
):
152+
instance = {
153+
"title": "TheTitle",
154+
"f2": "v2",
155+
"f3": "v3",
156+
"f3_multilang_en": "v3_EN",
157+
"f3_multilang_it": "v3_IT",
158+
}
159+
160+
expected_calls = (
161+
({"defaults": ANY, "resource_id": 0, "lang": None, "name": "idx1"}, "TheTitle"),
162+
({"defaults": ANY, "resource_id": 0, "lang": None, "name": "idx12"}, "TheTitle v2"),
163+
({"defaults": ANY, "resource_id": 0, "lang": "it", "name": "idx123"}, "v3_IT TheTitle v2"),
164+
({"defaults": ANY, "resource_id": 0, "lang": "en", "name": "idx123"}, "v3_EN TheTitle v2"),
165+
)
166+
167+
self._run_index_test(instance, mock_uoc, expected_calls)
168+
169+
@patch("geonode.indexing.models.ResourceIndex.objects.update_or_create")
170+
def test_multilang_with_secondary_lang_missing(self, mock_uoc):
171+
"""
172+
The multilang field is a secondary field, with one translation missing.
173+
In a multilang context, indexes created with only non multilang fields should be created with lang=None.
174+
"""
175+
with override_settings(
176+
LANGUAGE_CODE="en",
177+
LANGUAGES=[("en", "English"), ("it", "Italiano")],
178+
MULTILANG_FIELDS=["f3"],
179+
METADATA_INDEXES={
180+
"idx1": ["title"],
181+
"idx12": ["title", "f2"],
182+
"idx123": ["title", "f2", "f3"],
183+
},
184+
):
185+
instance = {
186+
"title": "TheTitle",
187+
"f2": "v2",
188+
"f3": "v3",
189+
"f3_multilang_en": "v3_multilang_en",
190+
"f3_multilang_it": None,
191+
}
192+
193+
expected_calls = (
194+
({"defaults": ANY, "resource_id": 0, "lang": None, "name": "idx1"}, "TheTitle"),
195+
({"defaults": ANY, "resource_id": 0, "lang": None, "name": "idx12"}, "TheTitle v2"),
196+
({"defaults": ANY, "resource_id": 0, "lang": "en", "name": "idx123"}, "v3_multilang_en TheTitle v2"),
197+
({"defaults": ANY, "resource_id": 0, "lang": "it", "name": "idx123"}, "TheTitle v2"),
198+
)
199+
200+
self._run_index_test(instance, mock_uoc, expected_calls)
201+
202+
def _run_index_test(self, instance, mock_uoc, expected_calls):
203+
"""
204+
Test the calls to update_or_create
205+
:param: expected_calls contains the explicit params ass dict and a String containing the indexed data
206+
"""
207+
im = TSVectorIndexManager()
208+
im.update_index(0, instance)
209+
call_args_list = mock_uoc.call_args_list
210+
# logger.debug(f"UOC called with {mock_uoc.call_args_list}")
211+
212+
self.assertEqual(len(expected_calls), mock_uoc.call_count)
213+
for args, idx_data in expected_calls:
214+
# check the explicit call's params
215+
mock_uoc.assert_any_call(**args)
216+
# check the string to be indexed
217+
self._assert_tsvector_value(idx_data, args, call_args_list)
218+
219+
def _assert_tsvector_value(self, idx_data, args, call_args_list):
220+
def find_call(args, call_args_list):
221+
for _, ck in call_args_list:
222+
if args["lang"] == ck["lang"] and args["name"] == ck["name"]:
223+
return ck
224+
raise KeyError(f"Call not found {args}") # should not happen
225+
226+
called = find_call(args, call_args_list)
227+
vector = called["defaults"]["vector"]
228+
# logger.debug(f"VECTOR {vector}")
229+
# logger.debug(f"VECTOR EXPR is {vector.source_expressions}")
230+
value = vector.source_expressions[0].value
231+
# logger.debug(f"VECTOR VALUE is '{value}'")
232+
if idx_data is not None:
233+
self.assertEqual(idx_data, value, f"Bad index content for {args}")
234+
else:
235+
logger.info(f"Skipping test for index value for {args}")
236+
logger.debug(f"VECTOR VALUE is '{value}'")
237+
return

0 commit comments

Comments
 (0)