Skip to content

Commit 253bebd

Browse files
bambrizCopilotsimorenoh
authored
Add tests for Multi-language support for Full-Text Search (Azure#44254)
* update multi language support tests for vector search * update changelog and tests Adding skips for the test as pipeline is not set up for multilanguage support * Update sdk/cosmos/azure-cosmos/tests/test_full_text_policy.py Co-authored-by: Copilot <[email protected]> * Update sdk/cosmos/azure-cosmos/tests/test_full_text_policy_async.py Co-authored-by: Copilot <[email protected]> * Update test_full_text_policy.py * fix spelling check errors Ignores spell check for words used as string literals for multi language support * Update sdk/cosmos/azure-cosmos/CHANGELOG.md Co-authored-by: Simon Moreno <[email protected]> * Update CHANGELOG.md * Update _version.py --------- Co-authored-by: Copilot <[email protected]> Co-authored-by: Simon Moreno <[email protected]>
1 parent 60556ac commit 253bebd

File tree

3 files changed

+653
-1
lines changed

3 files changed

+653
-1
lines changed

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#### Bugs Fixed
1010

1111
#### Other Changes
12+
* Added tests for multi-language support for full text search. See [PR 44254](https://github.com/Azure/azure-sdk-for-python/pull/44254)
13+
1214

1315
### 4.15.0b2 (2025-12-16)
1416

@@ -751,4 +753,3 @@ Version 4.0.0b1 is the first preview of our efforts to create a user-friendly an
751753
## 1.0.1
752754

753755
- Supports proxy connection
754-

sdk/cosmos/azure-cosmos/tests/test_full_text_policy.py

Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,35 @@ class TestFullTextPolicy(unittest.TestCase):
1818
masterKey = test_config.TestConfig.masterKey
1919
connectionPolicy = test_config.TestConfig.connectionPolicy
2020

21+
# Centralized dictionaries for all tests
22+
supported_languages = {
23+
"EnglishUS": "en-US",
24+
"FrenchFR": "fr-FR",
25+
"GermanDE": "de-DE",
26+
"ItalianIT": "it-IT",
27+
"PortugueseBR": "pt-BR",
28+
"PortuguesePT": "pt-PT",
29+
"SpanishES": "es-ES",
30+
}
31+
language_abstracts = {
32+
"en-US": "This is a test in English.",
33+
"fr-FR": "Ceci est une démonstration en français.", # cspell:ignore Ceci démonstration français
34+
"de-DE": "Dies ist ein Beispiel auf Deutsch.", # cspell:ignore Dies Beispiel Deutsch
35+
"it-IT": "Questo è un esempio in italiano.", # cspell:ignore Questo esempio italiano
36+
"pt-BR": "Este é um exemplo em português do Brasil.", # cspell:ignore Este exemplo português Brasil
37+
"pt-PT": "Este é um exemplo em português de Portugal.", # cspell:ignore Este exemplo português Portugal
38+
"es-ES": "Esta es una demostración en español.", # cspell:ignore Esta demostración español
39+
}
40+
search_terms = {
41+
"en-US": "English",
42+
"fr-FR": "démonstration", # cspell:ignore démonstration
43+
"de-DE": "Beispiel", # cspell:ignore Beispiel
44+
"it-IT": "esempio", # cspell:ignore esempio
45+
"pt-BR": "exemplo", # cspell:ignore exemplo
46+
"pt-PT": "exemplo", # cspell:ignore exemplo
47+
"es-ES": "demostración", # cspell:ignore demostración
48+
}
49+
2150
@classmethod
2251
def setUpClass(cls):
2352
if (cls.masterKey == '[YOUR_KEY_HERE]' or
@@ -284,6 +313,308 @@ def test_fail_create_full_text_indexing_policy(self):
284313
assert e.status_code == 400
285314
assert "Missing path in full-text index specification at index (0)" in e.http_error_message
286315

316+
# Skipped until testing pipeline is set up for full text multi-language support
317+
@pytest.mark.skip
318+
def test_supported_languages_in_full_text_policy(self):
319+
# Create the container with English as the default language
320+
full_text_policy = {
321+
"defaultLanguage": "en-US",
322+
"fullTextPaths": [
323+
{"path": "/abstract", "language": "en-US"}
324+
]
325+
}
326+
container = self.test_db.create_container(
327+
id='full_text_container' + str(uuid.uuid4()),
328+
partition_key=PartitionKey(path="/id"),
329+
full_text_policy=full_text_policy
330+
)
331+
try:
332+
for lang in self.supported_languages.values():
333+
updated_policy = {
334+
"defaultLanguage": lang,
335+
"fullTextPaths": [
336+
{"path": "/abstract", "language": lang}
337+
]
338+
}
339+
replaced_container = self.test_db.replace_container(
340+
container=container.id,
341+
partition_key=PartitionKey(path="/id"),
342+
full_text_policy=updated_policy
343+
)
344+
properties = replaced_container.read()
345+
assert properties["fullTextPolicy"] == updated_policy
346+
finally:
347+
self.test_db.delete_container(container.id)
348+
349+
# Skipped until testing pipeline is set up for full text multi-language support
350+
@pytest.mark.skip
351+
def test_default_language_fallback(self):
352+
# Use centralized dictionaries
353+
full_text_policy = {
354+
"defaultLanguage": "en-US",
355+
"fullTextPaths": [
356+
{"path": "/abstract"}
357+
]
358+
}
359+
container = self.test_db.create_container(
360+
id='full_text_container' + str(uuid.uuid4()),
361+
partition_key=PartitionKey(path="/id"),
362+
full_text_policy=full_text_policy
363+
)
364+
try:
365+
for language_code in self.supported_languages.values():
366+
# Replace the container's full text policy for each language
367+
updated_policy = {
368+
"defaultLanguage": language_code,
369+
"fullTextPaths": [
370+
{"path": "/abstract"}
371+
]
372+
}
373+
replaced_container = self.test_db.replace_container(
374+
container=container.id,
375+
partition_key=PartitionKey(path="/id"),
376+
full_text_policy=updated_policy
377+
)
378+
properties = replaced_container.read()
379+
assert properties["fullTextPolicy"] == updated_policy
380+
# Insert and verify item
381+
item = {
382+
"id": str(uuid.uuid4()),
383+
"abstract": self.language_abstracts[language_code],
384+
}
385+
container.create_item(body=item)
386+
query = (
387+
f"SELECT TOP 1 * FROM c WHERE FullTextContains(c.abstract, '{self.search_terms[language_code]}') "
388+
f"ORDER BY RANK FullTextScore(c.abstract, '{self.search_terms[language_code]}')"
389+
)
390+
results = list(container.query_items(query, enable_cross_partition_query=True))
391+
assert len(results) > 0
392+
assert any(result["abstract"] == item["abstract"] for result in results)
393+
finally:
394+
self.test_db.delete_container(container.id)
395+
396+
# Skipped until testing pipeline is set up for full text multi-language support
397+
@pytest.mark.skip
398+
def test_mismatched_default_and_path_languages(self):
399+
# Create the initial container with English as the default language
400+
full_text_policy = {
401+
"defaultLanguage": "en-US",
402+
"fullTextPaths": [
403+
{
404+
"path": "/abstract",
405+
"language": "en-US"
406+
}
407+
]
408+
}
409+
container = self.test_db.create_container(
410+
id='full_text_container' + str(uuid.uuid4()),
411+
partition_key=PartitionKey(path="/id"),
412+
full_text_policy=full_text_policy
413+
)
414+
415+
try:
416+
# Update the full text policy with mismatched default and path languages
417+
updated_policy = {
418+
"defaultLanguage": "en-US",
419+
"fullTextPaths": [
420+
{
421+
"path": "/abstract",
422+
"language": "fr-FR"
423+
}
424+
]
425+
}
426+
replaced_container = self.test_db.replace_container(
427+
container=container.id,
428+
partition_key=PartitionKey(path="/id"),
429+
full_text_policy=updated_policy
430+
)
431+
properties = replaced_container.read()
432+
assert properties["fullTextPolicy"] == updated_policy
433+
434+
finally:
435+
# Delete the container at the end of the test
436+
self.test_db.delete_container(container.id)
437+
438+
# Skipped until testing pipeline is set up for full text multi-language support
439+
@pytest.mark.skip
440+
def test_replace_full_text_policy_with_different_languages(self):
441+
# Create the initial container with English as the default language
442+
full_text_policy = {
443+
"defaultLanguage": "en-US",
444+
"fullTextPaths": [
445+
{
446+
"path": "/abstract",
447+
"language": "en-US"
448+
}
449+
]
450+
}
451+
container = self.test_db.create_container(
452+
id='full_text_container' + str(uuid.uuid4()),
453+
partition_key=PartitionKey(path="/id"),
454+
full_text_policy=full_text_policy
455+
)
456+
457+
try:
458+
# Replace the full text policy with each supported language
459+
for language in self.supported_languages.values():
460+
updated_policy = {
461+
"defaultLanguage": language,
462+
"fullTextPaths": [
463+
{
464+
"path": "/abstract",
465+
"language": language
466+
}
467+
]
468+
}
469+
replaced_container = self.test_db.replace_container(
470+
container=container.id,
471+
partition_key=PartitionKey(path="/id"),
472+
full_text_policy=updated_policy
473+
)
474+
properties = replaced_container.read()
475+
assert properties["fullTextPolicy"] == updated_policy
476+
477+
finally:
478+
# Delete the container at the end of the test
479+
self.test_db.delete_container(container.id)
480+
481+
# Skipped until testing pipeline is set up for full text multi-language support
482+
@pytest.mark.skip
483+
def test_replace_full_text_policy_with_different_path_languages(self):
484+
# Create the initial container with English as the default language
485+
full_text_policy = {
486+
"defaultLanguage": "en-US",
487+
"fullTextPaths": [
488+
{
489+
"path": "/abstract",
490+
"language": "en-US"
491+
}
492+
]
493+
}
494+
container = self.test_db.create_container(
495+
id='full_text_container' + str(uuid.uuid4()),
496+
partition_key=PartitionKey(path="/id"),
497+
full_text_policy=full_text_policy
498+
)
499+
500+
try:
501+
# Replace the full text policy with each supported path language
502+
for language in self.supported_languages.values():
503+
updated_policy = {
504+
"defaultLanguage": "en-US", # Keep default language as English
505+
"fullTextPaths": [
506+
{
507+
"path": "/abstract",
508+
"language": language
509+
}
510+
]
511+
}
512+
replaced_container = self.test_db.replace_container(
513+
container=container.id,
514+
partition_key=PartitionKey(path="/id"),
515+
full_text_policy=updated_policy
516+
)
517+
properties = replaced_container.read()
518+
assert properties["fullTextPolicy"] == updated_policy
519+
520+
finally:
521+
# Delete the container at the end of the test
522+
self.test_db.delete_container(container.id)
523+
524+
# Skipped until testing pipeline is set up for full text multi-language support
525+
@pytest.mark.skip
526+
def test_multi_path_multi_language_policy(self):
527+
# Create a container with a different language in each path
528+
full_text_paths_multi = []
529+
for lang_code in self.supported_languages.values():
530+
# Use a unique, valid suffix for each language (replace '-' with '_')
531+
suffix = lang_code.replace('-', '_').lower()
532+
full_text_paths_multi.append({
533+
"path": f"/abstract_{suffix}",
534+
"language": lang_code
535+
})
536+
full_text_policy_multi = {
537+
"defaultLanguage": "en-US",
538+
"fullTextPaths": full_text_paths_multi
539+
}
540+
container = self.test_db.create_container(
541+
id='full_text_container_multi_' + str(uuid.uuid4()),
542+
partition_key=PartitionKey(path="/id"),
543+
full_text_policy=full_text_policy_multi
544+
)
545+
try:
546+
# Insert one item per language, each with its own path
547+
for lang_code in self.supported_languages.values():
548+
suffix = lang_code.replace('-', '_').lower()
549+
item = {
550+
"id": str(uuid.uuid4()),
551+
f"abstract_{suffix}": self.language_abstracts[lang_code],
552+
}
553+
container.create_item(body=item)
554+
# Verify the fullTextPolicy has the correct language for each path
555+
properties = container.read()
556+
for path_entry in properties["fullTextPolicy"]["fullTextPaths"]:
557+
lang = path_entry["language"]
558+
suffix = lang.replace('-', '_').lower()
559+
assert path_entry["path"] == f"/abstract_{suffix}"
560+
assert lang in self.language_abstracts
561+
# Perform a full-text search for each language
562+
for lang_code in self.supported_languages.values():
563+
suffix = lang_code.replace('-', '_').lower()
564+
query = (
565+
f"SELECT TOP 1 * FROM c WHERE FullTextContains(c.abstract_{suffix}, "
566+
f"'{self.search_terms[lang_code]}') "
567+
f"ORDER BY RANK FullTextScore(c.abstract_{suffix}, "
568+
f"'{self.search_terms[lang_code]}')"
569+
)
570+
results = list(container.query_items(query, enable_cross_partition_query=True))
571+
assert len(results) > 0
572+
assert results[0][f"abstract_{suffix}"] == self.language_abstracts[lang_code]
573+
finally:
574+
self.test_db.delete_container(container.id)
575+
576+
# Skipped until testing pipeline is set up for full text multi-language support
577+
@pytest.mark.skip
578+
def test_unsupported_language_in_full_text_policy(self):
579+
# Create the container with English as the default language
580+
full_text_policy = {
581+
"defaultLanguage": "en-US",
582+
"fullTextPaths": [
583+
{
584+
"path": "/abstract",
585+
"language": "en-US"
586+
}
587+
]
588+
}
589+
container = self.test_db.create_container(
590+
id='full_text_container' + str(uuid.uuid4()),
591+
partition_key=PartitionKey(path="/id"),
592+
full_text_policy=full_text_policy
593+
)
594+
try:
595+
# Replace the full-text policy with an unsupported language
596+
updated_policy = {
597+
"defaultLanguage": "en-US",
598+
"fullTextPaths": [
599+
{
600+
"path": "/abstract",
601+
"language": "unsupported-LANG"
602+
}
603+
]
604+
}
605+
try:
606+
self.test_db.replace_container(
607+
container=container.id,
608+
partition_key=PartitionKey(path="/id"),
609+
full_text_policy=updated_policy
610+
)
611+
pytest.fail("Container replacement should have failed for unsupported language.")
612+
except exceptions.CosmosHttpResponseError as e:
613+
assert e.status_code == 400
614+
assert "The Full Text Policy contains an unsupported language" in e.http_error_message
615+
finally:
616+
self.test_db.delete_container(container.id)
617+
287618

288619
if __name__ == '__main__':
289620
unittest.main()

0 commit comments

Comments
 (0)