qdrant · joein · Nov 12, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 11, 2025
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
@@ -24,6 +24,20 @@ jobs:
           - ubuntu-latest
           - macos-latest
           - windows-latest
+        exclude:
+          # Exclude 3.10–3.12 for macOS and Windows
+          - os: macos-latest
+            python-version: '3.10.x'
+          - os: macos-latest
+            python-version: '3.11.x'
+          - os: macos-latest
+            python-version: '3.12.x'
+          - os: windows-latest
+            python-version: '3.10.x'
+          - os: windows-latest
+            python-version: '3.11.x'
+          - os: windows-latest
+            python-version: '3.12.x'
 
     runs-on: ${{ matrix.os }}
 

diff --git a/fastembed/sparse/bm42.py b/fastembed/sparse/bm42.py
@@ -31,9 +31,17 @@
     ),
 ]
 
-MODEL_TO_LANGUAGE = {
+
+_MODEL_TO_LANGUAGE = {
     "Qdrant/bm42-all-minilm-l6-v2-attentions": "english",
 }
+MODEL_TO_LANGUAGE = {
+    model_name.lower(): language for model_name, language in _MODEL_TO_LANGUAGE.items()
+}
+
+
+def get_language_by_model_name(model_name: str) -> str:
+    return MODEL_TO_LANGUAGE[model_name.lower()]
 
 
 class Bm42(SparseTextEmbeddingBase, OnnxTextModel[SparseEmbedding]):
@@ -124,7 +132,7 @@ def __init__(
         self.special_tokens_ids: set[int] = set()
         self.punctuation = set(string.punctuation)
         self.stopwords = set(self._load_stopwords(self._model_dir))
-        self.stemmer = SnowballStemmer(MODEL_TO_LANGUAGE[model_name])
+        self.stemmer = SnowballStemmer(get_language_by_model_name(self.model_name))
         self.alpha = alpha
 
         if not self.lazy_load:

diff --git a/fastembed/sparse/minicoil.py b/fastembed/sparse/minicoil.py
@@ -46,9 +46,16 @@
     ),
 ]
 
-MODEL_TO_LANGUAGE = {
+_MODEL_TO_LANGUAGE = {
     "Qdrant/minicoil-v1": "english",
 }
+MODEL_TO_LANGUAGE = {
+    model_name.lower(): language for model_name, language in _MODEL_TO_LANGUAGE.items()
+}
+
+
+def get_language_by_model_name(model_name: str) -> str:
+    return MODEL_TO_LANGUAGE[model_name.lower()]
 
 
 class MiniCOIL(SparseTextEmbeddingBase, OnnxTextModel[SparseEmbedding]):
@@ -156,7 +163,7 @@ def load_onnx_model(self) -> None:
         self.special_tokens_ids = set(self.special_token_to_id.values())
         self.stopwords = set(self._load_stopwords(self._model_dir))
 
-        stemmer = SnowballStemmer(MODEL_TO_LANGUAGE[self.model_name])
+        stemmer = SnowballStemmer(get_language_by_model_name(self.model_name))
 
         self.vocab_resolver = VocabResolver(
             tokenizer=VocabTokenizer(self.tokenizer),

diff --git a/tests/test_attention_embeddings.py b/tests/test_attention_embeddings.py
@@ -1,4 +1,5 @@
 import os
+from contextlib import contextmanager
 
 import numpy as np
 import pytest
@@ -7,98 +8,119 @@
 from tests.utils import delete_model_cache
 
 
-@pytest.mark.parametrize("model_name", ["Qdrant/bm42-all-minilm-l6-v2-attentions", "Qdrant/bm25"])
-def test_attention_embeddings(model_name: str) -> None:
-    is_ci = os.getenv("CI")
-    model = SparseTextEmbedding(model_name=model_name)
-
-    output = list(
-        model.query_embed(
-            [
-                "I must not fear. Fear is the mind-killer.",
-            ]
-        )
-    )
-
-    assert len(output) == 1
-
-    for result in output:
-        assert len(result.indices) == len(result.values)
-        assert np.allclose(result.values, np.ones(len(result.values)))
-
-    quotes = [
-        "I must not fear. Fear is the mind-killer.",
-        "All animals are equal, but some animals are more equal than others.",
-        "It was a pleasure to burn.",
-        "The sky above the port was the color of television, tuned to a dead channel.",
-        "In the beginning, the universe was created."
-        " This has made a lot of people very angry and been widely regarded as a bad move.",
-        "It's a truth universally acknowledged that a zombie in possession of brains must be in want of more brains.",
-        "War is peace. Freedom is slavery. Ignorance is strength.",
-        "We're not in Infinity; we're in the suburbs.",
-        "I was a thousand times more evil than thou!",
-        "History is merely a list of surprises... It can only prepare us to be surprised yet again.",
-        ".",  # Empty string
-    ]
-
-    output = list(model.embed(quotes))
-
-    assert len(output) == len(quotes)
-
-    for result in output[:-1]:
-        assert len(result.indices) == len(result.values)
-        assert len(result.indices) > 0
-
-    assert len(output[-1].indices) == 0
-
-    # Test support for unknown languages
-    output = list(
-        model.query_embed(
-            [
-                "привет мир!",
-            ]
-        )
-    )
+_MODELS_TO_CACHE = ("Qdrant/bm42-all-minilm-l6-v2-attentions", "Qdrant/bm25")
+MODELS_TO_CACHE = tuple([x.lower() for x in _MODELS_TO_CACHE])
 
-    assert len(output) == 1
 
-    for result in output:
-        assert len(result.indices) == len(result.values)
-        assert len(result.indices) == 2
+@pytest.fixture(scope="module")
+def model_cache():
+    is_ci = os.getenv("CI")
+    cache = {}
+
+    @contextmanager
+    def get_model(model_name: str):
+        lowercase_model_name = model_name.lower()
+        if lowercase_model_name not in cache:
+            cache[lowercase_model_name] = SparseTextEmbedding(lowercase_model_name)
+        yield cache[lowercase_model_name]
-        lowercase_model_name = model_name.lower()
-        if lowercase_model_name not in cache:
-            cache[lowercase_model_name] = SparseTextEmbedding(lowercase_model_name)
-        yield cache[lowercase_model_name]
+        lowercase_model_name = model_name.lower()
+        if lowercase_model_name not in cache:
+            cache[lowercase_model_name] = SparseTextEmbedding(model_name)
+        yield cache[lowercase_model_name]
-        lowercase_model_name = model_name.lower()
-        if lowercase_model_name not in cache:
-            cache[lowercase_model_name] = SparseTextEmbedding(lowercase_model_name)
-        yield cache[lowercase_model_name]
+        lowercase_model_name = model_name.lower()
+        if lowercase_model_name not in cache:
+            cache[lowercase_model_name] = SparseTextEmbedding(model_name)
+        yield cache[lowercase_model_name]
+        if lowercase_model_name not in MODELS_TO_CACHE:
+            print("deleting model")
+            model_inst = cache.pop(lowercase_model_name)
+            if is_ci:
+                delete_model_cache(model_inst.model._model_dir)
+            del model_inst
+
+    yield get_model
 
     if is_ci:
-        delete_model_cache(model.model._model_dir)
+        for name, model in cache.items():
+            delete_model_cache(model.model._model_dir)
+    cache.clear()
 
 
 @pytest.mark.parametrize("model_name", ["Qdrant/bm42-all-minilm-l6-v2-attentions", "Qdrant/bm25"])
-def test_parallel_processing(model_name: str) -> None:
-    is_ci = os.getenv("CI")
+def test_attention_embeddings(model_cache, model_name: str) -> None:
+    with model_cache(model_name) as model:
+        output = list(
+            model.query_embed(
+                [
+                    "I must not fear. Fear is the mind-killer.",
+                ]
+            )
+        )
 
-    model = SparseTextEmbedding(model_name=model_name)
+        assert len(output) == 1
+
+        for result in output:
+            assert len(result.indices) == len(result.values)
+            assert np.allclose(result.values, np.ones(len(result.values)))
+
+        quotes = [
+            "I must not fear. Fear is the mind-killer.",
+            "All animals are equal, but some animals are more equal than others.",
+            "It was a pleasure to burn.",
+            "The sky above the port was the color of television, tuned to a dead channel.",
+            "In the beginning, the universe was created."
+            " This has made a lot of people very angry and been widely regarded as a bad move.",
-            "In the beginning, the universe was created."
-            " This has made a lot of people very angry and been widely regarded as a bad move.",
+            "In the beginning, the universe was created.",
+            " This has made a lot of people very angry and been widely regarded as a bad move.",
-            "In the beginning, the universe was created."
-            " This has made a lot of people very angry and been widely regarded as a bad move.",
+            "In the beginning, the universe was created.",
+            " This has made a lot of people very angry and been widely regarded as a bad move.",
+            "It's a truth universally acknowledged that a zombie in possession of brains must be in want of more brains.",
+            "War is peace. Freedom is slavery. Ignorance is strength.",
+            "We're not in Infinity; we're in the suburbs.",
+            "I was a thousand times more evil than thou!",
+            "History is merely a list of surprises... It can only prepare us to be surprised yet again.",
+            ".",  # Empty string
+        ]
+
+        output = list(model.embed(quotes))
+
+        assert len(output) == len(quotes)
+
+        for result in output[:-1]:
+            assert len(result.indices) == len(result.values)
+            assert len(result.indices) > 0
+
+        assert len(output[-1].indices) == 0
+
+        # Test support for unknown languages
+        output = list(
+            model.query_embed(
+                [
+                    "привет мир!",
+                ]
+            )
+        )
 
-    docs = ["hello world", "attention embedding", "Mangez-vous vraiment des grenouilles?"] * 100
-    embeddings = list(model.embed(docs, batch_size=10, parallel=2))
+        assert len(output) == 1
 
-    embeddings_2 = list(model.embed(docs, batch_size=10, parallel=None))
+        for result in output:
+            assert len(result.indices) == len(result.values)
+            assert len(result.indices) == 2
 
-    embeddings_3 = list(model.embed(docs, batch_size=10, parallel=0))
 
-    assert len(embeddings) == len(docs)
+@pytest.mark.parametrize("model_name", ["Qdrant/bm42-all-minilm-l6-v2-attentions", "Qdrant/bm25"])
+def test_parallel_processing(model_cache, model_name: str) -> None:
+    with model_cache(model_name) as model:
+        docs = [
+            "hello world",
+            "attention embedding",
+            "Mangez-vous vraiment des grenouilles?",
+        ] * 100
+        embeddings = list(model.embed(docs, batch_size=10, parallel=2))
 
-    for emb_1, emb_2, emb_3 in zip(embeddings, embeddings_2, embeddings_3):
-        assert np.allclose(emb_1.indices, emb_2.indices)
-        assert np.allclose(emb_1.indices, emb_3.indices)
-        assert np.allclose(emb_1.values, emb_2.values)
-        assert np.allclose(emb_1.values, emb_3.values)
+        embeddings_2 = list(model.embed(docs, batch_size=10, parallel=None))
 
-    if is_ci:
-        delete_model_cache(model.model._model_dir)
+        embeddings_3 = list(model.embed(docs, batch_size=10, parallel=0))
 
+        assert len(embeddings) == len(docs)
+
+        for emb_1, emb_2, emb_3 in zip(embeddings, embeddings_2, embeddings_3):
+            assert np.allclose(emb_1.indices, emb_2.indices)
+            assert np.allclose(emb_1.indices, emb_3.indices)
+            assert np.allclose(emb_1.values, emb_2.values)
+            assert np.allclose(emb_1.values, emb_3.values)
 
-@pytest.mark.parametrize("model_name", ["Qdrant/bm25"])
-def test_multilanguage(model_name: str) -> None:
-    is_ci = os.getenv("CI")
 
+@pytest.mark.parametrize("model_name", ["Qdrant/bm25"])
+def test_multilanguage(model_cache, model_name: str) -> None:
     docs = ["Mangez-vous vraiment des grenouilles?", "Je suis au lit"]
 
     model = SparseTextEmbedding(model_name=model_name, language="french")
@@ -109,39 +131,30 @@ def test_multilanguage(model_name: str) -> None:
     assert embeddings[1].values.shape == (1,)
     assert embeddings[1].indices.shape == (1,)
 
-    model = SparseTextEmbedding(model_name=model_name, language="english")
-    embeddings = list(model.embed(docs))[:2]
-    assert embeddings[0].values.shape == (5,)
-    assert embeddings[0].indices.shape == (5,)
+    with model_cache(model_name) as model:  # language = "english"
+        embeddings = list(model.embed(docs))[:2]
+        assert embeddings[0].values.shape == (5,)
+        assert embeddings[0].indices.shape == (5,)
 
-    assert embeddings[1].values.shape == (4,)
-    assert embeddings[1].indices.shape == (4,)
-
-    if is_ci:
-        delete_model_cache(model.model._model_dir)
+        assert embeddings[1].values.shape == (4,)
+        assert embeddings[1].indices.shape == (4,)
 
 
 @pytest.mark.parametrize("model_name", ["Qdrant/bm25"])
-def test_special_characters(model_name: str) -> None:
-    is_ci = os.getenv("CI")
-
-    docs = [
-        "Über den größten Flüssen Österreichs äußern sich Experten häufig: Öko-Systeme müssen geschützt werden!",
-        "L'élève français s'écrie : « Où est mon crayon ? J'ai besoin de finir cet exercice avant la récréation!",
-        "Într-o zi însorită, Ștefan și Ioana au mâncat mămăligă cu brânză și au băut țuică la cabană.",
-        "Üzgün öğretmen öğrencilere seslendi: Lütfen gürültü yapmayın, sınavınızı bitirmeye çalışıyorum!",
-        "Ο Ξενοφών είπε: «Ψάχνω για ένα ωραίο δώρο για τη γιαγιά μου. Ίσως ένα φυτό ή ένα βιβλίο;»",
-        "Hola! ¿Cómo estás? Estoy muy emocionado por el cumpleaños de mi hermano, ¡va a ser increíble! También quiero comprar un pastel de chocolate con fresas y un regalo especial: un libro titulado «Cien años de soledad",
-    ]
-
-    model = SparseTextEmbedding(model_name=model_name, language="english")
-    embeddings = list(model.embed(docs))
-    for idx, shape in enumerate([14, 18, 15, 10, 15]):
-        assert embeddings[idx].values.shape == (shape,)
-        assert embeddings[idx].indices.shape == (shape,)
-
-    if is_ci:
-        delete_model_cache(model.model._model_dir)
+def test_special_characters(model_cache, model_name: str) -> None:
+    with model_cache(model_name) as model:
+        docs = [
+            "Über den größten Flüssen Österreichs äußern sich Experten häufig: Öko-Systeme müssen geschützt werden!",
+            "L'élève français s'écrie : « Où est mon crayon ? J'ai besoin de finir cet exercice avant la récréation!",
+            "Într-o zi însorită, Ștefan și Ioana au mâncat mămăligă cu brânză și au băut țuică la cabană.",
+            "Üzgün öğretmen öğrencilere seslendi: Lütfen gürültü yapmayın, sınavınızı bitirmeye çalışıyorum!",
+            "Ο Ξενοφών είπε: «Ψάχνω για ένα ωραίο δώρο για τη γιαγιά μου. Ίσως ένα φυτό ή ένα βιβλίο;»",
+            "Hola! ¿Cómo estás? Estoy muy emocionado por el cumpleaños de mi hermano, ¡va a ser increíble! También quiero comprar un pastel de chocolate con fresas y un regalo especial: un libro titulado «Cien años de soledad",
+        ]
+        embeddings = list(model.embed(docs))
+        for idx, shape in enumerate([14, 18, 15, 10, 15]):
+            assert embeddings[idx].values.shape == (shape,)
+            assert embeddings[idx].indices.shape == (shape,)
 
 
 @pytest.mark.parametrize("model_name", ["Qdrant/bm42-all-minilm-l6-v2-attentions"])

diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
@@ -70,9 +70,13 @@ def test_text_custom_model():
     assert embeddings.shape == (2, dim)
 
     assert np.allclose(embeddings[0, : canonical_vector.shape[0]], canonical_vector, atol=1e-3)
+
     if is_ci:
         delete_model_cache(model.model._model_dir)
 
+    CustomTextEmbedding.SUPPORTED_MODELS.clear()
+    CustomTextEmbedding.POSTPROCESSING_MAPPING.clear()
+
 
 def test_cross_encoder_custom_model():
     is_ci = os.getenv("CI")
@@ -110,6 +114,8 @@ def test_cross_encoder_custom_model():
     if is_ci:
         delete_model_cache(model.model._model_dir)
 
+    CustomTextCrossEncoder.SUPPORTED_MODELS.clear()
+
 
 def test_mock_add_custom_models():
     dim = 5
@@ -169,6 +175,9 @@ def test_mock_add_custom_models():
         )
         assert np.allclose(post_processed_output, expected_output[model_name], atol=1e-3)
 
+    CustomTextEmbedding.SUPPORTED_MODELS.clear()
+    CustomTextEmbedding.POSTPROCESSING_MAPPING.clear()
+
 
 def test_do_not_add_existing_model():
     existing_base_model = "sentence-transformers/all-MiniLM-L6-v2"
@@ -203,6 +212,9 @@ def test_do_not_add_existing_model():
             size_in_gb=0.47,
         )
 
+    CustomTextEmbedding.SUPPORTED_MODELS.clear()
+    CustomTextEmbedding.POSTPROCESSING_MAPPING.clear()
+
 
 def test_do_not_add_existing_cross_encoder():
     existing_base_model = "Xenova/ms-marco-MiniLM-L-6-v2"
@@ -227,3 +239,5 @@ def test_do_not_add_existing_cross_encoder():
             sources=ModelSource(hf=custom_model_name),
             size_in_gb=0.08,
         )
+
+    CustomTextCrossEncoder.SUPPORTED_MODELS.clear()