qdrant · joein · Nov 12, 2024 · Nov 11, 2024 · Nov 11, 2024 · Nov 11, 2024
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
@@ -42,4 +42,4 @@ jobs:
 
       - name: Run pytest
         run: |
-          poetry run pytest
+          poetry run pytest 
diff --git a/tests/test_attention_embeddings.py b/tests/test_attention_embeddings.py
@@ -1,10 +1,10 @@
 import os
-import shutil
 
 import numpy as np
 import pytest
 
 from fastembed import SparseTextEmbedding
+from tests.utils import delete_model_cache
 
 
 @pytest.mark.parametrize("model_name", ["Qdrant/bm42-all-minilm-l6-v2-attentions", "Qdrant/bm25"])
@@ -67,7 +67,7 @@ def test_attention_embeddings(model_name):
         assert len(result.indices) == 2
 
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize("model_name", ["Qdrant/bm42-all-minilm-l6-v2-attentions", "Qdrant/bm25"])
@@ -92,7 +92,7 @@ def test_parallel_processing(model_name):
         assert np.allclose(emb_1.values, emb_3.values)
 
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize("model_name", ["Qdrant/bm25"])
@@ -118,7 +118,7 @@ def test_multilanguage(model_name):
     assert embeddings[1].indices.shape == (4,)
 
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize("model_name", ["Qdrant/bm25"])
@@ -141,7 +141,7 @@ def test_special_characters(model_name):
         assert embeddings[idx].indices.shape == (shape,)
 
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize("model_name", ["Qdrant/bm42-all-minilm-l6-v2-attentions"])

diff --git a/tests/test_image_onnx_embeddings.py b/tests/test_image_onnx_embeddings.py
@@ -1,5 +1,4 @@
 import os
-import shutil
 from io import BytesIO
 
 import numpy as np
@@ -9,6 +8,7 @@
 
 from fastembed import ImageEmbedding
 from tests.config import TEST_MISC_DIR
+from tests.utils import delete_model_cache
 
 CANONICAL_VECTOR_VALUES = {
     "Qdrant/clip-ViT-B-32-vision": np.array([-0.0098, 0.0128, -0.0274, 0.002, -0.0059]),
@@ -54,7 +54,7 @@ def test_embedding():
         assert np.allclose(embeddings[1], embeddings[2]), model_desc["model"]
 
         if is_ci:
-            shutil.rmtree(model.model._model_dir)
+            delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize("n_dims,model_name", [(512, "Qdrant/clip-ViT-B-32-vision")])
@@ -74,7 +74,7 @@ def test_batch_embedding(n_dims, model_name):
 
     assert embeddings.shape == (len(test_images) * n_images, n_dims)
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize("n_dims,model_name", [(512, "Qdrant/clip-ViT-B-32-vision")])
@@ -102,11 +102,12 @@ def test_parallel_processing(n_dims, model_name):
     assert np.allclose(embeddings, embeddings_2, atol=1e-3)
     assert np.allclose(embeddings, embeddings_3, atol=1e-3)
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize("model_name", ["Qdrant/clip-ViT-B-32-vision"])
 def test_lazy_load(model_name):
+    is_ci = os.getenv("CI")
     model = ImageEmbedding(model_name=model_name, lazy_load=True)
     assert not hasattr(model.model, "model")
     images = [
@@ -115,3 +116,5 @@ def test_lazy_load(model_name):
     ]
     list(model.embed(images))
     assert hasattr(model.model, "model")
+    if is_ci:
+        delete_model_cache(model.model._model_dir)
diff --git a/tests/test_late_interaction_embeddings.py b/tests/test_late_interaction_embeddings.py
@@ -1,12 +1,12 @@
 import os
-import shutil
 
 import pytest
 import numpy as np
 
 from fastembed.late_interaction.late_interaction_text_embedding import (
     LateInteractionTextEmbedding,
 )
+from tests.utils import delete_model_cache
 
 # vectors are abridged and rounded for brevity
 CANONICAL_COLUMN_VALUES = {
@@ -167,7 +167,7 @@ def test_batch_embedding():
             assert np.allclose(value[:, :abridged_dim], expected_result, atol=2e-3)
 
         if is_ci:
-            shutil.rmtree(model.model._model_dir)
+            delete_model_cache(model.model._model_dir)
 
 
 def test_single_embedding():
@@ -182,7 +182,7 @@ def test_single_embedding():
         assert np.allclose(result[:, :abridged_dim], expected_result, atol=2e-3)
 
         if is_ci:
-            shutil.rmtree(model.model._model_dir)
+            delete_model_cache(model.model._model_dir)
 
 
 def test_single_embedding_query():
@@ -197,7 +197,7 @@ def test_single_embedding_query():
         assert np.allclose(result[:, :abridged_dim], expected_result, atol=2e-3)
 
         if is_ci:
-            shutil.rmtree(model.model._model_dir)
+            delete_model_cache(model.model._model_dir)
 
 
 def test_parallel_processing():
@@ -219,14 +219,16 @@ def test_parallel_processing():
     assert np.allclose(embeddings, embeddings_3, atol=1e-3)
 
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize(
     "model_name",
     ["colbert-ir/colbertv2.0"],
 )
 def test_lazy_load(model_name):
+    is_ci = os.getenv("CI")
+
     model = LateInteractionTextEmbedding(model_name=model_name, lazy_load=True)
     assert not hasattr(model.model, "model")
 
@@ -239,3 +241,6 @@ def test_lazy_load(model_name):
 
     model = LateInteractionTextEmbedding(model_name=model_name, lazy_load=True)
     list(model.passage_embed(docs))
+
+    if is_ci:
+        delete_model_cache(model.model._model_dir)
diff --git a/tests/test_sparse_embeddings.py b/tests/test_sparse_embeddings.py
@@ -1,11 +1,11 @@
 import os
-import shutil
 
 import pytest
 import numpy as np
 
 from fastembed.sparse.bm25 import Bm25
 from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
+from tests.utils import delete_model_cache
 
 CANONICAL_COLUMN_VALUES = {
     "prithvida/Splade_PP_en_v1": {
@@ -61,7 +61,7 @@ def test_batch_embedding():
         for i, value in enumerate(result.values):
             assert pytest.approx(value, abs=0.001) == expected_result["values"][i]
         if is_ci:
-            shutil.rmtree(model.model._model_dir)
+            delete_model_cache(model.model._model_dir)
 
 
 def test_single_embedding():
@@ -77,7 +77,7 @@ def test_single_embedding():
             for i, value in enumerate(result.values):
                 assert pytest.approx(value, abs=0.001) == expected_result["values"][i]
         if is_ci:
-            shutil.rmtree(model.model._model_dir)
+            delete_model_cache(model.model._model_dir)
 
 
 def test_parallel_processing():
@@ -107,7 +107,7 @@ def test_parallel_processing():
         assert np.allclose(sparse_embedding.values, sparse_embedding_all.values, atol=1e-3)
 
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.fixture
@@ -116,7 +116,7 @@ def bm25_instance():
     model = Bm25("Qdrant/bm25", language="english")
     yield model
     if ci:
-        shutil.rmtree(model._model_dir)
+        delete_model_cache(model._model_dir)
 
 
 def test_stem_with_stopwords_and_punctuation(bm25_instance):
@@ -150,11 +150,13 @@ def test_stem_case_insensitive_stopwords(bm25_instance):
     expected = ["quick", "brown", "fox", "test", "sentenc"]
     assert result == expected, f"Expected {expected}, but got {result}"
 
+
 @pytest.mark.parametrize(
     "model_name",
     ["prithivida/Splade_PP_en_v1"],
 )
 def test_lazy_load(model_name):
+    is_ci = os.getenv("CI")
     model = SparseTextEmbedding(model_name=model_name, lazy_load=True)
     assert not hasattr(model.model, "model")
 
@@ -167,3 +169,6 @@ def test_lazy_load(model_name):
 
     model = SparseTextEmbedding(model_name=model_name, lazy_load=True)
     list(model.passage_embed(docs))
+
+    if is_ci:
+        delete_model_cache(model.model._model_dir)
diff --git a/tests/test_text_cross_encoder.py b/tests/test_text_cross_encoder.py
@@ -2,9 +2,9 @@
 
 import numpy as np
 import pytest
-import shutil
 
 from fastembed.rerank.cross_encoder import TextCrossEncoder
+from tests.utils import delete_model_cache
 
 CANONICAL_SCORE_VALUES = {
     "Xenova/ms-marco-MiniLM-L-6-v2": np.array([8.500708, -2.541011]),
@@ -32,7 +32,7 @@ def test_rerank():
             scores, canonical_scores, atol=1e-3
         ), f"Model: {model_name}, Scores: {scores}, Expected: {canonical_scores}"
         if is_ci:
-            shutil.rmtree(model.model._model_dir)
+            delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize(
@@ -55,17 +55,21 @@ def test_batch_rerank(model_name):
         scores, canonical_scores, atol=1e-3
     ), f"Model: {model_name}, Scores: {scores}, Expected: {canonical_scores}"
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize(
     "model_name",
     ["Xenova/ms-marco-MiniLM-L-6-v2"],
 )
 def test_lazy_load(model_name):
+    is_ci = os.getenv("CI")
     model = TextCrossEncoder(model_name=model_name, lazy_load=True)
     assert not hasattr(model.model, "model")
     query = "What is the capital of France?"
     documents = ["Paris is the capital of France.", "Berlin is the capital of Germany."]
     list(model.rerank(query, documents))
     assert hasattr(model.model, "model")
+
+    if is_ci:
+        delete_model_cache(model.model._model_dir)
diff --git a/tests/test_text_onnx_embeddings.py b/tests/test_text_onnx_embeddings.py
@@ -1,10 +1,10 @@
 import os
-import shutil
 
 import numpy as np
 import pytest
 
 from fastembed.text.text_embedding import TextEmbedding
+from tests.utils import delete_model_cache
 
 CANONICAL_VECTOR_VALUES = {
     "BAAI/bge-small-en": np.array([-0.0232, -0.0255, 0.0174, -0.0639, -0.0006]),
@@ -85,7 +85,7 @@ def test_embedding():
             embeddings[0, : canonical_vector.shape[0]], canonical_vector, atol=1e-3
         ), model_desc["model"]
         if is_ci:
-            shutil.rmtree(model.model._model_dir)
+            delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize(
@@ -102,7 +102,7 @@ def test_batch_embedding(n_dims, model_name):
 
     assert embeddings.shape == (200, n_dims)
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize(
@@ -128,14 +128,15 @@ def test_parallel_processing(n_dims, model_name):
     assert np.allclose(embeddings, embeddings_3, atol=1e-3)
 
     if is_ci:
-        shutil.rmtree(model.model._model_dir)
+        delete_model_cache(model.model._model_dir)
 
 
 @pytest.mark.parametrize(
     "model_name",
     ["BAAI/bge-small-en-v1.5"],
 )
 def test_lazy_load(model_name):
+    is_ci = os.getenv("CI")
     model = TextEmbedding(model_name=model_name, lazy_load=True)
     assert not hasattr(model.model, "model")
     docs = ["hello world", "flag embedding"]
@@ -147,3 +148,6 @@ def test_lazy_load(model_name):
 
     model = TextEmbedding(model_name=model_name, lazy_load=True)
     list(model.passage_embed(docs))
+
+    if is_ci:
+        delete_model_cache(model.model._model_dir)
diff --git a/tests/utils.py b/tests/utils.py
@@ -0,0 +1,32 @@
+import shutil
+import traceback
+
+from pathlib import Path
+from typing import Union
+
+
+def delete_model_cache(model_dir: Union[str, Path]) -> None:
+    """Delete the model cache directory.
+
+    If a model was downloaded from the HuggingFace model hub, then _model_dir is the dir to snapshots, removing
+    it won't help to release the memory, because data is in blobs directory.
+    If a model was downloaded from GCS, then we can just remove model_dir
+
+    Args:
+        model_dir (Union[str, Path]): The path to the model cache directory.
+    """
+
+    def on_error(func, path, exc_info):
+        print("Failed to remove: ", path)
+        print("Exception: ", exc_info)
+        traceback.print_exception(*exc_info)
+
+    if isinstance(model_dir, str):
+        model_dir = Path(model_dir)
+
+    if model_dir.parent.parent.name.startswith("models--"):
+        model_dir = model_dir.parent.parent
+
+    if model_dir.exists():
+        # todo: PermissionDenied is raised on blobs removal in Windows, with blobs > 2GB
+        shutil.rmtree(model_dir, onerror=on_error)