embeddings-benchmark
diff --git a/‎docs/overview/create_available_benchmarks.py‎
Lines changed: 5 additions & 3 deletions b/‎docs/overview/create_available_benchmarks.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎docs/overview/create_available_models.py‎
Lines changed: 6 additions & 1 deletion b/‎docs/overview/create_available_models.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎docs/overview/create_available_tasks.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/overview/create_available_tasks.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/usage/loading_results.md‎
Lines changed: 6 additions & 8 deletions b/‎docs/usage/loading_results.md‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎docs/whats_new.md‎
Lines changed: 4 additions & 4 deletions b/‎docs/whats_new.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎mteb/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎mteb/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎mteb/_create_dataloaders.py‎
Lines changed: 16 additions & 9 deletions b/‎mteb/_create_dataloaders.py‎
Lines changed: 16 additions & 9 deletions
diff --git a/‎mteb/_evaluators/any_sts_evaluator.py‎
Lines changed: 10 additions & 5 deletions b/‎mteb/_evaluators/any_sts_evaluator.py‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎mteb/_evaluators/clustering_evaluator.py‎
Lines changed: 10 additions & 4 deletions b/‎mteb/_evaluators/clustering_evaluator.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎mteb/_evaluators/evaluator.py‎
Lines changed: 9 additions & 4 deletions b/‎mteb/_evaluators/evaluator.py‎
Lines changed: 9 additions & 4 deletions
@@ -1,13 +1,15 @@
 """Updates the available benchmarks markdown file."""
 
 from pathlib import Path
-from typing import cast
+from typing import TYPE_CHECKING, cast
 
 from prettify_list import pretty_long_list
 from slugify import slugify_anchor
 
 import mteb
-from mteb.get_tasks import MTEBTasks
+
+if TYPE_CHECKING:
+    from mteb.get_tasks import MTEBTasks
 
 benchmark_entry = """
 ####  {benchmark_name}
@@ -38,7 +40,7 @@
 def create_table(benchmark: mteb.Benchmark) -> str:
     """Create a markdown table of tasks in the benchmark."""
     tasks = benchmark.tasks
-    tasks = cast(MTEBTasks, tasks)
+    tasks = cast("MTEBTasks", tasks)
     df = tasks.to_dataframe(["name", "type", "modalities", "languages"])
 
     # add links to task names:
 
@@ -1,11 +1,16 @@
 """Updates the available models markdown files."""
 
+from __future__ import annotations
+
 from pathlib import Path
+from typing import TYPE_CHECKING
 
 from prettify_list import pretty_long_list
 
 import mteb
-from mteb.models import ModelMeta
+
+if TYPE_CHECKING:
+    from mteb.models import ModelMeta
 
 model_entry = """
 ####  {model_name_w_link}
 
@@ -83,7 +83,7 @@ def task_category_to_string(category: str) -> str:
 
 
 def create_aggregate_table(task: AbsTaskAggregate) -> str:
-    tasks = cast(MTEBTasks, MTEBTasks(task.metadata.tasks))
+    tasks = cast("MTEBTasks", MTEBTasks(task.metadata.tasks))
     df = tasks.to_dataframe(["name", "type", "modalities", "languages"])
     df["name"] = df.apply(
         lambda row: (
 
@@ -7,12 +7,11 @@ For instance, if you are selecting the best model for semantic text similarity (
 
 ```python
 import mteb
-from mteb.cache import ResultCache
 
 tasks = mteb.get_tasks(tasks=["STS12"])
 model_names = ["intfloat/multilingual-e5-large"]
 
-cache = ResultCache("~/.cache/mteb")
+cache = mteb.ResultCache("~/.cache/mteb")
 results = cache.load_results(models=model_names, tasks=tasks)
 ```
 
@@ -36,16 +35,16 @@ All previously submitted results are available results [repository](https://gith
 You can download this using:
 
 ```python
-from mteb.cache import ResultCache
+import mteb
 
-cache = ResultCache()
+cache = mteb.ResultCache()
 cache.download_from_remote() # download results from the remote repository
 ```
 
 From here, you can work with the cache as usual. For instance, if you are selecting the best model for your French and English retrieval task on legal documents, you could fetch the relevant tasks and create a dataframe of the results using the following code:
 
 ```python
-from mteb.cache import ResultCache
+import mteb
 
 # select your tasks
 tasks = mteb.get_tasks(task_types=["Retrieval"], languages=["eng", "fra"], domains=["Legal"])
@@ -56,7 +55,7 @@ model_names = [
 ]
 
 
-cache = ResultCache()
+cache = mteb.ResultCache()
 cache.download_from_remote() # download results from the remote repository. Might take a while the first time.
 
 results = cache.load_results(
@@ -88,11 +87,10 @@ If you loaded results for a specific benchmark, you can get the aggregated bench
 
 ```python
 import mteb
-from mteb.cache import ResultCache
 
 # Load results for a specific benchmark
 benchmark = mteb.get_benchmark("MTEB(eng, v2)")
-cache = ResultCache()
+cache = mteb.ResultCache()
 cache.download_from_remote()  # download results from the remote repository
 results = cache.load_results(
     models=["intfloat/e5-small", "intfloat/multilingual-e5-small"],
 
@@ -37,12 +37,12 @@ results = mteb.evaluate(model, tasks)
 ### Better local and online caching
 The new [`mteb.ResultCache`][mteb.cache.ResultCache] makes managing the cache notably easier:
 ```py
-from mteb.cache import ResultCache
+import mteb
 
 model = ...
 tasks = ...
 
-cache = ResultCache(cache_path="~/.cache/mteb")  # default
+cache = mteb.ResultCache(cache_path="~/.cache/mteb")  # default
 
 # simple evaluate with cache
 results = mteb.evaluate(model, tasks, cache=cache)  # only runs if results not in cache
@@ -169,9 +169,9 @@ We've added a lot of new documentation to make it easier to get started with MTE
 The new `ResultCache` also makes it easier to load, inspect and compare both local and online results:
 
 ```py
-from mteb.cache import ResultCache
+import mteb
 
-cache = ResultCache(cache_path="~/.cache/mteb") # default
+cache = mteb.ResultCache(cache_path="~/.cache/mteb") # default
 cache.download_from_remote() # download the latest results from the remote repository
 
 # load both local and online results
 
@@ -3,6 +3,7 @@
 from mteb import types
 from mteb.abstasks import AbsTask
 from mteb.abstasks.task_metadata import TaskMetadata
+from mteb.cache import ResultCache
 from mteb.deprecated_evaluator import MTEB
 from mteb.evaluate import evaluate
 from mteb.filter_tasks import filter_tasks
@@ -33,6 +34,7 @@
     "CrossEncoderProtocol",
     "EncoderProtocol",
     "IndexEncoderSearchProtocol",
+    "ResultCache",
     "SearchProtocol",
     "SentenceTransformerEncoderWrapper",
     "TaskMetadata",
 
@@ -1,21 +1,28 @@
+from __future__ import annotations
+
 import logging
 import warnings
-from collections.abc import Callable
-from typing import Any, cast
+from typing import TYPE_CHECKING, Any, cast
 
 import torch
 from datasets import Dataset, Image
 from torch.utils.data import DataLoader, default_collate
 
-from mteb.abstasks.task_metadata import TaskMetadata
 from mteb.types import (
-    BatchedInput,
-    Conversation,
     ConversationTurn,
     PromptType,
-    QueryDatasetType,
 )
-from mteb.types._encoder_io import CorpusInput, ImageInput, QueryInput, TextInput
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+    from mteb.abstasks.task_metadata import TaskMetadata
+    from mteb.types import (
+        BatchedInput,
+        Conversation,
+        QueryDatasetType,
+    )
+    from mteb.types._encoder_io import CorpusInput, ImageInput, QueryInput, TextInput
 
 logger = logging.getLogger(__name__)
 
@@ -128,7 +135,7 @@ def _convert_conv_history_to_query(
     conversation = row["text"]
     # if it's a list of strings, just join them
     if isinstance(conversation, list) and isinstance(conversation[0], str):
-        conversation_ = cast(list[str], conversation)
+        conversation_ = cast("list[str]", conversation)
         conv_str = "; ".join(conversation_)
         current_conversation = [
             ConversationTurn(role="user", content=message) for message in conversation_
@@ -173,7 +180,7 @@ def _convert_conv_history_to_query(
 
     row["text"] = conv_str
     row["conversation"] = current_conversation
-    return cast(dict[str, str | list[ConversationTurn]], row)
+    return cast("dict[str, str | list[ConversationTurn]]", row)
 
 
 def _create_dataloader_for_queries_conversation(
 
@@ -1,21 +1,26 @@
+from __future__ import annotations
+
 import logging
-from typing import TypedDict
+from typing import TYPE_CHECKING, TypedDict
 
-from datasets import Dataset
 from sklearn.metrics.pairwise import (
     paired_cosine_distances,
     paired_euclidean_distances,
     paired_manhattan_distances,
 )
 
 from mteb._create_dataloaders import create_dataloader
-from mteb.abstasks.task_metadata import TaskMetadata
-from mteb.models import EncoderProtocol
 from mteb.similarity_functions import compute_pairwise_similarity
-from mteb.types import EncodeKwargs, PromptType
 
 from .evaluator import Evaluator
 
+if TYPE_CHECKING:
+    from datasets import Dataset
+
+    from mteb.abstasks.task_metadata import TaskMetadata
+    from mteb.models import EncoderProtocol
+    from mteb.types import EncodeKwargs, PromptType
+
 logger = logging.getLogger(__name__)
 
 
 
@@ -1,15 +1,21 @@
+from __future__ import annotations
+
 import logging
+from typing import TYPE_CHECKING
 
-from datasets import Dataset
 from sklearn import cluster
 
 from mteb._create_dataloaders import create_dataloader
-from mteb.abstasks.task_metadata import TaskMetadata
-from mteb.models import EncoderProtocol
-from mteb.types import EncodeKwargs
 
 from .evaluator import Evaluator
 
+if TYPE_CHECKING:
+    from datasets import Dataset
+
+    from mteb.abstasks.task_metadata import TaskMetadata
+    from mteb.models import EncoderProtocol
+    from mteb.types import EncodeKwargs
+
 logger = logging.getLogger(__name__)
 
 
 
@@ -1,10 +1,15 @@
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
-from collections.abc import Iterable, Mapping
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from mteb.abstasks.abstask import _set_seed
-from mteb.models import EncoderProtocol
-from mteb.types import EncodeKwargs
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Mapping
+
+    from mteb.models import EncoderProtocol
+    from mteb.types import EncodeKwargs
 
 
 class Evaluator(ABC):