|
| 1 | +from mteb.models.model_meta import ModelMeta |
| 2 | +from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader |
| 3 | + |
| 4 | +embedding_gemma_300m_scandi = ModelMeta( |
| 5 | + loader=sentence_transformers_loader, # type: ignore |
| 6 | + name="emillykkejensen/EmbeddingGemma-Scandi-300m", |
| 7 | + languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"], |
| 8 | + open_weights=True, |
| 9 | + revision="9f3307b9f601db564a9190cb475324d128dcfe86", |
| 10 | + release_date="2025-10-17", |
| 11 | + n_parameters=307_581_696, |
| 12 | + embed_dim=768, |
| 13 | + max_tokens=2048, |
| 14 | + license="apache-2.0", |
| 15 | + reference="https://huggingface.co/emillykkejensen/EmbeddingGemma-Scandi-300m", |
| 16 | + framework=["Sentence Transformers", "PyTorch"], |
| 17 | + use_instructions=True, |
| 18 | + public_training_code=None, |
| 19 | + public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data", |
| 20 | + training_datasets=set(), |
| 21 | + similarity_fn_name="cosine", # type: ignore[arg-type] |
| 22 | + adapted_from="emillykkejensen/EmbeddingGemma-Scandi-300m", |
| 23 | + memory_usage_mb=578, |
| 24 | +) |
| 25 | + |
| 26 | + |
| 27 | +qwen_scandi = ModelMeta( |
| 28 | + loader=sentence_transformers_loader, # type: ignore |
| 29 | + name="emillykkejensen/Qwen3-Embedding-Scandi-0.6B", |
| 30 | + languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"], |
| 31 | + open_weights=True, |
| 32 | + revision="cf1e7ba36ebd3d605549d8f02930a18e17b54513", |
| 33 | + release_date="2025-10-17", |
| 34 | + n_parameters=595776512, |
| 35 | + memory_usage_mb=2272, |
| 36 | + embed_dim=1024, |
| 37 | + max_tokens=32768, |
| 38 | + license="apache-2.0", |
| 39 | + reference="https://huggingface.co/emillykkejensen/Qwen3-Embedding-Scandi-0.6B", |
| 40 | + framework=["Sentence Transformers", "PyTorch"], |
| 41 | + use_instructions=True, |
| 42 | + public_training_code=None, |
| 43 | + public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data", |
| 44 | + training_datasets=set(), |
| 45 | + similarity_fn_name="cosine", # type: ignore[arg-type] |
| 46 | + adapted_from="Qwen/Qwen3-Embedding-0.6B", |
| 47 | +) |
| 48 | + |
| 49 | + |
| 50 | +mmbert_scandi = ModelMeta( |
| 51 | + loader=sentence_transformers_loader, # type: ignore |
| 52 | + name="emillykkejensen/mmBERTscandi-base-embedding", |
| 53 | + languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"], |
| 54 | + open_weights=True, |
| 55 | + revision="82d74c7a5d8e1ddf31b132865df2d16b2b0294ee", |
| 56 | + release_date="2025-10-17", |
| 57 | + n_parameters=306939648, |
| 58 | + memory_usage_mb=1171, |
| 59 | + embed_dim=768, |
| 60 | + max_tokens=8192, |
| 61 | + license="apache-2.0", |
| 62 | + reference="https://huggingface.co/emillykkejensen/Qwen3-Embedding-Scandi-0.6B", |
| 63 | + framework=["Sentence Transformers", "PyTorch"], |
| 64 | + use_instructions=True, |
| 65 | + public_training_code=None, |
| 66 | + public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data", |
| 67 | + training_datasets=set(), |
| 68 | + similarity_fn_name="cosine", # type: ignore[arg-type] |
| 69 | + adapted_from="jonasaise/scandmmBERT-base-scandinavian", |
| 70 | +) |
0 commit comments