Skip to content

Commit 1ad433f

Browse files
model: Added emillykkes scandi models (#3521)
* model: Added EmbeddingGemma-Scandi-300m ``` from sentence_transformers import SentenceTransformer # test that is loads with sentence transformers model = SentenceTransformer("emillykkejensen/EmbeddingGemma-Scandi-300m") # OSError: Can't load the model for 'emillykkejensen/EmbeddingGemma-Scandi-300m'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'emillykkejensen/EmbeddingGemma-Scandi-300m' is the correct path to a directory containing a file named pytorch_model.bin, tf_model.h5, model.ckpt or flax_model.msgpack. ``` Can't get the model to load * added other models * fix remaining metada issue
1 parent 940f897 commit 1ad433f

File tree

1 file changed

+70
-0
lines changed

1 file changed

+70
-0
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from mteb.models.model_meta import ModelMeta
2+
from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
3+
4+
embedding_gemma_300m_scandi = ModelMeta(
5+
loader=sentence_transformers_loader, # type: ignore
6+
name="emillykkejensen/EmbeddingGemma-Scandi-300m",
7+
languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
8+
open_weights=True,
9+
revision="9f3307b9f601db564a9190cb475324d128dcfe86",
10+
release_date="2025-10-17",
11+
n_parameters=307_581_696,
12+
embed_dim=768,
13+
max_tokens=2048,
14+
license="apache-2.0",
15+
reference="https://huggingface.co/emillykkejensen/EmbeddingGemma-Scandi-300m",
16+
framework=["Sentence Transformers", "PyTorch"],
17+
use_instructions=True,
18+
public_training_code=None,
19+
public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
20+
training_datasets=set(),
21+
similarity_fn_name="cosine", # type: ignore[arg-type]
22+
adapted_from="emillykkejensen/EmbeddingGemma-Scandi-300m",
23+
memory_usage_mb=578,
24+
)
25+
26+
27+
qwen_scandi = ModelMeta(
28+
loader=sentence_transformers_loader, # type: ignore
29+
name="emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
30+
languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
31+
open_weights=True,
32+
revision="cf1e7ba36ebd3d605549d8f02930a18e17b54513",
33+
release_date="2025-10-17",
34+
n_parameters=595776512,
35+
memory_usage_mb=2272,
36+
embed_dim=1024,
37+
max_tokens=32768,
38+
license="apache-2.0",
39+
reference="https://huggingface.co/emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
40+
framework=["Sentence Transformers", "PyTorch"],
41+
use_instructions=True,
42+
public_training_code=None,
43+
public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
44+
training_datasets=set(),
45+
similarity_fn_name="cosine", # type: ignore[arg-type]
46+
adapted_from="Qwen/Qwen3-Embedding-0.6B",
47+
)
48+
49+
50+
mmbert_scandi = ModelMeta(
51+
loader=sentence_transformers_loader, # type: ignore
52+
name="emillykkejensen/mmBERTscandi-base-embedding",
53+
languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
54+
open_weights=True,
55+
revision="82d74c7a5d8e1ddf31b132865df2d16b2b0294ee",
56+
release_date="2025-10-17",
57+
n_parameters=306939648,
58+
memory_usage_mb=1171,
59+
embed_dim=768,
60+
max_tokens=8192,
61+
license="apache-2.0",
62+
reference="https://huggingface.co/emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
63+
framework=["Sentence Transformers", "PyTorch"],
64+
use_instructions=True,
65+
public_training_code=None,
66+
public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
67+
training_datasets=set(),
68+
similarity_fn_name="cosine", # type: ignore[arg-type]
69+
adapted_from="jonasaise/scandmmBERT-base-scandinavian",
70+
)

0 commit comments

Comments
 (0)