Skip to content

Commit 3ab29f4

Browse files
authored
model: Vietnamese model for VN-MTEB (#4187)
* [ADD] Vietnamese model for VN-MTEB * [ADD] Vietnamese model for VN-MTEB (rename variable)
1 parent bce3fac commit 3ab29f4

File tree

1 file changed

+85
-1
lines changed

1 file changed

+85
-1
lines changed

mteb/models/model_implementations/vi_vn_models.py

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,62 @@
5757
adapted_from="BAAI/bge-m3",
5858
)
5959

60+
greennode_embedding_kalm_mini_instruct_vn_v1 = ModelMeta(
61+
name="GreenNode/GreenNode-Embedding-KaLM-Mini-Instruct-VN-V1",
62+
model_type=["dense"],
63+
revision="c123a4b0ef40ed847cb5122ff2c70ffc92129f3a",
64+
release_date="2026-02-26",
65+
languages=[
66+
"vie-Latn",
67+
],
68+
loader=sentence_transformers_loader,
69+
open_weights=True,
70+
n_parameters=494032768,
71+
n_embedding_parameters=136_134_656,
72+
memory_usage_mb=1885,
73+
embed_dim=896,
74+
license="cc-by-4.0",
75+
max_tokens=32768,
76+
reference="https://huggingface.co/GreenNode/GreenNode-Embedding-KaLM-Mini-Instruct-VN-V1",
77+
similarity_fn_name="cosine",
78+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
79+
use_instructions=False,
80+
public_training_code=None,
81+
public_training_data="https://huggingface.co/datasets/GreenNode/GreenNode-Table-Markdown-Retrieval-VN",
82+
training_datasets=greennode_embedding_large_vn_v1_training_data,
83+
adapted_from="KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5",
84+
)
85+
86+
greennode_embedding_e5_large_vn_v1 = ModelMeta(
87+
name="GreenNode/GreenNode-Embedding-E5-Large-VN-V1",
88+
model_type=["dense"],
89+
revision="a15ad86a2a4c80f168210a17cc5d540a52354113",
90+
release_date="2026-02-26",
91+
languages=[
92+
"vie-Latn",
93+
],
94+
loader=sentence_transformers_loader,
95+
open_weights=True,
96+
n_parameters=560_000_000,
97+
n_embedding_parameters=256_002_048,
98+
memory_usage_mb=2136,
99+
embed_dim=1024,
100+
license="cc-by-4.0",
101+
max_tokens=512,
102+
reference="https://huggingface.co/GreenNode/GreenNode-Embedding-E5-Large-VN-V1",
103+
similarity_fn_name="cosine",
104+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
105+
use_instructions=False,
106+
public_training_code=None,
107+
public_training_data="https://huggingface.co/datasets/GreenNode/GreenNode-Table-Markdown-Retrieval-VN",
108+
training_datasets=greennode_embedding_large_vn_v1_training_data,
109+
adapted_from="intfloat/multilingual-e5-large",
110+
)
111+
60112
aiteamvn_vietnamese_embeddings = ModelMeta(
61113
name="AITeamVN/Vietnamese_Embedding",
62114
model_type=["dense"],
63-
revision="fcbbb905e6c3757d421aaa5db6fd7c53d038f6fb",
115+
revision="dea33aa1ab339f38d66ae0a40e6c40e0a9249568",
64116
release_date="2024-03-17",
65117
languages=[
66118
"vie-Latn",
@@ -121,6 +173,38 @@
121173
}""",
122174
)
123175

176+
hiieu_halong_embedding = ModelMeta(
177+
name="contextboxai/halong_embedding",
178+
model_type=["dense"],
179+
revision="b57776031035f70ed2030d2e35ecc533eb0f8f71",
180+
release_date="2024-07-06",
181+
languages=[
182+
"vie-Latn",
183+
],
184+
loader=sentence_transformers_loader,
185+
use_instructions=False,
186+
open_weights=True,
187+
n_parameters=278043648,
188+
n_embedding_parameters=192_001_536,
189+
memory_usage_mb=1061,
190+
embed_dim=768,
191+
license="apache-2.0",
192+
max_tokens=514,
193+
reference="https://huggingface.co/hiieu/halong_embedding",
194+
similarity_fn_name="cosine",
195+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
196+
public_training_code=None,
197+
public_training_data=None,
198+
training_datasets=None,
199+
adapted_from="intfloat/multilingual-e5-base",
200+
citation="""@misc{HalongEmbedding,
201+
title={HalongEmbedding: A Vietnamese Text Embedding},
202+
author={Ngo Hieu},
203+
year={2024},
204+
publisher={Huggingface},
205+
}""",
206+
)
207+
124208
sup_simcse_vietnamese_phobert_base_ = ModelMeta(
125209
name="VoVanPhuc/sup-SimCSE-VietNamese-phobert-base",
126210
model_type=["dense"],

0 commit comments

Comments
 (0)