|
57 | 57 | adapted_from="BAAI/bge-m3", |
58 | 58 | ) |
59 | 59 |
|
| 60 | +greennode_embedding_kalm_mini_instruct_vn_v1 = ModelMeta( |
| 61 | + name="GreenNode/GreenNode-Embedding-KaLM-Mini-Instruct-VN-V1", |
| 62 | + model_type=["dense"], |
| 63 | + revision="c123a4b0ef40ed847cb5122ff2c70ffc92129f3a", |
| 64 | + release_date="2026-02-26", |
| 65 | + languages=[ |
| 66 | + "vie-Latn", |
| 67 | + ], |
| 68 | + loader=sentence_transformers_loader, |
| 69 | + open_weights=True, |
| 70 | + n_parameters=494032768, |
| 71 | + n_embedding_parameters=136_134_656, |
| 72 | + memory_usage_mb=1885, |
| 73 | + embed_dim=896, |
| 74 | + license="cc-by-4.0", |
| 75 | + max_tokens=32768, |
| 76 | + reference="https://huggingface.co/GreenNode/GreenNode-Embedding-KaLM-Mini-Instruct-VN-V1", |
| 77 | + similarity_fn_name="cosine", |
| 78 | + framework=["Sentence Transformers", "PyTorch", "safetensors"], |
| 79 | + use_instructions=False, |
| 80 | + public_training_code=None, |
| 81 | + public_training_data="https://huggingface.co/datasets/GreenNode/GreenNode-Table-Markdown-Retrieval-VN", |
| 82 | + training_datasets=greennode_embedding_large_vn_v1_training_data, |
| 83 | + adapted_from="KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5", |
| 84 | +) |
| 85 | + |
| 86 | +greennode_embedding_e5_large_vn_v1 = ModelMeta( |
| 87 | + name="GreenNode/GreenNode-Embedding-E5-Large-VN-V1", |
| 88 | + model_type=["dense"], |
| 89 | + revision="a15ad86a2a4c80f168210a17cc5d540a52354113", |
| 90 | + release_date="2026-02-26", |
| 91 | + languages=[ |
| 92 | + "vie-Latn", |
| 93 | + ], |
| 94 | + loader=sentence_transformers_loader, |
| 95 | + open_weights=True, |
| 96 | + n_parameters=560_000_000, |
| 97 | + n_embedding_parameters=256_002_048, |
| 98 | + memory_usage_mb=2136, |
| 99 | + embed_dim=1024, |
| 100 | + license="cc-by-4.0", |
| 101 | + max_tokens=512, |
| 102 | + reference="https://huggingface.co/GreenNode/GreenNode-Embedding-E5-Large-VN-V1", |
| 103 | + similarity_fn_name="cosine", |
| 104 | + framework=["Sentence Transformers", "PyTorch", "safetensors"], |
| 105 | + use_instructions=False, |
| 106 | + public_training_code=None, |
| 107 | + public_training_data="https://huggingface.co/datasets/GreenNode/GreenNode-Table-Markdown-Retrieval-VN", |
| 108 | + training_datasets=greennode_embedding_large_vn_v1_training_data, |
| 109 | + adapted_from="intfloat/multilingual-e5-large", |
| 110 | +) |
| 111 | + |
60 | 112 | aiteamvn_vietnamese_embeddings = ModelMeta( |
61 | 113 | name="AITeamVN/Vietnamese_Embedding", |
62 | 114 | model_type=["dense"], |
63 | | - revision="fcbbb905e6c3757d421aaa5db6fd7c53d038f6fb", |
| 115 | + revision="dea33aa1ab339f38d66ae0a40e6c40e0a9249568", |
64 | 116 | release_date="2024-03-17", |
65 | 117 | languages=[ |
66 | 118 | "vie-Latn", |
|
121 | 173 | }""", |
122 | 174 | ) |
123 | 175 |
|
| 176 | +hiieu_halong_embedding = ModelMeta( |
| 177 | + name="contextboxai/halong_embedding", |
| 178 | + model_type=["dense"], |
| 179 | + revision="b57776031035f70ed2030d2e35ecc533eb0f8f71", |
| 180 | + release_date="2024-07-06", |
| 181 | + languages=[ |
| 182 | + "vie-Latn", |
| 183 | + ], |
| 184 | + loader=sentence_transformers_loader, |
| 185 | + use_instructions=False, |
| 186 | + open_weights=True, |
| 187 | + n_parameters=278043648, |
| 188 | + n_embedding_parameters=192_001_536, |
| 189 | + memory_usage_mb=1061, |
| 190 | + embed_dim=768, |
| 191 | + license="apache-2.0", |
| 192 | + max_tokens=514, |
| 193 | + reference="https://huggingface.co/hiieu/halong_embedding", |
| 194 | + similarity_fn_name="cosine", |
| 195 | + framework=["Sentence Transformers", "PyTorch", "safetensors"], |
| 196 | + public_training_code=None, |
| 197 | + public_training_data=None, |
| 198 | + training_datasets=None, |
| 199 | + adapted_from="intfloat/multilingual-e5-base", |
| 200 | + citation="""@misc{HalongEmbedding, |
| 201 | + title={HalongEmbedding: A Vietnamese Text Embedding}, |
| 202 | + author={Ngo Hieu}, |
| 203 | + year={2024}, |
| 204 | + publisher={Huggingface}, |
| 205 | +}""", |
| 206 | +) |
| 207 | + |
124 | 208 | sup_simcse_vietnamese_phobert_base_ = ModelMeta( |
125 | 209 | name="VoVanPhuc/sup-SimCSE-VietNamese-phobert-base", |
126 | 210 | model_type=["dense"], |
|
0 commit comments