@@ -54,6 +54,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
54
54
| [ ConGen-IndoBERT Lite Base] ( https://huggingface.co/LazarusNLP/congen-indobert-lite-base ) | 12M | [ IndoBERT Lite Base] ( https://huggingface.co/indobenchmark/indobert-lite-base-p1 ) | [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | [ Wikipedia] ( https://huggingface.co/datasets/LazarusNLP/wikipedia_id_20230520 ) | |
55
55
| [ ConGen-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-indobert-base ) | 125M | [ IndoBERT Base] ( https://huggingface.co/indobenchmark/indobert-base-p1 ) | [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | [ Wikipedia] ( https://huggingface.co/datasets/LazarusNLP/wikipedia_id_20230520 ) | |
56
56
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 125M | [ SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/simcse-indobert-base ) | [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | [ Wikipedia] ( https://huggingface.co/datasets/LazarusNLP/wikipedia_id_20230520 ) | |
57
+ | [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 118M | [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | [ Wikipedia] ( https://huggingface.co/datasets/LazarusNLP/wikipedia_id_20230520 ) | |
57
58
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 125M | [ IndoBERT Base] ( https://huggingface.co/indobenchmark/indobert-base-p1 ) | [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | [ Wikipedia] ( https://huggingface.co/datasets/LazarusNLP/wikipedia_id_20230520 ) | |
58
59
| [ S-IndoBERT Base mMARCO] ( https://huggingface.co/LazarusNLP/s-indobert-base-mmarco ) | 125M | [ IndoBERT Base] ( https://huggingface.co/indobenchmark/indobert-base-p1 ) | N/A | [ mMARCO] ( https://huggingface.co/datasets/unicamp-dl/mmarco ) | ✅ |
59
60
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 134M | [ DistilBERT Base Multilingual] ( https://huggingface.co/distilbert-base-multilingual-cased ) | mUSE | See: [ SBERT] ( https://www.sbert.net/docs/pretrained_models.html#model-overview ) | ✅ |
@@ -76,6 +77,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
76
77
| [ ConGen-IndoBERT Lite Base] ( https://huggingface.co/LazarusNLP/congen-indobert-lite-base ) | 79.97 |
77
78
| [ ConGen-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-indobert-base ) | 80.47 |
78
79
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 81.16 |
80
+ | [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 80.94 |
79
81
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 74.56 |
80
82
| [ S-IndoBERT Base mMARCO] ( https://huggingface.co/LazarusNLP/s-indobert-base-mmarco ) | 72.95 |
81
83
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 75.08 |
@@ -94,6 +96,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
94
96
| [ ConGen-IndoBERT Lite Base] ( https://huggingface.co/LazarusNLP/congen-indobert-lite-base ) | 46.04 | 59.06 | 51.01 |
95
97
| [ ConGen-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-indobert-base ) | 45.93 | 58.58 | 49.95 |
96
98
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 45.83 | 58.27 | 49.91 |
99
+ | [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 55.00 | 66.74 | 58.95 |
97
100
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 40.41 | 47.29 | 40.68 |
98
101
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 41.35 | 54.93 | 48.79 |
99
102
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 52.81 | 65.07 | 57.97 |
@@ -109,6 +112,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
109
112
| [ ConGen-IndoBERT Lite Base] ( https://huggingface.co/LazarusNLP/congen-indobert-lite-base ) | 75.22 | 81.55 | 84.13 |
110
113
| [ ConGen-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-indobert-base ) | 73.09 | 80.32 | 83.29 |
111
114
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 72.38 | 79.37 | 82.51 |
115
+ | [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 84.60 | 89.30 | 91.27 |
112
116
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 76.81 | 83.16 | 85.87 |
113
117
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 70.44 | 77.94 | 81.56 |
114
118
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 81.41 | 87.05 | 89.44 |
@@ -126,6 +130,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
126
130
| [ ConGen-IndoBERT Lite Base] ( https://huggingface.co/LazarusNLP/congen-indobert-lite-base ) | 62.41 | 60.94 |
127
131
| [ ConGen-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-indobert-base ) | 61.14 | 60.02 |
128
132
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 60.93 | 59.50 |
133
+ | [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 62.92 | 60.18 |
129
134
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 55.66 | 54.48 |
130
135
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 55.99 | 52.44 |
131
136
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 65.43 | 63.55 |
@@ -141,6 +146,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
141
146
| [ ConGen-IndoBERT Lite Base] ( https://huggingface.co/LazarusNLP/congen-indobert-lite-base ) | 67.25 | 66.53 |
142
147
| [ ConGen-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-indobert-base ) | 67.72 | 67.32 |
143
148
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 67.12 | 66.64 |
149
+ | [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 66.92 | 66.29 |
144
150
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 61.89 | 60.97 |
145
151
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 65.25 | 63.45 |
146
152
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 70.72 | 70.58 |
@@ -156,6 +162,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
156
162
| [ ConGen-IndoBERT Lite Base] ( https://huggingface.co/LazarusNLP/congen-indobert-lite-base ) | 58.18 | 58.84 |
157
163
| [ ConGen-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-indobert-base ) | 57.04 | 57.06 |
158
164
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 59.54 | 60.37 |
165
+ | [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 60.00 | 60.52 |
159
166
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 61.13 | 61.70 |
160
167
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 63.63 | 64.13 |
161
168
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 63.18 | 63.78 |
@@ -171,6 +178,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
171
178
| [ ConGen-IndoBERT Lite Base] ( https://huggingface.co/LazarusNLP/congen-indobert-lite-base ) | 81.2 | 75.59 |
172
179
| [ ConGen-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-indobert-base ) | 85.4 | 82.12 |
173
180
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 83.0 | 78.74 |
181
+ | [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 84.2 | 80.21 |
174
182
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 82.0 | 76.92 |
175
183
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 78.8 | 73.64 |
176
184
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 89.6 | ** 86.56** |
@@ -188,6 +196,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
188
196
| [ ConGen-IndoBERT Lite Base] ( https://huggingface.co/LazarusNLP/congen-indobert-lite-base ) | 69.44 | 53.74 |
189
197
| [ ConGen-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-indobert-base ) | 71.14 | 56.35 |
190
198
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 70.80 | 56.59 |
199
+ | [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 70.51 | 55.67 |
191
200
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 59.82 | 53.41 |
192
201
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 58.48 | 50.50 |
193
202
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | ** 74.87** | ** 57.96** |
0 commit comments