@@ -58,6 +58,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
58
58
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 125M | [ IndoBERT Base] ( https://huggingface.co/indobenchmark/indobert-base-p1 ) | [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | [ Wikipedia] ( https://huggingface.co/datasets/LazarusNLP/wikipedia_id_20230520 ) | |
59
59
| [ S-IndoBERT Base mMARCO] ( https://huggingface.co/LazarusNLP/s-indobert-base-mmarco ) | 125M | [ IndoBERT Base] ( https://huggingface.co/indobenchmark/indobert-base-p1 ) | N/A | [ mMARCO] ( https://huggingface.co/datasets/unicamp-dl/mmarco ) | ✅ |
60
60
| [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 125M | [ IndoBERT Base] ( https://huggingface.co/indobenchmark/indobert-base-p1 ) | N/A | See: [ 🤗] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | ✅ |
61
+ | [ all-IndoBERT Base p2] ( https://huggingface.co/LazarusNLP/all-indobert-base-p2 ) | 125M | [ IndoBERT Base p2] ( https://huggingface.co/indobenchmark/indobert-base-p2 ) | N/A | See: [ 🤗] ( https://huggingface.co/LazarusNLP/all-indobert-base-p2 ) | ✅ |
61
62
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 134M | [ DistilBERT Base Multilingual] ( https://huggingface.co/distilbert-base-multilingual-cased ) | mUSE | See: [ SBERT] ( https://www.sbert.net/docs/pretrained_models.html#model-overview ) | ✅ |
62
63
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 125M | [ XLM-RoBERTa Base] ( https://huggingface.co/xlm-roberta-base ) | [ paraphrase-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2 ) | See: [ SBERT] ( https://www.sbert.net/docs/pretrained_models.html#model-overview ) | ✅ |
63
64
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 118M | [ Multilingual-MiniLM-L12-H384] ( https://huggingface.co/microsoft/Multilingual-MiniLM-L12-H384 ) | See: [ arXiv] ( https://arxiv.org/abs/2212.03533 ) | See: [ 🤗] ( https://huggingface.co/intfloat/multilingual-e5-small ) | ✅ |
@@ -82,6 +83,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
82
83
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 74.56 |
83
84
| [ S-IndoBERT Base mMARCO] ( https://huggingface.co/LazarusNLP/s-indobert-base-mmarco ) | 72.95 |
84
85
| [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 73.84 |
86
+ | [ all-IndoBERT Base p2] ( https://huggingface.co/LazarusNLP/all-indobert-base-p2 ) | 73.45 |
85
87
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 75.08 |
86
88
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | ** 83.83** |
87
89
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 78.89 |
@@ -101,6 +103,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
101
103
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 55.00 | 66.74 | 58.95 |
102
104
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 40.41 | 47.29 | 40.68 |
103
105
| [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 65.52 | 75.92 | 70.13 |
106
+ | [ all-IndoBERT Base p2] ( https://huggingface.co/LazarusNLP/all-indobert-base-p2 ) | 60.62 | 71.95 | 66.31 |
104
107
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 41.35 | 54.93 | 48.79 |
105
108
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 52.81 | 65.07 | 57.97 |
106
109
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 70.20 | 79.61 | 74.80 |
@@ -118,6 +121,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
118
121
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 84.60 | 89.30 | 91.27 |
119
122
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 76.81 | 83.16 | 85.87 |
120
123
| [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 88.14 | 91.47 | 92.91 |
124
+ | [ all-IndoBERT Base p2] ( https://huggingface.co/LazarusNLP/all-indobert-base-p2 ) | 87.78 | 91.14 | 92.58 |
121
125
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 70.44 | 77.94 | 81.56 |
122
126
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 81.41 | 87.05 | 89.44 |
123
127
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 91.50 | 94.34 | 95.39 |
@@ -137,6 +141,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
137
141
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 62.92 | 60.18 |
138
142
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 55.66 | 54.48 |
139
143
| [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 58.40 | 57.21 |
144
+ | [ all-IndoBERT Base p2] ( https://huggingface.co/LazarusNLP/all-indobert-base-p2 ) | 60.36 | 59.29 |
140
145
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 55.99 | 52.44 |
141
146
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 65.43 | 63.55 |
142
147
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 64.16 | 61.33 |
@@ -154,6 +159,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
154
159
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 66.92 | 66.29 |
155
160
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 61.89 | 60.97 |
156
161
| [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 66.37 | 66.31 |
162
+ | [ all-IndoBERT Base p2] ( https://huggingface.co/LazarusNLP/all-indobert-base-p2 ) | 68.90 | 68.88 |
157
163
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 65.25 | 63.45 |
158
164
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 70.72 | 70.58 |
159
165
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 67.92 | 67.23 |
@@ -171,6 +177,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
171
177
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 60.00 | 60.52 |
172
178
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 61.13 | 61.70 |
173
179
| [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 57.27 | 57.47 |
180
+ | [ all-IndoBERT Base p2] ( https://huggingface.co/LazarusNLP/all-indobert-base-p2 ) | 57.04 | 57.14 |
174
181
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 63.63 | 64.13 |
175
182
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 63.18 | 63.78 |
176
183
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 64.54 | 65.04 |
@@ -188,6 +195,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
188
195
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 84.2 | 80.21 |
189
196
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 82.0 | 76.92 |
190
197
| [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 84.4 | 79.79 |
198
+ | [ all-IndoBERT Base p2] ( https://huggingface.co/LazarusNLP/all-indobert-base-p2 ) | 84.8 | 80.03 |
191
199
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 78.8 | 73.64 |
192
200
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 89.6 | ** 86.56** |
193
201
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 83.6 | 79.51 |
@@ -207,6 +215,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
207
215
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 70.51 | 55.67 |
208
216
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 59.82 | 53.41 |
209
217
| [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 72.01 | 56.79 |
218
+ | [ all-IndoBERT Base p2] ( https://huggingface.co/LazarusNLP/all-indobert-base-p2 ) | 69.32 | 54.76 |
210
219
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 58.48 | 50.50 |
211
220
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | ** 74.87** | ** 57.96** |
212
221
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 63.97 | 51.85 |
0 commit comments