@@ -57,6 +57,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
57
57
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 118M | [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | [ Wikipedia] ( https://huggingface.co/datasets/LazarusNLP/wikipedia_id_20230520 ) | |
58
58
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 125M | [ IndoBERT Base] ( https://huggingface.co/indobenchmark/indobert-base-p1 ) | [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | [ Wikipedia] ( https://huggingface.co/datasets/LazarusNLP/wikipedia_id_20230520 ) | |
59
59
| [ S-IndoBERT Base mMARCO] ( https://huggingface.co/LazarusNLP/s-indobert-base-mmarco ) | 125M | [ IndoBERT Base] ( https://huggingface.co/indobenchmark/indobert-base-p1 ) | N/A | [ mMARCO] ( https://huggingface.co/datasets/unicamp-dl/mmarco ) | ✅ |
60
+ | [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 125M | [ IndoBERT Base] ( https://huggingface.co/indobenchmark/indobert-base-p1 ) | N/A | See: [ 🤗] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | ✅ |
60
61
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 134M | [ DistilBERT Base Multilingual] ( https://huggingface.co/distilbert-base-multilingual-cased ) | mUSE | See: [ SBERT] ( https://www.sbert.net/docs/pretrained_models.html#model-overview ) | ✅ |
61
62
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 125M | [ XLM-RoBERTa Base] ( https://huggingface.co/xlm-roberta-base ) | [ paraphrase-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2 ) | See: [ SBERT] ( https://www.sbert.net/docs/pretrained_models.html#model-overview ) | ✅ |
62
63
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 118M | [ Multilingual-MiniLM-L12-H384] ( https://huggingface.co/microsoft/Multilingual-MiniLM-L12-H384 ) | See: [ arXiv] ( https://arxiv.org/abs/2212.03533 ) | See: [ 🤗] ( https://huggingface.co/intfloat/multilingual-e5-small ) | ✅ |
@@ -80,6 +81,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
80
81
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 80.94 |
81
82
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 74.56 |
82
83
| [ S-IndoBERT Base mMARCO] ( https://huggingface.co/LazarusNLP/s-indobert-base-mmarco ) | 72.95 |
84
+ | [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 73.84 |
83
85
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 75.08 |
84
86
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | ** 83.83** |
85
87
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 78.89 |
@@ -98,6 +100,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
98
100
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 45.83 | 58.27 | 49.91 |
99
101
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 55.00 | 66.74 | 58.95 |
100
102
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 40.41 | 47.29 | 40.68 |
103
+ | [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 65.52 | 75.92 | 70.13 |
101
104
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 41.35 | 54.93 | 48.79 |
102
105
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 52.81 | 65.07 | 57.97 |
103
106
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 70.20 | 79.61 | 74.80 |
@@ -114,6 +117,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
114
117
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 72.38 | 79.37 | 82.51 |
115
118
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 84.60 | 89.30 | 91.27 |
116
119
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 76.81 | 83.16 | 85.87 |
120
+ | [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 88.14 | 91.47 | 92.91 |
117
121
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 70.44 | 77.94 | 81.56 |
118
122
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 81.41 | 87.05 | 89.44 |
119
123
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 91.50 | 94.34 | 95.39 |
@@ -132,6 +136,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
132
136
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 60.93 | 59.50 |
133
137
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 62.92 | 60.18 |
134
138
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 55.66 | 54.48 |
139
+ | [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 58.40 | 57.21 |
135
140
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 55.99 | 52.44 |
136
141
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 65.43 | 63.55 |
137
142
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 64.16 | 61.33 |
@@ -148,6 +153,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
148
153
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 67.12 | 66.64 |
149
154
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 66.92 | 66.29 |
150
155
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 61.89 | 60.97 |
156
+ | [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 66.37 | 66.31 |
151
157
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 65.25 | 63.45 |
152
158
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 70.72 | 70.58 |
153
159
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 67.92 | 67.23 |
@@ -164,6 +170,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
164
170
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 59.54 | 60.37 |
165
171
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 60.00 | 60.52 |
166
172
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 61.13 | 61.70 |
173
+ | [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 57.27 | 57.47 |
167
174
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 63.63 | 64.13 |
168
175
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 63.18 | 63.78 |
169
176
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 64.54 | 65.04 |
@@ -180,6 +187,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
180
187
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 83.0 | 78.74 |
181
188
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 84.2 | 80.21 |
182
189
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 82.0 | 76.92 |
190
+ | [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 84.4 | 79.79 |
183
191
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 78.8 | 73.64 |
184
192
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | 89.6 | ** 86.56** |
185
193
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 83.6 | 79.51 |
@@ -198,6 +206,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
198
206
| [ ConGen-SimCSE-IndoBERT Base] ( https://huggingface.co/LazarusNLP/congen-simcse-indobert-base ) | 70.80 | 56.59 |
199
207
| [ ConGen-Indo-e5 Small] ( https://huggingface.co/LazarusNLP/congen-indo-e5-small ) | 70.51 | 55.67 |
200
208
| [ SCT-IndoBERT Base] ( https://huggingface.co/LazarusNLP/sct-indobert-base ) | 59.82 | 53.41 |
209
+ | [ all-IndoBERT Base] ( https://huggingface.co/LazarusNLP/all-indobert-base ) | 72.01 | 56.79 |
201
210
| [ distiluse-base-multilingual-cased-v2] ( https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2 ) | 58.48 | 50.50 |
202
211
| [ paraphrase-multilingual-mpnet-base-v2] ( https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2 ) | ** 74.87** | ** 57.96** |
203
212
| [ multilingual-e5-small] ( https://huggingface.co/intfloat/multilingual-e5-small ) | 63.97 | 51.85 |
0 commit comments