Skip to content

Commit f83ada2

Browse files
committed
Minor Doc Fixes
1 parent 6321d9f commit f83ada2

File tree

2 files changed

+2
-132
lines changed

2 files changed

+2
-132
lines changed

README.md

Lines changed: 1 addition & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
4444

4545
[SCT: An Efficient Self-Supervised Cross-View Training For Sentence Embedding](https://github.com/mrpeerat/SCT) is another unsupervised technique to train a sentence embedding model. It is very similar to ConGen in its knowledge distillation methodology, but also supports self-supervised training procedure without a teacher model. The original paper proposes back-translation as its data augmentation technique, but we implemented single-word deletion and found it to perform better than our backtranslated corpus. We used the [official SCT implementation](https://github.com/mrpeerat/SCT) which was written on top of the Sentence Transformers library.
4646

47-
## Models
47+
## Pretrained Models
4848

4949
| Model | #params | Base/Student Model | Teacher Model | Train Dataset | Supervised |
5050
| --------------------------------------------------------------------------------------------------------------------------- | :-----: | --------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------ | :--------: |
@@ -234,71 +234,6 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
234234
| [multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) | 60.25 | 50.91 |
235235
| [multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) | 61.39 | 51.62 |
236236

237-
## References
238-
239-
```bibtex
240-
@misc{Thai-Sentence-Vector-Benchmark-2022,
241-
author = {Limkonchotiwat, Peerat},
242-
title = {Thai-Sentence-Vector-Benchmark},
243-
year = {2022},
244-
publisher = {GitHub},
245-
journal = {GitHub repository},
246-
howpublished = {\url{https://github.com/mrpeerat/Thai-Sentence-Vector-Benchmark}}
247-
}
248-
```
249-
250-
```bibtex
251-
@inproceedings{reimers-2019-sentence-bert,
252-
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
253-
author = "Reimers, Nils and Gurevych, Iryna",
254-
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
255-
month = "11",
256-
year = "2019",
257-
publisher = "Association for Computational Linguistics",
258-
url = "https://arxiv.org/abs/1908.10084",
259-
}
260-
```
261-
262-
```bibtex
263-
@inproceedings{gao2021simcse,
264-
title={{SimCSE}: Simple Contrastive Learning of Sentence Embeddings},
265-
author={Gao, Tianyu and Yao, Xingcheng and Chen, Danqi},
266-
booktitle={Empirical Methods in Natural Language Processing (EMNLP)},
267-
year={2021}
268-
}
269-
```
270-
271-
```bibtex
272-
@inproceedings{limkonchotiwat-etal-2022-congen,
273-
title = "{ConGen}: Unsupervised Control and Generalization Distillation For Sentence Representation",
274-
author = "Limkonchotiwat, Peerat and
275-
Ponwitayarat, Wuttikorn and
276-
Lowphansirikul, Lalita and
277-
Udomcharoenchaikit, Can and
278-
Chuangsuwanich, Ekapol and
279-
Nutanong, Sarana",
280-
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
281-
year = "2022",
282-
publisher = "Association for Computational Linguistics",
283-
}
284-
```
285-
286-
```bibtex
287-
@article{10.1162/tacl_a_00620,
288-
author = {Limkonchotiwat, Peerat and Ponwitayarat, Wuttikorn and Lowphansirikul, Lalita and Udomcharoenchaikit, Can and Chuangsuwanich, Ekapol and Nutanong, Sarana},
289-
title = "{An Efficient Self-Supervised Cross-View Training For Sentence Embedding}",
290-
journal = {Transactions of the Association for Computational Linguistics},
291-
volume = {11},
292-
pages = {1572-1587},
293-
year = {2023},
294-
month = {12},
295-
issn = {2307-387X},
296-
doi = {10.1162/tacl_a_00620},
297-
url = {https://doi.org/10.1162/tacl\_a\_00620},
298-
eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00620/2196817/tacl\_a\_00620.pdf},
299-
}
300-
```
301-
302237
## Credits
303238

304239
Indonesian Sentence Embeddings is developed with love by:

docs/index.md

Lines changed: 1 addition & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
4444

4545
[SCT: An Efficient Self-Supervised Cross-View Training For Sentence Embedding](https://github.com/mrpeerat/SCT) is another unsupervised technique to train a sentence embedding model. It is very similar to ConGen in its knowledge distillation methodology, but also supports self-supervised training procedure without a teacher model. The original paper proposes back-translation as its data augmentation technique, but we implemented single-word deletion and found it to perform better than our backtranslated corpus. We used the [official SCT implementation](https://github.com/mrpeerat/SCT) which was written on top of the Sentence Transformers library.
4646

47-
## Models
47+
## Pretrained Models
4848

4949
| Model | #params | Base/Student Model | Teacher Model | Train Dataset | Supervised |
5050
| --------------------------------------------------------------------------------------------------------------------------- | :-----: | --------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------ | :--------: |
@@ -234,71 +234,6 @@ Like SimCSE, [ConGen: Unsupervised Control and Generalization Distillation For S
234234
| [multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) | 60.25 | 50.91 |
235235
| [multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) | 61.39 | 51.62 |
236236

237-
## References
238-
239-
```bibtex
240-
@misc{Thai-Sentence-Vector-Benchmark-2022,
241-
author = {Limkonchotiwat, Peerat},
242-
title = {Thai-Sentence-Vector-Benchmark},
243-
year = {2022},
244-
publisher = {GitHub},
245-
journal = {GitHub repository},
246-
howpublished = {\url{https://github.com/mrpeerat/Thai-Sentence-Vector-Benchmark}}
247-
}
248-
```
249-
250-
```bibtex
251-
@inproceedings{reimers-2019-sentence-bert,
252-
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
253-
author = "Reimers, Nils and Gurevych, Iryna",
254-
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
255-
month = "11",
256-
year = "2019",
257-
publisher = "Association for Computational Linguistics",
258-
url = "https://arxiv.org/abs/1908.10084",
259-
}
260-
```
261-
262-
```bibtex
263-
@inproceedings{gao2021simcse,
264-
title={{SimCSE}: Simple Contrastive Learning of Sentence Embeddings},
265-
author={Gao, Tianyu and Yao, Xingcheng and Chen, Danqi},
266-
booktitle={Empirical Methods in Natural Language Processing (EMNLP)},
267-
year={2021}
268-
}
269-
```
270-
271-
```bibtex
272-
@inproceedings{limkonchotiwat-etal-2022-congen,
273-
title = "{ConGen}: Unsupervised Control and Generalization Distillation For Sentence Representation",
274-
author = "Limkonchotiwat, Peerat and
275-
Ponwitayarat, Wuttikorn and
276-
Lowphansirikul, Lalita and
277-
Udomcharoenchaikit, Can and
278-
Chuangsuwanich, Ekapol and
279-
Nutanong, Sarana",
280-
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
281-
year = "2022",
282-
publisher = "Association for Computational Linguistics",
283-
}
284-
```
285-
286-
```bibtex
287-
@article{10.1162/tacl_a_00620,
288-
author = {Limkonchotiwat, Peerat and Ponwitayarat, Wuttikorn and Lowphansirikul, Lalita and Udomcharoenchaikit, Can and Chuangsuwanich, Ekapol and Nutanong, Sarana},
289-
title = "{An Efficient Self-Supervised Cross-View Training For Sentence Embedding}",
290-
journal = {Transactions of the Association for Computational Linguistics},
291-
volume = {11},
292-
pages = {1572-1587},
293-
year = {2023},
294-
month = {12},
295-
issn = {2307-387X},
296-
doi = {10.1162/tacl_a_00620},
297-
url = {https://doi.org/10.1162/tacl\_a\_00620},
298-
eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00620/2196817/tacl\_a\_00620.pdf},
299-
}
300-
```
301-
302237
## Credits
303238

304239
Indonesian Sentence Embeddings is developed with love by:

0 commit comments

Comments
 (0)