This repository provides all necessary
You can import the notebook in google collab or any notebook with access to GPU to fine-tune the model yourself
You can evaluate the model's performance on a dataset using the provided evaluate.py script.
pip install -r requirements.txt
python evaluate.py \
--model-type transformers \
--model-name gravitee-io/distilbert-multilingual-toxicity-classifier \
--dataset-name gravitee-io/textdetox-multilingual-toxicity-dataset \
--batch-size 16
You can evaluate the model's performance on a dataset using the provided evaluate.py script.
python convert_to_onnx.py \
--model-name gravitee-io/distilbert-multilingual-toxicity-classifier \
--export-onnx \
--export-directory /path/to/your/model/dir \
--quantize-onnx
You can refer to the HuggingFace model for performance and usage
This model is licensed under OpenRAIL++
@inproceedings{dementieva2024overview,
title={Overview of the Multilingual Text Detoxification Task at PAN 2024},
author={Dementieva, Daryna and Moskovskiy, Daniil and Babakov, Nikolay and Ayele, Abinew Ali and Rizwan, Naquee and Schneider, Frolian and Wang, Xintog and Yimam, Seid Muhie and Ustalov, Dmitry and Stakovskii, Elisei and Smirnova, Alisa and Elnagar, Ashraf and Mukherjee, Animesh and Panchenko, Alexander},
booktitle={Working Notes of CLEF 2024 - Conference and Labs of the Evaluation Forum},
editor={Guglielmo Faggioli and Nicola Ferro and Petra Galu{�{s}}{�{c}}{'a}kov{'a} and Alba Garc{'i}a Seco de Herrera},
year={2024},
organization={CEUR-WS.org}
}
@inproceedings{dementieva-etal-2024-toxicity,
title = "Toxicity Classification in {U}krainian",
author = "Dementieva, Daryna and Khylenko, Valeriia and Babakov, Nikolay and Groh, Georg",
booktitle = "Proceedings of the 8th Workshop on Online Abuse and Harms (WOAH 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.woah-1.19/",
doi = "10.18653/v1/2024.woah-1.19",
pages = "244--255"
}
@inproceedings{DBLP:conf/ecir/BevendorffCCDEFFKMMPPRRSSSTUWZ24,
author = {Janek Bevendorff and et al.},
title = {Overview of {PAN} 2024: Multi-author Writing Style Analysis, Multilingual Text Detoxification, Oppositional Thinking Analysis, and Generative {AI} Authorship Verification - Extended Abstract},
booktitle = {ECIR 2024, Glasgow, UK, March 24-28, 2024, Proceedings, Part {VI}},
series = {Lecture Notes in Computer Science},
volume = {14613},
pages = {3--10},
publisher = {Springer},
year = {2024},
doi = {10.1007/978-3-031-56072-9_1}
}