Skip to content

Commit bf8b547

Browse files
rpm000clefourrier
andauthored
add translation literals indic (#1015)
Co-authored-by: Clémentine Fourrier <[email protected]>
1 parent 161d47c commit bf8b547

File tree

1 file changed

+90
-5
lines changed

1 file changed

+90
-5
lines changed

src/lighteval/tasks/templates/utils/translation_literals.py

Lines changed: 90 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,30 @@ def __getattribute__(self, name: str) -> str:
183183
indices=["А", "Б", "В", "Г", "Д", "Е"],
184184
),
185185
Language.BEMBA: TranslationLiterals(language=Language.BEMBA),
186-
Language.BENGALI: TranslationLiterals(language=Language.BENGALI, question_word="প্রশ্ন"),
186+
Language.BENGALI: TranslationLiterals(
187+
language=Language.BENGALI,
188+
question_word="প্রশ্ন",
189+
answer="উত্তর",
190+
confirmation_word="তাই না",
191+
yes="হ্যাঁ",
192+
no="না",
193+
also="সাথে",
194+
cause_word="কারণ",
195+
effect_word="অতএব",
196+
true="সত্য",
197+
false="মিথ্যা",
198+
neither="ন তাই, ন তাই না",
199+
full_stop="।",
200+
comma=",",
201+
question_mark="?",
202+
exclamation_mark="!",
203+
word_space=" ",
204+
sentence_space=" ",
205+
colon=":",
206+
indices=["ক", "খ", "গ", "ঘ", "ঙ", "চ"],
207+
or_word="বা",
208+
and_word="এবং",
209+
),
187210
Language.BHOJPURI: TranslationLiterals(language=Language.BHOJPURI),
188211
Language.BIHARI: TranslationLiterals(language=Language.BIHARI), # Deprecated
189212
Language.BOSNIAN: TranslationLiterals(language=Language.BOSNIAN),
@@ -544,7 +567,30 @@ def __getattribute__(self, name: str) -> str:
544567
semicolon="·",
545568
),
546569
Language.GUARANI: TranslationLiterals(language=Language.GUARANI),
547-
Language.GUJARATI: TranslationLiterals(language=Language.GUJARATI),
570+
Language.GUJARATI: TranslationLiterals(
571+
language=Language.GUJARATI,
572+
question_word="પ્રશ્ન",
573+
answer="જવાબ",
574+
confirmation_word="ખરું ને",
575+
yes="હા",
576+
no="ના",
577+
also="અને",
578+
cause_word="કારણ કે",
579+
effect_word="તેથી",
580+
true="સાચું",
581+
false="ખોટું",
582+
neither="ન તો આ, ન તે",
583+
or_word="અથવા",
584+
and_word="અને",
585+
full_stop="।",
586+
comma=",",
587+
question_mark="?",
588+
exclamation_mark="!",
589+
word_space=" ",
590+
sentence_space=" ",
591+
colon=":",
592+
indices=["અ", "આ", "ઇ", "ઈ", "ઉ", "ઊ"],
593+
),
548594
Language.HAITIAN: TranslationLiterals(
549595
# From https://github.com/EleutherAI/lm-evaluation-harness/blob/0845b588303f1f59af98dd1c5bdbd78a9e75a1e2/lm_eval/tasks/xcopa/utils.py
550596
language=Language.HAITIAN,
@@ -859,7 +905,29 @@ def __getattribute__(self, name: str) -> str:
859905
colon=":",
860906
semicolon=";",
861907
),
862-
Language.PUNJABI: TranslationLiterals(language=Language.PUNJABI),
908+
Language.PUNJABI: TranslationLiterals(
909+
language=Language.PUNJABI,
910+
question_word="ਸਵਾਲ",
911+
answer="ਜਵਾਬ",
912+
confirmation_word="ਹਾਂ ਨਾ",
913+
yes="ਹਾਂ",
914+
no="ਨਹੀਂ",
915+
also="ਨਾਲ ਹੀ",
916+
cause_word="ਕਿਉਂਕਿ",
917+
effect_word="ਇਸ ਲਈ",
918+
true="ਸੱਚ",
919+
false="ਝੂਠ",
920+
neither="ਨਾ ਤਾਂ, ਨਾ",
921+
full_stop="।",
922+
comma=",",
923+
question_mark="?",
924+
exclamation_mark="!",
925+
word_space=" ",
926+
sentence_space=" ",
927+
colon=":",
928+
or_word="ਜਾਂ",
929+
and_word="ਅਤੇ",
930+
),
863931
Language.QUECHUA: TranslationLiterals(
864932
# From https://github.com/EleutherAI/lm-evaluation-harness/blob/0845b588303f1f59af98dd1c5bdbd78a9e75a1e2/lm_eval/tasks/xcopa/utils.py
865933
language=Language.QUECHUA,
@@ -1102,10 +1170,27 @@ def __getattribute__(self, name: str) -> str:
11021170
Language.TAJIK: TranslationLiterals(language=Language.TAJIK),
11031171
Language.TAMASHEQ: TranslationLiterals(language=Language.TAMASHEQ),
11041172
Language.TAMIL: TranslationLiterals(
1105-
# From https://github.com/EleutherAI/lm-evaluation-harness/blob/0845b588303f1f59af98dd1c5bdbd78a9e75a1e2/lm_eval/tasks/xcopa/utils.py
11061173
language=Language.TAMIL,
1107-
cause_word="காரணமாக",
1174+
question_word="கேள்வி",
1175+
answer="பதில்",
1176+
confirmation_word="இல்லையா",
1177+
yes="ஆம்",
1178+
no="இல்லை",
1179+
also="மேலும்",
1180+
cause_word="ஏனெனில்",
11081181
effect_word="எனவே",
1182+
true="உண்மை",
1183+
false="பொய்",
1184+
neither="இல்லை, இல்லை",
1185+
full_stop="।",
1186+
comma=",",
1187+
question_mark="?",
1188+
exclamation_mark="!",
1189+
word_space=" ",
1190+
sentence_space=" ",
1191+
colon=":",
1192+
or_word="அல்லது",
1193+
and_word="மற்றும்",
11091194
),
11101195
Language.TATAR: TranslationLiterals(
11111196
language=Language.TATAR,

0 commit comments

Comments
 (0)