Skip to content

Commit 824a91e

Browse files
authored
Merge pull request #94 from hmlendea/tajiki
Created a transliterator for `Tajik`
2 parents 817ed00 + 67e676f commit 824a91e

File tree

3 files changed

+86
-0
lines changed

3 files changed

+86
-0
lines changed

TransliterationAPI.UnitTests/Service/Transliterators/CyrillicTransliteratorTests.cs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,44 @@ public void GivenATextInSerbianCyrillicScript_WhenTransliteratingIntoLatin_ThenT
451451
}
452452
}
453453

454+
[Test]
455+
[TestCase("Балх", "Balx")]
456+
[TestCase("Бобоҷон Ғафуров", "Boboçon Ğafurov")]
457+
[TestCase("Бохтар", "Boxtar")]
458+
[TestCase("Бустон", "Buston")]
459+
[TestCase("Ваҳдат", "Vahdat")]
460+
[TestCase("Гулистон", "Guliston")]
461+
[TestCase("Данғара", "Danğara")]
462+
[TestCase("Дустӣ", "Dustī")]
463+
[TestCase("Душанбe", "Dușanbe")]
464+
[TestCase("Ёвон", "Jovon")]
465+
[TestCase("Зафаробод", "Zafarobod")]
466+
[TestCase("Истаравшан", "Istaravșan")]
467+
[TestCase("Истиқлол", "Istiqlol")]
468+
[TestCase("Исфара", "Isfara")]
469+
[TestCase("Конибодом", "Konibodom")]
470+
[TestCase("Кӯлоб", "Kūlob")]
471+
[TestCase("Левакант", "Levakant")]
472+
[TestCase("Маскав", "Maskav")]
473+
[TestCase("Мирзо Турсунзода", "Mirzo Tursunzoda")]
474+
[TestCase("Нoрaк", "Norak")]
475+
[TestCase("Навкат", "Navkat")]
476+
[TestCase("Панҷакент", "Pançakent")]
477+
[TestCase("Сомониён", "Somonijon")]
478+
[TestCase("Тоҷикистон", "Toçikiston")]
479+
[TestCase("Турсунзoдa", "Tursunzoda")]
480+
[TestCase("Фархoр", "Farxor")]
481+
[TestCase("Хоруғ", "Xoruğ")]
482+
[TestCase("Хуҷанд", "Xuçand")]
483+
[TestCase("Ҳисор", "Hisor")]
484+
[TestCase("Ҳулбук", "Hulbuk")]
485+
[TestCase("Шайдон", "Șajdon")]
486+
[TestCase("Эрон", "Eron")]
487+
public void GivenATextInTajikCyrillicScript_WhenTransliteratingIntoLatin_ThenTheCorrectTextIsReturned(
488+
string tajikText,
489+
string expectedTransliteratedText)
490+
=> Assert.That(transliterator.Transliterate(tajikText, Language.Tajik), Is.EqualTo(expectedTransliteratedText));
491+
454492
[Test]
455493
[TestCase("Алчевськ", "Alchevsk")]
456494
[TestCase("Бердянськ", "Berdiansk")]

TransliterationAPI/Service/Entities/Language.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ public sealed class Language : IEquatable<Language>
5151
public static Language SerboCroatian => new Language("sh", "Serbo-Croatian", nameof(CyrillicTransliterator));
5252
public static Language SimplifiedChinese => new Language("zh-hans", "Simplified Chinese", nameof(PinyinTransliterator));
5353
public static Language Sinhala => new Language("si", nameof(Sinhala), nameof(UshuaiaTransliterator));
54+
public static Language Tajik => new Language("tg", nameof(Tajik), nameof(CyrillicTransliterator));
5455
public static Language Tamil => new Language("ta", nameof(Tamil), nameof(UshuaiaTransliterator));
5556
public static Language Telugu => new Language("te", nameof(Telugu), nameof(UshuaiaTransliterator));
5657
public static Language Udmurt => new Language("udm", nameof(Udmurt), nameof(TranslitterationDotComTransliterator));

TransliterationAPI/Service/Transliterators/CyrillicTransliterator.cs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ public class CyrillicTransliterator : ITransliterator
1818
Dictionary<string, string> russianTransliterationTable;
1919
Dictionary<string, string> macedonianTransliterationTable;
2020
Dictionary<string, string> serbianTransliterationTable;
21+
Dictionary<string, string> tajikTransliterationTable;
2122
Dictionary<string, string> ukrainianTransliterationTable;
2223

2324
public CyrillicTransliterator()
@@ -326,6 +327,43 @@ public CyrillicTransliterator()
326327
{ "я", "ja" },
327328
};
328329

330+
tajikTransliterationTable = new Dictionary<string, string>
331+
{
332+
{ "[Ъъ]", "'" },
333+
334+
// Uppercase letters
335+
{ "Ғ", "Ğ" },
336+
{ "Ё", "Jo" },
337+
{ "Ж", "Ƶ" },
338+
{ "Ӣ", "Ī" },
339+
{ "Й", "J" },
340+
{ "Қ", "Q" },
341+
{ "Ӯ", "Ū" },
342+
{ "Х", "X" },
343+
{ "Ҳ", "H" },
344+
{ "Ч", "C" },
345+
{ "Ҷ", "Ç" },
346+
{ "Ш", "Ș" },
347+
{ "Ю", "Ju" },
348+
{ "Я", "Ja" },
349+
350+
// Lowercase letters
351+
{ "ғ", "ğ" },
352+
{ "ё", "jo" },
353+
{ "ж", "ƶ" },
354+
{ "ӣ", "ī" },
355+
{ "й", "j" },
356+
{ "қ", "q" },
357+
{ "ӯ", "ū" },
358+
{ "х", "x" },
359+
{ "ҳ", "h" },
360+
{ "ч", "c" },
361+
{ "ҷ", "ç" },
362+
{ "ш", "ș" },
363+
{ "ю", "ju" },
364+
{ "я", "ja" },
365+
};
366+
329367
ukrainianTransliterationTable = new Dictionary<string, string>
330368
{
331369
{ @"ія\b", "ia" },
@@ -391,6 +429,11 @@ public CyrillicTransliterator()
391429
serbianTransliterationTable.Add(characterTransliteration.Key, characterTransliteration.Value);
392430
}
393431

432+
if (!tajikTransliterationTable.ContainsKey(characterTransliteration.Key))
433+
{
434+
tajikTransliterationTable.Add(characterTransliteration.Key, characterTransliteration.Value);
435+
}
436+
394437
if (!ukrainianTransliterationTable.ContainsKey(characterTransliteration.Key))
395438
{
396439
ukrainianTransliterationTable.Add(characterTransliteration.Key, characterTransliteration.Value);
@@ -440,6 +483,10 @@ public string Transliterate(string text, Language language)
440483
{
441484
transliterationTable = serbianTransliterationTable;
442485
}
486+
else if (language.Equals(Language.Tajik))
487+
{
488+
transliterationTable = tajikTransliterationTable;
489+
}
443490
else if (language.Equals(Language.Ukrainian))
444491
{
445492
transliterationTable = ukrainianTransliterationTable;

0 commit comments

Comments
 (0)