@@ -20,6 +20,7 @@ public class CyrillicTransliterator : ITransliterator
2020 readonly Dictionary < string , string > russianTransliterationTable ;
2121 readonly Dictionary < string , string > macedonianTransliterationTable ;
2222 readonly Dictionary < string , string > serbianTransliterationTable ;
23+ readonly Dictionary < string , string > tatarTransliterationTable ;
2324 readonly Dictionary < string , string > tajikTransliterationTable ;
2425 readonly Dictionary < string , string > ukrainianTransliterationTable ;
2526
@@ -747,6 +748,154 @@ public CyrillicTransliterator()
747748 { "я" , "ja" } ,
748749 } ;
749750
751+ tatarTransliterationTable = new ( )
752+ {
753+ // Front vowels: [ÄäEeİiÖöÜüӘәЕеИиӨөҮү]
754+ // Back vowels: [AaIıOoUuАаЫыОоУу]
755+
756+ // Uppercase vowel harmony
757+ { "([ÄäEeİiÖöÜüӘәЕеИиӨөҮү])Е" , "$1Ye" } ,
758+ { "([ÄäEeİiÖöÜüӘәЕеИиӨөҮү][^ '\" -]*)Г" , "$1G" } ,
759+ { "([ÄäEeİiÖöÜüӘәЕеИиӨөҮү][^ '\" -]*)К" , "$1K" } ,
760+ { "([ÄäEeİiÖöÜüӘәЕеИиӨөҮү][^ '\" -]*)Ю" , "$1Yü" } ,
761+ { "([ÄäEeİiÖöÜüӘәЕеИиӨөҮү][^ '\" -]*)Я" , "$1Yä" } ,
762+ { "([AaIıOoUuАаЫыОоУу])Е" , "$1Yı" } ,
763+ { "([AaIıOoUuАаЫыОоУу][^ '\" -]*)Г" , "$1Ğ" } ,
764+ { "([AaIıOoUuАаЫыОоУу][^ '\" -]*)К" , "$1Q" } ,
765+ { "([AaIıOoUuАаЫыОоУу][^ '\" -]*)Ю" , "$1Yu" } ,
766+ { "([AaIıOoUuАаЫыОоУу][^ '\" -]*)Я" , "$1Ya" } ,
767+ { "Г([^ '\" -]*[ÄäEeİiÖöÜüӘәЕеИиӨөҮү])" , "G$1" } ,
768+ { "Г([^ '\" -]*[AaIıOoUuАаЫыОоУу])" , "Ğ$1" } ,
769+ { "К([^ '\" -]*[ÄäEeİiÖöÜüӘәЕеИиӨөҮү])" , "K$1" } ,
770+ { "К([^ '\" -]*[AaIıOoUuАаЫыОоУу])" , "Q$1" } ,
771+ { "Ю([^ '\" -]*[ÄäEeİiÖöÜüӘәЕеИиӨөҮү])" , "Yü$1" } ,
772+ { "Ю([^ '\" -]*[AaIıOoUuАаЫыОоУу])" , "Yu$1" } ,
773+ { "Я([^ '\" -]*[ÄäEeİiÖöÜüӘәЕеИиӨөҮү])" , "Yä$1" } ,
774+ { "Я([^ '\" -]*[AaIıOoUuАаЫыОоУу])" , "Ya$1" } ,
775+
776+ // Lowercase vowel harmony
777+ { "([ÄäEeİiÖöÜüӘәЕеИиӨөҮү])е" , "$1ye" } ,
778+ { "([ÄäEeİiÖöÜüӘәЕеИиӨөҮү][^ '\" -]*)г" , "$1g" } ,
779+ { "([ÄäEeİiÖöÜüӘәЕеИиӨөҮү][^ '\" -]*)к" , "$1k" } ,
780+ { "([ÄäEeİiÖöÜüӘәЕеИиӨөҮү][^ '\" -]*)ю" , "$1yü" } ,
781+ { "([ÄäEeİiÖöÜüӘәЕеИиӨөҮү][^ '\" -]*)я" , "$1yä" } ,
782+ { "([AaIıOoUuАаЫыОоУу])е" , "$1yı" } ,
783+ { "([AaIıOoUuАаЫыОоУу][^ '\" -]*)г" , "$1ğ" } ,
784+ { "([AaIıOoUuАаЫыОоУу][^ '\" -]*)к" , "$1q" } ,
785+ { "([AaIıOoUuАаЫыОоУу][^ '\" -]*)ю" , "$1yu" } ,
786+ { "([AaIıOoUuАаЫыОоУу][^ '\" -]*)я" , "$1ya" } ,
787+ { "г([^ '\" -]*[ÄäEeİiÖöÜüӘәЕеИиӨөҮү])" , "g$1" } ,
788+ { "г([^ '\" -]*[AaIıOoUuАаЫыОоУу])" , "ğ$1" } ,
789+ { "к([^ '\" -]*[ÄäEeİiÖöÜüӘәЕеИиӨөҮү])" , "k$1" } ,
790+ { "к([^ '\" -]*[AaIıOoUuАаЫыОоУу])" , "q$1" } ,
791+ { "ю([^ '\" -]*[ÄäEeİiÖöÜüӘәЕеИиӨөҮү])" , "yü$1" } ,
792+ { "ю([^ '\" -]*[AaIıOoUuАаЫыОоУу])" , "yu$1" } ,
793+ { "я([^ '\" -]*[ÄäEeİiÖöÜüӘәЕеИиӨөҮү])" , "yä$1" } ,
794+ { "я([^ '\" -]*[AaIıOoUuАаЫыОоУу])" , "ya$1" } ,
795+
796+ //// Uppercase exceptions
797+ //{ "Аъ", "Ä" },
798+ //{ "Ӓ", "Ä" },
799+ //{ "Оъ", "Ö" },
800+ //{ "Ӧ", "Ö" },
801+ //{ "Уъ", "Ü" },
802+ //{ "Ӱ", "Ü" },
803+ //{ "Жъ", "C" },
804+ //{ "Нъ", "Ñ" },
805+ //{ "Ҥ", "Ñ" },
806+ //{ "Хъ", "H" },
807+
808+ // Uppercase letters
809+ { "А" , "A" } ,
810+ { "Б" , "B" } ,
811+ { "В" , "W" } , // Or V in Russian words
812+ { "Д" , "D" } ,
813+ { "Е" , "E" } ,
814+ { "Ж" , "J" } ,
815+ { "З" , "Z" } ,
816+ { "И" , "İ" } ,
817+ { "Й" , "Y" } ,
818+ { "Л" , "L" } ,
819+ { "М" , "M" } ,
820+ { "Н" , "N" } ,
821+ { "О" , "O" } ,
822+ { "П" , "P" } ,
823+ { "Р" , "R" } ,
824+ { "С" , "S" } ,
825+ { "Т" , "T" } ,
826+ { "У" , "U" } ,
827+ { "Ф" , "F" } ,
828+ { "Х" , "X" } ,
829+ { "Ч" , "Ç" } ,
830+ { "Ш" , "Ş" } ,
831+ { "Ы" , "I" } ,
832+ { "Ә" , "Ä" } ,
833+ { "Ө" , "Ö" } ,
834+ { "Ү" , "Ü" } ,
835+ { "Җ" , "C" } ,
836+ { "Ң" , "Ñ" } ,
837+ { "Һ" , "H" } ,
838+
839+ // Uppercase letters - Russian
840+ { "Ё" , "Yo" } ,
841+ { "Ц" , "Ts" } ,
842+ { "Щ" , "Şç" } ,
843+
844+ //// Lowercase exceptions
845+ //{ "аъ", "ä" },
846+ //{ "ӓ", "ä" },
847+ //{ "оъ", "ö" },
848+ //{ "ӧ", "ö" },
849+ //{ "уъ", "ü" },
850+ //{ "ӱ", "ü" },
851+ //{ "жъ", "c" },
852+ //{ "нъ", "ñ" },
853+ //{ "ҥ", "ñ" },
854+ //{ "хъ", "h" },
855+
856+ // Lowercase letters
857+ { "а" , "a" } ,
858+ { "б" , "b" } ,
859+ { "в" , "w" } , // Or V in Russian words
860+ { "д" , "d" } ,
861+ { "е" , "e" } ,
862+ { "ж" , "j" } ,
863+ { "з" , "z" } ,
864+ { "и" , "i" } ,
865+ { "й" , "y" } ,
866+ { "л" , "l" } ,
867+ { "м" , "m" } ,
868+ { "н" , "n" } ,
869+ { "о" , "o" } ,
870+ { "п" , "p" } ,
871+ { "р" , "r" } ,
872+ { "с" , "s" } ,
873+ { "т" , "t" } ,
874+ { "у" , "u" } ,
875+ { "ф" , "f" } ,
876+ { "х" , "x" } ,
877+ { "ч" , "ç" } ,
878+ { "ш" , "ş" } ,
879+ { "ы" , "ı" } ,
880+ { "ә" , "ä" } ,
881+ { "ө" , "ö" } ,
882+ { "ү" , "ü" } ,
883+ { "җ" , "c" } ,
884+ { "ң" , "ñ" } ,
885+ { "һ" , "h" } ,
886+
887+ // Lowercase letters - Russian
888+ { "ё" , "yo" } ,
889+ { "ц" , "ts" } ,
890+ { "щ" , "şç" } ,
891+
892+ // Special characters
893+ { "Ъ" , "" } ,
894+ { "Ь" , "" } ,
895+ { "ъ" , "" } ,
896+ { "ь" , "" } ,
897+ } ;
898+
750899 ukrainianTransliterationTable = new ( )
751900 {
752901 { @"ія\b" , "ia" } ,
@@ -812,6 +961,11 @@ public CyrillicTransliterator()
812961 serbianTransliterationTable . Add ( characterTransliteration . Key , characterTransliteration . Value ) ;
813962 }
814963
964+ if ( ! tatarTransliterationTable . ContainsKey ( characterTransliteration . Key ) )
965+ {
966+ tatarTransliterationTable . Add ( characterTransliteration . Key , characterTransliteration . Value ) ;
967+ }
968+
815969 if ( ! tajikTransliterationTable . ContainsKey ( characterTransliteration . Key ) )
816970 {
817971 tajikTransliterationTable . Add ( characterTransliteration . Key , characterTransliteration . Value ) ;
@@ -883,6 +1037,11 @@ public string Transliterate(string text, Language language)
8831037 {
8841038 transliterationTable = tajikTransliterationTable ;
8851039 }
1040+ else if ( language . Equals ( Language . Tatar ) ||
1041+ language . Equals ( Language . TatarCyrillic ) )
1042+ {
1043+ transliterationTable = tatarTransliterationTable ;
1044+ }
8861045 else if ( language . Equals ( Language . Ukrainian ) )
8871046 {
8881047 transliterationTable = ukrainianTransliterationTable ;
0 commit comments