File tree Expand file tree Collapse file tree 2 files changed +22
-10
lines changed
Expand file tree Collapse file tree 2 files changed +22
-10
lines changed Original file line number Diff line number Diff line change 11import re
22
3- from korean_romanizer .syllable import Syllable
3+ from korean_romanizer .syllable import (
4+ Syllable ,
5+ unicode_compatible_consonants ,
6+ unicode_initial ,
7+ )
48from korean_romanizer .pronouncer import Pronouncer
59
610'''
96100
97101 None : '' ,
98102}
103+
104+ # Compatibility jamo (e.g. ㄱ, ㄴ) do not appear as part of a full syllable.
105+ # Map them to their onset romanization so single jamo can be transliterated.
106+ compat_onset = {
107+ comp : onset [unicode_initial [i ]]
108+ for i , comp in enumerate (unicode_compatible_consonants )
109+ }
99110
100111class Romanizer (object ):
101112 def __init__ (self , text ):
@@ -111,13 +122,14 @@ def romanize(self):
111122
112123 if not s .medial and not s .final :
113124 # s is NOT a full syllable (e.g. characters)
114- # if onset.get(chr(s.initial)):
115- # _romanized += onset[chr(s.initial)]
116- # elif vowel.get(chr(s.initial)):
117- # _romanized += vowel[chr(s.initial)]
118- # else:
119- # _romanized += char
120- _romanized += char
125+ if char in vowel :
126+ _romanized += vowel [char ]
127+ elif char in onset :
128+ _romanized += onset [char ]
129+ elif char in compat_onset :
130+ _romanized += compat_onset [char ]
131+ else :
132+ _romanized += char
121133 else :
122134 # s is a full syllable
123135 _romanized += onset [s .initial ] + vowel [s .medial ] + coda [s .final ]
Original file line number Diff line number Diff line change @@ -70,8 +70,8 @@ def test_double_consonant_final_without_next_syllable():
7070
7171
7272def test_non_syllables ():
73- assert romanize ("ㅠㄴㅁㄱ" ) == "ㅠㄴㅁㄱ "
74- assert romanize ("ㅠ동" ) == "ㅠdong "
73+ assert romanize ("ㅠㄴㅁㄱ" ) == "yunmg "
74+ assert romanize ("ㅠ동" ) == "yudong "
7575
7676
7777def test_coda_h ():
You can’t perform that action at this time.
0 commit comments