Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions korean_romanizer/romanizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import re

from korean_romanizer.syllable import Syllable
from korean_romanizer.syllable import (
Syllable,
unicode_compatible_consonants,
unicode_initial,
)
from korean_romanizer.pronouncer import Pronouncer

'''
Expand Down Expand Up @@ -96,6 +100,13 @@

None: '',
}

# Compatibility jamo (e.g. ㄱ, ㄴ) do not appear as part of a full syllable.
# Map them to their onset romanization so single jamo can be transliterated.
compat_onset = {
comp: onset[unicode_initial[i]]
for i, comp in enumerate(unicode_compatible_consonants)
}

class Romanizer(object):
def __init__(self, text):
Expand All @@ -111,13 +122,14 @@ def romanize(self):

if not s.medial and not s.final:
# s is NOT a full syllable (e.g. characters)
# if onset.get(chr(s.initial)):
# _romanized += onset[chr(s.initial)]
# elif vowel.get(chr(s.initial)):
# _romanized += vowel[chr(s.initial)]
# else:
# _romanized += char
_romanized += char
if char in vowel:
_romanized += vowel[char]
elif char in onset:
_romanized += onset[char]
elif char in compat_onset:
_romanized += compat_onset[char]
else:
_romanized += char
else:
# s is a full syllable
_romanized += onset[s.initial] + vowel[s.medial] + coda[s.final]
Expand Down
4 changes: 2 additions & 2 deletions tests/test_romanizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def test_double_consonant_final_without_next_syllable():


def test_non_syllables():
assert romanize("ㅠㄴㅁㄱ") == "ㅠㄴㅁㄱ"
assert romanize("ㅠ동") == "ㅠdong"
assert romanize("ㅠㄴㅁㄱ") == "yunmg"
assert romanize("ㅠ동") == "yudong"


def test_coda_h():
Expand Down