@@ -45,7 +45,7 @@ def has_foreign_lemma(word):
4545 if not '-' in lemma :
4646 return False
4747
48- cand = lemma .split ('-' )[- 1 ]
48+ cand = lemma .split ('-' , 1 )[- 1 ]
4949 # NOTE: some words have 外国 instead of a foreign spelling. ジル
5050 # (Jill?) is an example. Unclear why this is the case.
5151 # There are other hyphenated lemmas, like 私-代名詞.
@@ -257,6 +257,8 @@ def romaji_tokens(self, words, capitalize=True, title=False):
257257 if nw and nw .feature .pos1 in ('補助記号' , '接尾辞' ): continue
258258 # special case for half-width commas
259259 if nw and nw .surface == ',' : continue
260+ # special case for prefixes
261+ if foreign and roma [- 1 ] == "-" : continue
260262 # 思えば -> omoeba
261263 if nw and nw .feature .pos2 in ('接続助詞' ): continue
262264 # 333 -> 333 ; this should probably be handled in mecab
@@ -348,7 +350,7 @@ def romaji_word(self, word):
348350 elif (self .use_foreign_spelling and
349351 has_foreign_lemma (word )):
350352 # this is a foreign word with known spelling
351- return word .feature .lemma .split ('-' )[- 1 ]
353+ return word .feature .lemma .split ('-' , 1 )[- 1 ]
352354 elif word .feature .kana :
353355 # for known words
354356 kana = jaconv .kata2hira (word .feature .kana )
0 commit comments