Skip to content

Commit bc6bb58

Browse files
committed
initial implementation of to_ipa
1 parent b2963e2 commit bc6bb58

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed

arda/pron.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,70 @@ def syllabify(word, debug=False):
108108
result[-3] = result[-3].upper()
109109

110110
return result
111+
112+
113+
rules = [
114+
("b", "b"),
115+
("ch", "χ"), # weakened in Gondor unless word final or before t
116+
("c", "k"),
117+
("dh", "ð"),
118+
("d", "d"),
119+
("f#", "v"),
120+
("f", "f"),
121+
("gh", "ɣ"), # black speech / orkish
122+
("g", "g"),
123+
("ht", "çt"),
124+
("hw", "w̥"),
125+
("hy", "ç"),
126+
("h", "h"),
127+
("kh", "χ"), # not Elvish (and incorrect for Dwarvish)
128+
("k", "k"),
129+
("lh", "l̥"),
130+
("l", "l"),
131+
("m", "m"),
132+
("ng#", "ŋ"),
133+
("ng", "ŋg"),
134+
("#ñ", "ŋ"),
135+
("n", "n"),
136+
("ph", "f"), # ff if derived from pp
137+
("p", "p"),
138+
("qu", "kw"),
139+
("rh", "r̥"),
140+
("r", "r"),
141+
("sh", "ʃ"),
142+
("s", "s"),
143+
("th", "θ"), # not in Dwarvish
144+
("ty", "tj"),
145+
("t", "t"),
146+
("v", "v"),
147+
("w", "w"),
148+
149+
("á", "ɑː"),
150+
("a", "ɑ"),
151+
("eä", "e-a"),
152+
("e", "e"),
153+
("ëa", "e-a"),
154+
("ë", "e"),
155+
("io", "jο"),
156+
("i", "i"),
157+
("ó", "oː"),
158+
("o", "o"),
159+
("û", "uː"),
160+
("ú", "uː"),
161+
("u", "u"),
162+
("#", ""),
163+
]
164+
165+
166+
def to_ipa(word):
167+
ipa = ""
168+
word = "#" + word.lower() + "#"
169+
while word:
170+
for rule_in, rule_out in rules:
171+
if word.startswith(rule_in):
172+
ipa += rule_out
173+
word = word[len(rule_in) :]
174+
break
175+
else:
176+
raise ValueError(f"Can't match: {word}")
177+
return ipa

pron_test.rst

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,75 @@ Pelargir pe.LAR.gir
5555
silivren si.LIV.ren
5656
andúne an.DÚ.ne
5757
dûn DÛN
58+
59+
60+
>>> from arda.pron import to_ipa
61+
>>> to_ipa('c')
62+
'k'
63+
64+
>>> to_ipa('@')
65+
Traceback (most recent call last):
66+
...
67+
ValueError: Can't match: @#
68+
69+
>>> to_ipa('ch')
70+
'χ'
71+
72+
>>> to_ipa('Ioreth')
73+
'jοreθ'
74+
75+
>>> for word in [
76+
... "Isildur",
77+
... "Aulë",
78+
... "Eärendil",
79+
... "Eressëa",
80+
... "Elrond",
81+
... "Aragorn",
82+
... "Tinúviel",
83+
... "Thingol",
84+
... "Foalókë",
85+
... "Ringló",
86+
... "Angband",
87+
... "Angmar",
88+
... "Glorfindel",
89+
... "Glaurung",
90+
... "Caradhras",
91+
... "Orome",
92+
... "Fëanor",
93+
... "Ancalima",
94+
... "Elentári",
95+
... "Denethor",
96+
... "Periannath",
97+
... "Ecthelion",
98+
... "Pelargir",
99+
... "silivren",
100+
... "andúne",
101+
... "dûn",
102+
... ]:
103+
... print(word, to_ipa(word))
104+
Isildur isildur
105+
Aulë ɑule
106+
Eärendil e-arendil
107+
Eressëa eresse-a
108+
Elrond elrond
109+
Aragorn ɑrɑgorn
110+
Tinúviel tinuːviel
111+
Thingol θiŋgol
112+
Foalókë foɑloːke
113+
Ringló riŋgloː
114+
Angband ɑŋgbɑnd
115+
Angmar ɑŋgmɑr
116+
Glorfindel glorfindel
117+
Glaurung glɑuruŋ
118+
Caradhras kɑrɑðrɑs
119+
Orome orome
120+
Fëanor fe-anor
121+
Ancalima ɑnkɑlimɑ
122+
Elentári elentɑːri
123+
Denethor deneθor
124+
Periannath periɑnnɑθ
125+
Ecthelion ekθeljοn
126+
Pelargir pelɑrgir
127+
silivren silivren
128+
andúne ɑnduːne
129+
dûn duːn

0 commit comments

Comments
 (0)