Skip to content

Commit 651c709

Browse files
authored
Add files via upload
1 parent f6ba76b commit 651c709

File tree

1 file changed

+118
-83
lines changed

1 file changed

+118
-83
lines changed

ARPAsingPlusPhonemizer/ArpasingPlusPhonemizer.cs

Lines changed: 118 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer {
2525
"aam", "am", "axm", "aem", "ahm", "aom", "om", "awm", "aum", "aym", "aim", "ehm", "em", "eym", "eim", "ihm", "iym", "im", "owm", "oum", "oym", "oim", "uhm", "uwm", "um", "oh",
2626
"eu", "oe", "yw", "yx", "wx"
2727
};
28-
private readonly string[] consonants = "b,ch,d,dh,dr,dx,f,g,hh,jh,k,l,m,n,ng,p,q,r,s,sh,t,th,tr,v,w,y,z,zh".Split(',');
28+
private readonly string[] consonants = "b,ch,d,dh,dx,f,g,hh,jh,k,l,m,n,ng,p,q,r,s,sh,t,th,v,w,y,z,zh".Split(',');
2929
private readonly string[] affricates = "ch,jh,j".Split(',');
3030
private readonly string[] tapConsonant = "dx".Split(",");
3131
private readonly string[] semilongConsonants = "ng,n,m,v,z,q,hh".Split(",");
@@ -204,24 +204,24 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
204204
var firstC = 0;
205205

206206
foreach (var entry in missingVphonemes) {
207-
if (HasOto(entry.Key, syllable.tone)) {
207+
if (!HasOto("ax", syllable.tone) || !HasOto("b ax", syllable.tone) || !HasOto("ax b", syllable.tone)) {
208208
isMissingVPhonemes = true;
209209
break;
210210
}
211211
}
212212
foreach (var entry in missingCphonemes) {
213-
if (HasOto(entry.Key, syllable.tone)) {
213+
if (!HasOto("wh", syllable.tone) || !HasOto("zh er", syllable.tone) || !HasOto("ah dx", syllable.tone)) {
214214
isMissingCPhonemes = true;
215215
break;
216216
}
217217
}
218218
foreach (var entry in timitphonemes) {
219-
if (HasOto(entry.Key, syllable.tone)) {
219+
if (!HasOto("gcl", syllable.tone) || !HasOto("f axh", syllable.tone) || !HasOto("ih tcl", syllable.tone)) {
220220
isTimitPhonemes = true;
221221
break;
222222
}
223223
}
224-
224+
225225
// STARTING V
226226
if (syllable.IsStartingV) {
227227
// TRIES - V THEN V
@@ -335,40 +335,38 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
335335
for (var i = firstC; i < cc.Length - 1; i++) {
336336
var ccv = $"{string.Join("", cc)} {v}";
337337
var ccv1 = string.Join("", cc.Skip(i)) + " " + v;
338-
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
339-
basePhoneme = ccv;
340-
lastC = i;
341-
break;
342-
} else {
343-
if (HasOto(ccv1, syllable.vowelTone) || HasOto(ValidateAlias(ccv1), syllable.vowelTone)) {
344-
basePhoneme = ccv1;
338+
if (syllable.CurrentWordCc.Length >= 2) {
339+
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
340+
basePhoneme = ccv;
341+
lastC = i;
345342
break;
346-
} else if (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone)) {
347-
basePhoneme = crv;
348-
break;
349-
} else {
350-
basePhoneme = $"{cc.Last()} {v}";
343+
} else if (HasOto(ccv1, syllable.vowelTone) || HasOto(ValidateAlias(ccv1), syllable.vowelTone)) {
344+
basePhoneme = ccv1;
351345
}
346+
break;
347+
} else if (syllable.CurrentWordCc.Length >= 1 && syllable.PreviousWordCc.Length == 1) {
348+
basePhoneme = crv;
352349
}
353350
}
354351
// try [V C], [V CC], [V -][- C]
355352
for (var i = lastC + 1; i >= 0; i--) {
356353
var vr = $"{prevV} -";
357-
var vcc = $"{prevV} {string.Join("", cc.Take(2))}"; // bug on vcc, sequence of [{vowel} v][v f][f {vowel}] turns in to [{vowel} q/t][- {vowel}] which is odd
354+
var vcc = $"{prevV} {string.Join("", cc.Take(2))}"; // bug on vcc, sequence of [{vowel} v][v f][f {vowel}] turns into [{vowel} q/t][- {vowel}] which is odd
358355
var vc = $"{prevV} {cc[0]}";
359356
if (i == 0 && (HasOto(vr, syllable.tone) || HasOto(ValidateAlias(vr), syllable.tone)) && !HasOto(vc, syllable.tone)) {
360357
phonemes.Add(vr);
361358
phonemes.Add($"- {cc[0]}");
362359
break;
363-
}
364-
if (HasOto(vcc, syllable.tone) || HasOto(ValidateAlias(vcc), syllable.tone)) {
360+
} else if (cc.Length > 2 && HasOto(vcc, syllable.tone) || HasOto(ValidateAlias(vcc), syllable.tone)) {
365361
phonemes.Add(vcc);
366362
firstC = 1;
367363
break;
368-
}
369-
if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone)) {
364+
} else if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone)) {
370365
phonemes.Add(vc);
371366
break;
367+
} else {
368+
// If none of the conditions are met, continue the loop
369+
continue;
372370
}
373371
}
374372
}
@@ -379,10 +377,6 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
379377
if (!HasOto(cc1, syllable.tone)) {
380378
cc1 = ValidateAlias(cc1);
381379
}
382-
// [C1 C2C3]
383-
if (HasOto($"{cc[i]} {string.Join("", cc.Skip(i + 1))}", syllable.tone)) {
384-
cc1 = ($"{cc[i]} {string.Join("", cc.Skip(i + 1))}");
385-
}
386380
// [C1 C2]
387381
if (!HasOto(cc1, syllable.tone)) {
388382
cc1 = $"{cc[i]} {cc[i + 1]}";
@@ -399,22 +393,31 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
399393
if (!HasOto(cc1, syllable.tone)) {
400394
cc1 = ValidateAlias(cc1);
401395
}
402-
// CC V on multiple consonants ex [s tr ao]
403-
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
404-
basePhoneme = ccv;
405-
lastC = i;
406-
break;
407-
} else if ((HasOto(lcv, syllable.vowelTone) || HasOto(ValidateAlias(lcv), syllable.vowelTone)) && HasOto(cc1, syllable.vowelTone) && !cc1.Contains($"{cc[i]} {cc[i + 1]}")) {
396+
// CC V on multiple consonants ex [s tr ao] (only if the word starts with a CC)
397+
if (syllable.CurrentWordCc.Length >= 2 && syllable.PreviousWordCc.Length >= 1) {
398+
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
399+
basePhoneme = ccv;
400+
lastC = i;
401+
break;
402+
} else if ((HasOto(lcv, syllable.vowelTone) || HasOto(ValidateAlias(lcv), syllable.vowelTone))
403+
&& HasOto(cc1, syllable.vowelTone) && !HasOto(ccv, syllable.vowelTone)) {
404+
basePhoneme = lcv;
405+
}
406+
// [C1 C2C3]
407+
if (HasOto($"{cc[i]} {string.Join("", cc.Skip(i + 1))}", syllable.tone)) {
408+
cc1 = $"{cc[i]} {string.Join("", cc.Skip(i + 1))}";
409+
}
410+
} else if (syllable.CurrentWordCc.Length >= 1 && syllable.PreviousWordCc.Length == 1) {
408411
basePhoneme = lcv;
412+
// [C1 C2]
413+
if (!HasOto(cc1, syllable.tone)) {
414+
cc1 = $"{cc[i]} {cc[i + 1]}";
415+
}
409416
}
410417
if (i + 1 < lastC) {
411418
if (!HasOto(cc1, syllable.tone)) {
412419
cc1 = ValidateAlias(cc1);
413420
}
414-
// [C1 C2C3]
415-
if (HasOto($"{cc[i]} {string.Join("", cc.Skip(i + 1))}", syllable.tone)) {
416-
cc1 = ($"{cc[i]} {string.Join("", cc.Skip(i + 1))}");
417-
}
418421
// [C1 C2]
419422
if (!HasOto(cc1, syllable.tone)) {
420423
cc1 = $"{cc[i]} {cc[i + 1]}";
@@ -431,13 +434,26 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
431434
if (!HasOto(cc1, syllable.tone)) {
432435
cc1 = ValidateAlias(cc1);
433436
}
434-
// CC V on multiple consonants ex [s tr ao]
435-
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
436-
basePhoneme = ccv;
437-
lastC = i;
438-
break;
439-
} else if ((HasOto(lcv, syllable.vowelTone) || HasOto(ValidateAlias(lcv), syllable.vowelTone)) && HasOto(cc1, syllable.vowelTone) && !cc1.Contains($"{cc[i]} {cc[i + 1]}")) {
437+
// CC V on multiple consonants ex [s tr ao] (only if the word starts with a CC)
438+
if (syllable.CurrentWordCc.Length >= 2 && syllable.PreviousWordCc.Length >= 1) {
439+
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
440+
basePhoneme = ccv;
441+
lastC = i;
442+
break;
443+
} else if ((HasOto(lcv, syllable.vowelTone) || HasOto(ValidateAlias(lcv), syllable.vowelTone))
444+
&& HasOto(cc1, syllable.vowelTone) && !HasOto(ccv, syllable.vowelTone)) {
445+
basePhoneme = lcv;
446+
}
447+
// [C1 C2C3]
448+
if (HasOto($"{cc[i]} {string.Join("", cc.Skip(i + 1))}", syllable.tone)) {
449+
cc1 = $"{cc[i]} {string.Join("", cc.Skip(i + 1))}";
450+
}
451+
} else if (syllable.CurrentWordCc.Length >= 1 && syllable.PreviousWordCc.Length == 1) {
440452
basePhoneme = lcv;
453+
// [C1 C2]
454+
if (!HasOto(cc1, syllable.tone)) {
455+
cc1 = $"{cc[i]} {cc[i + 1]}";
456+
}
441457
}
442458
if (HasOto(cc1, syllable.tone) && HasOto(cc1, syllable.tone) && !cc1.Contains($"{string.Join("", cc.Skip(i))}")) {
443459
// like [V C1] [C1 C2] [C2 C3] [C3 ..]
@@ -455,6 +471,7 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
455471
TryAddPhoneme(phonemes, syllable.tone, cc1);
456472
}
457473
}
474+
458475
phonemes.Add(basePhoneme);
459476
return phonemes;
460477
}
@@ -938,10 +955,10 @@ protected override string ValidateAlias(string alias) {
938955
return alias.Replace("ao dx", "ah d");
939956
}
940957
if (alias == "ao q") {
941-
return alias.Replace("ao q", "ah t");
958+
return alias.Replace("ao q", "ao t");
942959
}
943960
if (alias == "ao tr") {
944-
return alias.Replace("ao tr", "ah t");
961+
return alias.Replace("ao tr", "ao t");
945962
}
946963
if (alias == "ao y") {
947964
return alias.Replace("ao y", "ow y");
@@ -1649,80 +1666,98 @@ protected override string ValidateAlias(string alias) {
16491666
}
16501667

16511668
// glottal
1652-
foreach (var c1 in new[] { "q" }) {
1653-
foreach (var v1 in vowels) {
1654-
1655-
alias = alias.Replace(c1 + " " + v1, "-" + " " + v1);
1669+
foreach (var v1 in vowels) {
1670+
if (!alias.Contains("cl " + v1) || !alias.Contains("q " + v1)) {
1671+
alias = alias.Replace("q " + v1, "- " + v1);
16561672
}
16571673
}
1658-
foreach (var c1 in new[] { "q" }) {
1659-
foreach (var c2 in consonants) {
1660-
alias = alias.Replace(c2 + " " + c1, $"{c2} -");
1674+
foreach (var c2 in consonants) {
1675+
if (!alias.Contains(c2 + " cl") || !alias.Contains(c2 + " q")) {
1676+
alias = alias.Replace(c2 + " q", $"{c2} -");
16611677
}
16621678
}
1663-
foreach (var c1 in new[] { "q" }) {
1664-
foreach (var c2 in consonants) {
1665-
alias = alias.Replace(c1 + " " + c2, $"- {c2}");
1679+
foreach (var c2 in consonants) {
1680+
if (!alias.Contains("cl " + c2) || !alias.Contains("q " + c2)) {
1681+
alias = alias.Replace("q " + c2, "- " + c2);
16661682
}
16671683
}
1668-
1684+
16691685
// C -'s
16701686
foreach (var c1 in new[] { "d", "dh", "g", "p", "jh", "b", "s", "ch", "t", "r", "n", "l", "ng", "sh", "zh", "th", "z", "f", "k", "s", "hh" }) {
16711687
foreach (var s in new[] { "-" }) {
1672-
switch (c1 + " " + s) {
1673-
case var str when alias.Contains(str):
1674-
if (c1 == "d" || c1 == "dh" || c1 == "g" || c1 == "p") {
1688+
var str = c1 + " " + s;
1689+
if (alias.Contains(str)) {
1690+
switch (c1) {
1691+
case "d" when c1 == "d" || c1 == "dh" || c1 == "g" || c1 == "p":
16751692
alias = alias.Replace(str, "b" + " " + s);
1676-
} else if (c1 == "jh") {
1693+
break;
1694+
case "jh" when c1 == "jh":
16771695
alias = alias.Replace(str, "ch" + " " + s);
1678-
} else if (c1 == "b") {
1696+
break;
1697+
case "b" when c1 == "b":
16791698
alias = alias.Replace(str, "d" + " " + s);
1680-
} else if (c1 == "s") {
1699+
break;
1700+
case "s" when c1 == "s":
16811701
alias = alias.Replace(str, "f" + " " + s);
1682-
} else if (c1 == "ch") {
1702+
break;
1703+
case "ch" when c1 == "ch":
16831704
alias = alias.Replace(str, "jh" + " " + s);
1684-
} else if (c1 == "t") {
1705+
break;
1706+
case "t" when c1 == "t":
16851707
alias = alias.Replace(str, "k" + " " + s);
1686-
} else if (c1 == "r") {
1708+
break;
1709+
case "r" when c1 == "r":
16871710
alias = alias.Replace(str, "er" + " " + s);
1688-
} else if (c1 == "n") {
1711+
break;
1712+
case "n" when c1 == "n":
16891713
alias = alias.Replace(str, "m" + " " + s);
1690-
} else if (c1 == "ng" || c1 == "m") {
1714+
break;
1715+
case "ng" when c1 == "ng" || c1 == "m":
16911716
alias = alias.Replace(str, "n" + " " + s);
1692-
} else if (c1 == "sh" || c1 == "zh" || c1 == "th" || c1 == "z" || c1 == "f") {
1717+
break;
1718+
case "sh" when c1 == "sh" || c1 == "zh" || c1 == "th" || c1 == "z" || c1 == "f":
16931719
alias = alias.Replace(str, "s" + " " + s);
1694-
} else if (c1 == "k") {
1720+
break;
1721+
case "k" when c1 == "k":
16951722
alias = alias.Replace(str, "t" + " " + s);
1696-
} else if (c1 == "s") {
1723+
break;
1724+
case "s" when c1 == "s":
16971725
alias = alias.Replace(str, "z" + " " + s);
1698-
} else if (c1 == "hh") {
1726+
break;
1727+
case "hh" when c1 == "hh":
16991728
alias = alias.Replace(str, null);
1700-
}
1701-
break;
1729+
break;
1730+
}
17021731
}
17031732
}
17041733
}
17051734
// CC's
17061735
foreach (var c1 in new[] { "f", "z", "hh", "k", "p", "d", "dh", "g", "b", "m", "r" }) {
17071736
foreach (var c2 in consonants) {
1708-
switch (c1 + " " + c2) {
1709-
case var str when alias.Contains(str):
1710-
if (ccSpecific) {
1711-
if (c1 == "f" || c1 == "z") {
1737+
var str = c1 + " " + c2;
1738+
if (alias.Contains(str)) {
1739+
if (ccSpecific) {
1740+
switch (c1) {
1741+
case "f" when c1 == "f" || c1 == "z":
17121742
alias = alias.Replace(str, "s" + " " + c2);
1713-
} else if (c1 == "k" || c1 == "p" || c1 == "d") {
1743+
break;
1744+
case "k" when c1 == "k" || c1 == "p" || c1 == "d":
17141745
alias = alias.Replace(str, "t" + " " + c2);
1715-
} else if (c1 == "dh" || c1 == "g" || c1 == "b") {
1746+
break;
1747+
case "dh" when c1 == "dh" || c1 == "g" || c1 == "b":
17161748
alias = alias.Replace(str, "d" + " " + c2);
1717-
} else if (c1 == "m") {
1749+
break;
1750+
case "m" when c1 == "m":
17181751
alias = alias.Replace(str, "n" + " " + c2);
1719-
} else if (c1 == "hh") {
1752+
break;
1753+
case "hh" when c1 == "hh":
17201754
alias = alias.Replace(str, "f" + " " + c2);
1721-
} else if (c1 == "r") {
1755+
break;
1756+
case "r" when c1 == "r":
17221757
alias = alias.Replace(str, "er" + " " + c2);
1723-
}
1758+
break;
17241759
}
1725-
break;
1760+
}
17261761
}
17271762
}
17281763
}

0 commit comments

Comments
 (0)