Skip to content

Commit f6ba76b

Browse files
authored
Add files via upload
1 parent 332f388 commit f6ba76b

File tree

1 file changed

+71
-69
lines changed

1 file changed

+71
-69
lines changed

ARPAsingPlusPhonemizer/ArpasingPlusPhonemizer.cs

Lines changed: 71 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer {
4545
protected override Dictionary<string, string> GetDictionaryPhonemesReplacement() => dictionaryReplacements;
4646

4747
// For banks with missing vowels
48-
private readonly Dictionary<string, string> missingVphonemes = "ax=ah,aa=ah,ae=ah,iy=ih,uh=uw,ix=ih,ux=uh".Split(',')
48+
private readonly Dictionary<string, string> missingVphonemes = "ax=ah,aa=ah,ae=ah,iy=ih,uh=uw,ix=ih,ux=uh,oh=ao,eu=uh,oe=ax,uy=uw,yw=uw,yx=iy,wx=uw".Split(',')
4949
.Select(entry => entry.Split('='))
5050
.Where(parts => parts.Length == 2)
5151
.Where(parts => parts[0] != parts[1])
@@ -68,14 +68,6 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer {
6868
.ToDictionary(parts => parts[0], parts => parts[1]);
6969
private bool isTimitPhonemes = false;
7070

71-
// other ARPAbet
72-
private readonly Dictionary<string, string> otherArpaphonemes = "oh=ao,eu=uh,oe=ax,uy=uw,yw=uw,yx=iy,wx=uw".Split(',')
73-
.Select(entry => entry.Split('='))
74-
.Where(parts => parts.Length == 2)
75-
.Where(parts => parts[0] != parts[1])
76-
.ToDictionary(parts => parts[0], parts => parts[1]);
77-
private bool isOtherArpaPhonemes = false;
78-
7971
private readonly Dictionary<string, string> vvExceptions =
8072
new Dictionary<string, string>() {
8173
{"aw","w"},
@@ -211,17 +203,23 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
211203
var lastC = cc.Length - 1;
212204
var firstC = 0;
213205

214-
if (!HasOto("b ax", syllable.tone) && !HasOto("ax t", syllable.tone) && !HasOto("ax", syllable.tone)) {
215-
isMissingVPhonemes = true;
216-
}
217-
if (!HasOto("bw", syllable.tone)) {
218-
isMissingCPhonemes = true;
206+
foreach (var entry in missingVphonemes) {
207+
if (HasOto(entry.Key, syllable.tone)) {
208+
isMissingVPhonemes = true;
209+
break;
210+
}
219211
}
220-
if (!HasOto("gcl", syllable.tone)) {
221-
isTimitPhonemes = true;
212+
foreach (var entry in missingCphonemes) {
213+
if (HasOto(entry.Key, syllable.tone)) {
214+
isMissingCPhonemes = true;
215+
break;
216+
}
222217
}
223-
if (!HasOto("b oh", syllable.tone) && !HasOto("ue t", syllable.tone) && !HasOto("oh", syllable.tone)) {
224-
isOtherArpaPhonemes = true;
218+
foreach (var entry in timitphonemes) {
219+
if (HasOto(entry.Key, syllable.tone)) {
220+
isTimitPhonemes = true;
221+
break;
222+
}
225223
}
226224

227225
// STARTING V
@@ -350,7 +348,6 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
350348
break;
351349
} else {
352350
basePhoneme = $"{cc.Last()} {v}";
353-
break;
354351
}
355352
}
356353
}
@@ -359,82 +356,95 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
359356
var vr = $"{prevV} -";
360357
var vcc = $"{prevV} {string.Join("", cc.Take(2))}"; // bug on vcc, sequence of [{vowel} v][v f][f {vowel}] turns in to [{vowel} q/t][- {vowel}] which is odd
361358
var vc = $"{prevV} {cc[0]}";
362-
if (i == 0) {
363-
if (HasOto(vr, syllable.tone) || HasOto(ValidateAlias(vr), syllable.tone) && !HasOto(vc, syllable.tone)) {
364-
phonemes.Add(vr);
365-
phonemes.Add($"- {cc[0]}");
366-
}
367-
} else if (HasOto(vcc, syllable.tone) || HasOto(ValidateAlias(vcc), syllable.tone)) {
359+
if (i == 0 && (HasOto(vr, syllable.tone) || HasOto(ValidateAlias(vr), syllable.tone)) && !HasOto(vc, syllable.tone)) {
360+
phonemes.Add(vr);
361+
phonemes.Add($"- {cc[0]}");
362+
break;
363+
}
364+
if (HasOto(vcc, syllable.tone) || HasOto(ValidateAlias(vcc), syllable.tone)) {
368365
phonemes.Add(vcc);
369366
firstC = 1;
370367
break;
371-
} else if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone)) {
368+
}
369+
if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone)) {
372370
phonemes.Add(vc);
373371
break;
374-
} else {
375-
continue;
376372
}
377373
}
378374
}
379375
for (var i = firstC; i < lastC; i++) {
380-
var rccv = $"- {string.Join("", cc)} {v}";
381-
var ccv = string.Join("", cc.Skip(i)) + " " + v;
376+
var ccv = $"{string.Join("", cc.Skip(i))} {v}";
382377
var cc1 = $"{string.Join(" ", cc.Skip(i))}";
383-
var crv = $"{cc.Last()} {v}";
384-
// if (HasOto($"{cc[i]} {string.Join("", cc.Skip(i + 1))}", syllable.tone)) {
378+
var lcv = $"{cc.Last()} {v}";
379+
if (!HasOto(cc1, syllable.tone)) {
380+
cc1 = ValidateAlias(cc1);
381+
}
385382
// [C1 C2C3]
386-
// cc1 = ($"{cc[i]} {string.Join("", cc.Skip(i + 1))}");
387-
if (!HasOto($"{cc[i]} {string.Join("", cc.Skip(i + 1))}", syllable.tone)) {
388-
// [C1 C2]
389-
cc1 = $"{cc[i]} {cc[i + 1]}";
383+
if (HasOto($"{cc[i]} {string.Join("", cc.Skip(i + 1))}", syllable.tone)) {
384+
cc1 = ($"{cc[i]} {string.Join("", cc.Skip(i + 1))}");
390385
}
386+
// [C1 C2]
391387
if (!HasOto(cc1, syllable.tone)) {
392-
// [C1 C2]
393388
cc1 = $"{cc[i]} {cc[i + 1]}";
394389
}
395390
if (!HasOto(cc1, syllable.tone)) {
396391
cc1 = ValidateAlias(cc1);
397392
}
398393
// CC FALLBACKS
399-
if (!HasOto(cc1, syllable.tone) && !HasOto($"{cc[i]} {cc[i + 1]}", syllable.tone)) {
394+
if (!HasOto(cc1, syllable.tone) || !HasOto(ValidateAlias(cc1), syllable.tone) && !HasOto($"{cc[i]} {cc[i + 1]}", syllable.tone)) {
400395
// [C1 -] [- C2]
401396
cc1 = $"- {cc[i + 1]}";
402397
phonemes.Add($"{cc[i]} -");
403398
}
404399
if (!HasOto(cc1, syllable.tone)) {
405400
cc1 = ValidateAlias(cc1);
406401
}
407-
// CC V on multiple consonants ex [s tr ao] (fix causes the sequence [vowel v][v f][f vowel] to be [vowel t][v f][- vowel]) also I now clue how to code [C CC] and [CC V] existing together sorry
408-
// if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
409-
// basePhoneme = ccv;
410-
// lastC = i;
411-
if ((HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone))) {
412-
basePhoneme = crv;
402+
// CC V on multiple consonants ex [s tr ao]
403+
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
404+
basePhoneme = ccv;
405+
lastC = i;
406+
break;
407+
} else if ((HasOto(lcv, syllable.vowelTone) || HasOto(ValidateAlias(lcv), syllable.vowelTone)) && HasOto(cc1, syllable.vowelTone) && !cc1.Contains($"{cc[i]} {cc[i + 1]}")) {
408+
basePhoneme = lcv;
413409
}
414410
if (i + 1 < lastC) {
415-
var cc2 = $"{string.Join("", cc.Skip(i + 1))}";
416-
if (!HasOto(cc2, syllable.tone)) {
417-
// [C1 C2]
418-
cc2 = $"{cc[i]} {cc[i + 1]}";
411+
if (!HasOto(cc1, syllable.tone)) {
412+
cc1 = ValidateAlias(cc1);
413+
}
414+
// [C1 C2C3]
415+
if (HasOto($"{cc[i]} {string.Join("", cc.Skip(i + 1))}", syllable.tone)) {
416+
cc1 = ($"{cc[i]} {string.Join("", cc.Skip(i + 1))}");
419417
}
420-
if (!HasOto(cc2, syllable.tone)) {
421-
cc2 = ValidateAlias(cc2);
418+
// [C1 C2]
419+
if (!HasOto(cc1, syllable.tone)) {
420+
cc1 = $"{cc[i]} {cc[i + 1]}";
421+
}
422+
if (!HasOto(cc1, syllable.tone)) {
423+
cc1 = ValidateAlias(cc1);
422424
}
423425
// CC FALLBACKS
424-
if (!HasOto(cc2, syllable.tone) && !HasOto($"{cc[i]} {cc[i + 1]}", syllable.tone)) {
426+
if (!HasOto(cc1, syllable.tone) || !HasOto(ValidateAlias(cc1), syllable.tone) && !HasOto($"{cc[i]} {cc[i + 1]}", syllable.tone)) {
425427
// [C1 -] [- C2]
426-
cc2 = $"- {cc[i + 1]}";
428+
cc1 = $"- {cc[i + 1]}";
427429
phonemes.Add($"{cc[i]} -");
428430
}
429-
if (!HasOto(cc2, syllable.tone)) {
430-
cc2 = ValidateAlias(cc2);
431+
if (!HasOto(cc1, syllable.tone)) {
432+
cc1 = ValidateAlias(cc1);
433+
}
434+
// CC V on multiple consonants ex [s tr ao]
435+
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
436+
basePhoneme = ccv;
437+
lastC = i;
438+
break;
439+
} else if ((HasOto(lcv, syllable.vowelTone) || HasOto(ValidateAlias(lcv), syllable.vowelTone)) && HasOto(cc1, syllable.vowelTone) && !cc1.Contains($"{cc[i]} {cc[i + 1]}")) {
440+
basePhoneme = lcv;
431441
}
432-
if (HasOto(cc1, syllable.tone) && HasOto(cc2, syllable.tone) && !cc1.Contains($"{string.Join("", cc.Skip(i))}")) {
442+
if (HasOto(cc1, syllable.tone) && HasOto(cc1, syllable.tone) && !cc1.Contains($"{string.Join("", cc.Skip(i))}")) {
433443
// like [V C1] [C1 C2] [C2 C3] [C3 ..]
434444
phonemes.Add(cc1);
435445
} else if (TryAddPhoneme(phonemes, syllable.tone, cc1)) {
436446
// like [V C1] [C1 C2] [C2 ..]
437-
if (cc1.Contains($"{string.Join("", cc.Skip(i + 1))}")) {
447+
if (cc1.Contains($"{string.Join(" ", cc.Skip(i + 1))}")) {
438448
i++;
439449
}
440450
} else {
@@ -443,10 +453,7 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
443453
}
444454
} else {
445455
TryAddPhoneme(phonemes, syllable.tone, cc1);
446-
447-
448456
}
449-
450457
}
451458
phonemes.Add(basePhoneme);
452459
return phonemes;
@@ -498,9 +505,10 @@ protected override List<string> ProcessEnding(Ending ending) {
498505
var vcc4 = $"{v} {string.Join("", cc.Take(2))}";
499506
var vc = $"{v} {cc[0]}";
500507
if (i == 0) {
501-
if (HasOto(vr, ending.tone) || HasOto(ValidateAlias(vr), ending.tone)) {
508+
if (HasOto(vr, ending.tone) || HasOto(ValidateAlias(vr), ending.tone) && !HasOto(vc, ending.tone)) {
502509
phonemes.Add(vr);
503510
}
511+
break;
504512
} else if ((HasOto(vcc, ending.tone) || HasOto(ValidateAlias(vcc), ending.tone)) && lastC == 1) {
505513
phonemes.Add(vcc);
506514
firstC = 1;
@@ -532,12 +540,7 @@ protected override List<string> ProcessEnding(Ending ending) {
532540
firstC = 1;
533541
break;
534542
} else {
535-
// Transform 'v' to vowel-like representation
536-
if (cc[0] == "v") {
537-
phonemes.Add($"{v} -");
538-
} else {
539-
phonemes.Add(vc);
540-
}
543+
phonemes.Add(vc);
541544
break;
542545
}
543546
}
@@ -733,9 +736,8 @@ protected override string ValidateAlias(string alias) {
733736
}
734737

735738
// VALIDATE ALIAS DEPENDING ON METHOD
736-
if (isMissingVPhonemes || isMissingCPhonemes || isTimitPhonemes || isOtherArpaPhonemes) {
737-
foreach (var syllable in missingVphonemes.Concat(missingCphonemes).Concat(timitphonemes
738-
.Concat(otherArpaphonemes))) {
739+
if (isMissingVPhonemes || isMissingCPhonemes || isTimitPhonemes) {
740+
foreach (var syllable in missingVphonemes.Concat(missingCphonemes).Concat(timitphonemes)) {
739741
alias = alias.Replace(syllable.Key, syllable.Value);
740742
}
741743
}

0 commit comments

Comments
 (0)