Skip to content

Commit 4068e2b

Browse files
authored
reworked CCV and VCC
1 parent 2c5841d commit 4068e2b

File tree

1 file changed

+86
-65
lines changed

1 file changed

+86
-65
lines changed

ARPAsingPlusPhonemizer/ArpasingPlusPhonemizer.cs

Lines changed: 86 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,9 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer {
9292
{"awng","ng"},
9393
{"el","l"},
9494
};
95-
95+
private readonly string[] ccvException = { "ch", "dh", "dx", "fh", "gh", "hh", "jh", "kh", "ph", "ng", "sh", "th", "vh", "wh", "zh" };
96+
private readonly string[] vc_cException = { "r", "l" };
97+
private readonly string[] RomajiException = { "a", "e", "i", "o", "u" };
9698

9799
protected override string[] GetSymbols(Note note) {
98100
string[] original = base.GetSymbols(note);
@@ -298,14 +300,14 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
298300
var rccv1 = $"- {string.Join("", cc)}{v}";
299301
var crv = $"{cc.Last()} {v}";
300302
var ccv = $"{string.Join("", cc)} {v}";
301-
if (HasOto(rccv, syllable.vowelTone) || HasOto(ValidateAlias(rccv), syllable.vowelTone)) {
303+
if (HasOto(rccv, syllable.vowelTone) || HasOto(ValidateAlias(rccv), syllable.vowelTone) && !ccvException.Contains(cc[0])) {
302304
basePhoneme = rccv;
303305
lastC = 0;
304-
} else if (HasOto(rccv1, syllable.vowelTone) || HasOto(ValidateAlias(rccv1), syllable.vowelTone)) {
306+
} else if (HasOto(rccv1, syllable.vowelTone) || HasOto(ValidateAlias(rccv1), syllable.vowelTone) && !ccvException.Contains(cc[0])) {
305307
basePhoneme = rccv1;
306308
lastC = 0;
307309
} else {
308-
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
310+
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone) && !ccvException.Contains(cc[0])) {
309311
basePhoneme = ccv;
310312
} else if (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone)) {
311313
basePhoneme = crv;
@@ -314,9 +316,11 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
314316
}
315317
// TRY RCC [- CC]
316318
for (var i = cc.Length; i > 1; i--) {
317-
if (TryAddPhoneme(phonemes, syllable.tone, $"- {string.Join("", cc.Take(i))}", ValidateAlias($"- {string.Join("", cc.Take(i))}"))) {
318-
firstC = i - 1;
319-
break;
319+
if (!ccvException.Contains(cc[0])) {
320+
if (TryAddPhoneme(phonemes, syllable.tone, $"- {string.Join("", cc.Take(i))}", ValidateAlias($"- {string.Join("", cc.Take(i))}"))) {
321+
firstC = i - 1;
322+
break;
323+
}
320324
}
321325
}
322326
// [- C]
@@ -335,11 +339,11 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
335339
for (var i = firstC; i < cc.Length - 1; i++) {
336340
var ccv = $"{string.Join("", cc)} {v}";
337341
var ccv1 = string.Join("", cc.Skip(i)) + " " + v;
338-
if (syllable.CurrentWordCc.Length >= 2) {
342+
if (syllable.CurrentWordCc.Length >= 2 && !ccvException.Contains(cc[0])) {
339343
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
340344
basePhoneme = ccv;
341345
lastC = i;
342-
break;
346+
break;
343347
} else if (HasOto(ccv1, syllable.vowelTone) || HasOto(ValidateAlias(ccv1), syllable.vowelTone)) {
344348
basePhoneme = ccv1;
345349
}
@@ -348,17 +352,31 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
348352
basePhoneme = crv;
349353
}
350354
}
351-
// try [V C], [V CC], [V -][- C]
355+
// try [V C], [V CC], [VC C], [V -][- C]
352356
for (var i = lastC + 1; i >= 0; i--) {
353357
var vr = $"{prevV} -";
354-
var vcc = $"{prevV} {string.Join("", cc.Take(2))}"; // bug on vcc, sequence of [{vowel} v][v f][f {vowel}] turns into [{vowel} q/t][- {vowel}] which is odd
358+
var vc_c = $"{prevV}{string.Join(" ", cc.Take(2))}";
359+
var vcc = $"{prevV} {string.Join("", cc.Take(2))}";
355360
var vc = $"{prevV} {cc[0]}";
361+
// CCV will trigger VCC
362+
bool CCV = false;
363+
if (syllable.CurrentWordCc.Length >= 2 && !ccvException.Contains(cc[0])) {
364+
if (HasOto($"{string.Join("", cc)} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{string.Join("", cc)} {v}"), syllable.vowelTone)) {
365+
CCV = true;
366+
}
367+
}
356368
if (i == 0 && (HasOto(vr, syllable.tone) || HasOto(ValidateAlias(vr), syllable.tone)) && !HasOto(vc, syllable.tone)) {
357369
phonemes.Add(vr);
358370
phonemes.Add($"- {cc[0]}");
359371
break;
360-
} else if (syllable.IsStartingCVWithMoreThanOneConsonant && syllable.CurrentWordCc.Length >= 2 && HasOto(vcc, syllable.tone) || HasOto(ValidateAlias(vcc), syllable.tone)) {
361-
phonemes.Add(vcc);
372+
} else if (CCV) {
373+
if (!(ccvException.Contains(cc[0]) && (HasOto(vcc, syllable.tone) || HasOto(ValidateAlias(vcc), syllable.tone)))) {
374+
phonemes.Add(vcc);
375+
firstC = 1;
376+
break;
377+
}
378+
} else if (vc_cException.Contains(cc[0]) && HasOto(vc_c, syllable.tone) || HasOto(ValidateAlias(vc_c), syllable.tone)) {
379+
phonemes.Add(vc_c);
362380
firstC = 1;
363381
break;
364382
} else if (HasOto(vc, syllable.tone) || HasOto(ValidateAlias(vc), syllable.tone)) {
@@ -394,13 +412,12 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
394412
cc1 = ValidateAlias(cc1);
395413
}
396414
// CC V on multiple consonants ex [s tr ao] (only if the word starts with a CC)
397-
if (syllable.CurrentWordCc.Length >= 2) {
415+
if (syllable.CurrentWordCc.Length >= 2 && !ccvException.Contains(cc[0])) {
398416
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
399417
basePhoneme = ccv;
400418
lastC = i;
401419
break;
402-
} else if ((HasOto(lcv, syllable.vowelTone) || HasOto(ValidateAlias(lcv), syllable.vowelTone))
403-
&& HasOto(cc1, syllable.vowelTone) && !HasOto(ccv, syllable.vowelTone)) {
420+
} else if ((HasOto(lcv, syllable.vowelTone) || HasOto(ValidateAlias(lcv), syllable.vowelTone)) && HasOto(cc1, syllable.vowelTone) && !HasOto(ccv, syllable.vowelTone)) {
404421
basePhoneme = lcv;
405422
}
406423
// [C1 C2C3]
@@ -435,13 +452,12 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
435452
cc1 = ValidateAlias(cc1);
436453
}
437454
// CC V on multiple consonants ex [s tr ao] (only if the word starts with a CC)
438-
if (syllable.CurrentWordCc.Length >= 2) {
455+
if (syllable.CurrentWordCc.Length >= 2 && !ccvException.Contains(cc[0])) {
439456
if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
440457
basePhoneme = ccv;
441458
lastC = i;
442459
break;
443-
} else if ((HasOto(lcv, syllable.vowelTone) || HasOto(ValidateAlias(lcv), syllable.vowelTone))
444-
&& HasOto(cc1, syllable.vowelTone) && !HasOto(ccv, syllable.vowelTone)) {
460+
} else if ((HasOto(lcv, syllable.vowelTone) || HasOto(ValidateAlias(lcv), syllable.vowelTone)) && HasOto(cc1, syllable.vowelTone) && !HasOto(ccv, syllable.vowelTone)) {
445461
basePhoneme = lcv;
446462
}
447463
// [C1 C2C3]
@@ -496,21 +512,23 @@ protected override List<string> ProcessEnding(Ending ending) {
496512
} else {
497513
phonemes.Add(vR);
498514
}
499-
} else if (ending.IsEndingVCWithOneConsonant) { // fix endings that ends with [v] turns into romaji vowel if the vb have them
515+
} else if (ending.IsEndingVCWithOneConsonant) { // fix endings that ends with [v] consonant turns into romaji vowel if the vb have them
500516
var vc = $"{v} {cc[0]}";
501517
var vcr = $"{v} {cc[0]}-";
502518
var vcr2 = $"{v}{cc[0]} -";
503519
var vcr3 = $"{v}{cc[0]}-";
504-
if (HasOto(vcr, ending.tone) || HasOto(ValidateAlias(vcr), ending.tone)) {
505-
phonemes.Add(vcr);
506-
} else if (!HasOto(vcr, ending.tone) && !HasOto(ValidateAlias(vcr), ending.tone) && (HasOto(vcr2, ending.tone) || HasOto(ValidateAlias(vcr2), ending.tone))) {
507-
phonemes.Add(vcr2);
508-
} else if (!HasOto(vcr2, ending.tone) && !HasOto(ValidateAlias(vcr2), ending.tone) && (HasOto(vcr3, ending.tone) || HasOto(ValidateAlias(vcr3), ending.tone))) {
509-
phonemes.Add(vcr3);
510-
} else {
511-
phonemes.Add(vc);
512-
if (vc.Contains(cc[0])) {
513-
phonemes.Add($"{cc[0]} -");
520+
if (!RomajiException.Contains(cc[0])) {
521+
if (HasOto(vcr, ending.tone) || HasOto(ValidateAlias(vcr), ending.tone)) {
522+
phonemes.Add(vcr);
523+
} else if (!HasOto(vcr, ending.tone) && !HasOto(ValidateAlias(vcr), ending.tone) && (HasOto(vcr2, ending.tone) || HasOto(ValidateAlias(vcr2), ending.tone))) {
524+
phonemes.Add(vcr2);
525+
} else if (!HasOto(vcr2, ending.tone) && !HasOto(ValidateAlias(vcr2), ending.tone) && (HasOto(vcr3, ending.tone) || HasOto(ValidateAlias(vcr3), ending.tone))) {
526+
phonemes.Add(vcr3);
527+
} else {
528+
phonemes.Add(vc);
529+
if (vc.Contains(cc[0])) {
530+
phonemes.Add($"{cc[0]} -");
531+
}
514532
}
515533
}
516534
} else {
@@ -521,44 +539,47 @@ protected override List<string> ProcessEnding(Ending ending) {
521539
var vcc3 = $"{v}{string.Join(" ", cc.Take(2))}";
522540
var vcc4 = $"{v} {string.Join("", cc.Take(2))}";
523541
var vc = $"{v} {cc[0]}";
524-
if (i == 0) {
525-
if (HasOto(vr, ending.tone) || HasOto(ValidateAlias(vr), ending.tone) && !HasOto(vc, ending.tone)) {
526-
phonemes.Add(vr);
527-
}
528-
break;
529-
} else if ((HasOto(vcc, ending.tone) || HasOto(ValidateAlias(vcc), ending.tone)) && lastC == 1) {
530-
phonemes.Add(vcc);
531-
firstC = 1;
532-
break;
533-
} else if ((HasOto(vcc2, ending.tone) || HasOto(ValidateAlias(vcc2), ending.tone)) && lastC == 1) {
534-
phonemes.Add(vcc2);
535-
firstC = 1;
536-
break;
537-
} else if (HasOto(vcc3, ending.tone) || HasOto(ValidateAlias(vcc3), ending.tone)) {
538-
phonemes.Add(vcc3);
539-
if (vcc3.EndsWith(cc.Last()) && lastC == 1) {
540-
if (affricates.Contains(cc.Last())) {
541-
TryAddPhoneme(phonemes, ending.tone, $"{cc.Last()} -", ValidateAlias($"{cc.Last()} -"), cc.Last(), ValidateAlias(cc.Last()));
542-
} else {
543-
TryAddPhoneme(phonemes, ending.tone, $"{cc.Last()} -", ValidateAlias($"{cc.Last()} -"));
542+
if (!RomajiException.Contains(cc[0])) {
543+
if (i == 0) {
544+
if (HasOto(vr, ending.tone) || HasOto(ValidateAlias(vr), ending.tone) && !HasOto(vc, ending.tone)) {
545+
phonemes.Add(vr);
544546
}
545-
}
546-
firstC = 1;
547-
break;
548-
} else if (HasOto(vcc4, ending.tone) || HasOto(ValidateAlias(vcc4), ending.tone)) {
549-
phonemes.Add(vcc4);
550-
if (vcc4.EndsWith(cc.Last()) && lastC == 1) {
551-
if (affricates.Contains(cc.Last())) {
552-
TryAddPhoneme(phonemes, ending.tone, $"{cc.Last()} -", ValidateAlias($"{cc.Last()} -"), cc.Last(), ValidateAlias(cc.Last()));
553-
} else {
554-
TryAddPhoneme(phonemes, ending.tone, $"{cc.Last()} -", ValidateAlias($"{cc.Last()} -"));
547+
break;
548+
} else if ((HasOto(vcc, ending.tone) || HasOto(ValidateAlias(vcc), ending.tone)) && lastC == 1 && !ccvException.Contains(cc[0])) {
549+
phonemes.Add(vcc);
550+
firstC = 1;
551+
break;
552+
} else if ((HasOto(vcc2, ending.tone) || HasOto(ValidateAlias(vcc2), ending.tone)) && lastC == 1 && !ccvException.Contains(cc[0])) {
553+
554+
phonemes.Add(vcc2);
555+
firstC = 1;
556+
break;
557+
} else if (HasOto(vcc3, ending.tone) || HasOto(ValidateAlias(vcc3), ending.tone) && !ccvException.Contains(cc[0])) {
558+
phonemes.Add(vcc3);
559+
if (vcc3.EndsWith(cc.Last()) && lastC == 1) {
560+
if (consonants.Contains(cc.Last())) {
561+
TryAddPhoneme(phonemes, ending.tone, $"{cc.Last()} -", ValidateAlias($"{cc.Last()} -"), cc.Last(), ValidateAlias(cc.Last()));
562+
} else {
563+
TryAddPhoneme(phonemes, ending.tone, $"{cc.Last()} -", ValidateAlias($"{cc.Last()} -"));
564+
}
555565
}
566+
firstC = 1;
567+
break;
568+
} else if (HasOto(vcc4, ending.tone) || HasOto(ValidateAlias(vcc4), ending.tone) && !ccvException.Contains(cc[0])) {
569+
phonemes.Add(vcc4);
570+
if (vcc4.EndsWith(cc.Last()) && lastC == 1) {
571+
if (consonants.Contains(cc.Last())) {
572+
TryAddPhoneme(phonemes, ending.tone, $"{cc.Last()} -", ValidateAlias($"{cc.Last()} -"), cc.Last(), ValidateAlias(cc.Last()));
573+
} else {
574+
TryAddPhoneme(phonemes, ending.tone, $"{cc.Last()} -", ValidateAlias($"{cc.Last()} -"));
575+
}
576+
}
577+
firstC = 1;
578+
break;
579+
} else {
580+
phonemes.Add(vc);
581+
break;
556582
}
557-
firstC = 1;
558-
break;
559-
} else {
560-
phonemes.Add(vc);
561-
break;
562583
}
563584
}
564585
for (var i = firstC; i < lastC; i++) {

0 commit comments

Comments
 (0)