@@ -45,7 +45,7 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer {
4545 protected override Dictionary < string , string > GetDictionaryPhonemesReplacement ( ) => dictionaryReplacements ;
4646
4747 // For banks with missing vowels
48- private readonly Dictionary < string , string > missingVphonemes = "ax=ah,aa=ah,ae=ah,iy=ih,uh=uw,ix=ih,ux=uh" . Split ( ',' )
48+ private readonly Dictionary < string , string > missingVphonemes = "ax=ah,aa=ah,ae=ah,iy=ih,uh=uw,ix=ih,ux=uh,oh=ao,eu=uh,oe=ax,uy=uw,yw=uw,yx=iy,wx=uw " . Split ( ',' )
4949 . Select ( entry => entry . Split ( '=' ) )
5050 . Where ( parts => parts . Length == 2 )
5151 . Where ( parts => parts [ 0 ] != parts [ 1 ] )
@@ -68,14 +68,6 @@ public class ArpasingPlusPhonemizer : SyllableBasedPhonemizer {
6868 . ToDictionary ( parts => parts [ 0 ] , parts => parts [ 1 ] ) ;
6969 private bool isTimitPhonemes = false ;
7070
71- // other ARPAbet
72- private readonly Dictionary < string , string > otherArpaphonemes = "oh=ao,eu=uh,oe=ax,uy=uw,yw=uw,yx=iy,wx=uw" . Split ( ',' )
73- . Select ( entry => entry . Split ( '=' ) )
74- . Where ( parts => parts . Length == 2 )
75- . Where ( parts => parts [ 0 ] != parts [ 1 ] )
76- . ToDictionary ( parts => parts [ 0 ] , parts => parts [ 1 ] ) ;
77- private bool isOtherArpaPhonemes = false ;
78-
7971 private readonly Dictionary < string , string > vvExceptions =
8072 new Dictionary < string , string > ( ) {
8173 { "aw" , "w" } ,
@@ -211,17 +203,23 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
211203 var lastC = cc . Length - 1 ;
212204 var firstC = 0 ;
213205
214- if ( ! HasOto ( "b ax" , syllable . tone ) && ! HasOto ( "ax t" , syllable . tone ) && ! HasOto ( "ax" , syllable . tone ) ) {
215- isMissingVPhonemes = true ;
216- }
217- if ( ! HasOto ( "bw" , syllable . tone ) ) {
218- isMissingCPhonemes = true ;
206+ foreach ( var entry in missingVphonemes ) {
207+ if ( HasOto ( entry . Key , syllable . tone ) ) {
208+ isMissingVPhonemes = true ;
209+ break ;
210+ }
219211 }
220- if ( ! HasOto ( "gcl" , syllable . tone ) ) {
221- isTimitPhonemes = true ;
212+ foreach ( var entry in missingCphonemes ) {
213+ if ( HasOto ( entry . Key , syllable . tone ) ) {
214+ isMissingCPhonemes = true ;
215+ break ;
216+ }
222217 }
223- if ( ! HasOto ( "b oh" , syllable . tone ) && ! HasOto ( "ue t" , syllable . tone ) && ! HasOto ( "oh" , syllable . tone ) ) {
224- isOtherArpaPhonemes = true ;
218+ foreach ( var entry in timitphonemes ) {
219+ if ( HasOto ( entry . Key , syllable . tone ) ) {
220+ isTimitPhonemes = true ;
221+ break ;
222+ }
225223 }
226224
227225 // STARTING V
@@ -350,7 +348,6 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
350348 break ;
351349 } else {
352350 basePhoneme = $ "{ cc . Last ( ) } { v } ";
353- break ;
354351 }
355352 }
356353 }
@@ -359,82 +356,95 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
359356 var vr = $ "{ prevV } -";
360357 var vcc = $ "{ prevV } { string . Join ( "" , cc . Take ( 2 ) ) } "; // bug on vcc, sequence of [{vowel} v][v f][f {vowel}] turns in to [{vowel} q/t][- {vowel}] which is odd
361358 var vc = $ "{ prevV } { cc [ 0 ] } ";
362- if ( i == 0 ) {
363- if ( HasOto ( vr , syllable . tone ) || HasOto ( ValidateAlias ( vr ) , syllable . tone ) && ! HasOto ( vc , syllable . tone ) ) {
364- phonemes . Add ( vr ) ;
365- phonemes . Add ( $ "- { cc [ 0 ] } " ) ;
366- }
367- } else if ( HasOto ( vcc , syllable . tone ) || HasOto ( ValidateAlias ( vcc ) , syllable . tone ) ) {
359+ if ( i == 0 && ( HasOto ( vr , syllable . tone ) || HasOto ( ValidateAlias ( vr ) , syllable . tone ) ) && ! HasOto ( vc , syllable . tone ) ) {
360+ phonemes . Add ( vr ) ;
361+ phonemes . Add ( $ "- { cc [ 0 ] } " ) ;
362+ break ;
363+ }
364+ if ( HasOto ( vcc , syllable . tone ) || HasOto ( ValidateAlias ( vcc ) , syllable . tone ) ) {
368365 phonemes . Add ( vcc ) ;
369366 firstC = 1 ;
370367 break ;
371- } else if ( HasOto ( vc , syllable . tone ) || HasOto ( ValidateAlias ( vc ) , syllable . tone ) ) {
368+ }
369+ if ( HasOto ( vc , syllable . tone ) || HasOto ( ValidateAlias ( vc ) , syllable . tone ) ) {
372370 phonemes . Add ( vc ) ;
373371 break ;
374- } else {
375- continue ;
376372 }
377373 }
378374 }
379375 for ( var i = firstC ; i < lastC ; i ++ ) {
380- var rccv = $ "- { string . Join ( "" , cc ) } { v } ";
381- var ccv = string . Join ( "" , cc . Skip ( i ) ) + " " + v ;
376+ var ccv = $ "{ string . Join ( "" , cc . Skip ( i ) ) } { v } ";
382377 var cc1 = $ "{ string . Join ( " " , cc . Skip ( i ) ) } ";
383- var crv = $ "{ cc . Last ( ) } { v } ";
384- // if (HasOto($"{cc[i]} {string.Join("", cc.Skip(i + 1))}", syllable.tone)) {
378+ var lcv = $ "{ cc . Last ( ) } { v } ";
379+ if ( ! HasOto ( cc1 , syllable . tone ) ) {
380+ cc1 = ValidateAlias ( cc1 ) ;
381+ }
385382 // [C1 C2C3]
386- // cc1 = ($"{cc[i]} {string.Join("", cc.Skip(i + 1))}");
387- if ( ! HasOto ( $ "{ cc [ i ] } { string . Join ( "" , cc . Skip ( i + 1 ) ) } ", syllable . tone ) ) {
388- // [C1 C2]
389- cc1 = $ "{ cc [ i ] } { cc [ i + 1 ] } ";
383+ if ( HasOto ( $ "{ cc [ i ] } { string . Join ( "" , cc . Skip ( i + 1 ) ) } ", syllable . tone ) ) {
384+ cc1 = ( $ "{ cc [ i ] } { string . Join ( "" , cc . Skip ( i + 1 ) ) } ") ;
390385 }
386+ // [C1 C2]
391387 if ( ! HasOto ( cc1 , syllable . tone ) ) {
392- // [C1 C2]
393388 cc1 = $ "{ cc [ i ] } { cc [ i + 1 ] } ";
394389 }
395390 if ( ! HasOto ( cc1 , syllable . tone ) ) {
396391 cc1 = ValidateAlias ( cc1 ) ;
397392 }
398393 // CC FALLBACKS
399- if ( ! HasOto ( cc1 , syllable . tone ) && ! HasOto ( $ "{ cc [ i ] } { cc [ i + 1 ] } ", syllable . tone ) ) {
394+ if ( ! HasOto ( cc1 , syllable . tone ) || ! HasOto ( ValidateAlias ( cc1 ) , syllable . tone ) && ! HasOto ( $ "{ cc [ i ] } { cc [ i + 1 ] } ", syllable . tone ) ) {
400395 // [C1 -] [- C2]
401396 cc1 = $ "- { cc [ i + 1 ] } ";
402397 phonemes . Add ( $ "{ cc [ i ] } -") ;
403398 }
404399 if ( ! HasOto ( cc1 , syllable . tone ) ) {
405400 cc1 = ValidateAlias ( cc1 ) ;
406401 }
407- // CC V on multiple consonants ex [s tr ao] (fix causes the sequence [vowel v][v f][f vowel] to be [vowel t][v f][- vowel]) also I now clue how to code [C CC] and [CC V] existing together sorry
408- // if (HasOto(ccv, syllable.vowelTone) || HasOto(ValidateAlias(ccv), syllable.vowelTone)) {
409- // basePhoneme = ccv;
410- // lastC = i;
411- if ( ( HasOto ( crv , syllable . vowelTone ) || HasOto ( ValidateAlias ( crv ) , syllable . vowelTone ) ) ) {
412- basePhoneme = crv ;
402+ // CC V on multiple consonants ex [s tr ao]
403+ if ( HasOto ( ccv , syllable . vowelTone ) || HasOto ( ValidateAlias ( ccv ) , syllable . vowelTone ) ) {
404+ basePhoneme = ccv ;
405+ lastC = i ;
406+ break ;
407+ } else if ( ( HasOto ( lcv , syllable . vowelTone ) || HasOto ( ValidateAlias ( lcv ) , syllable . vowelTone ) ) && HasOto ( cc1 , syllable . vowelTone ) && ! cc1 . Contains ( $ "{ cc [ i ] } { cc [ i + 1 ] } ") ) {
408+ basePhoneme = lcv ;
413409 }
414410 if ( i + 1 < lastC ) {
415- var cc2 = $ "{ string . Join ( "" , cc . Skip ( i + 1 ) ) } ";
416- if ( ! HasOto ( cc2 , syllable . tone ) ) {
417- // [C1 C2]
418- cc2 = $ "{ cc [ i ] } { cc [ i + 1 ] } ";
411+ if ( ! HasOto ( cc1 , syllable . tone ) ) {
412+ cc1 = ValidateAlias ( cc1 ) ;
413+ }
414+ // [C1 C2C3]
415+ if ( HasOto ( $ "{ cc [ i ] } { string . Join ( "" , cc . Skip ( i + 1 ) ) } ", syllable . tone ) ) {
416+ cc1 = ( $ "{ cc [ i ] } { string . Join ( "" , cc . Skip ( i + 1 ) ) } ") ;
419417 }
420- if ( ! HasOto ( cc2 , syllable . tone ) ) {
421- cc2 = ValidateAlias ( cc2 ) ;
418+ // [C1 C2]
419+ if ( ! HasOto ( cc1 , syllable . tone ) ) {
420+ cc1 = $ "{ cc [ i ] } { cc [ i + 1 ] } ";
421+ }
422+ if ( ! HasOto ( cc1 , syllable . tone ) ) {
423+ cc1 = ValidateAlias ( cc1 ) ;
422424 }
423425 // CC FALLBACKS
424- if ( ! HasOto ( cc2 , syllable . tone ) && ! HasOto ( $ "{ cc [ i ] } { cc [ i + 1 ] } ", syllable . tone ) ) {
426+ if ( ! HasOto ( cc1 , syllable . tone ) || ! HasOto ( ValidateAlias ( cc1 ) , syllable . tone ) && ! HasOto ( $ "{ cc [ i ] } { cc [ i + 1 ] } ", syllable . tone ) ) {
425427 // [C1 -] [- C2]
426- cc2 = $ "- { cc [ i + 1 ] } ";
428+ cc1 = $ "- { cc [ i + 1 ] } ";
427429 phonemes . Add ( $ "{ cc [ i ] } -") ;
428430 }
429- if ( ! HasOto ( cc2 , syllable . tone ) ) {
430- cc2 = ValidateAlias ( cc2 ) ;
431+ if ( ! HasOto ( cc1 , syllable . tone ) ) {
432+ cc1 = ValidateAlias ( cc1 ) ;
433+ }
434+ // CC V on multiple consonants ex [s tr ao]
435+ if ( HasOto ( ccv , syllable . vowelTone ) || HasOto ( ValidateAlias ( ccv ) , syllable . vowelTone ) ) {
436+ basePhoneme = ccv ;
437+ lastC = i ;
438+ break ;
439+ } else if ( ( HasOto ( lcv , syllable . vowelTone ) || HasOto ( ValidateAlias ( lcv ) , syllable . vowelTone ) ) && HasOto ( cc1 , syllable . vowelTone ) && ! cc1 . Contains ( $ "{ cc [ i ] } { cc [ i + 1 ] } ") ) {
440+ basePhoneme = lcv ;
431441 }
432- if ( HasOto ( cc1 , syllable . tone ) && HasOto ( cc2 , syllable . tone ) && ! cc1 . Contains ( $ "{ string . Join ( "" , cc . Skip ( i ) ) } ") ) {
442+ if ( HasOto ( cc1 , syllable . tone ) && HasOto ( cc1 , syllable . tone ) && ! cc1 . Contains ( $ "{ string . Join ( "" , cc . Skip ( i ) ) } ") ) {
433443 // like [V C1] [C1 C2] [C2 C3] [C3 ..]
434444 phonemes . Add ( cc1 ) ;
435445 } else if ( TryAddPhoneme ( phonemes , syllable . tone , cc1 ) ) {
436446 // like [V C1] [C1 C2] [C2 ..]
437- if ( cc1 . Contains ( $ "{ string . Join ( "" , cc . Skip ( i + 1 ) ) } ") ) {
447+ if ( cc1 . Contains ( $ "{ string . Join ( " " , cc . Skip ( i + 1 ) ) } ") ) {
438448 i ++ ;
439449 }
440450 } else {
@@ -443,10 +453,7 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
443453 }
444454 } else {
445455 TryAddPhoneme ( phonemes , syllable . tone , cc1 ) ;
446-
447-
448456 }
449-
450457 }
451458 phonemes . Add ( basePhoneme ) ;
452459 return phonemes ;
@@ -498,9 +505,10 @@ protected override List<string> ProcessEnding(Ending ending) {
498505 var vcc4 = $ "{ v } { string . Join ( "" , cc . Take ( 2 ) ) } ";
499506 var vc = $ "{ v } { cc [ 0 ] } ";
500507 if ( i == 0 ) {
501- if ( HasOto ( vr , ending . tone ) || HasOto ( ValidateAlias ( vr ) , ending . tone ) ) {
508+ if ( HasOto ( vr , ending . tone ) || HasOto ( ValidateAlias ( vr ) , ending . tone ) && ! HasOto ( vc , ending . tone ) ) {
502509 phonemes . Add ( vr ) ;
503510 }
511+ break ;
504512 } else if ( ( HasOto ( vcc , ending . tone ) || HasOto ( ValidateAlias ( vcc ) , ending . tone ) ) && lastC == 1 ) {
505513 phonemes . Add ( vcc ) ;
506514 firstC = 1 ;
@@ -532,12 +540,7 @@ protected override List<string> ProcessEnding(Ending ending) {
532540 firstC = 1 ;
533541 break ;
534542 } else {
535- // Transform 'v' to vowel-like representation
536- if ( cc [ 0 ] == "v" ) {
537- phonemes . Add ( $ "{ v } -") ;
538- } else {
539- phonemes . Add ( vc ) ;
540- }
543+ phonemes . Add ( vc ) ;
541544 break ;
542545 }
543546 }
@@ -733,9 +736,8 @@ protected override string ValidateAlias(string alias) {
733736 }
734737
735738 // VALIDATE ALIAS DEPENDING ON METHOD
736- if ( isMissingVPhonemes || isMissingCPhonemes || isTimitPhonemes || isOtherArpaPhonemes ) {
737- foreach ( var syllable in missingVphonemes . Concat ( missingCphonemes ) . Concat ( timitphonemes
738- . Concat ( otherArpaphonemes ) ) ) {
739+ if ( isMissingVPhonemes || isMissingCPhonemes || isTimitPhonemes ) {
740+ foreach ( var syllable in missingVphonemes . Concat ( missingCphonemes ) . Concat ( timitphonemes ) ) {
739741 alias = alias . Replace ( syllable . Key , syllable . Value ) ;
740742 }
741743 }
0 commit comments