Final Code

Raj2503 · Raj2503 · commit 8b1620248f5e · 2021-11-09T21:40:52.000+05:30
diff --git a/SpeechSynthesis.py b/SpeechSynthesis.py
@@ -2,12 +2,9 @@
 
 if __name__ == "__main__":
 	# message = input("Message: ")
-	# message = "को"
-	message = "मैं सार्थक हूँ"
+	message = "मैं राज हूं"
 
 	# for i in message:
 	# 	print(i)
 
-
-
-	tts.text_to_speech(message, debug=True, use_pronunciation_dict=True)
+	tts.text_to_speech(message, debug=True, use_pronunciation_dict=True)
diff --git a/Utilities/convertphonemes.py b/Utilities/convertphonemes.py
@@ -1,6 +1,5 @@
-# convertphones.py
 # Takes list of words and finds appropriate phonemes
-# If phoneme not found, an educated guess is made
+# If phoneme not found, an (un)educated guess is made
 
 from Utilities import phonemes, util
 import ast, re
@@ -32,68 +31,9 @@ def phoneme_blocks_to_list(blocks):
 
 
 def phoneme_scan(word):
-	# while re.search("[ट]", word): # Keep replacing letters with phonemes in brackets [] until there are no more letters
-		# Special patterns
-		# while re.search("[a-zA-Z.,;!?]", word):
+	# Takes a word and returns a string of phonemes
 	while re.search("[अ-ॐ.,;!?]", word):
 
-		# word = replace_with_phoneme(word, r'iew', (phonemes.CONS_Y, phonemes.VOWEL_OO,phonemes.CONS_W))
-		
-		# word = replace_with_phoneme(word, r'oo', (phonemes.VOWEL_OO,))
-		# word = replace_with_phoneme(word, r'ou', (phonemes.VOWEL_OO,))
-		# word = replace_with_phoneme(word, r'ea', (phonemes.VOWEL_II,))
-		# word = replace_with_phoneme(word, r'ee', (phonemes.VOWEL_II,))
-
-		# word = replace_with_phoneme(word, r'gg', (phonemes.CONS_J,))
-		# word = replace_with_phoneme(word, r'dd', (phonemes.CONS_D,))
-		# word = replace_with_phoneme(word, r'ph', (phonemes.CONS_F,))
-		# word = replace_with_phoneme(word, r'll', (phonemes.CONS_L,))
-		# word = replace_with_phoneme(word, r'ss', (phonemes.CONS_S,))
-		# word = replace_with_phoneme(word, r'nn', (phonemes.CONS_N,))
-		# word = replace_with_phoneme(word, r'ch', (phonemes.CONS_CH,))
-		# word = replace_with_phoneme(word, r'sh', (phonemes.CONS_SH,))
-		# word = replace_with_phoneme(word, r'th', (phonemes.CONS_TH,))
-		# word = replace_with_phoneme(word, r'ck', (phonemes.CONS_K,))
-
-# 		# Default letters
-# क
-# ख
-# ग
-# घ
-# ङ
-# च
-# छ
-# ज
-# झ
-# ञ
-# ट
-# ठ
-# ड
-# ढ
-# ण
-# त
-# थ
-# द
-# ध
-# न
-# ऩ
-# प
-# फ
-# ब
-# भ
-# म
-# य
-# र
-# ऱ
-# ल
-# ळ
-# ऴ
-# व
-# श
-# ष
-# स
-# ह
-
 		word = replace_with_phoneme(word, "अ", (phonemes.VOWEL_U,))
 		word = replace_with_phoneme(word, "आ", (phonemes.VOWEL_A,))
 		word = replace_with_phoneme(word, "ा", (phonemes.VOWEL_A,))
@@ -164,11 +104,6 @@ def phoneme_scan(word):
 
 		word = replace_with_phoneme(word, "ज़", (phonemes.CONS_Z,))
 
-
-		# word = replace_with_phoneme(word, "q", (phonemes.CONS_K,))
-
-		# word = replace_with_phoneme(word, "w", (phonemes.CONS_W,))
-		# word = replace_with_phoneme(word, "x", (phonemes.CONS_K, phonemes.CONS_S))
 		word = replace_with_phoneme(word, ".", (phonemes.PUNC_PERIOD,))
 		word = replace_with_phoneme(word, ",", (phonemes.PUNC_COMMA,))
 		word = replace_with_phoneme(word, ";", (phonemes.PUNC_COMMA,))
diff --git a/Utilities/convertsounds.py b/Utilities/convertsounds.py
@@ -1,4 +1,3 @@
-# convertsounds.py
 # Matches each phoneme from list with appropriate wav file
 
 from Utilities import phonemes
diff --git a/Utilities/phonemes.py b/Utilities/phonemes.py
@@ -1,53 +1,51 @@
-# phonemes.py
 # List of all sounds used
 
-VOWEL_U 	= 0  # cup, luck        अ
-VOWEL_A 	= 1  # arm, father      आ
-VOWEL_AE 	= 2  # cat, black       ए
-VOWEL_E 	= 3  # met, bed         ऐ
-VOWEL_EE	= 4  # away, cinema
-VOWEL_UU	= 5  # turn, learn      
-VOWEL_I		= 6  # hit, sitting     इ
-VOWEL_II	= 7  # see, heat        ई
-VOWEL_O		= 8  # hot, rock        
-VOWEL_CA	= 9  # call, four       औ
-VOWEL_PU	= 10 # put, could       उ
-VOWEL_OO	= 11 # blue, food       ऊ 
-VOWEL_AI	= 12 # five, eye
-VOWEL_AU	= 13 # now, out
-VOWEL_EI	= 14 # say, eight
-VOWEL_OH	= 15 # go, home         ओ
-VOWEL_OI	= 16 # boy, join
-VOWEL_ER	= 17 # where, air
-VOWEL_IE 	= 18 # near, here
-VOWEL_UE	= 19 # pure, tourist        य
+VOWEL_U 	= 0  #     अ
+VOWEL_A 	= 1  #     आ
+VOWEL_AE 	= 2  #     ए
+VOWEL_E 	= 3  #     ऐ
+VOWEL_EE	= 4  #    away
+VOWEL_UU	= 5  #    turn     
+VOWEL_I		= 6  #     इ
+VOWEL_II	= 7  #     ई
+VOWEL_O		= 8  #     hot  
+VOWEL_CA	= 9  #    औ
+VOWEL_PU	= 10 #    उ
+VOWEL_OO	= 11 #    ऊ 
+VOWEL_AI	= 12 #   five
+VOWEL_AU	= 13 #    out
+VOWEL_EI	= 14 #   eight
+VOWEL_OH	= 15 #    ओ
+VOWEL_OI	= 16 #    join
+VOWEL_ER	= 17 #    where
+VOWEL_IE 	= 18 #    near
+VOWEL_UE	= 19 #     य
 
-CONS_B		= 20 # bad, lab             ब
-CONS_D		= 21 # did, lady            ड  ढ
-CONS_F		= 22 # find, if             फ           
-CONS_G		= 23 # give, flag           ग   घ
-CONS_H		= 24 # how, hello           ह
-CONS_Y		= 25 # yes, yellow          य
-CONS_K		= 26 # cat, black           ख क
-CONS_L		= 27 # leg, litte           ल
+CONS_B		= 20 #     ब
+CONS_D		= 21 #     ड  ढ
+CONS_F		= 22 #     फ           
+CONS_G		= 23 #     ग   घ
+CONS_H		= 24 #     ह
+CONS_Y		= 25 #     य
+CONS_K		= 26 #     ख क
+CONS_L		= 27 #     ल
 
-CONS_M		= 28 # man, lemon           म
-# todo
-CONS_N		= 29 # no, ten              न  ञ  ण
-CONS_NG		= 30 # sing, finger         ं
-CONS_P		= 31 # pet, map             प भ
-CONS_R		= 32 # red, try             र
-CONS_S 		= 33 # sun, miss            स
-CONS_SH		= 34 # she, crash           श ष
-CONS_T		= 35 # tea, getting         ट
-CONS_CH		= 36 # check, church        च
-CONS_TH		= 37 # think, both          थ  ठ ध
-CONS_TH2	= 38 # this, mother         द 
-CONS_V		= 39 # voice, five          व
-CONS_W		= 40 # wet, window          
-CONS_Z		= 41 # zoo, lazy            ज़
-CONS_JJ		= 42 # pleasure, vision     छ झ
-CONS_J 		= 43 # just, large          ज
+CONS_M		= 28 #     म
+CONS_N		= 29 #     न  ञ  ण
+CONS_NG		= 30 #     ं
+CONS_P		= 31 #     प भ
+CONS_R		= 32 #     र
+CONS_S 		= 33 #     स
+CONS_SH		= 34 #     श ष
+CONS_T		= 35 #     ट
+CONS_CH		= 36 #     च
+CONS_TH		= 37 #     थ  ठ ध
+CONS_TH2	= 38 #     द 
+CONS_V		= 39 #     व
+CONS_W		= 40 #     wet       
+CONS_Z		= 41 #     ज़
+CONS_JJ		= 42 #     छ झ
+CONS_J 		= 43 #     ज
 
 PUNC_PERIOD 	= 44 # period (long pause) 0.5
 PUNC_COMMA 		= 44 # comma (short pause) 0.3
diff --git a/Utilities/tts.py b/Utilities/tts.py
@@ -1,7 +1,5 @@
-# tts.py
 # Pulls all three pieces together: convertwords, convertphonemes, convertsounds
 
-DATABASE_NAME = "data/pronunciation.db"
 OUTPUT_FILE = "output.wav"
 
 from Utilities import convertphonemes, convertsounds, util
diff --git a/Utilities/util.py b/Utilities/util.py
@@ -3,17 +3,15 @@
 import csv, re
 
 # Convert raw user input to a list of words
-# TODO: convert numerical values to words
 def tokenize(data):
-  # return re.findall(r"[\w]+|[.,!?;|]",data.lower())
   word = []
   word = data.split(" ")
   word = re.split('; |, |\*|\n| ',data)
   return word
 	
 
 def get_pronunciation(word):
-  with open(PRONUNCIATION_CSV_PATH, 'rt') as f:
+  with open(PRONUNCIATION_CSV_PATH, 'rt',encoding="utf8") as f:
     reader = csv.reader(f, delimiter=',')
     for row in reader:
       if word == row[0]:
diff --git a/data/pronunciation.csv b/data/pronunciation.csv
@@ -1,19 +1,2 @@
-me, 28 7
-i, 12
-like, 27 12 26
-and, 2 29 21
-my, 28 12
-name, 29 14 28
-is, 6 41
-dad, 21 2 21
-am, 2 28
-the, 38 4
-used, 25 11 41 21
-own, 15 40 29
-voice, 39 16 33
-to, 35 11
-create, 26 32 7 14 35
-concatenative, 26 8 29 26 1 35 6 29 3 35 6 39
-speech, 33 31 7 36
-synthesis, 33 6 29 37 6 33 6 33
-demonstration, 21 3 28 8 29 33 35 32 14 34 6 29
+राज, 32 1 43 44
+सार्थक, 33 1 32 37 26 44

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-# convertsounds.py`
`2`	`1`	`# Matches each phoneme from list with appropriate wav file`
`3`	`2`
`4`	`3`	`from Utilities import phonemes`