🐸 eos_id now add in the end of sentence automatically, remove all explicit add eos_id.

dathudeptrai · dathudeptrai · commit 4d45d7d7ce13 · 2020-08-18T14:40:24.000+07:00
diff --git a/examples/tacotron2/extract_duration.py b/examples/tacotron2/extract_duration.py
@@ -165,8 +165,8 @@ def main():
 
         for i, alignment in enumerate(alignment_historys):
             real_char_length = (
-                input_lengths[i].numpy() - 1
-            )  # minus 1 because char have eos tokens.
+                input_lengths[i].numpy()
+            )
             real_mel_length = real_mel_lengths[i].numpy()
             alignment_mel_length = int(np.ceil(real_mel_length / config["tacotron2_params"]["reduction_factor"]))
             alignment = alignment[:real_char_length, :alignment_mel_length]
diff --git a/examples/tacotron2/tacotron_dataset.py b/examples/tacotron2/tacotron_dataset.py
@@ -23,9 +23,6 @@
 import tensorflow as tf
 
 from tensorflow_tts.datasets.abstract_dataset import AbstractDataset
-from tensorflow_tts.processor.ljspeech import symbols as ljspeech_symbols
-from tensorflow_tts.utils.korean import symbols as kss_symbols
-from tensorflow_tts.processor.baker import symbols as baker_symbols
 from tensorflow_tts.utils import find_files
 
 
@@ -103,13 +100,6 @@ def __init__(
             suffix = charactor_query[1:]
             utt_ids = [os.path.basename(f).replace(suffix, "") for f in charactor_files]
 
-        eos_token_dict = {
-            "ljspeech": len(ljspeech_symbols) - 1,
-            "kss": len(kss_symbols) - 1,
-            "baker": len(baker_symbols) - 1
-        }
-        self.eos_token_id = eos_token_dict[dataset]
-
         # set global params
         self.utt_ids = utt_ids
         self.mel_files = mel_files
@@ -125,7 +115,7 @@ def __init__(
         self.ga_pad_value = ga_pad_value
         self.g = g
         self.use_fixed_shapes = use_fixed_shapes
-        self.max_char_length = np.max(char_lengths) + 1  # +1 for eos
+        self.max_char_length = np.max(char_lengths)
 
         if np.max(mel_lengths) % self.reduction_factor == 0:
             self.max_mel_length = np.max(mel_lengths)
@@ -148,10 +138,6 @@ def generator(self, utt_ids):
             mel_length = self.mel_lengths[i]
             char_length = self.char_lengths[i]
 
-            # add eos token for charactor since charactor is original token.
-            charactor = np.concatenate([charactor, [self.eos_token_id]], -1)
-            char_length += 1
-
             # padding mel to make its length is multiple of reduction factor.
             real_mel_length = mel_length
             remainder = mel_length % self.reduction_factor
diff --git a/tensorflow_tts/configs/fastspeech.py b/tensorflow_tts/configs/fastspeech.py
@@ -19,6 +19,7 @@
 from tensorflow_tts.processor.ljspeech import LJSPEECH_SYMBOLS as lj_symbols
 from tensorflow_tts.processor.kss import KSS_SYMBOLS as kss_symbols
 from tensorflow_tts.processor.baker import BAKER_SYMBOLS as bk_symbols
+from tensorflow_tts.processor.libritts import LIBRITTS_SYMBOLS as lbri_symbols
 
 
 SelfAttentionParams = collections.namedtuple(
@@ -91,6 +92,8 @@ def __init__(
             self.vocab_size = len(kss_symbols)
         elif dataset == "baker":
             self.vocab_size = len(bk_symbols)
+        elif dataset == "libritts":
+            self.vocab_size = len(lbri_symbols)
         else:
             raise ValueError("No such dataset: {}".format(dataset))
         self.initializer_range = initializer_range
diff --git a/tensorflow_tts/configs/tacotron2.py b/tensorflow_tts/configs/tacotron2.py
@@ -17,6 +17,7 @@
 from tensorflow_tts.processor.ljspeech import LJSPEECH_SYMBOLS as lj_symbols
 from tensorflow_tts.processor.kss import KSS_SYMBOLS as kss_symbols
 from tensorflow_tts.processor.baker import BAKER_SYMBOLS as bk_symbols
+from tensorflow_tts.processor.libritts import LIBRITTS_SYMBOLS as lbri_symbols
 
 
 class Tacotron2Config(object):
@@ -61,6 +62,8 @@ def __init__(
             self.vocab_size = len(kss_symbols)
         elif dataset == 'baker':
             self.vocab_size = len(bk_symbols)
+        elif dataset == "libritts":
+            self.vocab_size = len(lbri_symbols)
         else:
             raise ValueError("No such dataset: {}".format(dataset))
         self.embedding_hidden_size = embedding_hidden_size
diff --git a/tensorflow_tts/processor/base_processor.py b/tensorflow_tts/processor/base_processor.py
@@ -72,7 +72,7 @@ def __getattr__(self, name: str) -> Union[str, int]:
 
     def create_speaker_map(self):
         """
-        Create speaker map for dataset
+        Create speaker map for dataset.
         """
         sp_id = 0
         for i in self.items:
@@ -94,7 +94,8 @@ def create_symbols(self):
     def create_items(self):
         """
         Method used to create items from training file
-        items struct => text, wav_file_path, speaker_name
+        items struct example => text, wav_file_path, speaker_name.
+        Note that the speaker_name should be a last.
         """
         with open(
             os.path.join(self.data_dir, self.train_f_name), mode="r", encoding="utf-8"
diff --git a/test/files/mapper.json b/test/files/mapper.json
@@ -12,5 +12,6 @@
     "0": "a",
     "1": "b",
     "2": "@ph"
-  }
+  },
+  "processor_name": "TestProcessor"
 }

Original file line number	Diff line number	Diff line change
`@@ -12,5 +12,6 @@`
`12`	`12`	`"0": "a",`
`13`	`13`	`"1": "b",`
`14`	`14`	`"2": "@ph"`
`15`		`- }`
	`15`	`+ },`
	`16`	`+ "processor_name": "TestProcessor"`
`16`	`17`	`}`