1212from decimal import Decimal
1313from xml .etree import ElementTree
1414from collections import OrderedDict
15+ from returnn .datasets .generating import Vocabulary
1516from returnn .datasets .hdf import HDFDataset
1617from returnn .sprint .cache import open_file_archive , FileArchiveBundle , FileArchive
1718from returnn .util import better_exchook
@@ -29,6 +30,7 @@ class Deps:
2930 lexicon : Lexicon
3031 labels_with_eoc_hdf : HDFDataset
3132 corpus : Dict [str , BlissItem ]
33+ bpe_vocab : Vocabulary
3234
3335
3436def uopen (path : str , * args , ** kwargs ):
@@ -341,7 +343,7 @@ def handle_segment(deps: Deps, segment_name: str):
341343 phones_s = " " .join (cur_word_phones )
342344 print (f"end time { time_idx * deps .phone_alignment_ms_per_frame / 1000. } sec:" , lemma .orth [0 ], "/" , phones_s )
343345 if phones_s not in lemma .phon :
344- print (f"WARNING: phones { phones_s } not in lemma { lemma } ?" )
346+ raise Exception (f"Phones { phones_s } not in lemma { lemma } ?" )
345347
346348 cur_word_phones .clear ()
347349 word_idx += 1
@@ -351,13 +353,14 @@ def handle_segment(deps: Deps, segment_name: str):
351353def main ():
352354 """main"""
353355 arg_parser = argparse .ArgumentParser ()
354- arg_parser .add_argument ("--phone-alignments" , required = True )
356+ arg_parser .add_argument ("--phone-alignments" , required = True , help = "From RASR" )
355357 arg_parser .add_argument ("--phone-alignment-ms-per-frame" , type = float , default = 10.0 )
356- arg_parser .add_argument ("--allophone-file" , required = True )
357- arg_parser .add_argument ("--lexicon" , required = True )
358- arg_parser .add_argument ("--corpus" , required = True )
359- arg_parser .add_argument ("--labels-with-eoc" , required = True )
358+ arg_parser .add_argument ("--allophone-file" , required = True , help = "From RASR" )
359+ arg_parser .add_argument ("--lexicon" , required = True , help = "XML" )
360+ arg_parser .add_argument ("--corpus" , required = True , help = "Bliss XML" )
361+ arg_parser .add_argument ("--labels-with-eoc" , required = True , help = "HDF dataset" )
360362 arg_parser .add_argument ("--segment" , nargs = "*" )
363+ arg_parser .add_argument ("--bpe-vocab" , required = True , help = "BPE vocab dict" )
361364 args = arg_parser .parse_args ()
362365
363366 phone_alignments = open_file_archive (args .phone_alignments )
@@ -373,12 +376,15 @@ def main():
373376 for item in iter_bliss (args .corpus ):
374377 corpus [item .segment_name ] = item
375378
379+ bpe_vocab = Vocabulary (args .bpe_vocab , unknown_label = None )
380+
376381 deps = Deps (
377382 phone_alignments = phone_alignments ,
378383 phone_alignment_ms_per_frame = args .phone_alignment_ms_per_frame ,
379384 lexicon = lexicon ,
380385 labels_with_eoc_hdf = dataset ,
381386 corpus = corpus ,
387+ bpe_vocab = bpe_vocab ,
382388 )
383389
384390 for segment_name in args .segment or corpus :
0 commit comments