diff --git a/common/setups/rasr/hybrid_decoder.py b/common/setups/rasr/hybrid_decoder.py index 4923f0640..74b3eaadc 100644 --- a/common/setups/rasr/hybrid_decoder.py +++ b/common/setups/rasr/hybrid_decoder.py @@ -23,6 +23,7 @@ CombineLmRasrConfig, ) from .util.decode import ( + DevRecognitionParameters, RecognitionParameters, SearchJobArgs, Lattice2CtmArgs, @@ -47,7 +48,7 @@ class HybridDecoder(BaseDecoder): def __init__( self, rasr_binary_path: tk.Path, - rasr_arch: "str" = "linux-x86_64-standard", + rasr_arch: str = "linux-x86_64-standard", compress: bool = False, append: bool = False, unbuffered: bool = False, @@ -155,8 +156,9 @@ def recognition( tf_fwd_input_name: str = "tf-fwd-input", ): """ - run the recognitino, consisting of search, lattice to ctm, and scoring + run the recognition, consisting of search, lattice to ctm, and scoring + :param name: decoding name :param returnn_config: RETURNN config for recognition :param checkpoints: epoch to model checkpoint mapping :param recognition_parameters: keys are the corpus keys so that recog params can be set for specific eval sets. diff --git a/common/setups/rasr/hybrid_system.py b/common/setups/rasr/hybrid_system.py index 8ad96549f..34acb12c8 100644 --- a/common/setups/rasr/hybrid_system.py +++ b/common/setups/rasr/hybrid_system.py @@ -1,4 +1,4 @@ -__all__ = ["HybridArgs", "HybridSystem"] +__all__ = ["HybridSystem"] import copy import itertools @@ -21,17 +21,21 @@ add_tf_flow_to_base_flow, ) from i6_core.util import MultiPath, MultiOutputPath +from i6_core.mm import CreateDummyMixturesJob +from i6_core.returnn import ReturnnComputePriorJobV2 from .nn_system import NnSystem +from .hybrid_decoder import HybridDecoder from .util import ( RasrInitArgs, ReturnnRasrDataInput, - OggZipHdfDataInput, HybridArgs, NnRecogArgs, RasrSteps, NnForcedAlignArgs, + ReturnnTrainingJobArgs, + AllowedReturnnTrainingDataInput, ) # -------------------- Init -------------------- @@ -90,9 +94,15 @@ def __init__( self.cv_corpora = [] self.devtrain_corpora = [] - self.train_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]] - self.cv_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]] - self.devtrain_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]] + self.train_input_data = ( + None + ) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]] + self.cv_input_data = ( + None + ) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]] + self.devtrain_input_data = ( + None + ) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]] self.dev_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]] self.test_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]] @@ -128,9 +138,9 @@ def _add_output_alias_for_train_job( def init_system( self, rasr_init_args: RasrInitArgs, - train_data: Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]], - cv_data: Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]], - devtrain_data: Optional[Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]]] = None, + train_data: Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]], + cv_data: Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]], + devtrain_data: Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]] = None, dev_data: Optional[Dict[str, ReturnnRasrDataInput]] = None, test_data: Optional[Dict[str, ReturnnRasrDataInput]] = None, train_cv_pairing: Optional[List[Tuple[str, ...]]] = None, # List[Tuple[trn_c, cv_c, name, dvtr_c]] @@ -211,21 +221,17 @@ def generate_lattices(self): def returnn_training( self, - name, - returnn_config, - nn_train_args, + name: str, + returnn_config: returnn.ReturnnConfig, + nn_train_args: Union[Dict, ReturnnTrainingJobArgs], train_corpus_key, cv_corpus_key, devtrain_corpus_key=None, - ): - assert isinstance(returnn_config, returnn.ReturnnConfig) - - returnn_config.config["train"] = self.train_input_data[train_corpus_key].get_data_dict() - returnn_config.config["dev"] = self.cv_input_data[cv_corpus_key].get_data_dict() - if devtrain_corpus_key is not None: - returnn_config.config["eval_datasets"] = { - "devtrain": self.devtrain_input_data[devtrain_corpus_key].get_data_dict() - } + ) -> returnn.ReturnnTrainingJob: + if nn_train_args.returnn_root is None: + nn_train_args.returnn_root = self.returnn_root + if nn_train_args.returnn_python_exe is None: + nn_train_args.returnn_python_exe = self.returnn_python_exe train_job = returnn.ReturnnTrainingJob( returnn_config=returnn_config, @@ -346,7 +352,7 @@ def nn_recognition( name: str, returnn_config: returnn.ReturnnConfig, checkpoints: Dict[int, returnn.Checkpoint], - acoustic_mixture_path: tk.Path, # TODO maybe Optional if prior file provided -> automatically construct dummy file + train_job: Union[returnn.ReturnnTrainingJob, returnn.ReturnnRasrTrainingJob], prior_scales: List[float], pronunciation_scales: List[float], lm_scales: List[float], @@ -384,15 +390,31 @@ def nn_recognition( for pron, lm, prior, epoch in itertools.product(pronunciation_scales, lm_scales, prior_scales, epochs): assert epoch in checkpoints.keys() - assert acoustic_mixture_path is not None - - if use_epoch_for_compile: - tf_graph = self.nn_compile_graph(name, returnn_config, epoch=epoch) + acoustic_mixture_path = CreateDummyMixturesJob( + num_mixtures=returnn_config.config["extern_data"]["classes"]["dim"], + num_features=returnn_config.config["extern_data"]["data"]["dim"], + ).out_mixtures + lmgc_scorer = rasr.GMMFeatureScorer(acoustic_mixture_path) + prior_job = ReturnnComputePriorJobV2( + model_checkpoint=checkpoints[epoch], + returnn_config=train_job.returnn_config, + returnn_python_exe=train_job.returnn_python_exe, + returnn_root=train_job.returnn_root, + log_verbosity=train_job.returnn_config.post_config["log_verbosity"], + ) + prior_job.add_alias("extract_nn_prior/" + name) + prior_file = prior_job.out_prior_xml_file + assert prior_file is not None scorer = rasr.PrecomputedHybridFeatureScorer( prior_mixtures=acoustic_mixture_path, priori_scale=prior, + prior_file=prior_file, ) + assert acoustic_mixture_path is not None + + if use_epoch_for_compile: + tf_graph = self.nn_compile_graph(name, returnn_config, epoch=epoch) tf_flow = make_precomputed_hybrid_tf_feature_flow( tf_checkpoint=checkpoints[epoch], @@ -419,6 +441,8 @@ def nn_recognition( parallelize_conversion=parallelize_conversion, rtf=rtf, mem=mem, + lmgc_alias=f"lmgc/{name}/{recognition_corpus_key}-{recog_name}", + lmgc_scorer=lmgc_scorer, **kwargs, ) @@ -429,15 +453,22 @@ def nn_recog( returnn_config: Path, checkpoints: Dict[int, returnn.Checkpoint], step_args: HybridArgs, + train_job: Union[returnn.ReturnnTrainingJob, returnn.ReturnnRasrTrainingJob], ): for recog_name, recog_args in step_args.recognition_args.items(): + recog_args = copy.deepcopy(recog_args) + whitelist = recog_args.pop("training_whitelist", None) + if whitelist: + if train_name not in whitelist: + continue for dev_c in self.dev_corpora: self.nn_recognition( name=f"{train_corpus_key}-{train_name}-{recog_name}", returnn_config=returnn_config, checkpoints=checkpoints, - acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures, + train_job=train_job, recognition_corpus_key=dev_c, + acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures, **recog_args, ) @@ -451,8 +482,9 @@ def nn_recog( name=f"{train_name}-{recog_name}", returnn_config=returnn_config, checkpoints=checkpoints, - acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures, + train_job=train_job, recognition_corpus_key=tst_c, + acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures, **r_args, ) @@ -509,7 +541,7 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs): train_corpus_key=trn_c, cv_corpus_key=cv_c, ) - else: + elif isinstance(self.train_input_data[trn_c], AllowedReturnnTrainingDataInput): returnn_train_job = self.returnn_training( name=name, returnn_config=step_args.returnn_training_configs[name], @@ -518,6 +550,8 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs): cv_corpus_key=cv_c, devtrain_corpus_key=dvtr_c, ) + else: + raise NotImplementedError returnn_recog_config = step_args.returnn_recognition_configs.get( name, step_args.returnn_training_configs[name] @@ -529,6 +563,7 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs): returnn_config=returnn_recog_config, checkpoints=returnn_train_job.out_checkpoints, step_args=step_args, + train_job=returnn_train_job, ) def run_nn_recog_step(self, step_args: NnRecogArgs): diff --git a/common/setups/rasr/nn_system.py b/common/setups/rasr/nn_system.py index 13c3d239d..b3feaf101 100644 --- a/common/setups/rasr/nn_system.py +++ b/common/setups/rasr/nn_system.py @@ -1,36 +1,20 @@ __all__ = ["NnSystem"] import copy -import itertools -import sys from dataclasses import asdict -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Union # -------------------- Sisyphus -------------------- import sisyphus.toolkit as tk import sisyphus.global_settings as gs -from sisyphus.delayed_ops import DelayedFormat - # -------------------- Recipes -------------------- -import i6_core.features as features -import i6_core.rasr as rasr import i6_core.returnn as returnn -from i6_core.util import MultiPath, MultiOutputPath - from .rasr_system import RasrSystem -from .util import ( - RasrInitArgs, - ReturnnRasrDataInput, - OggZipHdfDataInput, - HybridArgs, - NnRecogArgs, - RasrSteps, -) # -------------------- Init --------------------