From e8d6bd0ea3d25fdc260722f933f649497499baa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=2E=20L=C3=BCscher?= Date: Wed, 4 Oct 2023 18:42:40 +0200 Subject: [PATCH 1/6] small changes for hybrid decoder --- common/setups/rasr/hybrid_decoder.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/common/setups/rasr/hybrid_decoder.py b/common/setups/rasr/hybrid_decoder.py index 4923f0640..74b3eaadc 100644 --- a/common/setups/rasr/hybrid_decoder.py +++ b/common/setups/rasr/hybrid_decoder.py @@ -23,6 +23,7 @@ CombineLmRasrConfig, ) from .util.decode import ( + DevRecognitionParameters, RecognitionParameters, SearchJobArgs, Lattice2CtmArgs, @@ -47,7 +48,7 @@ class HybridDecoder(BaseDecoder): def __init__( self, rasr_binary_path: tk.Path, - rasr_arch: "str" = "linux-x86_64-standard", + rasr_arch: str = "linux-x86_64-standard", compress: bool = False, append: bool = False, unbuffered: bool = False, @@ -155,8 +156,9 @@ def recognition( tf_fwd_input_name: str = "tf-fwd-input", ): """ - run the recognitino, consisting of search, lattice to ctm, and scoring + run the recognition, consisting of search, lattice to ctm, and scoring + :param name: decoding name :param returnn_config: RETURNN config for recognition :param checkpoints: epoch to model checkpoint mapping :param recognition_parameters: keys are the corpus keys so that recog params can be set for specific eval sets. From c05c3384716711ff2078463ba976673e716edab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=2E=20L=C3=BCscher?= Date: Wed, 4 Oct 2023 18:51:42 +0200 Subject: [PATCH 2/6] cleanup --- common/setups/rasr/nn_system.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/common/setups/rasr/nn_system.py b/common/setups/rasr/nn_system.py index 13c3d239d..b3feaf101 100644 --- a/common/setups/rasr/nn_system.py +++ b/common/setups/rasr/nn_system.py @@ -1,36 +1,20 @@ __all__ = ["NnSystem"] import copy -import itertools -import sys from dataclasses import asdict -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Union # -------------------- Sisyphus -------------------- import sisyphus.toolkit as tk import sisyphus.global_settings as gs -from sisyphus.delayed_ops import DelayedFormat - # -------------------- Recipes -------------------- -import i6_core.features as features -import i6_core.rasr as rasr import i6_core.returnn as returnn -from i6_core.util import MultiPath, MultiOutputPath - from .rasr_system import RasrSystem -from .util import ( - RasrInitArgs, - ReturnnRasrDataInput, - OggZipHdfDataInput, - HybridArgs, - NnRecogArgs, - RasrSteps, -) # -------------------- Init -------------------- From a461970360562d07414d85ed47278e1d3d9ce06e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=2E=20L=C3=BCscher?= Date: Wed, 4 Oct 2023 19:01:59 +0200 Subject: [PATCH 3/6] update hybrid system --- common/setups/rasr/hybrid_system.py | 118 +++++++++++++++++++++------- 1 file changed, 90 insertions(+), 28 deletions(-) diff --git a/common/setups/rasr/hybrid_system.py b/common/setups/rasr/hybrid_system.py index 8ad96549f..fde48a3e3 100644 --- a/common/setups/rasr/hybrid_system.py +++ b/common/setups/rasr/hybrid_system.py @@ -1,4 +1,4 @@ -__all__ = ["HybridArgs", "HybridSystem"] +__all__ = ["HybridSystem"] import copy import itertools @@ -21,17 +21,21 @@ add_tf_flow_to_base_flow, ) from i6_core.util import MultiPath, MultiOutputPath +from i6_core.mm import CreateDummyMixturesJob +from i6_core.returnn import ReturnnComputePriorJobV2 from .nn_system import NnSystem +from .hybrid_decoder import HybridDecoder from .util import ( RasrInitArgs, ReturnnRasrDataInput, - OggZipHdfDataInput, HybridArgs, NnRecogArgs, RasrSteps, NnForcedAlignArgs, + ReturnnTrainingJobArgs, + AllowedReturnnTrainingDataInput, ) # -------------------- Init -------------------- @@ -39,6 +43,13 @@ Path = tk.setup_path(__package__) # -------------------- System -------------------- +from i6_core.report.report import _Report_Type + + +def hybrid_report_format(report: _Report_Type) -> str: + out = [(recog, str(report[recog])) for recog in report] + out = sorted(out, key=lambda x: float(x[1])) + return "\n".join([f"{pair[0]}: {str(pair[1])}" for pair in out]) class HybridSystem(NnSystem): @@ -90,9 +101,15 @@ def __init__( self.cv_corpora = [] self.devtrain_corpora = [] - self.train_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]] - self.cv_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]] - self.devtrain_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]] + self.train_input_data = ( + None + ) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]] + self.cv_input_data = ( + None + ) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]] + self.devtrain_input_data = ( + None + ) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]] self.dev_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]] self.test_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]] @@ -128,9 +145,9 @@ def _add_output_alias_for_train_job( def init_system( self, rasr_init_args: RasrInitArgs, - train_data: Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]], - cv_data: Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]], - devtrain_data: Optional[Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]]] = None, + train_data: Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]], + cv_data: Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]], + devtrain_data: Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]] = None, dev_data: Optional[Dict[str, ReturnnRasrDataInput]] = None, test_data: Optional[Dict[str, ReturnnRasrDataInput]] = None, train_cv_pairing: Optional[List[Tuple[str, ...]]] = None, # List[Tuple[trn_c, cv_c, name, dvtr_c]] @@ -211,21 +228,17 @@ def generate_lattices(self): def returnn_training( self, - name, - returnn_config, - nn_train_args, + name: str, + returnn_config: returnn.ReturnnConfig, + nn_train_args: Union[Dict, ReturnnTrainingJobArgs], train_corpus_key, cv_corpus_key, devtrain_corpus_key=None, - ): - assert isinstance(returnn_config, returnn.ReturnnConfig) - - returnn_config.config["train"] = self.train_input_data[train_corpus_key].get_data_dict() - returnn_config.config["dev"] = self.cv_input_data[cv_corpus_key].get_data_dict() - if devtrain_corpus_key is not None: - returnn_config.config["eval_datasets"] = { - "devtrain": self.devtrain_input_data[devtrain_corpus_key].get_data_dict() - } + ) -> returnn.ReturnnTrainingJob: + if nn_train_args.returnn_root is None: + nn_train_args.returnn_root = self.returnn_root + if nn_train_args.returnn_python_exe is None: + nn_train_args.returnn_python_exe = self.returnn_python_exe train_job = returnn.ReturnnTrainingJob( returnn_config=returnn_config, @@ -346,7 +359,7 @@ def nn_recognition( name: str, returnn_config: returnn.ReturnnConfig, checkpoints: Dict[int, returnn.Checkpoint], - acoustic_mixture_path: tk.Path, # TODO maybe Optional if prior file provided -> automatically construct dummy file + train_job: Union[returnn.ReturnnTrainingJob, returnn.ReturnnRasrTrainingJob], prior_scales: List[float], pronunciation_scales: List[float], lm_scales: List[float], @@ -362,6 +375,7 @@ def nn_recognition( use_epoch_for_compile=False, forward_output_layer="output", native_ops: Optional[List[str]] = None, + acoustic_mixture_path: Optional[tk.Path] = None, **kwargs, ): with tk.block(f"{name}_recognition"): @@ -384,15 +398,31 @@ def nn_recognition( for pron, lm, prior, epoch in itertools.product(pronunciation_scales, lm_scales, prior_scales, epochs): assert epoch in checkpoints.keys() - assert acoustic_mixture_path is not None - - if use_epoch_for_compile: - tf_graph = self.nn_compile_graph(name, returnn_config, epoch=epoch) + acoustic_mixture_path = CreateDummyMixturesJob( + num_mixtures=returnn_config.config["extern_data"]["classes"]["dim"], + num_features=returnn_config.config["extern_data"]["data"]["dim"], + ).out_mixtures + lmgc_scorer = rasr.GMMFeatureScorer(acoustic_mixture_path) + prior_job = ReturnnComputePriorJobV2( + model_checkpoint=checkpoints[epoch], + returnn_config=train_job.returnn_config, + returnn_python_exe=train_job.returnn_python_exe, + returnn_root=train_job.returnn_root, + log_verbosity=train_job.returnn_config.post_config["log_verbosity"], + ) + prior_job.add_alias("extract_nn_prior/" + name) + prior_file = prior_job.out_prior_xml_file + assert prior_file is not None scorer = rasr.PrecomputedHybridFeatureScorer( prior_mixtures=acoustic_mixture_path, priori_scale=prior, + prior_file=prior_file, ) + assert acoustic_mixture_path is not None + + if use_epoch_for_compile: + tf_graph = self.nn_compile_graph(name, returnn_config, epoch=epoch) tf_flow = make_precomputed_hybrid_tf_feature_flow( tf_checkpoint=checkpoints[epoch], @@ -419,6 +449,8 @@ def nn_recognition( parallelize_conversion=parallelize_conversion, rtf=rtf, mem=mem, + lmgc_alias=f"lmgc/{name}/{recognition_corpus_key}-{recog_name}", + lmgc_scorer=lmgc_scorer, **kwargs, ) @@ -429,15 +461,22 @@ def nn_recog( returnn_config: Path, checkpoints: Dict[int, returnn.Checkpoint], step_args: HybridArgs, + train_job: Union[returnn.ReturnnTrainingJob, returnn.ReturnnRasrTrainingJob], ): for recog_name, recog_args in step_args.recognition_args.items(): + recog_args = copy.deepcopy(recog_args) + whitelist = recog_args.pop("training_whitelist", None) + if whitelist: + if train_name not in whitelist: + continue for dev_c in self.dev_corpora: self.nn_recognition( name=f"{train_corpus_key}-{train_name}-{recog_name}", returnn_config=returnn_config, checkpoints=checkpoints, - acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures, + train_job=train_job, recognition_corpus_key=dev_c, + acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures, **recog_args, ) @@ -451,8 +490,9 @@ def nn_recog( name=f"{train_name}-{recog_name}", returnn_config=returnn_config, checkpoints=checkpoints, - acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures, + train_job=train_job, recognition_corpus_key=tst_c, + acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures, **r_args, ) @@ -509,7 +549,7 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs): train_corpus_key=trn_c, cv_corpus_key=cv_c, ) - else: + elif isinstance(self.train_input_data[trn_c], AllowedReturnnTrainingDataInput): returnn_train_job = self.returnn_training( name=name, returnn_config=step_args.returnn_training_configs[name], @@ -518,6 +558,8 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs): cv_corpus_key=cv_c, devtrain_corpus_key=dvtr_c, ) + else: + raise NotImplementedError returnn_recog_config = step_args.returnn_recognition_configs.get( name, step_args.returnn_training_configs[name] @@ -529,7 +571,27 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs): returnn_config=returnn_recog_config, checkpoints=returnn_train_job.out_checkpoints, step_args=step_args, + train_job=returnn_train_job, ) + from i6_core.report import GenerateReportStringJob, MailJob + + results = {} + for c in self.dev_corpora + self.test_corpora: + for job_name in self.jobs[c]: + if "scorer" not in job_name: + continue + if name not in job_name: + continue + if "scorer" in job_name: + scorer = self.jobs[c][job_name] + if scorer.out_wer: + results[job_name] = scorer.out_wer + + report = GenerateReportStringJob(report_values=results, report_template=hybrid_report_format) + report.add_alias(name + "/report_job") + mail = MailJob(report.out_report, send_contents=True, subject=name) + mail.add_alias(name + "/mail_job") + tk.register_output(name + "/mail", mail.out_status) def run_nn_recog_step(self, step_args: NnRecogArgs): for eval_c in self.dev_corpora + self.test_corpora: From 75a09152a80c4f08dea9541222fb3c2eeee0efe7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=2E=20L=C3=BCscher?= Date: Fri, 15 Dec 2023 15:56:44 +0100 Subject: [PATCH 4/6] remove acoustic mixtures the reason for this is that prior estimating should be always done for performance reasons (WER and RTF) Co-authored-by: Benedikt Hilmes --- common/setups/rasr/hybrid_system.py | 1 - 1 file changed, 1 deletion(-) diff --git a/common/setups/rasr/hybrid_system.py b/common/setups/rasr/hybrid_system.py index fde48a3e3..e40aa6d56 100644 --- a/common/setups/rasr/hybrid_system.py +++ b/common/setups/rasr/hybrid_system.py @@ -375,7 +375,6 @@ def nn_recognition( use_epoch_for_compile=False, forward_output_layer="output", native_ops: Optional[List[str]] = None, - acoustic_mixture_path: Optional[tk.Path] = None, **kwargs, ): with tk.block(f"{name}_recognition"): From cc79f820329f2d8bbc4fb1781967763bbdd8995a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=2E=20L=C3=BCscher?= Date: Tue, 23 Jan 2024 11:20:12 +0100 Subject: [PATCH 5/6] remove report generation --- common/setups/rasr/hybrid_system.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/common/setups/rasr/hybrid_system.py b/common/setups/rasr/hybrid_system.py index e40aa6d56..69eabe581 100644 --- a/common/setups/rasr/hybrid_system.py +++ b/common/setups/rasr/hybrid_system.py @@ -43,12 +43,6 @@ Path = tk.setup_path(__package__) # -------------------- System -------------------- -from i6_core.report.report import _Report_Type - - -def hybrid_report_format(report: _Report_Type) -> str: - out = [(recog, str(report[recog])) for recog in report] - out = sorted(out, key=lambda x: float(x[1])) return "\n".join([f"{pair[0]}: {str(pair[1])}" for pair in out]) @@ -572,25 +566,6 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs): step_args=step_args, train_job=returnn_train_job, ) - from i6_core.report import GenerateReportStringJob, MailJob - - results = {} - for c in self.dev_corpora + self.test_corpora: - for job_name in self.jobs[c]: - if "scorer" not in job_name: - continue - if name not in job_name: - continue - if "scorer" in job_name: - scorer = self.jobs[c][job_name] - if scorer.out_wer: - results[job_name] = scorer.out_wer - - report = GenerateReportStringJob(report_values=results, report_template=hybrid_report_format) - report.add_alias(name + "/report_job") - mail = MailJob(report.out_report, send_contents=True, subject=name) - mail.add_alias(name + "/mail_job") - tk.register_output(name + "/mail", mail.out_status) def run_nn_recog_step(self, step_args: NnRecogArgs): for eval_c in self.dev_corpora + self.test_corpora: From 00a79c5cc23d67ce84551380efffb0262d908128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=2E=20L=C3=BCscher?= Date: Tue, 23 Jan 2024 11:23:37 +0100 Subject: [PATCH 6/6] fix --- common/setups/rasr/hybrid_system.py | 1 - 1 file changed, 1 deletion(-) diff --git a/common/setups/rasr/hybrid_system.py b/common/setups/rasr/hybrid_system.py index 69eabe581..34acb12c8 100644 --- a/common/setups/rasr/hybrid_system.py +++ b/common/setups/rasr/hybrid_system.py @@ -43,7 +43,6 @@ Path = tk.setup_path(__package__) # -------------------- System -------------------- - return "\n".join([f"{pair[0]}: {str(pair[1])}" for pair in out]) class HybridSystem(NnSystem):