Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions common/setups/rasr/hybrid_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
CombineLmRasrConfig,
)
from .util.decode import (
DevRecognitionParameters,
RecognitionParameters,
SearchJobArgs,
Lattice2CtmArgs,
Expand All @@ -47,7 +48,7 @@ class HybridDecoder(BaseDecoder):
def __init__(
self,
rasr_binary_path: tk.Path,
rasr_arch: "str" = "linux-x86_64-standard",
rasr_arch: str = "linux-x86_64-standard",
compress: bool = False,
append: bool = False,
unbuffered: bool = False,
Expand Down Expand Up @@ -155,8 +156,9 @@ def recognition(
tf_fwd_input_name: str = "tf-fwd-input",
):
"""
run the recognitino, consisting of search, lattice to ctm, and scoring
run the recognition, consisting of search, lattice to ctm, and scoring

:param name: decoding name
:param returnn_config: RETURNN config for recognition
:param checkpoints: epoch to model checkpoint mapping
:param recognition_parameters: keys are the corpus keys so that recog params can be set for specific eval sets.
Expand Down
118 changes: 90 additions & 28 deletions common/setups/rasr/hybrid_system.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__all__ = ["HybridArgs", "HybridSystem"]
__all__ = ["HybridSystem"]

import copy
import itertools
Expand All @@ -21,24 +21,35 @@
add_tf_flow_to_base_flow,
)
from i6_core.util import MultiPath, MultiOutputPath
from i6_core.mm import CreateDummyMixturesJob
from i6_core.returnn import ReturnnComputePriorJobV2

from .nn_system import NnSystem
from .hybrid_decoder import HybridDecoder

from .util import (
RasrInitArgs,
ReturnnRasrDataInput,
OggZipHdfDataInput,
HybridArgs,
NnRecogArgs,
RasrSteps,
NnForcedAlignArgs,
ReturnnTrainingJobArgs,
AllowedReturnnTrainingDataInput,
)

# -------------------- Init --------------------

Path = tk.setup_path(__package__)

# -------------------- System --------------------
from i6_core.report.report import _Report_Type


def hybrid_report_format(report: _Report_Type) -> str:
out = [(recog, str(report[recog])) for recog in report]
out = sorted(out, key=lambda x: float(x[1]))
return "\n".join([f"{pair[0]}: {str(pair[1])}" for pair in out])


class HybridSystem(NnSystem):
Expand Down Expand Up @@ -90,9 +101,15 @@ def __init__(
self.cv_corpora = []
self.devtrain_corpora = []

self.train_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]]
self.cv_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]]
self.devtrain_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]]
self.train_input_data = (
None
) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]]
self.cv_input_data = (
None
) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]]
self.devtrain_input_data = (
None
) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]]
self.dev_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]]
self.test_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]]

Expand Down Expand Up @@ -128,9 +145,9 @@ def _add_output_alias_for_train_job(
def init_system(
self,
rasr_init_args: RasrInitArgs,
train_data: Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]],
cv_data: Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]],
devtrain_data: Optional[Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]]] = None,
train_data: Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]],
cv_data: Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]],
devtrain_data: Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]] = None,
dev_data: Optional[Dict[str, ReturnnRasrDataInput]] = None,
test_data: Optional[Dict[str, ReturnnRasrDataInput]] = None,
train_cv_pairing: Optional[List[Tuple[str, ...]]] = None, # List[Tuple[trn_c, cv_c, name, dvtr_c]]
Expand Down Expand Up @@ -211,21 +228,17 @@ def generate_lattices(self):

def returnn_training(
self,
name,
returnn_config,
nn_train_args,
name: str,
returnn_config: returnn.ReturnnConfig,
nn_train_args: Union[Dict, ReturnnTrainingJobArgs],
train_corpus_key,
cv_corpus_key,
devtrain_corpus_key=None,
):
assert isinstance(returnn_config, returnn.ReturnnConfig)

returnn_config.config["train"] = self.train_input_data[train_corpus_key].get_data_dict()
returnn_config.config["dev"] = self.cv_input_data[cv_corpus_key].get_data_dict()
if devtrain_corpus_key is not None:
returnn_config.config["eval_datasets"] = {
"devtrain": self.devtrain_input_data[devtrain_corpus_key].get_data_dict()
}
) -> returnn.ReturnnTrainingJob:
if nn_train_args.returnn_root is None:
nn_train_args.returnn_root = self.returnn_root
if nn_train_args.returnn_python_exe is None:
nn_train_args.returnn_python_exe = self.returnn_python_exe

train_job = returnn.ReturnnTrainingJob(
returnn_config=returnn_config,
Expand Down Expand Up @@ -346,7 +359,7 @@ def nn_recognition(
name: str,
returnn_config: returnn.ReturnnConfig,
checkpoints: Dict[int, returnn.Checkpoint],
acoustic_mixture_path: tk.Path, # TODO maybe Optional if prior file provided -> automatically construct dummy file
train_job: Union[returnn.ReturnnTrainingJob, returnn.ReturnnRasrTrainingJob],
prior_scales: List[float],
pronunciation_scales: List[float],
lm_scales: List[float],
Expand All @@ -362,6 +375,7 @@ def nn_recognition(
use_epoch_for_compile=False,
forward_output_layer="output",
native_ops: Optional[List[str]] = None,
acoustic_mixture_path: Optional[tk.Path] = None,
**kwargs,
):
with tk.block(f"{name}_recognition"):
Expand All @@ -384,15 +398,31 @@ def nn_recognition(

for pron, lm, prior, epoch in itertools.product(pronunciation_scales, lm_scales, prior_scales, epochs):
assert epoch in checkpoints.keys()
assert acoustic_mixture_path is not None

if use_epoch_for_compile:
tf_graph = self.nn_compile_graph(name, returnn_config, epoch=epoch)
acoustic_mixture_path = CreateDummyMixturesJob(
num_mixtures=returnn_config.config["extern_data"]["classes"]["dim"],
num_features=returnn_config.config["extern_data"]["data"]["dim"],
).out_mixtures
lmgc_scorer = rasr.GMMFeatureScorer(acoustic_mixture_path)
prior_job = ReturnnComputePriorJobV2(
model_checkpoint=checkpoints[epoch],
returnn_config=train_job.returnn_config,
returnn_python_exe=train_job.returnn_python_exe,
returnn_root=train_job.returnn_root,
log_verbosity=train_job.returnn_config.post_config["log_verbosity"],
)

prior_job.add_alias("extract_nn_prior/" + name)
prior_file = prior_job.out_prior_xml_file
assert prior_file is not None
scorer = rasr.PrecomputedHybridFeatureScorer(
prior_mixtures=acoustic_mixture_path,
priori_scale=prior,
prior_file=prior_file,
)
assert acoustic_mixture_path is not None

if use_epoch_for_compile:
tf_graph = self.nn_compile_graph(name, returnn_config, epoch=epoch)

tf_flow = make_precomputed_hybrid_tf_feature_flow(
tf_checkpoint=checkpoints[epoch],
Expand All @@ -419,6 +449,8 @@ def nn_recognition(
parallelize_conversion=parallelize_conversion,
rtf=rtf,
mem=mem,
lmgc_alias=f"lmgc/{name}/{recognition_corpus_key}-{recog_name}",
lmgc_scorer=lmgc_scorer,
**kwargs,
)

Expand All @@ -429,15 +461,22 @@ def nn_recog(
returnn_config: Path,
checkpoints: Dict[int, returnn.Checkpoint],
step_args: HybridArgs,
train_job: Union[returnn.ReturnnTrainingJob, returnn.ReturnnRasrTrainingJob],
):
for recog_name, recog_args in step_args.recognition_args.items():
recog_args = copy.deepcopy(recog_args)
whitelist = recog_args.pop("training_whitelist", None)
if whitelist:
if train_name not in whitelist:
continue
for dev_c in self.dev_corpora:
self.nn_recognition(
name=f"{train_corpus_key}-{train_name}-{recog_name}",
returnn_config=returnn_config,
checkpoints=checkpoints,
acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures,
train_job=train_job,
recognition_corpus_key=dev_c,
acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures,
**recog_args,
)

Expand All @@ -451,8 +490,9 @@ def nn_recog(
name=f"{train_name}-{recog_name}",
returnn_config=returnn_config,
checkpoints=checkpoints,
acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures,
train_job=train_job,
recognition_corpus_key=tst_c,
acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures,
**r_args,
)

Expand Down Expand Up @@ -509,7 +549,7 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs):
train_corpus_key=trn_c,
cv_corpus_key=cv_c,
)
else:
elif isinstance(self.train_input_data[trn_c], AllowedReturnnTrainingDataInput):
returnn_train_job = self.returnn_training(
name=name,
returnn_config=step_args.returnn_training_configs[name],
Expand All @@ -518,6 +558,8 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs):
cv_corpus_key=cv_c,
devtrain_corpus_key=dvtr_c,
)
else:
raise NotImplementedError

returnn_recog_config = step_args.returnn_recognition_configs.get(
name, step_args.returnn_training_configs[name]
Expand All @@ -529,7 +571,27 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs):
returnn_config=returnn_recog_config,
checkpoints=returnn_train_job.out_checkpoints,
step_args=step_args,
train_job=returnn_train_job,
)
from i6_core.report import GenerateReportStringJob, MailJob

results = {}
for c in self.dev_corpora + self.test_corpora:
for job_name in self.jobs[c]:
if "scorer" not in job_name:
continue
if name not in job_name:
continue
if "scorer" in job_name:
scorer = self.jobs[c][job_name]
if scorer.out_wer:
results[job_name] = scorer.out_wer

report = GenerateReportStringJob(report_values=results, report_template=hybrid_report_format)
report.add_alias(name + "/report_job")
mail = MailJob(report.out_report, send_contents=True, subject=name)
mail.add_alias(name + "/mail_job")
tk.register_output(name + "/mail", mail.out_status)

def run_nn_recog_step(self, step_args: NnRecogArgs):
for eval_c in self.dev_corpora + self.test_corpora:
Expand Down
18 changes: 1 addition & 17 deletions common/setups/rasr/nn_system.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,20 @@
__all__ = ["NnSystem"]

import copy
import itertools
import sys
from dataclasses import asdict
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Union

# -------------------- Sisyphus --------------------

import sisyphus.toolkit as tk
import sisyphus.global_settings as gs

from sisyphus.delayed_ops import DelayedFormat

# -------------------- Recipes --------------------

import i6_core.features as features
import i6_core.rasr as rasr
import i6_core.returnn as returnn

from i6_core.util import MultiPath, MultiOutputPath

from .rasr_system import RasrSystem

from .util import (
RasrInitArgs,
ReturnnRasrDataInput,
OggZipHdfDataInput,
HybridArgs,
NnRecogArgs,
RasrSteps,
)

# -------------------- Init --------------------

Expand Down