Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions common/setups/rasr/hybrid_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
CombineLmRasrConfig,
)
from .util.decode import (
DevRecognitionParameters,
RecognitionParameters,
SearchJobArgs,
Lattice2CtmArgs,
Expand All @@ -47,7 +48,7 @@ class HybridDecoder(BaseDecoder):
def __init__(
self,
rasr_binary_path: tk.Path,
rasr_arch: "str" = "linux-x86_64-standard",
rasr_arch: str = "linux-x86_64-standard",
compress: bool = False,
append: bool = False,
unbuffered: bool = False,
Expand Down Expand Up @@ -155,8 +156,9 @@ def recognition(
tf_fwd_input_name: str = "tf-fwd-input",
):
"""
run the recognitino, consisting of search, lattice to ctm, and scoring
run the recognition, consisting of search, lattice to ctm, and scoring

:param name: decoding name
:param returnn_config: RETURNN config for recognition
:param checkpoints: epoch to model checkpoint mapping
:param recognition_parameters: keys are the corpus keys so that recog params can be set for specific eval sets.
Expand Down
91 changes: 63 additions & 28 deletions common/setups/rasr/hybrid_system.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__all__ = ["HybridArgs", "HybridSystem"]
__all__ = ["HybridSystem"]

import copy
import itertools
Expand All @@ -21,17 +21,21 @@
add_tf_flow_to_base_flow,
)
from i6_core.util import MultiPath, MultiOutputPath
from i6_core.mm import CreateDummyMixturesJob
from i6_core.returnn import ReturnnComputePriorJobV2

from .nn_system import NnSystem
from .hybrid_decoder import HybridDecoder

from .util import (
RasrInitArgs,
ReturnnRasrDataInput,
OggZipHdfDataInput,
HybridArgs,
NnRecogArgs,
RasrSteps,
NnForcedAlignArgs,
ReturnnTrainingJobArgs,
AllowedReturnnTrainingDataInput,
)

# -------------------- Init --------------------
Expand Down Expand Up @@ -90,9 +94,15 @@ def __init__(
self.cv_corpora = []
self.devtrain_corpora = []

self.train_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]]
self.cv_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]]
self.devtrain_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]]
self.train_input_data = (
None
) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]]
self.cv_input_data = (
None
) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]]
self.devtrain_input_data = (
None
) # type:Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]]
self.dev_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]]
self.test_input_data = None # type:Optional[Dict[str, ReturnnRasrDataInput]]

Expand Down Expand Up @@ -128,9 +138,9 @@ def _add_output_alias_for_train_job(
def init_system(
self,
rasr_init_args: RasrInitArgs,
train_data: Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]],
cv_data: Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]],
devtrain_data: Optional[Dict[str, Union[ReturnnRasrDataInput, OggZipHdfDataInput]]] = None,
train_data: Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]],
cv_data: Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]],
devtrain_data: Optional[Dict[str, Union[ReturnnRasrDataInput, AllowedReturnnTrainingDataInput]]] = None,
dev_data: Optional[Dict[str, ReturnnRasrDataInput]] = None,
test_data: Optional[Dict[str, ReturnnRasrDataInput]] = None,
train_cv_pairing: Optional[List[Tuple[str, ...]]] = None, # List[Tuple[trn_c, cv_c, name, dvtr_c]]
Expand Down Expand Up @@ -211,21 +221,17 @@ def generate_lattices(self):

def returnn_training(
self,
name,
returnn_config,
nn_train_args,
name: str,
returnn_config: returnn.ReturnnConfig,
nn_train_args: Union[Dict, ReturnnTrainingJobArgs],
train_corpus_key,
cv_corpus_key,
devtrain_corpus_key=None,
):
assert isinstance(returnn_config, returnn.ReturnnConfig)

returnn_config.config["train"] = self.train_input_data[train_corpus_key].get_data_dict()
returnn_config.config["dev"] = self.cv_input_data[cv_corpus_key].get_data_dict()
if devtrain_corpus_key is not None:
returnn_config.config["eval_datasets"] = {
"devtrain": self.devtrain_input_data[devtrain_corpus_key].get_data_dict()
}
) -> returnn.ReturnnTrainingJob:
if nn_train_args.returnn_root is None:
nn_train_args.returnn_root = self.returnn_root
if nn_train_args.returnn_python_exe is None:
nn_train_args.returnn_python_exe = self.returnn_python_exe

train_job = returnn.ReturnnTrainingJob(
returnn_config=returnn_config,
Expand Down Expand Up @@ -346,7 +352,7 @@ def nn_recognition(
name: str,
returnn_config: returnn.ReturnnConfig,
checkpoints: Dict[int, returnn.Checkpoint],
acoustic_mixture_path: tk.Path, # TODO maybe Optional if prior file provided -> automatically construct dummy file
train_job: Union[returnn.ReturnnTrainingJob, returnn.ReturnnRasrTrainingJob],
prior_scales: List[float],
pronunciation_scales: List[float],
lm_scales: List[float],
Expand Down Expand Up @@ -384,15 +390,31 @@ def nn_recognition(

for pron, lm, prior, epoch in itertools.product(pronunciation_scales, lm_scales, prior_scales, epochs):
assert epoch in checkpoints.keys()
assert acoustic_mixture_path is not None

if use_epoch_for_compile:
tf_graph = self.nn_compile_graph(name, returnn_config, epoch=epoch)
acoustic_mixture_path = CreateDummyMixturesJob(
num_mixtures=returnn_config.config["extern_data"]["classes"]["dim"],
num_features=returnn_config.config["extern_data"]["data"]["dim"],
).out_mixtures
lmgc_scorer = rasr.GMMFeatureScorer(acoustic_mixture_path)
prior_job = ReturnnComputePriorJobV2(
model_checkpoint=checkpoints[epoch],
returnn_config=train_job.returnn_config,
returnn_python_exe=train_job.returnn_python_exe,
returnn_root=train_job.returnn_root,
log_verbosity=train_job.returnn_config.post_config["log_verbosity"],
)

prior_job.add_alias("extract_nn_prior/" + name)
prior_file = prior_job.out_prior_xml_file
assert prior_file is not None
scorer = rasr.PrecomputedHybridFeatureScorer(
prior_mixtures=acoustic_mixture_path,
priori_scale=prior,
prior_file=prior_file,
)
assert acoustic_mixture_path is not None

if use_epoch_for_compile:
tf_graph = self.nn_compile_graph(name, returnn_config, epoch=epoch)

tf_flow = make_precomputed_hybrid_tf_feature_flow(
tf_checkpoint=checkpoints[epoch],
Expand All @@ -419,6 +441,8 @@ def nn_recognition(
parallelize_conversion=parallelize_conversion,
rtf=rtf,
mem=mem,
lmgc_alias=f"lmgc/{name}/{recognition_corpus_key}-{recog_name}",
lmgc_scorer=lmgc_scorer,
**kwargs,
)

Expand All @@ -429,15 +453,22 @@ def nn_recog(
returnn_config: Path,
checkpoints: Dict[int, returnn.Checkpoint],
step_args: HybridArgs,
train_job: Union[returnn.ReturnnTrainingJob, returnn.ReturnnRasrTrainingJob],
):
for recog_name, recog_args in step_args.recognition_args.items():
recog_args = copy.deepcopy(recog_args)
whitelist = recog_args.pop("training_whitelist", None)
if whitelist:
if train_name not in whitelist:
continue
for dev_c in self.dev_corpora:
self.nn_recognition(
name=f"{train_corpus_key}-{train_name}-{recog_name}",
returnn_config=returnn_config,
checkpoints=checkpoints,
acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures,
train_job=train_job,
recognition_corpus_key=dev_c,
acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures,
**recog_args,
)

Expand All @@ -451,8 +482,9 @@ def nn_recog(
name=f"{train_name}-{recog_name}",
returnn_config=returnn_config,
checkpoints=checkpoints,
acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures,
train_job=train_job,
recognition_corpus_key=tst_c,
acoustic_mixture_path=self.train_input_data[train_corpus_key].acoustic_mixtures,
**r_args,
)

Expand Down Expand Up @@ -509,7 +541,7 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs):
train_corpus_key=trn_c,
cv_corpus_key=cv_c,
)
else:
elif isinstance(self.train_input_data[trn_c], AllowedReturnnTrainingDataInput):
returnn_train_job = self.returnn_training(
name=name,
returnn_config=step_args.returnn_training_configs[name],
Expand All @@ -518,6 +550,8 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs):
cv_corpus_key=cv_c,
devtrain_corpus_key=dvtr_c,
)
else:
raise NotImplementedError

returnn_recog_config = step_args.returnn_recognition_configs.get(
name, step_args.returnn_training_configs[name]
Expand All @@ -529,6 +563,7 @@ def run_nn_step(self, step_name: str, step_args: HybridArgs):
returnn_config=returnn_recog_config,
checkpoints=returnn_train_job.out_checkpoints,
step_args=step_args,
train_job=returnn_train_job,
)

def run_nn_recog_step(self, step_args: NnRecogArgs):
Expand Down
18 changes: 1 addition & 17 deletions common/setups/rasr/nn_system.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,20 @@
__all__ = ["NnSystem"]

import copy
import itertools
import sys
from dataclasses import asdict
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Union

# -------------------- Sisyphus --------------------

import sisyphus.toolkit as tk
import sisyphus.global_settings as gs

from sisyphus.delayed_ops import DelayedFormat

# -------------------- Recipes --------------------

import i6_core.features as features
import i6_core.rasr as rasr
import i6_core.returnn as returnn

from i6_core.util import MultiPath, MultiOutputPath

from .rasr_system import RasrSystem

from .util import (
RasrInitArgs,
ReturnnRasrDataInput,
OggZipHdfDataInput,
HybridArgs,
NnRecogArgs,
RasrSteps,
)

# -------------------- Init --------------------

Expand Down