before starting with Ted

Marvin84 · Marvin84 · commit 918e20e56026 · 2024-01-22T16:15:02.000+01:00
diff --git a/users/raissi/experiments/librispeech/configs/LFR_factored/baseline/alignment/config_alignment_lfr.py b/users/raissi/experiments/librispeech/configs/LFR_factored/baseline/alignment/config_alignment_lfr.py
@@ -72,7 +72,7 @@ def get_system(key, lr=4e-4, num_epochs=None, am_scale=1.0, tdp_scale=0.1):
     label_info_init_args = {
         "ph_emb_size": 0,
         "st_emb_size": 0,
-        "state_tying": RasrStateTying.monophone,
+        "state_tying": 'monophone-dense',#RasrStateTying.monophone,
         "n_states_per_phone": 1
     }
     init_args_system = {
@@ -174,6 +174,7 @@ def get_system(key, lr=4e-4, num_epochs=None, am_scale=1.0, tdp_scale=0.1):
         returnn_config=s.experiments[key]["returnn_config"],
         log_linear_scales=log_linear_scales
     )
+
     s.experiments[key]["returnn_config"] = bw_augmented_returnn_config
 
     s.returnn_rasr_training_fullsum(
@@ -183,6 +184,8 @@ def get_system(key, lr=4e-4, num_epochs=None, am_scale=1.0, tdp_scale=0.1):
         nn_train_args=train_args,
     )
 
+    s.label_info = dataclasses.replace(s.label_info, state_tying=RasrStateTying.monophone)
+
     return_config_dict_infer = s.get_config_with_legacy_prolog_and_epilog(
         config=s.experiments[key]["returnn_config"].config,
         epilog_additional_str=train_helpers.specaugment.get_legacy_specaugment_epilog_blstm(
@@ -194,10 +197,10 @@ def get_system(key, lr=4e-4, num_epochs=None, am_scale=1.0, tdp_scale=0.1):
 
     s.set_single_prior_returnn_rasr(
         key=key,
-        epoch=450,
+        epoch=400,
         train_corpus_key=s.crp_names["train"],
         dev_corpus_key=s.crp_names["cvtrain"],
-        data_share=0.3,
+        data_share=0.1,
         context_type=PhoneticContext.monophone,
         smoothen=True,
         output_layer_name="center-output"
diff --git a/users/raissi/setups/common/TF_factored_hybrid_system.py b/users/raissi/setups/common/TF_factored_hybrid_system.py
@@ -211,7 +211,7 @@ def get_conformer_network(
             network = net_helpers.augment.augment_net_with_label_pops(
                 network, label_info=self.label_info, frame_rate_reduction_ratio_info=frame_rate_reduction_ratio_info
             )
-            if frame_rate_reduction_ratio_info.factor > 1:
+            if frame_rate_reduction_ratio_info.factor > 1 and frame_rate_reduction_ratio_info.single_state_alignment:
                 network["slice_classes"] = {
                     "class": "slice",
                     "from": network["classes_"]["from"],
diff --git a/users/raissi/setups/common/encoder/conformer/best_setup.py b/users/raissi/setups/common/encoder/conformer/best_setup.py
@@ -2,7 +2,7 @@
 
 
 from enum import Enum
-from typing import Union, Optional
+from typing import Union, Optional, Tuple
 from i6_experiments.users.raissi.setups.common.encoder.conformer.get_network_args import (
     get_encoder_args,
     get_network_args,
@@ -29,7 +29,7 @@ def get_best_model_config(
     num_classes: int,
     num_input_feature: int,
     *,
-    chunking: Optional[str] = None,
+    chunking: [str, Tuple] = None,
     int_loss_at_layer: Optional[int] = None,
     int_loss_scale: Optional[float] = None,
     label_smoothing: Optional[float] = None,
@@ -52,7 +52,10 @@ def get_best_model_config(
 
     assert model_dim % att_dim == 0, "model_dim must be divisible by number of att heads"
 
-    clipping, overlap = [int(v) for v in chunking.split(":")] if chunking is not None else (400, 200)
+    if isinstance(chunking, tuple):
+        [clipping, overlap] = [ele['data'] for ele in chunking]
+    else:
+        clipping, overlap = [int(v) for v in chunking.split(":")] if chunking is not None else (400, 200)
 
     enc_args = get_encoder_args(
         model_dim // att_dim,
diff --git a/users/raissi/setups/common/helpers/train/chunking.py b/users/raissi/setups/common/helpers/train/chunking.py
@@ -5,7 +5,8 @@ def chunking_with_nfactor(
     chunk_str: str, factor: int, data_key: str = "data", class_key: str = "classes"
 ) -> Tuple[Dict[str, int], Dict[str, int]]:
     """
-    It gives back the cunking dictionary for different factors. Factor 1 means no subsampling is done
+    It gives back the chunking dictionary for different factors. Factor 1 means no subsampling is done
     """
+
     chunk, overlap = [int(p.strip()) for p in chunk_str.strip().split(":")]
-    return ({"classes": chunk // factor, "data": chunk}, {"classes": overlap // factor, "data": overlap})
+    return ({class_key: chunk // factor, data_key: chunk}, {class_key: overlap // factor, data_key: overlap})
diff --git a/users/raissi/setups/common/helpers/train/network_params.py b/users/raissi/setups/common/helpers/train/network_params.py
@@ -40,3 +40,7 @@ def __post_init__(self):
 # no chunking for full-sum
 default_blstm_fullsum = GeneralNetworkParams(l2=1e-4, use_multi_task=False, add_mlps=False)
 default_conformer_viterbi = GeneralNetworkParams(chunking="400:200", l2=1e-6, specaug_args=asdict(default_sa_args))
+
+frameshift40_conformer_viterbi = GeneralNetworkParams(
+    l2=1e-6, chunking="400:200", specaug_args=asdict(default_sa_args), frame_rate_reduction_ratio_factor=4
+)
diff --git a/users/raissi/utils/default_tools.py b/users/raissi/utils/default_tools.py
@@ -78,6 +78,7 @@ def get_rasr_binary_path(rasr_path):
 
 # common
 RETURNN_ROOT = tk.Path("/work/tools/users/raissi/returnn_versions/conformer", hash_overwrite="CONFORMER_RETURNN_ROOT")
+RETURNN_ROOT_MORITZ = tk.Path("/work/asr3/raissi/shared_workspaces/gunz/2023-05--thesis-baselines-tf2/i6_core/tools/git/CloneGitRepositoryJob.0TxYoqLkxbuC/output/returnn", hash_overwrite="CONFORMER_RETURNN_Len_FIX")
 RETURNN_ROOT_TORCH = tk.Path("/work/tools/users/raissi/returnn_versions/torch", hash_overwrite="TORCH_RETURNN_ROOT")
 
 SCTK_BINARY_PATH = compile_sctk(branch="v2.4.12")  # use last published version
@@ -104,6 +105,12 @@ def __post_init__(self) -> None:
     rasr_binary_path=U16_RASR_BINARY_PATHS["TF2"],
 )
 
+u16_default_tools_returnn_fix = ToolPaths(
+    returnn_root=RETURNN_ROOT_MORITZ,
+    returnn_python_exe=U16_RETURNN_LAUNCHERS["TF2"],
+    rasr_binary_path=U16_RASR_BINARY_PATHS["TED_COMMON"],
+)
+
 
 u16_default_tools_ted = ToolPaths(
     returnn_root=RETURNN_ROOT,
@@ -112,6 +119,8 @@ def __post_init__(self) -> None:
 )
 
 
+
+
 u22_tools_tf = ToolPaths(
     returnn_root=RETURNN_ROOT_TORCH,
     returnn_python_exe=U22_RETURNN_LAUNCHERS["TF2"],

Original file line number	Diff line number	Diff line change
`@@ -211,7 +211,7 @@ def get_conformer_network(`
`211`	`211`	`network = net_helpers.augment.augment_net_with_label_pops(`
`212`	`212`	`network, label_info=self.label_info, frame_rate_reduction_ratio_info=frame_rate_reduction_ratio_info`
`213`	`213`	`)`
`214`		`- if frame_rate_reduction_ratio_info.factor > 1:`
	`214`	`+ if frame_rate_reduction_ratio_info.factor > 1 and frame_rate_reduction_ratio_info.single_state_alignment:`
`215`	`215`	`network["slice_classes"] = {`
`216`	`216`	`"class": "slice",`
`217`	`217`	`"from": network["classes_"]["from"],`