From 620a042227f3215bd92691abbb4b9b723e1a1978 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 21 Aug 2023 18:33:12 +0200 Subject: [PATCH 01/27] feat: Add option to generate LM image and GC via two separate jobs Closes #430 --- recognition/advanced_tree_search.py | 44 ++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 3996d854e..e0e24bb82 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -10,6 +10,7 @@ Path = setup_path(__package__) +import copy import math import os import shutil @@ -167,6 +168,7 @@ def __init__( lmgc_mem: float = 12.0, lmgc_alias: Optional[str] = None, lmgc_scorer: Optional[rasr.FeatureScorer] = None, + separate_lmi_gc_generation: bool = False, model_combination_config: Optional[rasr.RasrConfig] = None, model_combination_post_config: Optional[rasr.RasrConfig] = None, extra_config: Optional[rasr.RasrConfig] = None, @@ -286,18 +288,40 @@ def create_config( lmgc_mem: float, lmgc_alias: Optional[str], lmgc_scorer: Optional[rasr.FeatureScorer], + separate_lmi_gc_generation: bool, model_combination_config: Optional[rasr.RasrConfig], model_combination_post_config: Optional[rasr.RasrConfig], extra_config: Optional[rasr.RasrConfig], extra_post_config: Optional[rasr.RasrConfig], **kwargs, ): - lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( - crp, lmgc_scorer if lmgc_scorer is not None else feature_scorer, extra_config, extra_post_config - ) - if lmgc_alias is not None: - lm_gc.add_alias(lmgc_alias) - lm_gc.rqmt["mem"] = lmgc_mem + def specialize_lm_config(crp, lm_config): + crp = copy.deepcopy(crp) + crp.language_model = lm_config + return crp + + if separate_lmi_gc_generation: + gc = BuildGlobalCacheJob(crp, extra_config, extra_post_config).out_global_cache + + arpa_lms = AdvancedTreeSearchLmImageAndGlobalCacheJob.find_arpa_lms( + crp.language_model, post_config.lm if post_config is not None else None + ) + lm_images = { + (i + 1): lm.CreateLmImageJob( + specialize_lm_config(crp, lm), extra_config=extra_config, extra_post_config=extra_post_config + ).out_lm + for i, lm in enumerate(arpa_lms) + } + else: + lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( + crp, lmgc_scorer if lmgc_scorer is not None else feature_scorer, extra_config, extra_post_config + ) + if lmgc_alias is not None: + lm_gc.add_alias(lmgc_alias) + lm_gc.rqmt["mem"] = lmgc_mem + + gc = lm_gc.out_global_cache + lm_images = lm_gc.out_lm_images search_parameters = cls.update_search_parameters(search_parameters) @@ -397,14 +421,14 @@ def create_config( ] post_config.flf_lattice_tool.global_cache.read_only = True - post_config.flf_lattice_tool.global_cache.file = lm_gc.out_global_cache + post_config.flf_lattice_tool.global_cache.file = gc arpa_lms = AdvancedTreeSearchLmImageAndGlobalCacheJob.find_arpa_lms( config.flf_lattice_tool.network.recognizer.lm, post_config.flf_lattice_tool.network.recognizer.lm, ) for i, lm_config in enumerate(arpa_lms): - lm_config[1].image = lm_gc.out_lm_images[i + 1] + lm_config[1].image = lm_images[i + 1] # Remaining Flf-network @@ -438,11 +462,11 @@ def create_config( config._update(extra_config) post_config._update(extra_post_config) - return config, post_config, lm_gc + return config, post_config @classmethod def hash(cls, kwargs): - config, post_config, lm_gc = cls.create_config(**kwargs) + config, post_config = cls.create_config(**kwargs) return super().hash( { "config": config, From a68265cebb2f6c4380cde0b29a5666cb0b1891a0 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 21 Aug 2023 18:34:56 +0200 Subject: [PATCH 02/27] chore: Document parameter --- recognition/advanced_tree_search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index e0e24bb82..a047f9805 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -192,6 +192,7 @@ def __init__( :param lmgc_mem: Memory requirement for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_alias: Alias for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_scorer: Dummy scorer for the AdvancedTreeSearchLmImageAndGlobalCacheJob which is required but unused + :param separate_lmi_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash :param model_combination_config: Configuration for model combination :param model_combination_post_config: Post config for model combination :param extra_config: Additional Config for recognition From b74c6542bea68243e68770805e4ff8c6da3b5091 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 21 Aug 2023 18:37:49 +0200 Subject: [PATCH 03/27] fix: Always assign the (possibly to None) lm_gc property --- recognition/advanced_tree_search.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index a047f9805..3b647efb2 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -313,6 +313,7 @@ def specialize_lm_config(crp, lm_config): ).out_lm for i, lm in enumerate(arpa_lms) } + lm_gc = None else: lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( crp, lmgc_scorer if lmgc_scorer is not None else feature_scorer, extra_config, extra_post_config @@ -463,11 +464,11 @@ def specialize_lm_config(crp, lm_config): config._update(extra_config) post_config._update(extra_post_config) - return config, post_config + return config, post_config, lm_gc @classmethod def hash(cls, kwargs): - config, post_config = cls.create_config(**kwargs) + config, post_config, lm_gc = cls.create_config(**kwargs) return super().hash( { "config": config, From 79cbd2adecc4814b95dcdc24bdae7ac58c2dfc2e Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 22 Aug 2023 10:22:53 +0200 Subject: [PATCH 04/27] fix bug, assign jobs to class if possible --- recognition/advanced_tree_search.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 3b647efb2..b921a4c93 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -209,6 +209,8 @@ def __init__( self.config, self.post_config, self.lm_gc_job, + self.gc_job, + self.lm_image_jobs, ) = AdvancedTreeSearchJob.create_config(**kwargs) self.feature_flow = feature_flow self.exe = self.select_exe(crp.flf_tool_exe, "flf-tool") @@ -302,17 +304,21 @@ def specialize_lm_config(crp, lm_config): return crp if separate_lmi_gc_generation: - gc = BuildGlobalCacheJob(crp, extra_config, extra_post_config).out_global_cache + gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) arpa_lms = AdvancedTreeSearchLmImageAndGlobalCacheJob.find_arpa_lms( crp.language_model, post_config.lm if post_config is not None else None ) - lm_images = { + lm_image_jobs = { (i + 1): lm.CreateLmImageJob( - specialize_lm_config(crp, lm), extra_config=extra_config, extra_post_config=extra_post_config - ).out_lm - for i, lm in enumerate(arpa_lms) + specialize_lm_config(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config + ) + for i, lm_config in enumerate(arpa_lms) } + + gc = gc_job.out_global_cache + lm_images = {k: v.out_image for k, v in lm_image_jobs.items()} + lm_gc = None else: lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( @@ -325,6 +331,9 @@ def specialize_lm_config(crp, lm_config): gc = lm_gc.out_global_cache lm_images = lm_gc.out_lm_images + gc_job = None + lm_image_jobs = {} + search_parameters = cls.update_search_parameters(search_parameters) la_opts = { @@ -464,11 +473,11 @@ def specialize_lm_config(crp, lm_config): config._update(extra_config) post_config._update(extra_post_config) - return config, post_config, lm_gc + return config, post_config, lm_gc, gc_job, lm_image_jobs @classmethod def hash(cls, kwargs): - config, post_config, lm_gc = cls.create_config(**kwargs) + config, post_config, *jobs = cls.create_config(**kwargs) return super().hash( { "config": config, From 78f6bedfad249e13550fbdda727def490d9d2bd5 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 14:46:36 +0200 Subject: [PATCH 05/27] refactor find_arpa_lms into standalone function --- lm/__init__.py | 1 + lm/util.py | 24 ++++++++++++++++++++++ recognition/advanced_tree_search.py | 31 +++++------------------------ 3 files changed, 30 insertions(+), 26 deletions(-) create mode 100644 lm/util.py diff --git a/lm/__init__.py b/lm/__init__.py index b5960e96b..944dd5893 100644 --- a/lm/__init__.py +++ b/lm/__init__.py @@ -3,3 +3,4 @@ from .reverse_arpa import * from .vocabulary import * from .srilm import * +from .util import * diff --git a/lm/util.py b/lm/util.py new file mode 100644 index 000000000..b9fb0ebec --- /dev/null +++ b/lm/util.py @@ -0,0 +1,24 @@ +from typing import List, Tuple + +import i6_core.rasr as rasr + + +def _has_image(c: rasr.RasrConfig, pc: rasr.RasrConfig): + res = c._get("image") is not None + res = res or (pc is not None and pc._get("image") is not None) + return res + + +def find_arpa_lms(lm_config: rasr.RasrConfig, lm_post_config=None) -> List[Tuple[rasr.RasrConfig, rasr.RasrConfig]]: + result = [] + + if lm_config.type == "ARPA": + if not _has_image(lm_config, lm_post_config): + result.append((lm_config, lm_post_config)) + elif lm_config.type == "combine": + for i in range(1, lm_config.num_lms + 1): + sub_lm_config = lm_config[f"lm-{i}"] + sub_lm_post_config = lm_post_config[f"lm-{i}"] if lm_post_config is not None else None + result += find_arpa_lms(sub_lm_config, sub_lm_post_config) + + return result diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index b921a4c93..0868cc2c6 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -70,25 +70,6 @@ def run(self): def cleanup_before_run(self, cmd, retry, *args): util.backup_if_exists("lm_and_state_tree.log") - @classmethod - def find_arpa_lms(cls, lm_config, lm_post_config=None): - result = [] - - def has_image(c, pc): - res = c._get("image") is not None - res = res or (pc is not None and pc._get("image") is not None) - return res - - if lm_config.type == "ARPA": - if not has_image(lm_config, lm_post_config): - result.append((lm_config, lm_post_config)) - elif lm_config.type == "combine": - for i in range(1, lm_config.num_lms + 1): - sub_lm_config = lm_config["lm-%d" % i] - sub_lm_post_config = lm_post_config["lm-%d" % i] if lm_post_config is not None else None - result += cls.find_arpa_lms(sub_lm_config, sub_lm_post_config) - return result - @classmethod def create_config(cls, crp, feature_scorer, extra_config, extra_post_config, **kwargs): config, post_config = rasr.build_config_from_mapping( @@ -118,7 +99,7 @@ def create_config(cls, crp, feature_scorer, extra_config, extra_post_config, **k config.flf_lattice_tool.network.recognizer.feature_extraction.file = "dummy.flow" config.flf_lattice_tool.network.recognizer.lm.scale = 1.0 - arpa_lms = cls.find_arpa_lms( + arpa_lms = lm.find_arpa_lms( config.flf_lattice_tool.network.recognizer.lm, post_config.flf_lattice_tool.network.recognizer.lm if post_config is not None else None, ) @@ -306,9 +287,7 @@ def specialize_lm_config(crp, lm_config): if separate_lmi_gc_generation: gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) - arpa_lms = AdvancedTreeSearchLmImageAndGlobalCacheJob.find_arpa_lms( - crp.language_model, post_config.lm if post_config is not None else None - ) + arpa_lms = lm.find_arpa_lms(crp.language_model, post_config.lm if post_config is not None else None) lm_image_jobs = { (i + 1): lm.CreateLmImageJob( specialize_lm_config(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config @@ -434,12 +413,12 @@ def specialize_lm_config(crp, lm_config): post_config.flf_lattice_tool.global_cache.read_only = True post_config.flf_lattice_tool.global_cache.file = gc - arpa_lms = AdvancedTreeSearchLmImageAndGlobalCacheJob.find_arpa_lms( + arpa_lms = lm.find_arpa_lms( config.flf_lattice_tool.network.recognizer.lm, post_config.flf_lattice_tool.network.recognizer.lm, ) - for i, lm_config in enumerate(arpa_lms): - lm_config[1].image = lm_images[i + 1] + for i, (_lm_config, lm_post_config) in enumerate(arpa_lms): + lm_post_config.image = lm_images[i + 1] # Remaining Flf-network From d209e8f793592668903872f51a6426288c3720fe Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 16:43:03 +0200 Subject: [PATCH 06/27] fix bugs from trial runs --- recognition/advanced_tree_search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 0868cc2c6..ef52a1043 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -281,18 +281,18 @@ def create_config( ): def specialize_lm_config(crp, lm_config): crp = copy.deepcopy(crp) - crp.language_model = lm_config + crp.language_model_config = lm_config return crp if separate_lmi_gc_generation: gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) - arpa_lms = lm.find_arpa_lms(crp.language_model, post_config.lm if post_config is not None else None) + arpa_lms = lm.find_arpa_lms(crp.language_model_config, None) lm_image_jobs = { (i + 1): lm.CreateLmImageJob( specialize_lm_config(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config ) - for i, lm_config in enumerate(arpa_lms) + for i, (lm_config, _lm_post_config) in enumerate(arpa_lms) } gc = gc_job.out_global_cache From 02d70884e044c622523b2815a123aff7e17cf08c Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 16:43:07 +0200 Subject: [PATCH 07/27] Re-enable lm-util See https://github.com/rwth-i6/rasr/commit/d58a228e80976f4b25c1700648622f658830e2dc --- rasr/crp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasr/crp.py b/rasr/crp.py index 68029d903..1a74baa76 100644 --- a/rasr/crp.py +++ b/rasr/crp.py @@ -79,7 +79,7 @@ def set_executables(self, rasr_binary_path, rasr_arch="linux-x86_64-standard"): self.flf_tool_exe = rasr_binary_path.join_right(f"flf-tool.{rasr_arch}") self.kws_tool_exe = None # does not exist self.lattice_processor_exe = rasr_binary_path.join_right(f"lattice-processor.{rasr_arch}") - self.lm_util_exe = None # does not exist + self.lm_util_exe = rasr_binary_path.join_right(f"lm-util.{rasr_arch}") self.nn_trainer_exe = rasr_binary_path.join_right(f"nn-trainer.{rasr_arch}") self.speech_recognizer_exe = rasr_binary_path.join_right(f"speech-recognizer.{rasr_arch}") From 6a236c8b1aa11e91de3ae7913f806b5293c2f227 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 16:48:34 +0200 Subject: [PATCH 08/27] more mem for LM + GC jobs --- lm/lm_image.py | 2 +- recognition/advanced_tree_search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lm/lm_image.py b/lm/lm_image.py index f5de4ac4e..c213a289b 100644 --- a/lm/lm_image.py +++ b/lm/lm_image.py @@ -21,7 +21,7 @@ def __init__( extra_config=None, extra_post_config=None, encoding="utf-8", - mem=2, + mem=4, ): kwargs = locals() del kwargs["self"] diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index ef52a1043..e14ad8d35 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -851,7 +851,7 @@ def __init__(self, crp, extra_config=None, extra_post_config=None): self.out_log_file = self.log_file_output_path("build_global_cache", crp, False) self.out_global_cache = self.output_path("global.cache", cached=True) - self.rqmt = {"time": 1, "cpu": 1, "mem": 2} + self.rqmt = {"time": 1, "cpu": 1, "mem": 4} def tasks(self): yield Task("create_files", mini_task=True) From a61b03f9ac65fede2df1b298c6b75b5aaa95749b Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 16:49:09 +0200 Subject: [PATCH 09/27] make mem configurable --- recognition/advanced_tree_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index e14ad8d35..d6b3a1f2f 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -831,7 +831,7 @@ class BuildGlobalCacheJob(rasr.RasrCommand, Job): Standalone job to create the global-cache for advanced-tree-search """ - def __init__(self, crp, extra_config=None, extra_post_config=None): + def __init__(self, crp, extra_config=None, extra_post_config=None, mem=4): """ :param rasr.CommonRasrParameters crp: common RASR params (required: lexicon, acoustic_model, language_model, recognizer) :param rasr.Configuration extra_config: overlay config that influences the Job's hash @@ -851,7 +851,7 @@ def __init__(self, crp, extra_config=None, extra_post_config=None): self.out_log_file = self.log_file_output_path("build_global_cache", crp, False) self.out_global_cache = self.output_path("global.cache", cached=True) - self.rqmt = {"time": 1, "cpu": 1, "mem": 4} + self.rqmt = {"time": 1, "cpu": 1, "mem": mem} def tasks(self): yield Task("create_files", mini_task=True) From 008c9dc0c1c2b7842d3509092b8f9a5c649de079 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 16:54:24 +0200 Subject: [PATCH 10/27] even more mem --- lm/lm_image.py | 2 +- recognition/advanced_tree_search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lm/lm_image.py b/lm/lm_image.py index c213a289b..1847ae670 100644 --- a/lm/lm_image.py +++ b/lm/lm_image.py @@ -21,7 +21,7 @@ def __init__( extra_config=None, extra_post_config=None, encoding="utf-8", - mem=4, + mem=12, ): kwargs = locals() del kwargs["self"] diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index d6b3a1f2f..1b074867b 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -831,7 +831,7 @@ class BuildGlobalCacheJob(rasr.RasrCommand, Job): Standalone job to create the global-cache for advanced-tree-search """ - def __init__(self, crp, extra_config=None, extra_post_config=None, mem=4): + def __init__(self, crp, extra_config=None, extra_post_config=None, mem=12): """ :param rasr.CommonRasrParameters crp: common RASR params (required: lexicon, acoustic_model, language_model, recognizer) :param rasr.Configuration extra_config: overlay config that influences the Job's hash From 07e11361de04668f1481c0bb707b2c876ee9d58d Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 17:28:18 +0200 Subject: [PATCH 11/27] enable split behavior by default, document hash implications --- recognition/advanced_tree_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 1b074867b..c74efd8d9 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -149,7 +149,7 @@ def __init__( lmgc_mem: float = 12.0, lmgc_alias: Optional[str] = None, lmgc_scorer: Optional[rasr.FeatureScorer] = None, - separate_lmi_gc_generation: bool = False, + separate_lmi_gc_generation: bool = True, model_combination_config: Optional[rasr.RasrConfig] = None, model_combination_post_config: Optional[rasr.RasrConfig] = None, extra_config: Optional[rasr.RasrConfig] = None, @@ -173,7 +173,7 @@ def __init__( :param lmgc_mem: Memory requirement for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_alias: Alias for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_scorer: Dummy scorer for the AdvancedTreeSearchLmImageAndGlobalCacheJob which is required but unused - :param separate_lmi_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash + :param separate_lmi_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so using separate jobs is the default. :param model_combination_config: Configuration for model combination :param model_combination_post_config: Post config for model combination :param extra_config: Additional Config for recognition From cdc791a7e396141960b0aadf1a533d07edb8a751 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Wed, 30 Aug 2023 14:38:56 +0200 Subject: [PATCH 12/27] disable flag by default --- recognition/advanced_tree_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index c74efd8d9..e73ebdb4d 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -149,7 +149,7 @@ def __init__( lmgc_mem: float = 12.0, lmgc_alias: Optional[str] = None, lmgc_scorer: Optional[rasr.FeatureScorer] = None, - separate_lmi_gc_generation: bool = True, + separate_lmi_gc_generation: bool = False, model_combination_config: Optional[rasr.RasrConfig] = None, model_combination_post_config: Optional[rasr.RasrConfig] = None, extra_config: Optional[rasr.RasrConfig] = None, From ab010c5d6ec19a3b413c7a0ba16b5ea02eb983d9 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 11 Sep 2023 17:30:19 +0200 Subject: [PATCH 13/27] Rename flag to be more clear --- recognition/advanced_tree_search.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index e73ebdb4d..fe5b5e3c5 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -149,7 +149,7 @@ def __init__( lmgc_mem: float = 12.0, lmgc_alias: Optional[str] = None, lmgc_scorer: Optional[rasr.FeatureScorer] = None, - separate_lmi_gc_generation: bool = False, + separate_lm_image_gc_generation: bool = False, model_combination_config: Optional[rasr.RasrConfig] = None, model_combination_post_config: Optional[rasr.RasrConfig] = None, extra_config: Optional[rasr.RasrConfig] = None, @@ -173,7 +173,7 @@ def __init__( :param lmgc_mem: Memory requirement for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_alias: Alias for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_scorer: Dummy scorer for the AdvancedTreeSearchLmImageAndGlobalCacheJob which is required but unused - :param separate_lmi_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so using separate jobs is the default. + :param separate_lm_image_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so using separate jobs is the default. :param model_combination_config: Configuration for model combination :param model_combination_post_config: Post config for model combination :param extra_config: Additional Config for recognition @@ -272,7 +272,7 @@ def create_config( lmgc_mem: float, lmgc_alias: Optional[str], lmgc_scorer: Optional[rasr.FeatureScorer], - separate_lmi_gc_generation: bool, + separate_lm_image_gc_generation: bool, model_combination_config: Optional[rasr.RasrConfig], model_combination_post_config: Optional[rasr.RasrConfig], extra_config: Optional[rasr.RasrConfig], @@ -284,7 +284,7 @@ def specialize_lm_config(crp, lm_config): crp.language_model_config = lm_config return crp - if separate_lmi_gc_generation: + if separate_lm_image_gc_generation: gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) arpa_lms = lm.find_arpa_lms(crp.language_model_config, None) From 5a8ec3d3af4869c511a5826a55f7bfbf180c1ce8 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 11 Sep 2023 17:31:08 +0200 Subject: [PATCH 14/27] rename local function --- recognition/advanced_tree_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index fe5b5e3c5..9c5201042 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -279,7 +279,7 @@ def create_config( extra_post_config: Optional[rasr.RasrConfig], **kwargs, ): - def specialize_lm_config(crp, lm_config): + def add_lm_config_to_crp(crp, lm_config): crp = copy.deepcopy(crp) crp.language_model_config = lm_config return crp @@ -290,7 +290,7 @@ def specialize_lm_config(crp, lm_config): arpa_lms = lm.find_arpa_lms(crp.language_model_config, None) lm_image_jobs = { (i + 1): lm.CreateLmImageJob( - specialize_lm_config(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config + add_lm_config_to_crp(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config ) for i, (lm_config, _lm_post_config) in enumerate(arpa_lms) } From ed001c4fccc501a67078553d971e31140970c605 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 6 Nov 2023 13:32:10 +0100 Subject: [PATCH 15/27] fix wording --- recognition/advanced_tree_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 9c5201042..e488dab1f 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -173,7 +173,7 @@ def __init__( :param lmgc_mem: Memory requirement for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_alias: Alias for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_scorer: Dummy scorer for the AdvancedTreeSearchLmImageAndGlobalCacheJob which is required but unused - :param separate_lm_image_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so using separate jobs is the default. + :param separate_lm_image_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so NOT using separate jobs is the default. :param model_combination_config: Configuration for model combination :param model_combination_post_config: Post config for model combination :param extra_config: Additional Config for recognition From 0182ad618391397d9283b3852063e98fd12ddf9d Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Wed, 30 Jul 2025 11:38:37 -0400 Subject: [PATCH 16/27] introduce enum --- recognition/advanced_tree_search.py | 37 ++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 48649fe38..582aebe24 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -11,6 +11,7 @@ Path = setup_path(__package__) import copy +from enum import Enum import math import os import shutil @@ -131,6 +132,11 @@ def hash(cls, kwargs): class AdvancedTreeSearchJob(rasr.RasrCommand, Job): + class LmCacheMethod(Enum): + JOINED = "joined" + SEPARATE = "separate" + NONE = "none" + def __init__( self, crp: rasr.CommonRasrParameters, @@ -149,7 +155,7 @@ def __init__( lmgc_mem: float = 12.0, lmgc_alias: Optional[str] = None, lmgc_scorer: Optional[rasr.FeatureScorer] = None, - separate_lm_image_gc_generation: bool = False, + lm_cache_metod: LmCacheMethod = LmCacheMethod.JOINED, model_combination_config: Optional[rasr.RasrConfig] = None, model_combination_post_config: Optional[rasr.RasrConfig] = None, extra_config: Optional[rasr.RasrConfig] = None, @@ -173,7 +179,10 @@ def __init__( :param lmgc_mem: Memory requirement for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_alias: Alias for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_scorer: Dummy scorer for the AdvancedTreeSearchLmImageAndGlobalCacheJob which is required but unused - :param separate_lm_image_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so NOT using separate jobs is the default. + :param lm_cache_method: Specifies, how the LM image and the global cache should be created: + JOINED (default) -> automatically create lm images and global cache as output of one job + SEPARATE -> automatically create lm images and global cache separately + NONE -> don't create lm images or global cache as part of this job at all :param model_combination_config: Configuration for model combination :param model_combination_post_config: Post config for model combination :param extra_config: Additional Config for recognition @@ -192,7 +201,7 @@ def __init__( self.lm_gc_job, self.gc_job, self.lm_image_jobs, - ) = AdvancedTreeSearchJob.create_config(**kwargs) + ) = self.create_config(**kwargs) self.feature_flow = feature_flow self.exe = self.select_exe(crp.flf_tool_exe, "flf-tool") self.concurrent = crp.concurrent @@ -272,7 +281,7 @@ def create_config( lmgc_mem: float, lmgc_alias: Optional[str], lmgc_scorer: Optional[rasr.FeatureScorer], - separate_lm_image_gc_generation: bool, + lm_cache_method: LmCacheMethod, model_combination_config: Optional[rasr.RasrConfig], model_combination_post_config: Optional[rasr.RasrConfig], extra_config: Optional[rasr.RasrConfig], @@ -284,7 +293,13 @@ def add_lm_config_to_crp(crp, lm_config): crp.language_model_config = lm_config return crp - if separate_lm_image_gc_generation: + if lm_cache_method == cls.LmCacheMethod.NONE: + gc_job = None + lm_gc = None + lm_images = None + gc = None + lm_image_jobs = {} + elif lm_cache_method == cls.LmCacheMethod.SEPARATE: gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) arpa_lms = lm.find_arpa_lms(crp.language_model_config, None) @@ -299,7 +314,7 @@ def add_lm_config_to_crp(crp, lm_config): lm_images = {k: v.out_image for k, v in lm_image_jobs.items()} lm_gc = None - else: + elif lm_cache_method == cls.LmCacheMethod.JOINED: lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( crp, lmgc_scorer if lmgc_scorer is not None else feature_scorer, extra_config, extra_post_config ) @@ -312,6 +327,8 @@ def add_lm_config_to_crp(crp, lm_config): gc_job = None lm_image_jobs = {} + else: + raise TypeError("Argument `lm_cache_method` must be of type `AdvancedTreeSearchJob.LmCacheMethod`") search_parameters = cls.update_search_parameters(search_parameters) @@ -411,14 +428,16 @@ def add_lm_config_to_crp(crp, lm_config): ] post_config.flf_lattice_tool.global_cache.read_only = True - post_config.flf_lattice_tool.global_cache.file = gc + if lm_cache_method != cls.LmCacheMethod.NONE: + post_config.flf_lattice_tool.global_cache.file = gc arpa_lms = lm.find_arpa_lms( config.flf_lattice_tool.network.recognizer.lm, post_config.flf_lattice_tool.network.recognizer.lm, ) - for i, (_lm_config, lm_post_config) in enumerate(arpa_lms): - lm_post_config.image = lm_images[i + 1] + if lm_cache_method != cls.LmCacheMethod.NONE: + for i, (_lm_config, lm_post_config) in enumerate(arpa_lms): + lm_post_config.image = lm_images[i + 1] # Remaining Flf-network From f3199fbdea5714a13915120ebfea43ba7551d530 Mon Sep 17 00:00:00 2001 From: DanEnergetics Date: Wed, 30 Jul 2025 17:42:41 +0200 Subject: [PATCH 17/27] Apply suggestions from code review Co-authored-by: michelwi Co-authored-by: Benedikt Hilmes --- lm/util.py | 2 +- recognition/advanced_tree_search.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lm/util.py b/lm/util.py index b9fb0ebec..bd305c359 100644 --- a/lm/util.py +++ b/lm/util.py @@ -9,7 +9,7 @@ def _has_image(c: rasr.RasrConfig, pc: rasr.RasrConfig): return res -def find_arpa_lms(lm_config: rasr.RasrConfig, lm_post_config=None) -> List[Tuple[rasr.RasrConfig, rasr.RasrConfig]]: +def find_arpa_lms(lm_config: rasr.RasrConfig, lm_post_config: Optional[rasr.RasrConfig] = None) -> List[Tuple[rasr.RasrConfig, rasr.RasrConfig]]: result = [] if lm_config.type == "ARPA": diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 48649fe38..8c781e77b 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -279,7 +279,7 @@ def create_config( extra_post_config: Optional[rasr.RasrConfig], **kwargs, ): - def add_lm_config_to_crp(crp, lm_config): + def add_lm_config_to_crp(crp: rasr.CommonRasrParameters, lm_config: rasr.RasrConfig): crp = copy.deepcopy(crp) crp.language_model_config = lm_config return crp @@ -287,12 +287,12 @@ def add_lm_config_to_crp(crp, lm_config): if separate_lm_image_gc_generation: gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) - arpa_lms = lm.find_arpa_lms(crp.language_model_config, None) + arpa_lms = lm.find_arpa_lms(crp.language_model_config, crp.language_model_post_config) lm_image_jobs = { (i + 1): lm.CreateLmImageJob( add_lm_config_to_crp(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config ) - for i, (lm_config, _lm_post_config) in enumerate(arpa_lms) + for i, (lm_config, _) in enumerate(arpa_lms) } gc = gc_job.out_global_cache From f02c2c93e9c1358abbabe4d40265c15754dbec13 Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Wed, 30 Jul 2025 11:56:38 -0400 Subject: [PATCH 18/27] change util function signature --- lm/util.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lm/util.py b/lm/util.py index bd305c359..660f15eb0 100644 --- a/lm/util.py +++ b/lm/util.py @@ -1,11 +1,11 @@ -from typing import List, Tuple +from typing import List, Optional, Tuple import i6_core.rasr as rasr -def _has_image(c: rasr.RasrConfig, pc: rasr.RasrConfig): - res = c._get("image") is not None - res = res or (pc is not None and pc._get("image") is not None) +def _has_image(config: rasr.RasrConfig, post_config: Optional[rasr.RasrConfig]): + res = config._get("image") is not None + res = res or (post_config is not None and post_config._get("image") is not None) return res From 9b6153865df52eab2577ce2f0b8e7499a4819209 Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Wed, 30 Jul 2025 12:11:19 -0400 Subject: [PATCH 19/27] more reviewer comments --- recognition/advanced_tree_search.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 415db93d0..870a108a4 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -293,6 +293,7 @@ def add_lm_config_to_crp(crp: rasr.CommonRasrParameters, lm_config: rasr.RasrCon crp.language_model_config = lm_config return crp + arpa_lms = lm.find_arpa_lms(crp.language_model_config, crp.language_model_post_config) if lm_cache_method == cls.LmCacheMethod.NONE: gc_job = None lm_gc = None @@ -302,7 +303,6 @@ def add_lm_config_to_crp(crp: rasr.CommonRasrParameters, lm_config: rasr.RasrCon elif lm_cache_method == cls.LmCacheMethod.SEPARATE: gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) - arpa_lms = lm.find_arpa_lms(crp.language_model_config, crp.language_model_post_config) lm_image_jobs = { (i + 1): lm.CreateLmImageJob( add_lm_config_to_crp(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config @@ -430,13 +430,7 @@ def add_lm_config_to_crp(crp: rasr.CommonRasrParameters, lm_config: rasr.RasrCon post_config.flf_lattice_tool.global_cache.read_only = True if lm_cache_method != cls.LmCacheMethod.NONE: post_config.flf_lattice_tool.global_cache.file = gc - - arpa_lms = lm.find_arpa_lms( - config.flf_lattice_tool.network.recognizer.lm, - post_config.flf_lattice_tool.network.recognizer.lm, - ) - if lm_cache_method != cls.LmCacheMethod.NONE: - for i, (_lm_config, lm_post_config) in enumerate(arpa_lms): + for i, (_, lm_post_config) in enumerate(arpa_lms): lm_post_config.image = lm_images[i + 1] # Remaining Flf-network From d2507d04c0cfbde18addd5a3d63c94ef2d75ec0c Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Wed, 30 Jul 2025 12:12:53 -0400 Subject: [PATCH 20/27] ruff formatting --- lm/util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lm/util.py b/lm/util.py index 660f15eb0..658cf5e52 100644 --- a/lm/util.py +++ b/lm/util.py @@ -9,7 +9,9 @@ def _has_image(config: rasr.RasrConfig, post_config: Optional[rasr.RasrConfig]): return res -def find_arpa_lms(lm_config: rasr.RasrConfig, lm_post_config: Optional[rasr.RasrConfig] = None) -> List[Tuple[rasr.RasrConfig, rasr.RasrConfig]]: +def find_arpa_lms( + lm_config: rasr.RasrConfig, lm_post_config: Optional[rasr.RasrConfig] = None +) -> List[Tuple[rasr.RasrConfig, rasr.RasrConfig]]: result = [] if lm_config.type == "ARPA": From a5a6af9320b06a8e8128d0b636727a853a1daa3c Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Wed, 30 Jul 2025 12:18:13 -0400 Subject: [PATCH 21/27] adjust rescoring job --- recognition/advanced_tree_search.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 870a108a4..ef069662e 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -532,6 +532,8 @@ def __init__( self.config, self.post_config, self.lm_gc_job, + self.gc_job, + self.lm_image_jobs, ) = AdvancedTreeSearchWithRescoringJob.create_config(**kwargs) @classmethod @@ -545,7 +547,7 @@ def create_config( rescoring_lookahead_scale, **kwargs, ): - config, post_config, lm_gc_job = super().create_config(**kwargs) + config, *remainder = super().create_config(**kwargs) config.flf_lattice_tool.network.recognizer.links = "rescore" @@ -560,7 +562,7 @@ def create_config( rescore_config.lookahead_scale = rescoring_lookahead_scale rescore_config.lm = rescoring_lm_config - return config, post_config, lm_gc_job + return config, *remainder class BidirectionalAdvancedTreeSearchJob(rasr.RasrCommand, Job): From f74c52fe1d1341779ede73c0b365c10b18b9610b Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Wed, 30 Jul 2025 12:23:35 -0400 Subject: [PATCH 22/27] fix parameter name typo --- recognition/advanced_tree_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index ef069662e..77cb1245f 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -155,7 +155,7 @@ def __init__( lmgc_mem: float = 12.0, lmgc_alias: Optional[str] = None, lmgc_scorer: Optional[rasr.FeatureScorer] = None, - lm_cache_metod: LmCacheMethod = LmCacheMethod.JOINED, + lm_cache_method: LmCacheMethod = LmCacheMethod.JOINED, model_combination_config: Optional[rasr.RasrConfig] = None, model_combination_post_config: Optional[rasr.RasrConfig] = None, extra_config: Optional[rasr.RasrConfig] = None, From 0f82f3fcf286744c8a9eaebab499f2ddbc391294 Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Wed, 30 Jul 2025 12:40:46 -0400 Subject: [PATCH 23/27] fix empty post config --- recognition/advanced_tree_search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 77cb1245f..fcf230ffc 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -293,6 +293,7 @@ def add_lm_config_to_crp(crp: rasr.CommonRasrParameters, lm_config: rasr.RasrCon crp.language_model_config = lm_config return crp + crp.language_model_post_config = crp.language_model_post_config or rasr.RasrConfig() arpa_lms = lm.find_arpa_lms(crp.language_model_config, crp.language_model_post_config) if lm_cache_method == cls.LmCacheMethod.NONE: gc_job = None From ffe3681b522d4813a7fb412b0f66e18e3b9ac1a1 Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Wed, 6 Aug 2025 12:56:13 -0400 Subject: [PATCH 24/27] postpone arpa discovery -> should fix hash test --- recognition/advanced_tree_search.py | 79 +++++++++++++++-------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index fcf230ffc..360c16451 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -293,44 +293,6 @@ def add_lm_config_to_crp(crp: rasr.CommonRasrParameters, lm_config: rasr.RasrCon crp.language_model_config = lm_config return crp - crp.language_model_post_config = crp.language_model_post_config or rasr.RasrConfig() - arpa_lms = lm.find_arpa_lms(crp.language_model_config, crp.language_model_post_config) - if lm_cache_method == cls.LmCacheMethod.NONE: - gc_job = None - lm_gc = None - lm_images = None - gc = None - lm_image_jobs = {} - elif lm_cache_method == cls.LmCacheMethod.SEPARATE: - gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) - - lm_image_jobs = { - (i + 1): lm.CreateLmImageJob( - add_lm_config_to_crp(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config - ) - for i, (lm_config, _) in enumerate(arpa_lms) - } - - gc = gc_job.out_global_cache - lm_images = {k: v.out_image for k, v in lm_image_jobs.items()} - - lm_gc = None - elif lm_cache_method == cls.LmCacheMethod.JOINED: - lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( - crp, lmgc_scorer if lmgc_scorer is not None else feature_scorer, extra_config, extra_post_config - ) - if lmgc_alias is not None: - lm_gc.add_alias(lmgc_alias) - lm_gc.rqmt["mem"] = lmgc_mem - - gc = lm_gc.out_global_cache - lm_images = lm_gc.out_lm_images - - gc_job = None - lm_image_jobs = {} - else: - raise TypeError("Argument `lm_cache_method` must be of type `AdvancedTreeSearchJob.LmCacheMethod`") - search_parameters = cls.update_search_parameters(search_parameters) la_opts = { @@ -428,6 +390,47 @@ def add_lm_config_to_crp(crp: rasr.CommonRasrParameters, lm_config: rasr.RasrCon "cache_high" ] + # Handle caching of ARPA LMs and maybe build global cache + arpa_lms = lm.find_arpa_lms( + config.flf_lattice_tool.network.recognizer.lm, + post_config.flf_lattice_tool.network.recognizer.lm + ) + if lm_cache_method == cls.LmCacheMethod.NONE: + gc_job = None + lm_gc = None + lm_images = None + gc = None + lm_image_jobs = {} + elif lm_cache_method == cls.LmCacheMethod.SEPARATE: + gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) + + lm_image_jobs = { + (i + 1): lm.CreateLmImageJob( + add_lm_config_to_crp(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config + ) + for i, (lm_config, _) in enumerate(arpa_lms) + } + + gc = gc_job.out_global_cache + lm_images = {k: v.out_image for k, v in lm_image_jobs.items()} + + lm_gc = None + elif lm_cache_method == cls.LmCacheMethod.JOINED: + lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( + crp, lmgc_scorer if lmgc_scorer is not None else feature_scorer, extra_config, extra_post_config + ) + if lmgc_alias is not None: + lm_gc.add_alias(lmgc_alias) + lm_gc.rqmt["mem"] = lmgc_mem + + gc = lm_gc.out_global_cache + lm_images = lm_gc.out_lm_images + + gc_job = None + lm_image_jobs = {} + else: + raise TypeError("Argument `lm_cache_method` must be of type `AdvancedTreeSearchJob.LmCacheMethod`") + post_config.flf_lattice_tool.global_cache.read_only = True if lm_cache_method != cls.LmCacheMethod.NONE: post_config.flf_lattice_tool.global_cache.file = gc From ef5e8ced0ac91adfb880b002334207c1ac65da7a Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Wed, 6 Aug 2025 12:57:32 -0400 Subject: [PATCH 25/27] ruff --- recognition/advanced_tree_search.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 360c16451..3126b8cce 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -392,8 +392,7 @@ def add_lm_config_to_crp(crp: rasr.CommonRasrParameters, lm_config: rasr.RasrCon # Handle caching of ARPA LMs and maybe build global cache arpa_lms = lm.find_arpa_lms( - config.flf_lattice_tool.network.recognizer.lm, - post_config.flf_lattice_tool.network.recognizer.lm + config.flf_lattice_tool.network.recognizer.lm, post_config.flf_lattice_tool.network.recognizer.lm ) if lm_cache_method == cls.LmCacheMethod.NONE: gc_job = None From a2292d03e4743dceea078529d3648e6aee2af629 Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Fri, 15 Aug 2025 09:32:46 -0400 Subject: [PATCH 26/27] reviewer comments --- recognition/advanced_tree_search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 3126b8cce..d4f18a87c 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -180,7 +180,8 @@ def __init__( :param lmgc_alias: Alias for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_scorer: Dummy scorer for the AdvancedTreeSearchLmImageAndGlobalCacheJob which is required but unused :param lm_cache_method: Specifies, how the LM image and the global cache should be created: - JOINED (default) -> automatically create lm images and global cache as output of one job + JOINED (default) -> automatically create lm images and global cache as output of one job. Note, that this could + create hash issues if e.g. many search jobs with different TDP configuration are started. SEPARATE -> automatically create lm images and global cache separately NONE -> don't create lm images or global cache as part of this job at all :param model_combination_config: Configuration for model combination From 7d1bcc97a49bad2bf9510fe73f4231f50e9eecee Mon Sep 17 00:00:00 2001 From: Daniel Mann Date: Fri, 15 Aug 2025 09:34:15 -0400 Subject: [PATCH 27/27] more reviewer comments --- recognition/advanced_tree_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index d4f18a87c..52fd6d586 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -182,7 +182,7 @@ def __init__( :param lm_cache_method: Specifies, how the LM image and the global cache should be created: JOINED (default) -> automatically create lm images and global cache as output of one job. Note, that this could create hash issues if e.g. many search jobs with different TDP configuration are started. - SEPARATE -> automatically create lm images and global cache separately + SEPARATE -> automatically create lm images and global cache separately. This is to be preferred most of the time. NONE -> don't create lm images or global cache as part of this job at all :param model_combination_config: Configuration for model combination :param model_combination_post_config: Post config for model combination