From 66002d38d93c2fc4c6807d74de929e8869a652af Mon Sep 17 00:00:00 2001 From: Yao You Date: Mon, 2 Jun 2025 16:32:04 -0500 Subject: [PATCH 1/5] feat: use singleton instead of global - where previously global was used now they are singleton class variables --- CHANGELOG.md | 4 ++++ unstructured_inference/__version__.py | 2 +- unstructured_inference/models/base.py | 25 ++++++++++++++++++++++--- unstructured_inference/models/tables.py | 15 ++++++++++++--- 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b11d8ec8..f4f991bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.4 + +* feat: use singleton instead of `global` to store shared variables + ## 1.0.3 * setting longest_edge=1333 to the table image processor diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index 6520c47e..18934c58 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "1.0.3" # pragma: no cover +__version__ = "1.0.4" # pragma: no cover diff --git a/unstructured_inference/models/base.py b/unstructured_inference/models/base.py index 826bf5a5..20b48014 100644 --- a/unstructured_inference/models/base.py +++ b/unstructured_inference/models/base.py @@ -15,7 +15,28 @@ DEFAULT_MODEL = "yolox" -models: Dict[str, UnstructuredModel] = {} + +class Models(dict): + _instance = None + + @classmethod + def instance(cls): + if cls._instance is None: + cls._instance = cls.__new__(cls) + cls.models: Dict[str, UnstructuredModel] = {} + return cls._instance + + def __contains__(self, key): + return key in self.models + + def __getitem__(self, key: str): + return self.models.__getitem__(key) + + def __setitem__(self, key: str, value: UnstructuredModel): + self.models[key] = value + + +models: Dict[str, UnstructuredModel] = Models.instance() def get_default_model_mappings() -> Tuple[ @@ -46,8 +67,6 @@ def get_model(model_name: Optional[str] = None) -> UnstructuredModel: # TODO(alan): These cases are similar enough that we can probably do them all together with # importlib - global models # noqa - if model_name is None: default_name_from_env = os.environ.get("UNSTRUCTURED_DEFAULT_MODEL_NAME") model_name = default_name_from_env if default_name_from_env is not None else DEFAULT_MODEL diff --git a/unstructured_inference/models/tables.py b/unstructured_inference/models/tables.py index 45c98017..021100a1 100644 --- a/unstructured_inference/models/tables.py +++ b/unstructured_inference/models/tables.py @@ -27,9 +27,17 @@ class UnstructuredTableTransformerModel(UnstructuredModel): """Unstructured model wrapper for table-transformer.""" + _instance = None + def __init__(self): pass + @classmethod + def instance(cls): + if cls._instance is None: + cls._instance = cls.__new__(cls) + return cls._instance + def predict( self, x: PILImage.Image, @@ -72,7 +80,8 @@ def initialize( cached_current_verbosity = logging.get_verbosity() logging.set_verbosity_error() self.model = TableTransformerForObjectDetection.from_pretrained( - model, device_map=self.device + model, + device_map=self.device, ) logging.set_verbosity(cached_current_verbosity) self.model.eval() @@ -135,12 +144,12 @@ def run_prediction( return prediction -tables_agent: UnstructuredTableTransformerModel = UnstructuredTableTransformerModel() +tables_agent: UnstructuredTableTransformerModel = UnstructuredTableTransformerModel.instance() def load_agent(): """Loads the Table agent as a global variable to ensure that we only load it once.""" - global tables_agent # noqa + tables_agent = UnstructuredTableTransformerModel.instance() if not hasattr(tables_agent, "model"): logger.info("Loading the Table agent ...") From 5bea8d43328ec76e7460a6aef3e033e0b6ff39b3 Mon Sep 17 00:00:00 2001 From: Yao You Date: Mon, 2 Jun 2025 16:41:16 -0500 Subject: [PATCH 2/5] remove unnecessary inheritance --- unstructured_inference/models/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/models/base.py b/unstructured_inference/models/base.py index 20b48014..67a8e3c4 100644 --- a/unstructured_inference/models/base.py +++ b/unstructured_inference/models/base.py @@ -16,7 +16,7 @@ DEFAULT_MODEL = "yolox" -class Models(dict): +class Models: _instance = None @classmethod From c36dcc7a39b1e42557cbb6efc5438feb8bfee430 Mon Sep 17 00:00:00 2001 From: Yao You Date: Mon, 2 Jun 2025 17:08:02 -0500 Subject: [PATCH 3/5] add docstring --- unstructured_inference/models/base.py | 2 ++ unstructured_inference/models/tables.py | 1 + 2 files changed, 3 insertions(+) diff --git a/unstructured_inference/models/base.py b/unstructured_inference/models/base.py index 67a8e3c4..b49c45a2 100644 --- a/unstructured_inference/models/base.py +++ b/unstructured_inference/models/base.py @@ -21,6 +21,7 @@ class Models: @classmethod def instance(cls): + """return an instance if one already exists otherwise create an instance""" if cls._instance is None: cls._instance = cls.__new__(cls) cls.models: Dict[str, UnstructuredModel] = {} @@ -66,6 +67,7 @@ def get_model(model_name: Optional[str] = None) -> UnstructuredModel: """Gets the model object by model name.""" # TODO(alan): These cases are similar enough that we can probably do them all together with # importlib + models: Dict[str, UnstructuredModel] = Models.instance() if model_name is None: default_name_from_env = os.environ.get("UNSTRUCTURED_DEFAULT_MODEL_NAME") diff --git a/unstructured_inference/models/tables.py b/unstructured_inference/models/tables.py index 021100a1..80d34daf 100644 --- a/unstructured_inference/models/tables.py +++ b/unstructured_inference/models/tables.py @@ -34,6 +34,7 @@ def __init__(self): @classmethod def instance(cls): + """return an instance if one already exists otherwise create an instance""" if cls._instance is None: cls._instance = cls.__new__(cls) return cls._instance From 98e5b35d70ea15b4a38a4c4449ab621faf1f3b7d Mon Sep 17 00:00:00 2001 From: Yao You Date: Mon, 2 Jun 2025 17:31:44 -0500 Subject: [PATCH 4/5] use pythonic way to implement for linting --- unstructured_inference/models/base.py | 10 ++++------ unstructured_inference/models/tables.py | 3 +-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/unstructured_inference/models/base.py b/unstructured_inference/models/base.py index b49c45a2..58bcc59e 100644 --- a/unstructured_inference/models/base.py +++ b/unstructured_inference/models/base.py @@ -16,14 +16,13 @@ DEFAULT_MODEL = "yolox" -class Models: +class Models(object): _instance = None - @classmethod - def instance(cls): + def __new__(cls): """return an instance if one already exists otherwise create an instance""" if cls._instance is None: - cls._instance = cls.__new__(cls) + cls._instance = super(Models, cls).__new__(cls) cls.models: Dict[str, UnstructuredModel] = {} return cls._instance @@ -37,7 +36,7 @@ def __setitem__(self, key: str, value: UnstructuredModel): self.models[key] = value -models: Dict[str, UnstructuredModel] = Models.instance() +models: Dict[str, UnstructuredModel] = Models() def get_default_model_mappings() -> Tuple[ @@ -67,7 +66,6 @@ def get_model(model_name: Optional[str] = None) -> UnstructuredModel: """Gets the model object by model name.""" # TODO(alan): These cases are similar enough that we can probably do them all together with # importlib - models: Dict[str, UnstructuredModel] = Models.instance() if model_name is None: default_name_from_env = os.environ.get("UNSTRUCTURED_DEFAULT_MODEL_NAME") diff --git a/unstructured_inference/models/tables.py b/unstructured_inference/models/tables.py index 80d34daf..9760dfc4 100644 --- a/unstructured_inference/models/tables.py +++ b/unstructured_inference/models/tables.py @@ -149,8 +149,7 @@ def run_prediction( def load_agent(): - """Loads the Table agent as a global variable to ensure that we only load it once.""" - tables_agent = UnstructuredTableTransformerModel.instance() + """Loads the Table agent.""" if not hasattr(tables_agent, "model"): logger.info("Loading the Table agent ...") From 00aa48049d1af3578926d9650a0c783ddb6fe1db Mon Sep 17 00:00:00 2001 From: Yao You Date: Mon, 2 Jun 2025 17:37:01 -0500 Subject: [PATCH 5/5] linting --- unstructured_inference/models/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/models/base.py b/unstructured_inference/models/base.py index 58bcc59e..37344100 100644 --- a/unstructured_inference/models/base.py +++ b/unstructured_inference/models/base.py @@ -36,7 +36,7 @@ def __setitem__(self, key: str, value: UnstructuredModel): self.models[key] = value -models: Dict[str, UnstructuredModel] = Models() +models: Models = Models() def get_default_model_mappings() -> Tuple[