From db6c546e25e4128bd8f3a12bcc5c7b248d82ed77 Mon Sep 17 00:00:00 2001 From: Antonio Jimeno Yepes Date: Thu, 15 May 2025 16:39:54 -0700 Subject: [PATCH 1/4] Adding preprocessor parameters --- unstructured_inference/models/tables.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unstructured_inference/models/tables.py b/unstructured_inference/models/tables.py index 3308f61d..a33695e5 100644 --- a/unstructured_inference/models/tables.py +++ b/unstructured_inference/models/tables.py @@ -62,6 +62,8 @@ def initialize( """Loads the donut model using the specified parameters""" self.device = device self.feature_extractor = DetrImageProcessor.from_pretrained(model) + # value not set in the configuration and needed for newer models + self.feature_extractor.size['shortest_edge'] = 800 try: logger.info("Loading the table structure model ...") From 07ad59f529f7ec6e35c5c8fde1f18ac56e3937b4 Mon Sep 17 00:00:00 2001 From: Antonio Jimeno Yepes Date: Thu, 15 May 2025 16:42:14 -0700 Subject: [PATCH 2/4] Adding preprocessor parameters --- unstructured_inference/models/tables.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unstructured_inference/models/tables.py b/unstructured_inference/models/tables.py index a33695e5..7402a2d0 100644 --- a/unstructured_inference/models/tables.py +++ b/unstructured_inference/models/tables.py @@ -63,6 +63,7 @@ def initialize( self.device = device self.feature_extractor = DetrImageProcessor.from_pretrained(model) # value not set in the configuration and needed for newer models + # https://huggingface.co/microsoft/table-transformer-structure-recognition-v1.1-all/discussions/1 self.feature_extractor.size['shortest_edge'] = 800 try: From cd9b8f2a11f393d625932ce9f46c75f84be3d899 Mon Sep 17 00:00:00 2001 From: Antonio Jimeno Yepes Date: Thu, 15 May 2025 16:44:48 -0700 Subject: [PATCH 3/4] Version and linting --- CHANGELOG.md | 4 ++++ unstructured_inference/__version__.py | 2 +- unstructured_inference/models/tables.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 484006f5..10c11ac2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.1-dev + +* adding parameter to table image preprocessor related to the image size + ## 1.0.1 * fix: moving the table transformer model to device when loading the model instead of once the model is loaded. diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index abd26d6a..bf3bb2fd 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "1.0.1" # pragma: no cover +__version__ = "1.0.1-dev" # pragma: no cover diff --git a/unstructured_inference/models/tables.py b/unstructured_inference/models/tables.py index 7402a2d0..b9a01226 100644 --- a/unstructured_inference/models/tables.py +++ b/unstructured_inference/models/tables.py @@ -64,7 +64,7 @@ def initialize( self.feature_extractor = DetrImageProcessor.from_pretrained(model) # value not set in the configuration and needed for newer models # https://huggingface.co/microsoft/table-transformer-structure-recognition-v1.1-all/discussions/1 - self.feature_extractor.size['shortest_edge'] = 800 + self.feature_extractor.size["shortest_edge"] = 800 try: logger.info("Loading the table structure model ...") From 2fe6c27ff4f6fed309bd298d9bce71b691e81adf Mon Sep 17 00:00:00 2001 From: Yao You Date: Fri, 16 May 2025 10:12:21 +0200 Subject: [PATCH 4/4] update version number --- CHANGELOG.md | 2 +- unstructured_inference/__version__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10c11ac2..4a36cdd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 1.0.1-dev +## 1.0.2 * adding parameter to table image preprocessor related to the image size diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index bf3bb2fd..86fbd395 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "1.0.1-dev" # pragma: no cover +__version__ = "1.0.2" # pragma: no cover