feat: add config class (#218)

badGarnet · web-flow · commit 5e73202530d1 · 2023-09-21T13:55:38.000Z
- add a dataclass that contains configrations for inference processes
- the parameters can be specified via env variables, which overrides the
default values; this allows for flexibly setup in different
applications/deployments
- currently contains specifications for table and layout related
parameters
- followup needed to identify other parameters that can be added to this
config class
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,7 @@
-## 0.5.32-dev0
+## 0.6.0
+
+* add a config class to handle parameter configurations for inference tasks; parameters in the config class can be set via environement variables
+* update behavior of `pad_image_with_background_color` so that input `pad` is applied to all sides
 
 ## 0.5.31
 
diff --git a/test_unstructured_inference/test_config.py b/test_unstructured_inference/test_config.py
@@ -0,0 +1,11 @@
+def test_default_config():
+    from unstructured_inference.config import inference_config
+
+    assert inference_config.TABLE_IMAGE_CROP_PAD == 12
+
+
+def test_env_override(monkeypatch):
+    monkeypatch.setenv("TABLE_IMAGE_CROP_PAD", 1)
+    from unstructured_inference.config import inference_config
+
+    assert inference_config.TABLE_IMAGE_CROP_PAD == 1
diff --git a/test_unstructured_inference/test_utils.py b/test_unstructured_inference/test_utils.py
@@ -135,7 +135,7 @@ def test_annotate_layout_elements_with_plot_result():
 def test_pad_image_with_background_color(mock_pil_image):
     pad = 10
     height, width = mock_pil_image.size
-    padded = pad_image_with_background_color(mock_pil_image, pad * 2, "black")
+    padded = pad_image_with_background_color(mock_pil_image, pad, "black")
     assert padded.size == (height + 2 * pad, width + 2 * pad)
     np.testing.assert_array_almost_equal(
         np.array(padded.crop((pad, pad, width + pad, height + pad))),
diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py
@@ -1 +1 @@
-__version__ = "0.5.32-dev0"  # pragma: no cover
+__version__ = "0.6.0"  # pragma: no cover
diff --git a/unstructured_inference/config.py b/unstructured_inference/config.py
@@ -0,0 +1,82 @@
+"""
+This module contains variables that can permitted to be tweaked by the system environment. For
+example, model parameters that changes the output of an inference call. Constants do NOT belong in
+this module. Constants are values that are usually names for common options (e.g., color names) or
+settings that should not be altered without making a code change (e.g., definition of 1Gb of memory
+in bytes). Constants should go into `./constants.py`
+"""
+import os
+from dataclasses import dataclass
+
+
+@dataclass
+class InferenceConfig:
+    """class for configuring inference parameters"""
+
+    def _get_string(self, var: str, default_value: str = "") -> str:
+        """attempt to get the value of var from the os environment; if not present return the
+        default_value"""
+        return os.environ.get(var, default_value)
+
+    def _get_int(self, var: str, default_value: int) -> int:
+        if value := self._get_string(var):
+            return int(value)
+        return default_value
+
+    def _get_float(self, var: str, default_value: float) -> float:
+        if value := self._get_string(var):
+            return float(value)
+        return default_value
+
+    @property
+    def TABLE_IMAGE_CROP_PAD(self) -> int:
+        """extra image content to add around an identified table region; measured in pixels
+
+        The padding adds image data around an identified table bounding box for downstream table
+        structure detection model use as input
+        """
+        return self._get_int("TABLE_IMAGE_CROP_PAD", 12)
+
+    @property
+    def TABLE_IMAGE_BACKGROUND_PAD(self) -> int:
+        """number of pixels to pad around an table image with a white background color
+
+        The padding adds NO image data around an identified table bounding box; it simply adds white
+        background around the image
+        """
+        return self._get_int("TABLE_IMAGE_BACKGROUND_PAD", 0)
+
+    @property
+    def LAYOUT_SAME_REGION_THRESHOLD(self) -> float:
+        """threshold for two layouts' bounding boxes to be considered as the same region
+
+        When the intersection area over union area of the two is larger than this threshold the two
+        boxes are considered the same region
+        """
+        return self._get_float("LAYOUT_SAME_REGION_THRESHOLD", 0.75)
+
+    @property
+    def LAYOUT_SUBREGION_THRESHOLD(self) -> float:
+        """threshold for one bounding box to be considered as a sub-region of another bounding box
+
+        When the intersection region area divided by self area is larger than this threshold self is
+        considered a subregion of the other
+        """
+        return self._get_float("LAYOUT_SUBREGION_THRESHOLD", 0.75)
+
+    @property
+    def ELEMENTS_H_PADDING_COEF(self) -> float:
+        """When extending the boundaries of a PDF object for the purpose of determining which other
+        elements should be considered in the same text region, we use a relative distance based on
+        some fraction of the block height (typically character height). This is the fraction used
+        for the horizontal extension applied to the left and right sides.
+        """
+        return self._get_float("ELEMENTS_H_PADDING_COEF", 0.4)
+
+    @property
+    def ELEMENTS_V_PADDING_COEF(self) -> float:
+        """Same as ELEMENTS_H_PADDING_COEF but the vertical extension."""
+        return self._get_float("ELEMENTS_V_PADDING_COEF", 0.3)
+
+
+inference_config = InferenceConfig()
diff --git a/unstructured_inference/inference/elements.py b/unstructured_inference/inference/elements.py
@@ -11,18 +11,11 @@
 from PIL import Image
 from scipy.sparse.csgraph import connected_components
 
+from unstructured_inference.config import inference_config
 from unstructured_inference.logger import logger
 from unstructured_inference.math import safe_division
 from unstructured_inference.models import tesseract
 
-# When extending the boundaries of a PDF object for the purpose of determining which other elements
-# should be considered in the same text region, we use a relative distance based on some fraction of
-# the block height (typically character height). This is the fraction used for the horizontal
-# extension applied to the left and right sides.
-H_PADDING_COEF = 0.4
-# Same as above but the vertical extension.
-V_PADDING_COEF = 0.3
-
 
 @dataclass
 class Rectangle:
@@ -156,7 +149,10 @@ def partition_groups_from_regions(regions: Collection[Rectangle]) -> List[List[R
     """Partitions regions into groups of regions based on proximity. Returns list of lists of
     regions, each list corresponding with a group"""
     padded_regions = [
-        r.vpad(r.height * V_PADDING_COEF).hpad(r.height * H_PADDING_COEF) for r in regions
+        r.vpad(r.height * inference_config.ELEMENTS_V_PADDING_COEF).hpad(
+            r.height * inference_config.ELEMENTS_H_PADDING_COEF,
+        )
+        for r in regions
     ]
 
     intersection_mtx = intersections(*padded_regions)
diff --git a/unstructured_inference/inference/layoutelement.py b/unstructured_inference/inference/layoutelement.py
@@ -8,6 +8,7 @@
 from pandas import DataFrame
 from PIL import Image
 
+from unstructured_inference.config import inference_config
 from unstructured_inference.constants import FULL_PAGE_REGION_THRESHOLD, SUBREGION_THRESHOLD_FOR_OCR
 from unstructured_inference.inference.elements import (
     ImageTextRegion,
@@ -79,7 +80,7 @@ def interpret_table_block(text_block: TextRegion, image: Image.Image) -> str:
     tables.load_agent()
     if tables.tables_agent is None:
         raise RuntimeError("Unable to load table extraction agent.")
-    padded_block = text_block.pad(12)
+    padded_block = text_block.pad(inference_config.TABLE_IMAGE_CROP_PAD)
     cropped_image = image.crop((padded_block.x1, padded_block.y1, padded_block.x2, padded_block.y2))
     return tables.tables_agent.predict(cropped_image)
 
@@ -90,8 +91,8 @@ def merge_inferred_layout_with_extracted_layout(
     page_image_size: tuple,
     ocr_layout: Optional[List[TextRegion]] = None,
     supplement_with_ocr_elements: bool = True,
-    same_region_threshold: float = 0.75,
-    subregion_threshold: float = 0.75,
+    same_region_threshold: float = inference_config.LAYOUT_SAME_REGION_THRESHOLD,
+    subregion_threshold: float = inference_config.LAYOUT_SUBREGION_THRESHOLD,
 ) -> List[LayoutElement]:
     """Merge two layouts to produce a single layout."""
     extracted_elements_to_add: List[TextRegion] = []
diff --git a/unstructured_inference/models/tables.py b/unstructured_inference/models/tables.py
@@ -14,6 +14,7 @@
 from PIL import Image
 from transformers import DetrImageProcessor, TableTransformerForObjectDetection
 
+from unstructured_inference.config import inference_config
 from unstructured_inference.logger import logger
 from unstructured_inference.models.table_postprocess import Rect
 from unstructured_inference.models.unstructuredmodel import UnstructuredModel
@@ -113,7 +114,11 @@ def get_tokens(self, x: Image):
 
         return tokens
 
-    def get_structure(self, x: Image, pad_for_structure_detection: int = 50) -> dict:
+    def get_structure(
+        self,
+        x: Image,
+        pad_for_structure_detection: int = inference_config.TABLE_IMAGE_BACKGROUND_PAD,
+    ) -> dict:
         """get the table structure as a dictionary contaning different types of elements as
         key-value pairs; check table-transformer documentation for more information"""
         with torch.no_grad():
@@ -126,7 +131,11 @@ def get_structure(self, x: Image, pad_for_structure_detection: int = 50) -> dict
             outputs_structure["pad_for_structure_detection"] = pad_for_structure_detection
             return outputs_structure
 
-    def run_prediction(self, x: Image, pad_for_structure_detection: int = 50):
+    def run_prediction(
+        self,
+        x: Image,
+        pad_for_structure_detection: int = inference_config.TABLE_IMAGE_BACKGROUND_PAD,
+    ):
         """Predict table structure"""
         outputs_structure = self.get_structure(x, pad_for_structure_detection)
         tokens = self.get_tokens(x=x)
diff --git a/unstructured_inference/utils.py b/unstructured_inference/utils.py
@@ -130,7 +130,7 @@ def pad_image_with_background_color(
     pad: int = 10,
     background_color: str = "white",
 ) -> Image.Image:
-    """pads an input image with the same background color around it by pad//2 on all 4 sides
+    """pads an input image with the same background color around it by pad on all 4 sides
 
     The original image is kept intact and a new image is returned with padding added.
     """
@@ -139,6 +139,6 @@ def pad_image_with_background_color(
         raise ValueError(
             "Can not pad an image with negative space! Please use a positive value for `pad`.",
         )
-    new = Image.new(image.mode, (width + pad, height + pad), background_color)
-    new.paste(image, (pad // 2, pad // 2))
+    new = Image.new(image.mode, (width + pad * 2, height + pad * 2), background_color)
+    new.paste(image, (pad, pad))
     return new

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.5.32-dev0" # pragma: no cover`
	`1`	`+__version__ = "0.6.0" # pragma: no cover`