Skip to content

Commit 5e73202

Browse files
authored
feat: add config class (#218)
- add a dataclass that contains configrations for inference processes - the parameters can be specified via env variables, which overrides the default values; this allows for flexibly setup in different applications/deployments - currently contains specifications for table and layout related parameters - followup needed to identify other parameters that can be added to this config class
1 parent bfdf357 commit 5e73202

File tree

9 files changed

+122
-20
lines changed

9 files changed

+122
-20
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
## 0.5.32-dev0
1+
## 0.6.0
2+
3+
* add a config class to handle parameter configurations for inference tasks; parameters in the config class can be set via environement variables
4+
* update behavior of `pad_image_with_background_color` so that input `pad` is applied to all sides
25

36
## 0.5.31
47

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
def test_default_config():
2+
from unstructured_inference.config import inference_config
3+
4+
assert inference_config.TABLE_IMAGE_CROP_PAD == 12
5+
6+
7+
def test_env_override(monkeypatch):
8+
monkeypatch.setenv("TABLE_IMAGE_CROP_PAD", 1)
9+
from unstructured_inference.config import inference_config
10+
11+
assert inference_config.TABLE_IMAGE_CROP_PAD == 1

test_unstructured_inference/test_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def test_annotate_layout_elements_with_plot_result():
135135
def test_pad_image_with_background_color(mock_pil_image):
136136
pad = 10
137137
height, width = mock_pil_image.size
138-
padded = pad_image_with_background_color(mock_pil_image, pad * 2, "black")
138+
padded = pad_image_with_background_color(mock_pil_image, pad, "black")
139139
assert padded.size == (height + 2 * pad, width + 2 * pad)
140140
np.testing.assert_array_almost_equal(
141141
np.array(padded.crop((pad, pad, width + pad, height + pad))),
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.32-dev0" # pragma: no cover
1+
__version__ = "0.6.0" # pragma: no cover

unstructured_inference/config.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""
2+
This module contains variables that can permitted to be tweaked by the system environment. For
3+
example, model parameters that changes the output of an inference call. Constants do NOT belong in
4+
this module. Constants are values that are usually names for common options (e.g., color names) or
5+
settings that should not be altered without making a code change (e.g., definition of 1Gb of memory
6+
in bytes). Constants should go into `./constants.py`
7+
"""
8+
import os
9+
from dataclasses import dataclass
10+
11+
12+
@dataclass
13+
class InferenceConfig:
14+
"""class for configuring inference parameters"""
15+
16+
def _get_string(self, var: str, default_value: str = "") -> str:
17+
"""attempt to get the value of var from the os environment; if not present return the
18+
default_value"""
19+
return os.environ.get(var, default_value)
20+
21+
def _get_int(self, var: str, default_value: int) -> int:
22+
if value := self._get_string(var):
23+
return int(value)
24+
return default_value
25+
26+
def _get_float(self, var: str, default_value: float) -> float:
27+
if value := self._get_string(var):
28+
return float(value)
29+
return default_value
30+
31+
@property
32+
def TABLE_IMAGE_CROP_PAD(self) -> int:
33+
"""extra image content to add around an identified table region; measured in pixels
34+
35+
The padding adds image data around an identified table bounding box for downstream table
36+
structure detection model use as input
37+
"""
38+
return self._get_int("TABLE_IMAGE_CROP_PAD", 12)
39+
40+
@property
41+
def TABLE_IMAGE_BACKGROUND_PAD(self) -> int:
42+
"""number of pixels to pad around an table image with a white background color
43+
44+
The padding adds NO image data around an identified table bounding box; it simply adds white
45+
background around the image
46+
"""
47+
return self._get_int("TABLE_IMAGE_BACKGROUND_PAD", 0)
48+
49+
@property
50+
def LAYOUT_SAME_REGION_THRESHOLD(self) -> float:
51+
"""threshold for two layouts' bounding boxes to be considered as the same region
52+
53+
When the intersection area over union area of the two is larger than this threshold the two
54+
boxes are considered the same region
55+
"""
56+
return self._get_float("LAYOUT_SAME_REGION_THRESHOLD", 0.75)
57+
58+
@property
59+
def LAYOUT_SUBREGION_THRESHOLD(self) -> float:
60+
"""threshold for one bounding box to be considered as a sub-region of another bounding box
61+
62+
When the intersection region area divided by self area is larger than this threshold self is
63+
considered a subregion of the other
64+
"""
65+
return self._get_float("LAYOUT_SUBREGION_THRESHOLD", 0.75)
66+
67+
@property
68+
def ELEMENTS_H_PADDING_COEF(self) -> float:
69+
"""When extending the boundaries of a PDF object for the purpose of determining which other
70+
elements should be considered in the same text region, we use a relative distance based on
71+
some fraction of the block height (typically character height). This is the fraction used
72+
for the horizontal extension applied to the left and right sides.
73+
"""
74+
return self._get_float("ELEMENTS_H_PADDING_COEF", 0.4)
75+
76+
@property
77+
def ELEMENTS_V_PADDING_COEF(self) -> float:
78+
"""Same as ELEMENTS_H_PADDING_COEF but the vertical extension."""
79+
return self._get_float("ELEMENTS_V_PADDING_COEF", 0.3)
80+
81+
82+
inference_config = InferenceConfig()

unstructured_inference/inference/elements.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,11 @@
1111
from PIL import Image
1212
from scipy.sparse.csgraph import connected_components
1313

14+
from unstructured_inference.config import inference_config
1415
from unstructured_inference.logger import logger
1516
from unstructured_inference.math import safe_division
1617
from unstructured_inference.models import tesseract
1718

18-
# When extending the boundaries of a PDF object for the purpose of determining which other elements
19-
# should be considered in the same text region, we use a relative distance based on some fraction of
20-
# the block height (typically character height). This is the fraction used for the horizontal
21-
# extension applied to the left and right sides.
22-
H_PADDING_COEF = 0.4
23-
# Same as above but the vertical extension.
24-
V_PADDING_COEF = 0.3
25-
2619

2720
@dataclass
2821
class Rectangle:
@@ -156,7 +149,10 @@ def partition_groups_from_regions(regions: Collection[Rectangle]) -> List[List[R
156149
"""Partitions regions into groups of regions based on proximity. Returns list of lists of
157150
regions, each list corresponding with a group"""
158151
padded_regions = [
159-
r.vpad(r.height * V_PADDING_COEF).hpad(r.height * H_PADDING_COEF) for r in regions
152+
r.vpad(r.height * inference_config.ELEMENTS_V_PADDING_COEF).hpad(
153+
r.height * inference_config.ELEMENTS_H_PADDING_COEF,
154+
)
155+
for r in regions
160156
]
161157

162158
intersection_mtx = intersections(*padded_regions)

unstructured_inference/inference/layoutelement.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas import DataFrame
99
from PIL import Image
1010

11+
from unstructured_inference.config import inference_config
1112
from unstructured_inference.constants import FULL_PAGE_REGION_THRESHOLD, SUBREGION_THRESHOLD_FOR_OCR
1213
from unstructured_inference.inference.elements import (
1314
ImageTextRegion,
@@ -79,7 +80,7 @@ def interpret_table_block(text_block: TextRegion, image: Image.Image) -> str:
7980
tables.load_agent()
8081
if tables.tables_agent is None:
8182
raise RuntimeError("Unable to load table extraction agent.")
82-
padded_block = text_block.pad(12)
83+
padded_block = text_block.pad(inference_config.TABLE_IMAGE_CROP_PAD)
8384
cropped_image = image.crop((padded_block.x1, padded_block.y1, padded_block.x2, padded_block.y2))
8485
return tables.tables_agent.predict(cropped_image)
8586

@@ -90,8 +91,8 @@ def merge_inferred_layout_with_extracted_layout(
9091
page_image_size: tuple,
9192
ocr_layout: Optional[List[TextRegion]] = None,
9293
supplement_with_ocr_elements: bool = True,
93-
same_region_threshold: float = 0.75,
94-
subregion_threshold: float = 0.75,
94+
same_region_threshold: float = inference_config.LAYOUT_SAME_REGION_THRESHOLD,
95+
subregion_threshold: float = inference_config.LAYOUT_SUBREGION_THRESHOLD,
9596
) -> List[LayoutElement]:
9697
"""Merge two layouts to produce a single layout."""
9798
extracted_elements_to_add: List[TextRegion] = []

unstructured_inference/models/tables.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from PIL import Image
1515
from transformers import DetrImageProcessor, TableTransformerForObjectDetection
1616

17+
from unstructured_inference.config import inference_config
1718
from unstructured_inference.logger import logger
1819
from unstructured_inference.models.table_postprocess import Rect
1920
from unstructured_inference.models.unstructuredmodel import UnstructuredModel
@@ -113,7 +114,11 @@ def get_tokens(self, x: Image):
113114

114115
return tokens
115116

116-
def get_structure(self, x: Image, pad_for_structure_detection: int = 50) -> dict:
117+
def get_structure(
118+
self,
119+
x: Image,
120+
pad_for_structure_detection: int = inference_config.TABLE_IMAGE_BACKGROUND_PAD,
121+
) -> dict:
117122
"""get the table structure as a dictionary contaning different types of elements as
118123
key-value pairs; check table-transformer documentation for more information"""
119124
with torch.no_grad():
@@ -126,7 +131,11 @@ def get_structure(self, x: Image, pad_for_structure_detection: int = 50) -> dict
126131
outputs_structure["pad_for_structure_detection"] = pad_for_structure_detection
127132
return outputs_structure
128133

129-
def run_prediction(self, x: Image, pad_for_structure_detection: int = 50):
134+
def run_prediction(
135+
self,
136+
x: Image,
137+
pad_for_structure_detection: int = inference_config.TABLE_IMAGE_BACKGROUND_PAD,
138+
):
130139
"""Predict table structure"""
131140
outputs_structure = self.get_structure(x, pad_for_structure_detection)
132141
tokens = self.get_tokens(x=x)

unstructured_inference/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def pad_image_with_background_color(
130130
pad: int = 10,
131131
background_color: str = "white",
132132
) -> Image.Image:
133-
"""pads an input image with the same background color around it by pad//2 on all 4 sides
133+
"""pads an input image with the same background color around it by pad on all 4 sides
134134
135135
The original image is kept intact and a new image is returned with padding added.
136136
"""
@@ -139,6 +139,6 @@ def pad_image_with_background_color(
139139
raise ValueError(
140140
"Can not pad an image with negative space! Please use a positive value for `pad`.",
141141
)
142-
new = Image.new(image.mode, (width + pad, height + pad), background_color)
143-
new.paste(image, (pad // 2, pad // 2))
142+
new = Image.new(image.mode, (width + pad * 2, height + pad * 2), background_color)
143+
new.paste(image, (pad, pad))
144144
return new

0 commit comments

Comments
 (0)