Skip to content

Commit 6005abc

Browse files
MthwRobinsonJIAQIAdlozeve
authored
feat: configure googlevisionapi (#3126)
### Summary Includes changes from #3117. Merged into a feature branch to run the full test suite. Original PR description: The Google Vision API allows for [configuration of the API endpoint](https://cloud.google.com/vision/docs/ocr#regionalization), to select if the data should be sent to the US or the EU. This PR adds an environment variable (`GOOGLEVISION_API_ENDPOINT`) to configure it. --------- Co-authored-by: JIAQIA <[email protected]> Co-authored-by: Dimitri Lozeve <[email protected]>
1 parent 4a96d54 commit 6005abc

File tree

3 files changed

+19
-4
lines changed

3 files changed

+19
-4
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
### Features
1010

11+
- **Allow configuration of the Google Vision API endpoint** Add an environment variable to select the Google Vision API in the US or the EU.
12+
1113
### Fixes
1214

1315
* **Fix V2 S3 Destination Connector authentication** Fixes bugs with S3 Destination Connector where the connection config was neither registered nor properly deserialized.

unstructured/partition/utils/config.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,14 @@ def TESSERACT_OPTIMUM_TEXT_HEIGHT(self) -> int:
9595
"""optimum text height for tesseract OCR"""
9696
return self._get_int("TESSERACT_OPTIMUM_TEXT_HEIGHT", 20)
9797

98+
@property
99+
def GOOGLEVISION_API_ENDPOINT(self) -> str:
100+
"""API endpoint to use for Google Vision"""
101+
return self._get_string("GOOGLEVISION_API_ENDPOINT", "")
102+
98103
@property
99104
def OCR_AGENT(self) -> str:
100-
"""error margin when comparing if a ocr region is within the table element when preparing
101-
table tokens
102-
"""
105+
"""OCR Agent to use"""
103106
return self._get_string("OCR_AGENT", OCR_AGENT_TESSERACT)
104107

105108
@property

unstructured/partition/utils/ocr_models/google_vision_ocr.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
from google.cloud.vision import Image, ImageAnnotatorClient, Paragraph, TextAnnotation
77

8+
from unstructured.logger import logger, trace_logger
9+
from unstructured.partition.utils.config import env_config
810
from unstructured.partition.utils.constants import Source
911
from unstructured.partition.utils.ocr_models.ocr_interface import OCRAgent
1012

@@ -18,7 +20,14 @@ class OCRAgentGoogleVision(OCRAgent):
1820
"""OCR service implementation for Google Vision API."""
1921

2022
def __init__(self) -> None:
21-
self.client = ImageAnnotatorClient()
23+
client_options = {}
24+
api_endpoint = env_config.GOOGLEVISION_API_ENDPOINT
25+
if api_endpoint:
26+
logger.info(f"Using Google Vision OCR with endpoint {api_endpoint}")
27+
client_options["api_endpoint"] = api_endpoint
28+
else:
29+
logger.info("Using Google Vision OCR with default endpoint")
30+
self.client = ImageAnnotatorClient(client_options=client_options)
2231

2332
def is_text_sorted(self) -> bool:
2433
return True
@@ -34,6 +43,7 @@ def get_text_from_image(self, image: PILImage.Image, ocr_languages: str = "eng")
3443
def get_layout_from_image(
3544
self, image: PILImage.Image, ocr_languages: str = "eng"
3645
) -> list[TextRegion]:
46+
trace_logger.detail("Processing entire page OCR with Google Vision API...")
3747
with BytesIO() as buffer:
3848
image.save(buffer, format="PNG")
3949
response = self.client.document_text_detection(image=Image(content=buffer.getvalue()))

0 commit comments

Comments
 (0)