Skip to content

Commit 17a7b02

Browse files
[FEAT] Added vline hline in llmW adapter (#108)
* added vline hline in llmW adapter * changed discription for the fields
1 parent c48e21f commit 17a7b02

File tree

4 files changed

+26
-1
lines changed

4 files changed

+26
-1
lines changed

src/unstract/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.50.0"
1+
__version__ = "0.51.0"
22

33

44
def get_sdk_version():

src/unstract/sdk/adapters/x2text/llm_whisperer/src/constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ class WhispererConfig:
7272
ADD_LINE_NOS = "add_line_nos"
7373
OUTPUT_JSON = "output_json"
7474
PAGE_SEPARATOR = "page_seperator"
75+
MARK_VERTICAL_LINES = "mark_vertical_lines"
76+
MARK_HORIZONTAL_LINES = "mark_horizontal_lines"
7577

7678

7779
class WhisperStatus:
@@ -100,3 +102,6 @@ class WhispererDefaults:
100102
ADD_LINE_NOS = True
101103
OUTPUT_JSON = True
102104
PAGE_SEPARATOR = "<<< >>>"
105+
MARK_VERTICAL_LINES = False
106+
MARK_HORIZONTAL_LINES = False
107+

src/unstract/sdk/adapters/x2text/llm_whisperer/src/llm_whisperer.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,14 @@ def _get_whisper_params(self, enable_highlight: bool = False) -> dict[str, Any]:
174174
WhispererConfig.PAGE_SEPARATOR,
175175
WhispererDefaults.PAGE_SEPARATOR,
176176
),
177+
WhispererConfig.MARK_VERTICAL_LINES: self.config.get(
178+
WhispererConfig.MARK_VERTICAL_LINES,
179+
WhispererDefaults.MARK_VERTICAL_LINES,
180+
),
181+
WhispererConfig.MARK_HORIZONTAL_LINES: self.config.get(
182+
WhispererConfig.MARK_HORIZONTAL_LINES,
183+
WhispererDefaults.MARK_HORIZONTAL_LINES,
184+
),
177185
}
178186
if not params[WhispererConfig.FORCE_TEXT_PROCESSING]:
179187
params.update(

src/unstract/sdk/adapters/x2text/llm_whisperer/src/static/json_schema.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,18 @@
114114
"title": "Gaussian Blur Radius",
115115
"default": 0.0,
116116
"description": "The radius of the gaussian blur to use for pre-processing the image during OCR based extraction. Useful to eliminate noise from the image. Default is 0.0 if the value is not explicitly set. Available only in the Enterprise version."
117+
},
118+
"mark_vertical_lines": {
119+
"type": "boolean",
120+
"title": "Mark Vertical Lines",
121+
"default": false,
122+
"description": "Detect vertical lines in the document and replicate the same using text (using \"|\" symbol). Use this for displaying tables with borders."
123+
},
124+
"mark_horizontal_lines": {
125+
"type": "boolean",
126+
"title": "Mark Horizontal Lines",
127+
"default": false,
128+
"description": "Detect horizontal lines in the document and replicate the same using text (using \"-\" symbol). Use this for displaying tables with borders and other horizontal serperators found in the document."
117129
}
118130
},
119131
"required": [

0 commit comments

Comments
 (0)