Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ jobs:
- name: Create test env
shell: bash
run: |
cp tests/sample.env tests/.env
sed -i "s|LLMWHISPERER_API_KEY=|LLMWHISPERER_API_KEY=${{ secrets.LLMWHISPERER_API_KEY }}|" tests/.env
cp sample.env .env
sed -i "s|LLMWHISPERER_API_KEY=|LLMWHISPERER_API_KEY=${{ secrets.LLMWHISPERER_API_KEY }}|" .env

- name: Run tox
id: tox
Expand Down
5 changes: 2 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ repos:
exclude_types:
- "markdown"
- id: end-of-file-fixer
exclude: "tests/test_data/.*"
- id: check-yaml
args: [--unsafe]
- id: check-added-large-files
Expand Down Expand Up @@ -65,9 +66,7 @@ repos:
args: [--max-line-length=120]
exclude: |
(?x)^(
.*migrations/.*\.py|
unstract-core/tests/.*|
pkgs/unstract-flags/src/unstract/flags/evaluation_.*\.py|
tests/test_data/.*|
)$
- repo: https://github.com/pycqa/isort
rev: 5.13.2
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

LLMs are powerful, but their output is as good as the input you provide. LLMWhisperer is a technology that presents data from complex documents (different designs and formats) to LLMs in a way that they can best understand. LLMWhisperer features include Layout Preserving Mode, Auto-switching between native text and OCR modes, proper representation of radio buttons and checkboxes in PDF forms as raw text, among other features. You can now extract raw text from complex PDF documents or images without having to worry about whether the document is a native text document, a scanned image or just a picture clicked on a smartphone. Extraction of raw text from invoices, purchase orders, bank statements, etc works easily for structured data extraction with LLMs powered by LLMWhisperer's Layout Preserving mode.

Refer to the client documentation for more information: [LLMWhisperer Client Documentation](https://docs.unstract.com/llm_whisperer/python_client/llm_whisperer_python_client_intro)
Refer to the client documentation for more information: [LLMWhisperer Client Documentation](https://docs.unstract.com/llmwhisperer/index.html)

## Features

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies = [
"requests>=2",
]
readme = "README.md"
urls = { Homepage = "https://llmwhisperer.unstract.com", Source = "https://github.com/Zipstack/llm-whisperer-python-client" }
urls = { Homepage = "https://unstract.com/llmwhisperer/", Source = "https://github.com/Zipstack/llm-whisperer-python-client" }
license = {text = "AGPL v3"}
authors = [
{name = "Zipstack Inc", email = "[email protected]"},
Expand Down Expand Up @@ -69,7 +69,7 @@ includes = ["src"]
package-dir = "src"

[tool.pytest.ini_options]
env_files = ["tests/.env"]
env_files = [".env"]
addopts = "-s"
log_level = "INFO"
log_cli = true
Expand Down
1 change: 1 addition & 0 deletions tests/sample.env → sample.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
LLMWHISPERER_BASE_URL=https://llmwhisperer-api.unstract.com/v1
LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2
LLMWHISPERER_LOG_LEVEL=DEBUG
LLMWHISPERER_API_KEY=
5 changes: 3 additions & 2 deletions src/unstract/llmwhisperer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
__version__ = "0.22.0"
__version__ = "0.23.0"

from .client import LLMWhispererClient # noqa: F401
from .client_v2 import LLMWhispererClientV2 # noqa: F401


def get_sdk_version():
def get_llmw_py_client_version():
"""Returns the SDK version."""
return __version__
16 changes: 5 additions & 11 deletions src/unstract/llmwhisperer/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,7 @@ class LLMWhispererClient:
client's activities and errors.
"""

formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
log_stream_handler = logging.StreamHandler()
log_stream_handler.setFormatter(formatter)
Expand Down Expand Up @@ -117,9 +115,7 @@ def __init__(
self.api_key = os.getenv("LLMWHISPERER_API_KEY", "")
else:
self.api_key = api_key
self.logger.debug(
"api_key set to %s", LLMWhispererUtils.redact_key(self.api_key)
)
self.logger.debug("api_key set to %s", LLMWhispererUtils.redact_key(self.api_key))

self.api_timeout = api_timeout

Expand Down Expand Up @@ -169,7 +165,7 @@ def whisper(
ocr_provider: str = "advanced",
line_splitter_tolerance: float = 0.4,
horizontal_stretch_factor: float = 1.0,
encoding: str = "utf-8"
encoding: str = "utf-8",
) -> dict:
"""
Sends a request to the LLMWhisperer API to process a document.
Expand Down Expand Up @@ -240,12 +236,10 @@ def whisper(
should_stream = False
if url == "":
if stream is not None:

should_stream = True

def generate():
for chunk in stream:
yield chunk
yield from stream

req = requests.Request(
"POST",
Expand All @@ -269,7 +263,7 @@ def generate():
req = requests.Request("POST", api_url, params=params, headers=self.headers)
prepared = req.prepare()
s = requests.Session()
response = s.send(prepared, timeout=self.api_timeout, stream=should_stream)
response = s.send(prepared, timeout=timeout, stream=should_stream)
response.encoding = encoding
if response.status_code != 200 and response.status_code != 202:
message = json.loads(response.text)
Expand Down
Loading
Loading