Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 0 additions & 75 deletions docker-compose-postgres.yml

This file was deleted.

3 changes: 1 addition & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ services:

ocr-forwarding-api:
build:
context: src/api
context: packages/api
dockerfile: Dockerfile
args:
<<: *build-args-common
Expand All @@ -199,7 +199,6 @@ services:
ports:
- "${OCR_FORWARDING_API_PORT}:${OCR_FORWARDING_API_PORT}"
volumes:
- ./src/api/app:/app
- ${PWD}/${DATA_FOLDER}:/data
networks:
- pyonb_ocr_api
Expand Down
19 changes: 19 additions & 0 deletions packages/api/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM ghcr.io/astral-sh/uv:python3.13-bookworm AS app

SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"]

WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1

COPY ./pyproject.toml .
COPY ./README.md .
COPY ./src src/

RUN uv venv
RUN --mount=type=cache,target=/root/.cache/uv,sharing=locked uv sync --no-editable --no-dev

# make uvicorn etc available
ENV PATH="/app/.venv/bin:$PATH"

CMD uvicorn pyonb_api.main:app --host 0.0.0.0 --port "$OCR_FORWARDING_API_PORT" --workers 4 --use-colors
25 changes: 25 additions & 0 deletions packages/api/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# `pyonb` forwarding API

The forwarding API is used to send documents to an OCR service for processing. It provides a consistent
interface for using the various OCR tools supported by `pyonb`.

## Usage

You will need to define the `OCR_FORWARDING_API_PORT` in a `.env` file.

Then, spin up the `ocr-forwarding-api` servicer, along with the OCR service you wish to use.
For example, if you would like to use `kreuzberg`, run the following from the top-level `pyonb` directory:

```shell
docker-compose --profile kreuzberg up --build --detach
```

You can then use `curl` to send a PDF to the forwarding API:

```shell
curl -v -X POST http://127.0.0.1:8110/kreuzberg-ocr/inference_single \
-F "file_upload=@document.pdf" \
-H "accept: application/json"
```

Note, this assumes you have set `OCR_FORWARDING_API_PORT` to `8110`.
16 changes: 16 additions & 0 deletions packages/api/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[build-system]
build-backend = "hatchling.build"
requires = ["hatchling"]

[project]
dependencies = [
"aiohttp",
"fastapi[standard]",
"requests",
"uvicorn",
]
description = "pyonb forwarding API"
name = "pyonb-api"
readme = "README.md"
requires-python = ">=3.11"
version = "0.1.0"
File renamed without changes.
File renamed without changes.
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ dynamic = [
keywords = [
]
name = "pyonb"
optional-dependencies = {dev = [
optional-dependencies = {api = [
"pyonb-api",
], dev = [
"build",
"marker-pdf[full]==1.6.2",
"mypy",
Expand Down Expand Up @@ -159,12 +161,14 @@ gh.python."3.12" = ["py312"]
gh.python."3.13" = ["py313"]

[tool.uv.sources]
pyonb-api = {workspace = true}
pyonb-docling = {workspace = true}
pyonb-kreuzberg = {workspace = true}
pyonb-marker = {workspace = true}
pyonb-paddleocr = {workspace = true}

[tool.uv.workspace]
members = [
"packages/api",
"packages/ocr/*",
]
13 changes: 0 additions & 13 deletions src/api/Dockerfile

This file was deleted.

36 changes: 0 additions & 36 deletions src/api/app/util.py

This file was deleted.

4 changes: 0 additions & 4 deletions src/api/requirements.txt

This file was deleted.

26 changes: 0 additions & 26 deletions tests/api/test_app.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
"""Test functions in /src/api/app/main.py."""

import os
from pathlib import Path

import pytest
import requests

Expand All @@ -29,26 +26,3 @@ def test_local_start_api_and_healthy(ocr_forwarding_api_port: str) -> None:
def test_check_services(check_container_healthy: bool) -> None:
"""Test OCR API Docker services are up."""
assert check_container_healthy, "Docker services not running. Required for full API testing."


def test_data_folder_contents() -> None:
"""
Test documents data folder correct for testing.

For testing, env variable HOST_DATA_FOLDER should mount /tests/data/single_synthetic_doc
to CONTAINER_DATA_FOLDER, which should be /data inside the container.
There should be a single test PDF file (ms-note-one-page.pdf) inside.
"""
assert "single_synthetic_doc" in str(os.getenv("HOST_DATA_FOLDER")), (
"For testing, HOST_DATA_FOLDER should point at /tests/data/single_synthetic_doc. "
"Check environment variables if this fails. "
f"HOST_DATA_FOLDER: {os.getenv('HOST_DATA_FOLDER')}"
)
assert "data" in str(os.getenv("CONTAINER_DATA_FOLDER")), (
"CONTAINER_DATA_FOLDER should point at /data."
"Check environment variables if this fails. "
f"CONTAINER_DATA_FOLDER: {os.getenv('CONTAINER_DATA_FOLDER')}"
)

test_pdf_file_path = Path(str(os.getenv("HOST_DATA_FOLDER"))) / "ms-note-one-page.pdf"
assert test_pdf_file_path.is_file(), f"File at {test_pdf_file_path} not found."
Loading