Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.0.15

* **Bugfix for file data serialization**

## 0.0.14

* **Add support for batch file data**
Expand Down
10 changes: 5 additions & 5 deletions requirements/cli.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ idna==3.10
# via anyio
importlib-metadata==8.5.0
# via opentelemetry-api
marshmallow==3.23.1
marshmallow==3.23.2
# via dataclasses-json
mypy-extensions==1.0.0
# via typing-inspect
Expand Down Expand Up @@ -84,15 +84,15 @@ packaging==24.2
# opentelemetry-instrumentation
pandas==2.2.3
# via unstructured-ingest
protobuf==5.29.1
protobuf==5.29.2
# via
# googleapis-common-protos
# opentelemetry-proto
pydantic==2.10.3
pydantic==2.10.4
# via
# fastapi
# unstructured-ingest
pydantic-core==2.27.1
pydantic-core==2.27.2
# via pydantic
python-dateutil==2.9.0.post0
# via
Expand Down Expand Up @@ -122,7 +122,7 @@ typing-inspect==0.9.0
# via dataclasses-json
tzdata==2024.2
# via pandas
unstructured-ingest==0.3.10
unstructured-ingest==0.3.11
# via -r ./cli.in
uvicorn==0.34.0
# via -r ./cli.in
Expand Down
2 changes: 1 addition & 1 deletion requirements/constraints.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
unstructured-ingest>=0.3.10
unstructured-ingest>=0.3.1
2 changes: 1 addition & 1 deletion requirements/lint.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pyflakes==3.2.0
# via
# autoflake
# flake8
ruff==0.8.3
ruff==0.8.4
# via -r ./lint.in
tomli==2.2.1
# via
Expand Down
Empty file added test/api/__init__.py
Empty file.
125 changes: 125 additions & 0 deletions test/api/test_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from pathlib import Path
from typing import Any, Optional

import pytest
from fastapi.testclient import TestClient
from pydantic import BaseModel
from unstructured_ingest.v2.interfaces import BatchFileData, BatchItem, FileData, SourceIdentifiers

from unstructured_platform_plugins.etl_uvicorn.api_generator import (
EtlApiException,
UsageData,
wrap_in_fastapi,
)
from unstructured_platform_plugins.schema.filedata_meta import FileDataMeta


class InvokeResponse(BaseModel):
usage: list[UsageData]
status_code: int
filedata_meta: FileDataMeta
status_code_text: Optional[str] = None
output: Optional[Any] = None

def generic_validation(self):
assert self.status_code == 200
assert not self.status_code_text


mock_file_data = [
FileData(
identifier="mock file data",
connector_type="CON",
source_identifiers=SourceIdentifiers(filename="n", fullpath="n"),
),
BatchFileData(
identifier="mock file data", connector_type="CON", batch_items=[BatchItem(identifier="bid")]
),
]


@pytest.mark.parametrize(
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
)
def test_async_sample_function(file_data):
from test.assets.async_typed_dict_response import async_sample_function as test_fn

client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))

post_body = {"file_data": file_data.model_dump(), "content": {"a": 1, "b": 2}}
resp = client.post("/invoke", json=post_body)
resp_content = resp.json()
invoke_response = InvokeResponse.model_validate(resp_content)
invoke_response.generic_validation()
output = invoke_response.output
assert isinstance(output, dict)
assert output == {"response": {"a_out": 1, "b_out": 2}}


@pytest.mark.parametrize(
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
)
def test_dataclass_response(file_data):
from test.assets.dataclass_response import sample_function_with_path as test_fn

client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
current_path = Path(__file__)

post_body = {"file_data": file_data.model_dump(), "c": 1, "b": "2", "a": str(current_path)}
resp = client.post("/invoke", json=post_body)
resp_content = resp.json()
invoke_response = InvokeResponse.model_validate(resp_content)
invoke_response.generic_validation()
output = invoke_response.output
assert isinstance(output, dict)
assert output == {
"t": "PosixPath",
"exists": True,
"resolved": str(current_path.resolve()),
"b": "2",
"c": 1,
}


@pytest.mark.parametrize(
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
)
def test_empty_input_and_output(file_data):
from test.assets.empty_input_and_response import SampleClass as TestClass

test_class = TestClass()
client = TestClient(wrap_in_fastapi(func=test_class.do_nothing, plugin_id="mock_plugin"))

resp = client.post("/invoke", json={"file_data": file_data.model_dump()})
resp_content = resp.json()
invoke_response = InvokeResponse.model_validate(resp_content)
invoke_response.generic_validation()
output = invoke_response.output
assert not output


@pytest.mark.parametrize(
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
)
def test_filedata_meta(file_data):
from test.assets.filedata_meta import Input
from test.assets.filedata_meta import process_input as test_fn

client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))

post_body = {"file_data": file_data.model_dump(), "i": Input(m=15).model_dump()}
resp = client.post("/invoke", json=post_body)
resp_content = resp.json()
invoke_response = InvokeResponse.model_validate(resp_content)
invoke_response.generic_validation()
filedata_meta = invoke_response.filedata_meta
assert len(filedata_meta.new_records) == 15
assert filedata_meta.terminate_current
assert not invoke_response.output


def test_improper_function():
from test.assets.improper_function import sample_improper_function as test_fn

with pytest.raises(EtlApiException):
TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
4 changes: 3 additions & 1 deletion test/assets/async_typed_dict_response.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Any, TypedDict
from typing import Any

from typing_extensions import TypedDict


class SampleFunctionResponse(TypedDict):
Expand Down
14 changes: 9 additions & 5 deletions test/assets/filedata_meta.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import math
from typing import Optional
from typing import Optional, Union

from pydantic import BaseModel
from unstructured_ingest.v2.interfaces import FileData
from unstructured_ingest.v2.interfaces import BatchFileData, FileData, SourceIdentifiers

from unstructured_platform_plugins.schema import FileDataMeta, NewRecord

Expand All @@ -15,17 +15,21 @@ class Output(BaseModel):
n: float


def process_input(i: Input, file_data: FileData, filedata_meta: FileDataMeta) -> Optional[Output]:
def process_input(
i: Input, file_data: Union[FileData, BatchFileData], filedata_meta: FileDataMeta
) -> Optional[Output]:
if i.m > 10:
filedata_meta.terminate_current = True
new_content = [
NewRecord(
file_data=FileData(
identifier=str(i.m + x), connector_type=file_data.connector_type
identifier=str(i.m + x),
connector_type=file_data.connector_type,
source_identifiers=SourceIdentifiers(filename=f"{x}.txt", fullpath=f"{x}.txt"),
),
contents=Output(n=float(i.m + x)),
)
for x in range(5)
for x in range(i.m)
]
filedata_meta.new_records.extend(new_content)
return None
Expand Down
64 changes: 32 additions & 32 deletions test/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,22 +449,16 @@ def fn(a: FileData) -> list[FileData]:
"identifier": {"type": "string"},
"connector_type": {"type": "string"},
"source_identifiers": {
"anyOf": [
{
"type": "object",
"properties": {
"filename": {"type": "string"},
"fullpath": {"type": "string"},
"rel_path": {
"anyOf": [{"type": "string"}, {"type": "null"}],
"default": None,
},
},
"required": ["filename", "fullpath"],
"type": "object",
"properties": {
"filename": {"type": "string"},
"fullpath": {"type": "string"},
"rel_path": {
"anyOf": [{"type": "string"}, {"type": "null"}],
"default": None,
},
{"type": "null"},
],
"default": None,
},
"required": ["filename", "fullpath"],
},
"metadata": {
"type": "object",
Expand Down Expand Up @@ -533,7 +527,13 @@ def fn(a: FileData) -> list[FileData]:
"default": None,
},
},
"required": ["identifier", "connector_type", "metadata", "additional_metadata"],
"required": [
"identifier",
"connector_type",
"source_identifiers",
"metadata",
"additional_metadata",
],
}
},
}
Expand All @@ -551,22 +551,16 @@ def fn(a: FileData) -> list[FileData]:
"identifier": {"type": "string"},
"connector_type": {"type": "string"},
"source_identifiers": {
"anyOf": [
{
"type": "object",
"properties": {
"filename": {"type": "string"},
"fullpath": {"type": "string"},
"rel_path": {
"anyOf": [{"type": "string"}, {"type": "null"}],
"default": None,
},
},
"required": ["filename", "fullpath"],
"type": "object",
"properties": {
"filename": {"type": "string"},
"fullpath": {"type": "string"},
"rel_path": {
"anyOf": [{"type": "string"}, {"type": "null"}],
"default": None,
},
{"type": "null"},
],
"default": None,
},
"required": ["filename", "fullpath"],
},
"metadata": {
"type": "object",
Expand Down Expand Up @@ -629,7 +623,13 @@ def fn(a: FileData) -> list[FileData]:
},
"display_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": None},
},
"required": ["identifier", "connector_type", "metadata", "additional_metadata"],
"required": [
"identifier",
"connector_type",
"source_identifiers",
"metadata",
"additional_metadata",
],
},
}

Expand Down
3 changes: 2 additions & 1 deletion test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest
from pydantic import BaseModel
from unstructured_ingest.v2.interfaces import FileData
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
from uvicorn.importer import import_from_string

from unstructured_platform_plugins.etl_uvicorn import utils
Expand Down Expand Up @@ -110,6 +110,7 @@ def fn(a: A, b: B, c: MyEnum, d: list, e: FileData) -> None:
identifier="custom_file_data",
connector_type="mock_connector",
additional_metadata={"additional": "metadata"},
source_identifiers=SourceIdentifiers(filename="file.txt", fullpath="file.txt"),
)
inputs = {
"a": {"b": 4, "c": 5.6},
Expand Down
2 changes: 1 addition & 1 deletion unstructured_platform_plugins/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.14" # pragma: no cover
__version__ = "0.0.15" # pragma: no cover
25 changes: 18 additions & 7 deletions unstructured_platform_plugins/etl_uvicorn/api_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from pydantic import BaseModel, Field, create_model
from starlette.responses import RedirectResponse
from unstructured_ingest.v2.interfaces import FileData
from uvicorn.config import LOG_LEVELS
from uvicorn.importer import import_from_string

Expand All @@ -30,6 +29,11 @@
schema_to_base_model,
)


class EtlApiException(Exception):
pass


logger = logging.getLogger("uvicorn.error")


Expand Down Expand Up @@ -100,7 +104,19 @@ def wrap_in_fastapi(
func: Callable,
plugin_id: str,
precheck_func: Optional[Callable] = None,
):
) -> FastAPI:
try:
return _wrap_in_fastapi(func=func, plugin_id=plugin_id, precheck_func=precheck_func)
except Exception as e:
logger.error(f"failed to wrap function in FastAPI: {e}", exc_info=True)
raise EtlApiException(e) from e


def _wrap_in_fastapi(
func: Callable,
plugin_id: str,
precheck_func: Optional[Callable] = None,
) -> FastAPI:
if precheck_func is not None:
check_precheck_func(precheck_func=precheck_func)

Expand Down Expand Up @@ -184,11 +200,6 @@ async def run_job(request: input_schema_model) -> ResponseType:
log_func_and_body(func=func, body=request.json())
# Create dictionary from pydantic model while preserving underlying types
request_dict = {f: getattr(request, f) for f in request.model_fields}
# Map FileData back to original dataclass if present
if "file_data" in request_dict:
request_dict["file_data"] = FileData.from_dict(
request_dict["file_data"].model_dump()
)
map_inputs(func=func, raw_inputs=request_dict)
if logger.level == LOG_LEVELS.get("trace", logging.NOTSET):
logger.log(level=logger.level, msg=f"passing inputs to function: {request_dict}")
Expand Down
Loading
Loading