Skip to content

Commit a424e5b

Browse files
SDK & adapters merge (#73)
* SDK & adapters merge - First cut * Refactor unstract.sdk.core.* back to unstract.sdk.* * Pick up latest changes * Remove LICENSE * Refactoring adapters.py to adapter.py to resolve name conflict * Remove dependency on adapters * Update lock file * Fix regex pattern * pin llama-index-core version --------- Signed-off-by: Gayathri <[email protected]>
1 parent 623807c commit a424e5b

File tree

179 files changed

+7009
-390
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

179 files changed

+7009
-390
lines changed

pdm.lock

Lines changed: 461 additions & 331 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,48 @@ dependencies = [
1111
"jsonschema~=4.18.2",
1212
"python-magic~=0.4.27",
1313
"python-dotenv==1.0.0",
14-
# LLM Triad
15-
"unstract-adapters~=0.22.0",
14+
# Adapter changes
1615
"llama-index==0.10.38",
16+
# Temporary hack to get out of llama-index issues
17+
# To be removed once llama-index version is upgraded
18+
"llama-index-core==0.10.56",
1719
"tiktoken~=0.4.0",
1820
"transformers==4.37.0",
19-
# Error handling, remove after moving it to adapters
20-
"openai~=1.21.2"
21+
"llama-index-embeddings-google==0.1.5",
22+
"llama-index-embeddings-azure-openai==0.1.6",
23+
# Disabling Hugging Face & FastEmbed to
24+
# keep the image size under check
25+
# "llama-index-embeddings-huggingface==0.2.0",
26+
# Disabling fast embed due to high processing power
27+
# "llama-index-embeddings-fastembed==0.1.4",
28+
"llama-index-embeddings-openai==0.1.11",
29+
"llama-index-embeddings-azure-openai==0.1.6",
30+
"llama-index-embeddings-ollama==0.1.2",
31+
"llama-index-vector-stores-postgres==0.1.3",
32+
# Including Supabase conflicts with postgres on pg-vector.
33+
# Hence, commenting it out at the moment
34+
# "llama-index-vector-stores-supabase==0.1.3",
35+
"llama-index-vector-stores-milvus==0.1.18",
36+
"llama-index-vector-stores-weaviate==0.1.4",
37+
"llama-index-vector-stores-pinecone==0.1.4",
38+
"llama-index-vector-stores-qdrant==0.2.8",
39+
"llama-index-llms-openai==0.1.26",
40+
"llama-index-llms-palm==0.1.5",
41+
"llama-index-llms-mistralai==0.1.10",
42+
"llama-index-llms-anyscale==0.1.3",
43+
"llama-index-llms-anthropic==0.1.11",
44+
"llama-index-llms-azure-openai==0.1.5",
45+
"llama-index-llms-vertex==0.1.8",
46+
"llama-index-llms-replicate==0.1.3",
47+
"llama-index-llms-ollama==0.1.3",
48+
# OCR
49+
"filetype~=1.2.0",
50+
# Others
51+
# For singleton classes
52+
"singleton-decorator~=1.0.0",
53+
# For Llama Parse X2Text
54+
"llama-parse==0.4.1",
55+
"httpx>=0.25.2",
2156
]
2257
readme = "README.md"
2358
urls = { Homepage = "https://unstract.com", "Release notes" = "https://github.com/Zipstack/unstract-sdk/releases", Source = "https://github.com/Zipstack/unstract-sdk" }

src/unstract/sdk/LICENSE

Lines changed: 0 additions & 21 deletions
This file was deleted.

src/unstract/sdk/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
__version__ = "0.38.1"
1+
__version__ = "0.39.0"
2+
23

34

45
def get_sdk_version():

src/unstract/sdk/adapters.py renamed to src/unstract/sdk/adapter.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@ def get_adapter_configuration(
6767

6868
elif response.status_code == 404:
6969
self.tool.stream_log(
70-
f"adapter not found for: for adapter instance"
71-
f"{adapter_instance_id}",
70+
f"adapter not found for: for adapter instance" f"{adapter_instance_id}",
7271
level=LogLevel.ERROR,
7372
)
7473
return None
@@ -127,7 +126,6 @@ def get_adapter_config(
127126
] = tool_adapter.get_adapter_configuration(adapter_instance_id)
128127
if not adapter_metadata:
129128
tool.stream_error_and_exit(
130-
f"Adapter not found for "
131-
f"adapter instance: {adapter_instance_id}"
129+
f"Adapter not found for " f"adapter instance: {adapter_instance_id}"
132130
)
133131
return adapter_metadata
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import logging
2+
from logging import NullHandler
3+
from typing import Any
4+
5+
logging.getLogger(__name__).addHandler(NullHandler())
6+
7+
AdapterDict = dict[str, dict[str, Any]]
8+
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import logging
2+
from typing import Any
3+
4+
from singleton_decorator import singleton
5+
6+
from unstract.sdk.adapters import AdapterDict
7+
from unstract.sdk.adapters.base import Adapter
8+
from unstract.sdk.adapters.constants import Common
9+
from unstract.sdk.adapters.embedding import adapters as embedding_adapters
10+
from unstract.sdk.adapters.llm import adapters as llm_adapters
11+
from unstract.sdk.adapters.ocr import adapters as ocr_adapters
12+
from unstract.sdk.adapters.vectordb import adapters as vectordb_adapters
13+
from unstract.sdk.adapters.x2text import adapters as x2text_adapters
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
# Declaring this class as a Singleton to avoid initialising
19+
# adapters list everytime
20+
@singleton
21+
class Adapterkit:
22+
def __init__(self) -> None:
23+
self._adapters: AdapterDict = (
24+
embedding_adapters
25+
| llm_adapters
26+
| vectordb_adapters
27+
| x2text_adapters
28+
| ocr_adapters
29+
)
30+
31+
@property
32+
def adapters(self) -> AdapterDict:
33+
return self._adapters
34+
35+
def get_adapter_class_by_adapter_id(self, adapter_id: str) -> Adapter:
36+
if adapter_id in self._adapters:
37+
adapter_class: Adapter = self._adapters[adapter_id][
38+
Common.METADATA
39+
][Common.ADAPTER]
40+
return adapter_class
41+
else:
42+
raise RuntimeError(f"Couldn't obtain adapter for {adapter_id}")
43+
44+
def get_adapter_by_id(
45+
self, adapter_id: str, *args: Any, **kwargs: Any
46+
) -> Adapter:
47+
"""Instantiates and returns a adapter.
48+
49+
Args:
50+
adapter_id (str): Identifies adapter to create
51+
52+
Raises:
53+
RuntimeError: If the ID is invalid/adapter is missing
54+
55+
Returns:
56+
Adapter: Concrete impl of the `Adapter` base
57+
"""
58+
adapter_class: Adapter = self.get_adapter_class_by_adapter_id(
59+
adapter_id
60+
)
61+
return adapter_class(*args, **kwargs)
62+
63+
def get_adapters_list(self) -> list[dict[str, Any]]:
64+
adapters = []
65+
for adapter_id, adapter_registry_metadata in self._adapters.items():
66+
m: Adapter = adapter_registry_metadata[Common.METADATA][
67+
Common.ADAPTER
68+
]
69+
_id = m.get_id()
70+
name = m.get_name()
71+
adapter_type = m.get_adapter_type().name
72+
json_schema = m.get_json_schema()
73+
desc = m.get_description()
74+
icon = m.get_icon()
75+
adapters.append(
76+
{
77+
"id": _id,
78+
"name": name,
79+
"class_name": m.__name__,
80+
"description": desc,
81+
"icon": icon,
82+
"adapter_type": adapter_type,
83+
"json_schema": json_schema,
84+
}
85+
)
86+
return adapters

src/unstract/sdk/adapters/base.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import logging
2+
from abc import ABC, abstractmethod
3+
4+
from unstract.sdk.adapters.enums import AdapterTypes
5+
6+
logger = logging.getLogger(__name__)
7+
8+
9+
class Adapter(ABC):
10+
def __init__(self, name: str):
11+
self.name = name
12+
13+
@staticmethod
14+
@abstractmethod
15+
def get_id() -> str:
16+
return ""
17+
18+
@staticmethod
19+
@abstractmethod
20+
def get_name() -> str:
21+
return ""
22+
23+
@staticmethod
24+
@abstractmethod
25+
def get_description() -> str:
26+
return ""
27+
28+
@staticmethod
29+
@abstractmethod
30+
def get_icon() -> str:
31+
return ""
32+
33+
@staticmethod
34+
@abstractmethod
35+
def get_json_schema() -> str:
36+
return ""
37+
38+
@staticmethod
39+
@abstractmethod
40+
def get_adapter_type() -> AdapterTypes:
41+
return ""
42+
43+
@abstractmethod
44+
def test_connection(self) -> bool:
45+
"""Override to test connection for a adapter.
46+
47+
Returns:
48+
bool: Flag indicating if the credentials are valid or not
49+
"""
50+
pass
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
class Common:
2+
METADATA = "metadata"
3+
MODULE = "module"
4+
ADAPTER = "adapter"
5+
SRC_FOLDER = "src"
6+
ADAPTER_METADATA = "adapter_metadata"
7+
ICON = "icon"
8+
ADAPTER_ID = "adapter_id"
9+
ADAPTER_TYPE = "adapter_type"
10+
DEFAULT_ERR_MESSAGE = "Something went wrong"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from unstract.sdk.adapters import AdapterDict
2+
from unstract.sdk.adapters.embedding.register import EmbeddingRegistry
3+
4+
adapters: AdapterDict = {}
5+
EmbeddingRegistry.register_adapters(adapters)

0 commit comments

Comments
 (0)