Skip to content

Commit 6f06a6c

Browse files
fix: Raise from exc related fixes, bumped to 0.25.1 (#47)
* Raise from exc related fixes, bumped to 0.25.1 * Index related error handling improv
1 parent c6f6485 commit 6f06a6c

File tree

7 files changed

+48
-42
lines changed

7 files changed

+48
-42
lines changed

src/unstract/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.25.0"
1+
__version__ = "0.25.1"
22

33

44
def get_sdk_version():

src/unstract/sdk/embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def get_embedding(self, adapter_instance_id: str) -> BaseEmbedding:
4545
self.tool.stream_log(
4646
log=f"Error getting embedding: {e}", level=LogLevel.ERROR
4747
)
48-
raise ToolEmbeddingError(f"Error getting embedding instance: {e}")
48+
raise ToolEmbeddingError(f"Error getting embedding instance: {e}") from e
4949

5050
def get_embedding_length(self, embedding: BaseEmbedding) -> int:
5151
embedding_list = embedding._get_text_embedding(self.__TEST_SNIPPET)

src/unstract/sdk/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,9 @@ class ToolVectorDBError(SdkError):
2929
DEFAULT_MESSAGE = "Error ocurred related to vector DB"
3030

3131

32+
class X2TextError(SdkError):
33+
DEFAULT_MESSAGE = "Error ocurred related to text extractor"
34+
35+
3236
class RateLimitError(SdkError):
3337
DEFAULT_MESSAGE = "Running into rate limit errors, please try again later"

src/unstract/sdk/index.py

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def index_file(
196196
f"Error deleting nodes for {doc_id}: {e}",
197197
level=LogLevel.ERROR,
198198
)
199-
raise SdkError(f"Error deleting nodes for {doc_id}: {e}")
199+
raise SdkError(f"Error deleting nodes for {doc_id}: {e}") from e
200200
doc_id_found = False
201201

202202
if doc_id_found:
@@ -246,29 +246,33 @@ def index_file(
246246
document.id_ = doc_id
247247
documents.append(document)
248248
self.tool.stream_log(f"Number of documents: {len(documents)}")
249-
if chunk_size == 0:
250-
parser = SimpleNodeParser.from_defaults(
251-
chunk_size=len(documents[0].text) + 10, chunk_overlap=0
252-
)
253-
nodes = parser.get_nodes_from_documents(documents, show_progress=True)
254-
node = nodes[0]
255-
node.embedding = embedding_li.get_query_embedding(" ")
256-
vector_db_li.add(nodes=[node])
257-
self.tool.stream_log("Added node to vector db")
258-
else:
259-
storage_context = StorageContext.from_defaults(vector_store=vector_db_li)
260-
parser = SimpleNodeParser.from_defaults(
261-
chunk_size=chunk_size, chunk_overlap=chunk_overlap
262-
)
263249

264-
# Set callback_manager to collect Usage stats
265-
callback_manager = UNCallbackManager.set_callback_manager(
266-
platform_api_key=self.tool.get_env_or_die(ToolEnv.PLATFORM_API_KEY),
267-
embedding=embedding_li,
268-
)
250+
try:
251+
if chunk_size == 0:
252+
parser = SimpleNodeParser.from_defaults(
253+
chunk_size=len(documents[0].text) + 10, chunk_overlap=0
254+
)
255+
nodes = parser.get_nodes_from_documents(documents, show_progress=True)
256+
node = nodes[0]
257+
node.embedding = embedding_li.get_query_embedding(" ")
258+
vector_db_li.add(nodes=[node])
259+
self.tool.stream_log("Added node to vector db")
260+
else:
261+
storage_context = StorageContext.from_defaults(
262+
vector_store=vector_db_li
263+
)
264+
parser = SimpleNodeParser.from_defaults(
265+
chunk_size=chunk_size, chunk_overlap=chunk_overlap
266+
)
267+
268+
# Set callback_manager to collect Usage stats
269+
callback_manager = UNCallbackManager.set_callback_manager(
270+
platform_api_key=self.tool.get_env_or_die(ToolEnv.PLATFORM_API_KEY),
271+
embedding=embedding_li,
272+
)
273+
274+
self.tool.stream_log("Adding nodes to vector db...")
269275

270-
self.tool.stream_log("Adding nodes to vector db...")
271-
try:
272276
VectorStoreIndex.from_documents(
273277
documents,
274278
storage_context=storage_context,
@@ -277,13 +281,13 @@ def index_file(
277281
node_parser=parser,
278282
callback_manager=callback_manager,
279283
)
280-
except Exception as e:
281-
self.tool.stream_log(
282-
f"Error adding nodes to vector db: {e}",
283-
level=LogLevel.ERROR,
284-
)
285-
raise IndexingError(str(e)) from e
286-
self.tool.stream_log("Added nodes to vector db")
284+
except Exception as e:
285+
self.tool.stream_log(
286+
f"Error adding nodes to vector db: {e}",
287+
level=LogLevel.ERROR,
288+
)
289+
raise IndexingError(str(e)) from e
290+
self.tool.stream_log("Added nodes to vector db")
287291

288292
self.tool.stream_log("File has been indexed successfully")
289293
return doc_id

src/unstract/sdk/llm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def get_llm(self, adapter_instance_id: str) -> LLM:
103103
self.tool.stream_log(
104104
log=f"Unable to get llm instance: {e}", level=LogLevel.ERROR
105105
)
106-
raise ToolLLMError(f"Error getting llm instance: {e}")
106+
raise ToolLLMError(f"Error getting llm instance: {e}") from e
107107

108108
def get_max_tokens(self, reserved_for_output: int = 0) -> int:
109109
"""Returns the maximum number of tokens that can be used for the LLM.

src/unstract/sdk/vector_db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,4 +77,4 @@ def get_vector_db(
7777
log=f"Unable to get vector_db {adapter_instance_id}: {e}",
7878
level=LogLevel.ERROR,
7979
)
80-
raise ToolVectorDBError(f"Error getting vectorDB instance: {e}")
80+
raise ToolVectorDBError(f"Error getting vectorDB instance: {e}") from e

src/unstract/sdk/x2txt.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from unstract.sdk.adapters import ToolAdapter
99
from unstract.sdk.constants import LogLevel
10-
from unstract.sdk.exceptions import SdkError
10+
from unstract.sdk.exceptions import X2TextError
1111
from unstract.sdk.tool.base import BaseTool
1212

1313

@@ -28,17 +28,15 @@ def get_x2text(self, adapter_instance_id: str) -> X2TextAdapter:
2828
][Common.ADAPTER]
2929
x2text_metadata = x2text_config.get(Common.ADAPTER_METADATA)
3030
# Add x2text service host, port and platform_service_key
31-
x2text_metadata[
31+
x2text_metadata[X2TextConstants.X2TEXT_HOST] = self.tool.get_env_or_die(
3232
X2TextConstants.X2TEXT_HOST
33-
] = self.tool.get_env_or_die(X2TextConstants.X2TEXT_HOST)
34-
x2text_metadata[
33+
)
34+
x2text_metadata[X2TextConstants.X2TEXT_PORT] = self.tool.get_env_or_die(
3535
X2TextConstants.X2TEXT_PORT
36-
] = self.tool.get_env_or_die(X2TextConstants.X2TEXT_PORT)
36+
)
3737
x2text_metadata[
3838
X2TextConstants.PLATFORM_SERVICE_API_KEY
39-
] = self.tool.get_env_or_die(
40-
X2TextConstants.PLATFORM_SERVICE_API_KEY
41-
)
39+
] = self.tool.get_env_or_die(X2TextConstants.PLATFORM_SERVICE_API_KEY)
4240

4341
x2text_adapter_class = x2text_adapter(x2text_metadata)
4442

@@ -49,4 +47,4 @@ def get_x2text(self, adapter_instance_id: str) -> X2TextAdapter:
4947
log=f"Unable to get x2text adapter {adapter_instance_id}: {e}",
5048
level=LogLevel.ERROR,
5149
)
52-
raise SdkError(f"Error getting vectorDB instance: {e}")
50+
raise X2TextError(f"Error getting text extractor: {e}") from e

0 commit comments

Comments
 (0)