33from llama_index import Document , StorageContext , VectorStoreIndex
44from llama_index .node_parser import SimpleNodeParser
55from llama_index .vector_stores import VectorStoreQuery , VectorStoreQueryResult
6+ from unstract .adapters .exceptions import AdapterError
67from unstract .adapters .x2text .x2text_adapter import X2TextAdapter
78
89from unstract .sdk .constants import LogLevel , ToolEnv
910from unstract .sdk .embedding import ToolEmbedding
10- from unstract .sdk .exceptions import SdkException
11+ from unstract .sdk .exceptions import SdkError
1112from unstract .sdk .tool .base import BaseTool
1213from unstract .sdk .utils import ToolUtils
1314from unstract .sdk .utils .service_context import ServiceContext
@@ -30,7 +31,7 @@ def get_text_from_index(
3031 self .tool .stream_log (
3132 f"Error loading { embedding_type } " , level = LogLevel .ERROR
3233 )
33- raise SdkException (f"Error loading { embedding_type } " )
34+ raise SdkError (f"Error loading { embedding_type } " )
3435 embedding_dimension = embedd_helper .get_embedding_length (embedding_li )
3536
3637 vdb_helper = ToolVectorDB (
@@ -45,7 +46,7 @@ def get_text_from_index(
4546 self .tool .stream_log (
4647 f"Error loading { vector_db } " , level = LogLevel .ERROR
4748 )
48- raise SdkException (f"Error loading { vector_db } " )
49+ raise SdkError (f"Error loading { vector_db } " )
4950
5051 try :
5152 self .tool .stream_log (f">>> Querying { vector_db } ..." )
@@ -59,7 +60,7 @@ def get_text_from_index(
5960 self .tool .stream_log (
6061 f"Error querying { vector_db } : { e } " , level = LogLevel .ERROR
6162 )
62- raise SdkException (f"Error querying { vector_db } : { e } " )
63+ raise SdkError (f"Error querying { vector_db } : { e } " )
6364
6465 n : VectorStoreQueryResult = vector_db_li .query (query = q )
6566 if len (n .nodes ) > 0 :
@@ -134,13 +135,18 @@ def index_file(
134135
135136 self .tool .stream_log ("Extracting text from input file" )
136137 full_text = []
137- x2text = X2Text (tool = self .tool )
138- x2text_adapter_inst : X2TextAdapter = x2text .get_x2text (
139- adapter_instance_id = x2text_adapter
140- )
141- extracted_text = x2text_adapter_inst .process (
142- input_file_path = file_path , output_file_path = output_file_path
143- )
138+ extracted_text = ""
139+ try :
140+ x2text = X2Text (tool = self .tool )
141+ x2text_adapter_inst : X2TextAdapter = x2text .get_x2text (
142+ adapter_instance_id = x2text_adapter
143+ )
144+ extracted_text = x2text_adapter_inst .process (
145+ input_file_path = file_path , output_file_path = output_file_path
146+ )
147+ except AdapterError as e :
148+ # Wrapping AdapterErrors with SdkError
149+ raise SdkError (str (e )) from e
144150 full_text .append (
145151 {
146152 "section" : "full" ,
@@ -173,7 +179,7 @@ def index_file(
173179 self .tool .stream_log (
174180 f"Error loading { embedding_type } " , level = LogLevel .ERROR
175181 )
176- raise SdkException (f"Error loading { embedding_type } " )
182+ raise SdkError (f"Error loading { embedding_type } " )
177183
178184 embedding_dimension = embedd_helper .get_embedding_length (embedding_li )
179185 vector_db_li = vdb_helper .get_vector_db (
@@ -184,7 +190,7 @@ def index_file(
184190 self .tool .stream_log (
185191 f"Error loading { vector_db } " , level = LogLevel .ERROR
186192 )
187- raise SdkException (f"Error loading { vector_db } " )
193+ raise SdkError (f"Error loading { vector_db } " )
188194
189195 q = VectorStoreQuery (
190196 query_embedding = embedding_li .get_query_embedding (" " ),
@@ -214,7 +220,7 @@ def index_file(
214220 f"Error deleting nodes for { doc_id } : { e } " ,
215221 level = LogLevel .ERROR ,
216222 )
217- raise SdkException (f"Error deleting nodes for { doc_id } : { e } " )
223+ raise SdkError (f"Error deleting nodes for { doc_id } : { e } " )
218224 doc_id_not_found = True
219225
220226 if doc_id_not_found :
@@ -271,7 +277,7 @@ def index_file(
271277 f"Error adding nodes to vector db: { e } " ,
272278 level = LogLevel .ERROR ,
273279 )
274- raise SdkException (f"Error adding nodes to vector db: { e } " )
280+ raise SdkError (f"Error adding nodes to vector db: { e } " )
275281 self .tool .stream_log ("Added nodes to vector db" )
276282
277283 self .tool .stream_log ("Done indexing file" )
0 commit comments