Skip to content

Commit 932d557

Browse files
authored
Merge pull request #8 from Zipstack/fix-index-key-generation-with-x2text
fix: Index key generation with x2text
2 parents 069322c + 5a31174 commit 932d557

File tree

4 files changed

+10
-5
lines changed

4 files changed

+10
-5
lines changed

src/unstract/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.11.0"
1+
__version__ = "0.11.1"
22

33

44
def get_sdk_version():

src/unstract/sdk/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,13 @@ class ToolSettingsKey:
139139
LLM_ADAPTER_ID (str): The key for the LLM adapter ID.
140140
EMBEDDING_ADAPTER_ID (str): The key for the embedding adapter ID.
141141
VECTOR_DB_ADAPTER_ID (str): The key for the vector DB adapter ID.
142+
X2TEXT_ADAPTER_ID (str): The key for the X2Text adapter ID.
142143
"""
143144

144145
LLM_ADAPTER_ID = "llmAdapterId"
145146
EMBEDDING_ADAPTER_ID = "embeddingAdapterId"
146147
VECTOR_DB_ADAPTER_ID = "vectorDbAdapterId"
148+
X2TEXT_ADAPTER_ID = "x2TextAdapterId"
147149

148150

149151
class FileReaderSettings:

src/unstract/sdk/index.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,10 @@ def index_file(
112112
self.tool.stream_log("Extracting text from input file")
113113
full_text = []
114114
x2text = X2Text(tool=self.tool)
115-
x2text_adapter: X2TextAdapter = x2text.get_x2text(
115+
x2text_adapter_inst: X2TextAdapter = x2text.get_x2text(
116116
adapter_instance_id=x2text_adapter
117117
)
118-
extracted_text = x2text_adapter.process(input_file_path=file_path)
118+
extracted_text = x2text_adapter_inst.process(input_file_path=file_path)
119119
full_text.append(
120120
{
121121
"section": "full",
@@ -128,6 +128,7 @@ def index_file(
128128
file_hash=file_hash,
129129
vector_db=vector_db,
130130
embedding=embedding_type,
131+
x2text=x2text_adapter,
131132
chunk_size=chunk_size,
132133
chunk_overlap=chunk_overlap,
133134
)
@@ -257,6 +258,7 @@ def generate_file_id(
257258
file_hash: str,
258259
vector_db: str,
259260
embedding: str,
261+
x2text: str,
260262
chunk_size: str,
261263
chunk_overlap: str,
262264
) -> str:
@@ -267,13 +269,14 @@ def generate_file_id(
267269
file_hash (str): Hash of the file contents
268270
vector_db (str): UUID of the vector DB adapter
269271
embedding (str): UUID of the embedding adapter
272+
x2text (str): UUID of the X2Text adapter
270273
chunk_size (str): Chunk size for indexing
271274
chunk_overlap (str): Chunk overlap for indexing
272275
273276
Returns:
274277
str: Key representing unique ID for a file
275278
"""
276279
return (
277-
f"{tool_id}|{vector_db}|{embedding}|"
280+
f"{tool_id}|{vector_db}|{embedding}|{x2text}|"
278281
f"{chunk_size}|{chunk_overlap}|{file_hash}"
279282
)

src/unstract/sdk/utils/tool_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def json_to_str(json_to_dump: dict[str, Any]) -> str:
8080
return compact_json
8181

8282
@staticmethod
83-
def get_file_mime_type(self, input_file: Path) -> str:
83+
def get_file_mime_type(input_file: Path) -> str:
8484
"""Gets the file MIME type for an input file. Uses libmagic to perform
8585
the same.
8686

0 commit comments

Comments
 (0)