@@ -31,7 +31,9 @@ def __init__(self, tool: BaseTool):
3131 # TODO: Inherit from StreamMixin and avoid using BaseTool
3232 self .tool = tool
3333
34- def get_text_from_index (self , embedding_type : str , vector_db : str , doc_id : str ):
34+ def get_text_from_index (
35+ self , embedding_type : str , vector_db : str , doc_id : str
36+ ) -> Optional [str ]:
3537 embedd_helper = ToolEmbedding (tool = self .tool )
3638 embedding_li = embedd_helper .get_embedding (adapter_instance_id = embedding_type )
3739 embedding_dimension = embedd_helper .get_embedding_length (embedding_li )
@@ -326,8 +328,10 @@ def generate_file_id(
326328 "vector_db_config" : ToolAdapter .get_adapter_config (self .tool , vector_db ),
327329 "embedding_config" : ToolAdapter .get_adapter_config (self .tool , embedding ),
328330 "x2text_config" : ToolAdapter .get_adapter_config (self .tool , x2text ),
329- "chunk_size" : chunk_size ,
330- "chunk_overlap" : chunk_overlap ,
331+ # Typed and hashed as strings since the final hash is persisted
332+ # and this is required to be backward compatible
333+ "chunk_size" : str (chunk_size ),
334+ "chunk_overlap" : str (chunk_overlap ),
331335 }
332336 # JSON keys are sorted to ensure that the same key gets hashed even in
333337 # case where the fields are reordered.
0 commit comments