11from typing import Optional
2- import os
32
43from llama_index import Document , StorageContext , VectorStoreIndex
54from llama_index .node_parser import SimpleNodeParser
@@ -106,7 +105,29 @@ def index_file(
106105 reindex : bool = False ,
107106 file_hash : Optional [str ] = None ,
108107 output_file_path : Optional [str ] = None ,
109- ):
108+ ) -> str :
109+ """Indexes an individual file using the passed arguments.
110+
111+ Args:
112+ tool_id (str): UUID of the tool (workflow_id in case its called
113+ from workflow)
114+ embedding_type (str): UUID of the embedding service configured
115+ vector_db (str): UUID of the vector DB configured
116+ x2text_adapter (str): UUID of the x2text adapter configured.
117+ This is to extract text from documents.
118+ file_path (str): Path to the file that needs to be indexed.
119+ chunk_size (int): Chunk size to be used for indexing
120+ chunk_overlap (int): Overlap in chunks to be used for indexing
121+ reindex (bool, optional): Flag to denote if document should be
122+ re-indexed if its already indexed. Defaults to False.
123+ file_hash (Optional[str], optional): SHA256 hash of the file.
124+ Defaults to None. If None, the hash is generated.
125+ output_file_path (Optional[str], optional): File path to write
126+ the extracted contents into. Defaults to None.
127+
128+ Returns:
129+ str: A unique ID for the file and indexing arguments combination
130+ """
110131 # Make file content hash if not available
111132 if not file_hash :
112133 file_hash = ToolUtils .get_hash_from_file (file_path = file_path )
@@ -117,7 +138,9 @@ def index_file(
117138 x2text_adapter_inst : X2TextAdapter = x2text .get_x2text (
118139 adapter_instance_id = x2text_adapter
119140 )
120- extracted_text = x2text_adapter_inst .process (input_file_path = file_path , output_file_path = output_file_path )
141+ extracted_text = x2text_adapter_inst .process (
142+ input_file_path = file_path , output_file_path = output_file_path
143+ )
121144 full_text .append (
122145 {
123146 "section" : "full" ,
0 commit comments