@@ -198,10 +198,11 @@ def _exclude_metadata(self, documents: List[DocNode]) -> List[DocNode]:
198198 def load_file (input_file : Path , metadata_genf : Callable [[str ], Dict ], file_extractor : Dict [str , Callable ],
199199 encoding : str = "utf-8" , pathm : PurePath = Path , fs : Optional [AbstractFileSystem ] = None ,
200200 metadata : Optional [Dict ] = None ) -> List [DocNode ]:
201- metadata : dict = metadata or {}
201+ # metadata priority: user > reader > metadata_genf
202+ user_metadata : Dict = metadata or {}
203+ metadata_generated : Dict = metadata_genf (str (input_file )) if metadata_genf is not None else {}
202204 documents : List [DocNode ] = []
203205
204- if metadata_genf is not None : metadata .update (metadata_genf (str (input_file )))
205206 file_reader_patterns = list (file_extractor .keys ())
206207
207208 for pattern in file_reader_patterns :
@@ -213,8 +214,11 @@ def load_file(input_file: Path, metadata_genf: Callable[[str], Dict], file_extra
213214 kwargs = {'fs' : fs } if fs and not is_default_fs (fs ) else {}
214215 docs = reader (input_file , ** kwargs )
215216 if isinstance (docs , DocNode ): docs = [docs ]
216- for doc in docs : doc ._global_metadata = metadata
217-
217+ for doc in docs :
218+ metadata = metadata_generated .copy ()
219+ metadata .update (doc ._global_metadata or {})
220+ metadata .update (user_metadata )
221+ doc ._global_metadata = metadata
218222 if config ['rag_filename_as_id' ]:
219223 for i , doc in enumerate (docs ):
220224 doc ._uid = f"{ input_file !s} _index_{ i } "
0 commit comments