@@ -170,6 +170,7 @@ def create_source(
170170 chunk_index : int | None = None ,
171171 chunk_total : int | None = None ,
172172 chunk_content : str | None = None ,
173+ file_url_flag : bool = False ,
173174 ) -> SourceMessage :
174175 """Create SourceMessage from file content part."""
175176 if isinstance (message , dict ):
@@ -178,6 +179,7 @@ def create_source(
178179 "type" : "file" ,
179180 "doc_path" : file_info .get ("filename" ) or file_info .get ("file_id" , "" ),
180181 "content" : chunk_content if chunk_content else file_info .get ("file_data" , "" ),
182+ "file_info" : file_info if file_url_flag else {},
181183 }
182184 # Add chunk ordering information if provided
183185 if chunk_index is not None :
@@ -202,10 +204,7 @@ def rebuild_from_source(
202204 # Rebuild from source fields
203205 return {
204206 "type" : "file" ,
205- "file" : {
206- "filename" : source .doc_path or "" ,
207- "file_data" : source .content or "" ,
208- },
207+ "file" : source .file_info ,
209208 }
210209
211210 def _parse_file (self , file_info : dict [str , Any ]) -> str :
@@ -278,7 +277,7 @@ def parse_fast(
278277 file_data = file_info .get ("file_data" , "" )
279278 file_id = file_info .get ("file_id" , "" )
280279 filename = file_info .get ("filename" , "" )
281-
280+ file_url_flag = False
282281 # Build content string based on available information
283282 content_parts = []
284283
@@ -297,6 +296,7 @@ def parse_fast(
297296 content_parts .append (f"[File Data (base64/encoded): { len (file_data )} chars]" )
298297 # Check if it looks like a URL
299298 elif file_data .startswith (("http://" , "https://" , "file://" )):
299+ file_url_flag = True
300300 content_parts .append (f"[File URL: { file_data } ]" )
301301 else :
302302 # TODO: split into multiple memory items
@@ -348,6 +348,7 @@ def parse_fast(
348348 chunk_index = chunk_idx ,
349349 chunk_total = total_chunks ,
350350 chunk_content = chunk_text ,
351+ file_url_flag = file_url_flag ,
351352 )
352353
353354 memory_item = TextualMemoryItem (
@@ -384,6 +385,7 @@ def parse_fast(
384385 chunk_index = None ,
385386 chunk_total = 0 ,
386387 chunk_content = content ,
388+ file_url_flag = file_url_flag ,
387389 )
388390 memory_item = TextualMemoryItem (
389391 memory = content ,
0 commit comments