@@ -47,10 +47,6 @@ def __init__(self, work_dir: str, **kwargs):
4747
4848 # 存储集合映射 {db_id: collection}
4949 self .collections : dict [str , Any ] = {}
50-
51- # 元数据锁
52- self ._metadata_lock = asyncio .Lock ()
53-
5450 logger .info ("ChromaKB initialized" )
5551
5652 @property
@@ -290,6 +286,7 @@ async def update_content(self, db_id: str, file_ids: list[str], params: dict | N
290286
291287 try :
292288 # 更新状态为处理中
289+ self .files_meta [file_id ]["processing_params" ] = params .copy ()
293290 self .files_meta [file_id ]["status" ] = "processing"
294291 self ._save_metadata ()
295292
@@ -361,24 +358,6 @@ async def update_content(self, db_id: str, file_ids: list[str], params: dict | N
361358
362359 return processed_items_info
363360
364- async def delete_file_chunks_only (self , db_id : str , file_id : str ) -> None :
365- """仅删除文件的chunks数据,保留元数据(用于更新操作)"""
366- collection = await self ._get_chroma_collection (db_id )
367-
368- if collection :
369- try :
370- # 查找所有相关的chunks
371- results = collection .get (where = {"full_doc_id" : file_id }, include = ["metadatas" ])
372-
373- # 删除所有相关chunks
374- if results and results .get ("ids" ):
375- collection .delete (ids = results ["ids" ])
376- logger .info (f"Deleted { len (results ['ids' ])} chunks for file { file_id } " )
377-
378- except Exception as e :
379- logger .error (f"Error deleting file { file_id } from ChromaDB: { e } " )
380- # 注意:这里不删除 files_meta[file_id],保留元数据用于后续操作
381-
382361 async def aquery (self , query_text : str , db_id : str , ** kwargs ) -> list [dict ]:
383362 """异步查询知识库"""
384363 collection = await self ._get_chroma_collection (db_id )
@@ -473,16 +452,32 @@ async def aquery(self, query_text: str, db_id: str, **kwargs) -> list[dict]:
473452 logger .error (f"ChromaDB query error: { e } , { traceback .format_exc ()} " )
474453 return []
475454
455+ async def delete_file_chunks_only (self , db_id : str , file_id : str ) -> None :
456+ """仅删除文件的chunks数据,保留元数据(用于更新操作)"""
457+ collection = await self ._get_chroma_collection (db_id )
458+ if collection :
459+ try :
460+ # 查找所有相关的chunks
461+ results = collection .get (where = {"full_doc_id" : file_id }, include = ["metadatas" ])
462+
463+ # 删除所有相关chunks
464+ if results and results .get ("ids" ):
465+ collection .delete (ids = results ["ids" ])
466+ logger .info (f"Deleted { len (results ['ids' ])} chunks for file { file_id } " )
467+
468+ except Exception as e :
469+ logger .error (f"Error deleting file { file_id } from ChromaDB: { e } " )
470+ # 注意:这里不删除 files_meta[file_id],保留元数据用于后续操作
471+
476472 async def delete_file (self , db_id : str , file_id : str ) -> None :
477473 """删除文件(包括元数据)"""
478474 # 先删除 ChromaDB 中的 chunks 数据
479475 await self .delete_file_chunks_only (db_id , file_id )
480476
481- # 使用锁确保元数据操作的原子性
482- async with self ._metadata_lock :
483- if file_id in self .files_meta :
484- del self .files_meta [file_id ]
485- self ._save_metadata ()
477+ # 删除文件记录
478+ if file_id in self .files_meta :
479+ del self .files_meta [file_id ]
480+ self ._save_metadata ()
486481
487482 async def get_file_basic_info (self , db_id : str , file_id : str ) -> dict :
488483 """获取文件基本信息(仅元数据)"""
0 commit comments