1313from django .db .models import QuerySet
1414
1515from application .flow .i_step_node import NodeResult
16- from application .flow .step_node .search_dataset_node . i_search_dataset_node import ISearchDatasetStepNode
16+ from application .flow .step_node .search_knowledge_node . i_search_knowledge_node import ISearchKnowledgeStepNode
1717from common .config .embedding_config import VectorStore
1818from common .db .search import native_search
1919from common .utils .common import get_file_content
@@ -44,7 +44,7 @@ def reset_title(title):
4444 return f"#### { title } \n "
4545
4646
47- class BaseSearchDatasetNode ( ISearchDatasetStepNode ):
47+ class BaseSearchKnowledgeNode ( ISearchKnowledgeStepNode ):
4848 def save_context (self , details , workflow_manage ):
4949 result = details .get ('paragraph_list' , [])
5050 dataset_setting = self .node_params_serializer .data .get ('dataset_setting' )
@@ -60,24 +60,25 @@ def save_context(self, details, workflow_manage):
6060 result ])[0 :dataset_setting .get ('max_paragraph_char_number' , 5000 )]
6161 self .context ['directly_return' ] = directly_return
6262
63- def execute (self , dataset_id_list , dataset_setting , question ,
63+ def execute (self , knowledge_id_list , knowledge_setting , question ,
6464 exclude_paragraph_id_list = None ,
6565 ** kwargs ) -> NodeResult :
6666 self .context ['question' ] = question
67- if len (dataset_id_list ) == 0 :
67+ if len (knowledge_id_list ) == 0 :
6868 return get_none_result (question )
69- model_id = get_embedding_id (dataset_id_list )
69+ model_id = get_embedding_id (knowledge_id_list )
7070 workspace_id = self .workflow_manage .get_body ().get ('workspace_id' )
7171 embedding_model = get_model_instance_by_model_workspace_id (model_id , workspace_id )
7272 embedding_value = embedding_model .embed_query (question )
7373 vector = VectorStore .get_embedding_vector ()
7474 exclude_document_id_list = [str (document .id ) for document in
7575 QuerySet (Document ).filter (
76- dataset_id__in = dataset_id_list ,
76+ knowledge_id__in = knowledge_id_list ,
7777 is_active = False )]
78- embedding_list = vector .query (question , embedding_value , dataset_id_list , exclude_document_id_list ,
79- exclude_paragraph_id_list , True , dataset_setting .get ('top_n' ),
80- dataset_setting .get ('similarity' ), SearchMode (dataset_setting .get ('search_mode' )))
78+ embedding_list = vector .query (question , embedding_value , knowledge_id_list , exclude_document_id_list ,
79+ exclude_paragraph_id_list , True , knowledge_setting .get ('top_n' ),
80+ knowledge_setting .get ('similarity' ),
81+ SearchMode (knowledge_setting .get ('search_mode' )))
8182 # 手动关闭数据库连接
8283 connection .close ()
8384 if embedding_list is None :
@@ -89,7 +90,7 @@ def execute(self, dataset_id_list, dataset_setting, question,
8990 'is_hit_handling_method_list' : [row for row in result if row .get ('is_hit_handling_method' )],
9091 'data' : '\n ' .join (
9192 [f"{ reset_title (paragraph .get ('title' , '' ))} { paragraph .get ('content' )} " for paragraph in
92- result ])[0 :dataset_setting .get ('max_paragraph_char_number' , 5000 )],
93+ result ])[0 :knowledge_setting .get ('max_paragraph_char_number' , 5000 )],
9394 'directly_return' : '\n ' .join (
9495 [paragraph .get ('content' ) for paragraph in
9596 result if
@@ -112,7 +113,7 @@ def reset_paragraph(paragraph: Dict, embedding_list: List):
112113 'update_time' : paragraph .get ('update_time' ).strftime ("%Y-%m-%d %H:%M:%S" ),
113114 'create_time' : paragraph .get ('create_time' ).strftime ("%Y-%m-%d %H:%M:%S" ),
114115 'id' : str (paragraph .get ('id' )),
115- 'dataset_id ' : str (paragraph .get ('dataset_id ' )),
116+ 'knowledge_id ' : str (paragraph .get ('knowledge_id ' )),
116117 'document_id' : str (paragraph .get ('document_id' ))
117118 }
118119
@@ -124,7 +125,7 @@ def list_paragraph(embedding_list: List, vector):
124125 paragraph_list = native_search (QuerySet (Paragraph ).filter (id__in = paragraph_id_list ),
125126 get_file_content (
126127 os .path .join (PROJECT_DIR , "apps" , "application" , 'sql' ,
127- 'list_dataset_paragraph_by_paragraph_id .sql' )),
128+ 'list_knowledge_paragraph_by_paragraph_id .sql' )),
128129 with_table_name = True )
129130 # 如果向量库中存在脏数据 直接删除
130131 if len (paragraph_list ) != len (paragraph_id_list ):
0 commit comments