@@ -23,9 +23,14 @@ def merge_reranker_list(reranker_list, result=None):
2323 merge_reranker_list (document , result )
2424 elif isinstance (document , dict ):
2525 content = document .get ('title' , '' ) + document .get ('content' , '' )
26- result .append (str (document ) if len (content ) == 0 else content )
26+ title = document .get ("title" )
27+ dataset_name = document .get ("dataset_name" )
28+ document_name = document .get ('document_name' )
29+ result .append (
30+ Document (page_content = str (document ) if len (content ) == 0 else content ,
31+ metadata = {'title' : title , 'dataset_name' : dataset_name , 'document_name' : document_name }))
2732 else :
28- result .append (str (document ))
33+ result .append (Document ( page_content = str (document ), metadata = {} ))
2934 return result
3035
3136
@@ -43,6 +48,21 @@ def filter_result(document_list: List[Document], max_paragraph_char_number, top_
4348 return result
4449
4550
51+ def reset_result_list (result_list : List [Document ], document_list : List [Document ]):
52+ r = []
53+ document_list = document_list .copy ()
54+ for result in result_list :
55+ filter_result_list = [document for document in document_list if document .page_content == result .page_content ]
56+ if len (filter_result_list ) > 0 :
57+ item = filter_result_list [0 ]
58+ document_list .remove (item )
59+ r .append (Document (page_content = item .page_content ,
60+ metadata = {** item .metadata , 'relevance_score' : result .metadata .get ('relevance_score' )}))
61+ else :
62+ r .append (result )
63+ return r
64+
65+
4666class BaseRerankerNode (IRerankerNode ):
4767 def save_context (self , details , workflow_manage ):
4868 self .context ['document_list' ] = details .get ('document_list' , [])
@@ -55,16 +75,18 @@ def execute(self, question, reranker_setting, reranker_list, reranker_model_id,
5575 ** kwargs ) -> NodeResult :
5676 documents = merge_reranker_list (reranker_list )
5777 top_n = reranker_setting .get ('top_n' , 3 )
58- self .context ['document_list' ] = documents
78+ self .context ['document_list' ] = [{'page_content' : document .page_content , 'metadata' : document .metadata } for
79+ document in documents ]
5980 self .context ['question' ] = question
6081 reranker_model = get_model_instance_by_model_user_id (reranker_model_id ,
6182 self .flow_params_serializer .data .get ('user_id' ),
6283 top_n = top_n )
6384 result = reranker_model .compress_documents (
64- [ Document ( page_content = document ) for document in documents if document is not None and len ( document ) > 0 ] ,
85+ documents ,
6586 question )
6687 similarity = reranker_setting .get ('similarity' , 0.6 )
6788 max_paragraph_char_number = reranker_setting .get ('max_paragraph_char_number' , 5000 )
89+ result = reset_result_list (result , documents )
6890 r = filter_result (result , max_paragraph_char_number , top_n , similarity )
6991 return NodeResult ({'result_list' : r , 'result' : '' .join ([item .get ('page_content' ) for item in r ])}, {})
7092
0 commit comments