Skip to content

Commit b52c972

Browse files
authored
feat: Dialogue displays knowledge sources (#3501)
1 parent bfdbc74 commit b52c972

File tree

14 files changed

+135
-39
lines changed

14 files changed

+135
-39
lines changed

apps/application/chat_pipeline/I_base_chat_pipeline.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@
1818
class ParagraphPipelineModel:
1919

2020
def __init__(self, _id: str, document_id: str, knowledge_id: str, content: str, title: str, status: str,
21-
is_active: bool, comprehensive_score: float, similarity: float, dataset_name: str, document_name: str,
22-
hit_handling_method: str, directly_return_similarity: float, meta: dict = None):
21+
is_active: bool, comprehensive_score: float, similarity: float, knowledge_name: str,
22+
document_name: str,
23+
hit_handling_method: str, directly_return_similarity: float, knowledge_type, meta: dict = None):
2324
self.id = _id
2425
self.document_id = document_id
2526
self.knowledge_id = knowledge_id
@@ -29,11 +30,12 @@ def __init__(self, _id: str, document_id: str, knowledge_id: str, content: str,
2930
self.is_active = is_active
3031
self.comprehensive_score = comprehensive_score
3132
self.similarity = similarity
32-
self.dataset_name = dataset_name
33+
self.knowledge_name = knowledge_name
3334
self.document_name = document_name
3435
self.hit_handling_method = hit_handling_method
3536
self.directly_return_similarity = directly_return_similarity
3637
self.meta = meta
38+
self.knowledge_type = knowledge_type
3739

3840
def to_dict(self):
3941
return {
@@ -46,8 +48,9 @@ def to_dict(self):
4648
'is_active': self.is_active,
4749
'comprehensive_score': self.comprehensive_score,
4850
'similarity': self.similarity,
49-
'dataset_name': self.dataset_name,
51+
'knowledge_name': self.knowledge_name,
5052
'document_name': self.document_name,
53+
'knowledge_type': self.knowledge_type,
5154
'meta': self.meta,
5255
}
5356

@@ -57,7 +60,8 @@ def __init__(self):
5760
self.paragraph = {}
5861
self.comprehensive_score = None
5962
self.document_name = None
60-
self.dataset_name = None
63+
self.knowledge_name = None
64+
self.knowledge_type = None
6165
self.hit_handling_method = None
6266
self.directly_return_similarity = 0.9
6367
self.meta = {}
@@ -76,8 +80,12 @@ def add_paragraph(self, paragraph):
7680
self.paragraph = paragraph
7781
return self
7882

79-
def add_dataset_name(self, dataset_name):
80-
self.dataset_name = dataset_name
83+
def add_knowledge_name(self, knowledge_name):
84+
self.knowledge_name = knowledge_name
85+
return self
86+
87+
def add_knowledge_type(self, knowledge_type):
88+
self.knowledge_type = knowledge_type
8189
return self
8290

8391
def add_document_name(self, document_name):
@@ -110,8 +118,9 @@ def build(self):
110118
self.paragraph.get('content'), self.paragraph.get('title'),
111119
self.paragraph.get('status'),
112120
self.paragraph.get('is_active'),
113-
self.comprehensive_score, self.similarity, self.dataset_name,
121+
self.comprehensive_score, self.similarity, self.knowledge_name,
114122
self.document_name, self.hit_handling_method, self.directly_return_similarity,
123+
self.knowledge_type,
115124
self.meta)
116125

117126

apps/application/chat_pipeline/step/chat_step/impl/base_chat_step.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from langchain.chat_models.base import BaseChatModel
1919
from langchain.schema import BaseMessage
2020
from langchain.schema.messages import HumanMessage, AIMessage
21-
from langchain_core.messages import AIMessageChunk
21+
from langchain_core.messages import AIMessageChunk, SystemMessage
2222
from rest_framework import status
2323

2424
from application.chat_pipeline.I_base_chat_pipeline import ParagraphPipelineModel
@@ -196,7 +196,8 @@ def get_details(self, manage, **kwargs):
196196

197197
@staticmethod
198198
def reset_message_list(message_list: List[BaseMessage], answer_text):
199-
result = [{'role': 'user' if isinstance(message, HumanMessage) else 'ai', 'content': message.content} for
199+
result = [{'role': 'user' if isinstance(message, HumanMessage) else (
200+
'system' if isinstance(message, SystemMessage) else 'ai'), 'content': message.content} for
200201
message
201202
in
202203
message_list]

apps/application/chat_pipeline/step/search_dataset_step/impl/base_search_dataset_step.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ def reset_paragraph(paragraph: Dict, embedding_list: List) -> ParagraphPipelineM
7979
.add_paragraph(paragraph)
8080
.add_similarity(find_embedding.get('similarity'))
8181
.add_comprehensive_score(find_embedding.get('comprehensive_score'))
82-
.add_dataset_name(paragraph.get('dataset_name'))
82+
.add_knowledge_name(paragraph.get('knowledge_name'))
83+
.add_knowledge_type(paragraph.get('knowledge_type'))
8384
.add_document_name(paragraph.get('document_name'))
8485
.add_hit_handling_method(paragraph.get('hit_handling_method'))
8586
.add_directly_return_similarity(paragraph.get('directly_return_similarity'))

apps/application/flow/step_node/reranker_node/i_reranker_node.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ class RerankerStepNodeSerializer(serializers.Serializer):
3232
question_reference_address = serializers.ListField(required=True)
3333
reranker_model_id = serializers.UUIDField(required=True)
3434
reranker_reference_list = serializers.ListField(required=True, child=serializers.ListField(required=True))
35+
show_knowledge = serializers.BooleanField(required=True,
36+
label=_("The results are displayed in the knowledge sources"))
3537

3638
def is_valid(self, *, raise_exception=False):
3739
super().is_valid(raise_exception=True)
@@ -55,6 +57,6 @@ def _run(self):
5557

5658
reranker_list=reranker_list)
5759

58-
def execute(self, question, reranker_setting, reranker_list, reranker_model_id,
60+
def execute(self, question, reranker_setting, reranker_list, reranker_model_id,show_knowledge,
5961
**kwargs) -> NodeResult:
6062
pass

apps/application/flow/step_node/reranker_node/impl/base_reranker_node.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,9 @@ def merge_reranker_list(reranker_list, result=None):
2424
elif isinstance(document, dict):
2525
content = document.get('title', '') + document.get('content', '')
2626
title = document.get("title")
27-
dataset_name = document.get("dataset_name")
28-
document_name = document.get('document_name')
2927
result.append(
3028
Document(page_content=str(document) if len(content) == 0 else content,
31-
metadata={'title': title, 'dataset_name': dataset_name, 'document_name': document_name}))
29+
metadata={'title': title, **document}))
3230
else:
3331
result.append(Document(page_content=str(document), metadata={}))
3432
return result
@@ -71,17 +69,18 @@ def save_context(self, details, workflow_manage):
7169
self.context['result_list'] = details.get('result_list')
7270
self.context['result'] = details.get('result')
7371

74-
def execute(self, question, reranker_setting, reranker_list, reranker_model_id,
72+
def execute(self, question, reranker_setting, reranker_list, reranker_model_id, show_knowledge,
7573
**kwargs) -> NodeResult:
74+
self.context['show_knowledge'] = show_knowledge
7675
documents = merge_reranker_list(reranker_list)
7776
top_n = reranker_setting.get('top_n', 3)
7877
self.context['document_list'] = [{'page_content': document.page_content, 'metadata': document.metadata} for
7978
document in documents]
8079
self.context['question'] = question
8180
workspace_id = self.workflow_manage.get_body().get('workspace_id')
8281
reranker_model = get_model_instance_by_model_workspace_id(reranker_model_id,
83-
workspace_id,
84-
top_n=top_n)
82+
workspace_id,
83+
top_n=top_n)
8584
result = reranker_model.compress_documents(
8685
documents,
8786
question)
@@ -93,6 +92,7 @@ def execute(self, question, reranker_setting, reranker_list, reranker_model_id,
9392

9493
def get_details(self, index: int, **kwargs):
9594
return {
95+
'show_knowledge': self.context.get('show_knowledge'),
9696
'name': self.node.properties.get('stepName'),
9797
"index": index,
9898
'document_list': self.context.get('document_list'),

apps/application/flow/step_node/search_knowledge_node/i_search_knowledge_node.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ class SearchDatasetStepNodeSerializer(serializers.Serializer):
4141

4242
question_reference_address = serializers.ListField(required=True)
4343

44+
show_knowledge = serializers.BooleanField(required=True,
45+
label=_("The results are displayed in the knowledge sources"))
46+
4447
def is_valid(self, *, raise_exception=False):
4548
super().is_valid(raise_exception=True)
4649

@@ -73,7 +76,7 @@ def _run(self):
7376
return self.execute(**self.node_params_serializer.data, question=str(question),
7477
exclude_paragraph_id_list=exclude_paragraph_id_list)
7578

76-
def execute(self, dataset_id_list, dataset_setting, question,
79+
def execute(self, dataset_id_list, dataset_setting, question, show_knowledge,
7780
exclude_paragraph_id_list=None,
7881
**kwargs) -> NodeResult:
7982
pass

apps/application/flow/step_node/search_knowledge_node/impl/base_search_knowledge_node.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,11 @@ def save_context(self, details, workflow_manage):
6262
result])[0:dataset_setting.get('max_paragraph_char_number', 5000)]
6363
self.context['directly_return'] = directly_return
6464

65-
def execute(self, knowledge_id_list, knowledge_setting, question,
65+
def execute(self, knowledge_id_list, knowledge_setting, question, show_knowledge,
6666
exclude_paragraph_id_list=None,
6767
**kwargs) -> NodeResult:
6868
self.context['question'] = question
69+
self.context['show_knowledge'] = show_knowledge
6970
get_knowledge_list_of_authorized = DatabaseModelManage.get_model('get_knowledge_list_of_authorized')
7071
chat_user_type = self.workflow_manage.get_body().get('chat_user_type')
7172
if get_knowledge_list_of_authorized is not None and RoleConstants.CHAT_USER.value.name == chat_user_type:
@@ -145,6 +146,7 @@ def list_paragraph(embedding_list: List, vector):
145146
def get_details(self, index: int, **kwargs):
146147
return {
147148
'name': self.node.properties.get('stepName'),
149+
'show_knowledge': self.context.get('show_knowledge'),
148150
'question': self.context.get('question'),
149151
"index": index,
150152
'run_time': self.context.get('run_time'),

apps/application/serializers/application_chat_record.py

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,15 @@ def one(self, debug):
7575
chat_record = self.get_chat_record()
7676
if chat_record is None:
7777
raise AppApiException(500, gettext("Conversation does not exist"))
78-
return ApplicationChatRecordQuerySerializers.reset_chat_record(chat_record)
78+
application_access_token = QuerySet(ApplicationAccessToken).filter(
79+
application_id=self.data.get('application_id')).first()
80+
show_source = False
81+
show_exec = False
82+
if application_access_token is not None:
83+
show_exec = application_access_token.show_exec
84+
show_source = application_access_token.show_source
85+
return ApplicationChatRecordQuerySerializers.reset_chat_record(
86+
chat_record, show_source, show_exec)
7987

8088

8189
class ApplicationChatRecordQuerySerializers(serializers.Serializer):
@@ -103,21 +111,34 @@ def list(self, with_valid=True):
103111
QuerySet(ChatRecord).filter(chat_id=self.data.get('chat_id')).order_by(order_by)]
104112

105113
@staticmethod
106-
def reset_chat_record(chat_record):
114+
def reset_chat_record(chat_record, show_source, show_exec):
107115
knowledge_list = []
108116
paragraph_list = []
109-
110117
if 'search_step' in chat_record.details and chat_record.details.get('search_step').get(
111118
'paragraph_list') is not None:
112119
paragraph_list = chat_record.details.get('search_step').get(
113120
'paragraph_list')
114-
knowledge_list = [{'id': dataset_id, 'name': name} for dataset_id, name in reduce(lambda x, y: {**x, **y},
115-
[{row.get(
116-
'knowledge_id'): row.get(
117-
"knowledge_name")} for
118-
row in
119-
paragraph_list],
120-
{}).items()]
121+
122+
for item in chat_record.details.values():
123+
if item.get('type') == 'search-knowledge-node' and item.get('show_knowledge', False):
124+
paragraph_list = paragraph_list + item.get(
125+
'paragraph_list')
126+
127+
if item.get('type') == 'reranker-node' and item.get('show_knowledge', False):
128+
paragraph_list = paragraph_list + [rl.get('metadata') for rl in item.get('result_list') if
129+
'document_id' in rl.get('metadata') and 'knowledge_id' in rl.get(
130+
'metadata')]
131+
paragraph_list = list({p.get('id'): p for p in paragraph_list}.values())
132+
knowledge_list = knowledge_list + [{'id': knowledge_id, **knowledge} for knowledge_id, knowledge in
133+
reduce(lambda x, y: {**x, **y},
134+
[{row.get(
135+
'knowledge_id'): {'knowledge_name': row.get(
136+
"knowledge_name"),
137+
'knowledge_type': row.get('knowledge_type')}} for
138+
row in
139+
paragraph_list],
140+
{}).items()]
141+
121142
if len(chat_record.improve_paragraph_id_list) > 0:
122143
paragraph_model_list = QuerySet(Paragraph).filter(id__in=chat_record.improve_paragraph_id_list)
123144
if len(paragraph_model_list) < len(chat_record.improve_paragraph_id_list):
@@ -126,24 +147,33 @@ def reset_chat_record(chat_record):
126147
filter(lambda p_id: paragraph_model_id_list.__contains__(p_id),
127148
chat_record.improve_paragraph_id_list))
128149
chat_record.save()
129-
150+
show_source_dict = {'knowledge_list': knowledge_list,
151+
'paragraph_list': paragraph_list, }
152+
show_exec_dict = {'execution_details': [chat_record.details[key] for key in chat_record.details]}
130153
return {
131154
**ChatRecordSerializerModel(chat_record).data,
132155
'padding_problem_text': chat_record.details.get('problem_padding').get(
133156
'padding_problem_text') if 'problem_padding' in chat_record.details else None,
134-
'knowledge_list': knowledge_list,
135-
'paragraph_list': paragraph_list,
136-
'execution_details': [chat_record.details[key] for key in chat_record.details]
157+
**(show_source_dict if show_source else {}),
158+
**(show_exec_dict if show_exec else {})
137159
}
138160

139161
def page(self, current_page: int, page_size: int, with_valid=True):
140162
if with_valid:
141163
self.is_valid(raise_exception=True)
142164
order_by = '-create_time' if self.data.get('order_asc') is None or self.data.get(
143165
'order_asc') else 'create_time'
166+
application_access_token = QuerySet(ApplicationAccessToken).filter(
167+
application_id=self.data.get('application_id')).first()
168+
show_source = False
169+
show_exec = False
170+
if application_access_token is not None:
171+
show_exec = application_access_token.show_exec
172+
show_source = application_access_token.show_source
144173
page = page_search(current_page, page_size,
145174
QuerySet(ChatRecord).filter(chat_id=self.data.get('chat_id')).order_by(order_by),
146-
post_records_handler=lambda chat_record: self.reset_chat_record(chat_record))
175+
post_records_handler=lambda chat_record: self.reset_chat_record(chat_record, show_source,
176+
show_exec))
147177
return page
148178

149179

apps/application/serializers/common.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@
1414
from django.utils.translation import gettext_lazy as _
1515

1616
from application.chat_pipeline.step.chat_step.i_chat_step import PostResponseHandler
17-
from application.models import Application, ChatRecord, Chat, ApplicationVersion, ChatUserType
17+
from application.models import Application, ChatRecord, Chat, ApplicationVersion, ChatUserType, ApplicationTypeChoices, \
18+
ApplicationKnowledgeMapping
1819
from common.constants.cache_version import Cache_Version
1920
from common.database_model_manage.database_model_manage import DatabaseModelManage
2021
from common.exception.app_exception import ChatException
22+
from knowledge.models import Document
2123
from models_provider.models import Model
2224
from models_provider.tools import get_model_credential
2325

@@ -72,6 +74,19 @@ def get_application(self):
7274
'-create_time')[0:1].first()
7375
if not application:
7476
raise ChatException(500, _("The application has not been published. Please use it after publishing."))
77+
if application.type == ApplicationTypeChoices.SIMPLE.value:
78+
# 数据集id列表
79+
knowledge_id_list = [str(row.knowledge_id) for row in
80+
QuerySet(ApplicationKnowledgeMapping).filter(
81+
application_id=self.application_id)]
82+
83+
# 需要排除的文档
84+
exclude_document_id_list = [str(document.id) for document in
85+
QuerySet(Document).filter(
86+
knowledge_id__in=knowledge_id_list,
87+
is_active=False)]
88+
self.knowledge_id_list = knowledge_id_list
89+
self.exclude_document_id_list = exclude_document_id_list
7590
self.application = application
7691
return application
7792

apps/application/sql/list_knowledge_paragraph_by_paragraph_id.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
SELECT
22
paragraph.*,
33
knowledge."name" AS "knowledge_name",
4+
knowledge."type" AS "knowledge_type",
45
"document"."name" AS "document_name",
56
"document"."meta" AS "meta",
67
"document"."hit_handling_method" AS "hit_handling_method",

0 commit comments

Comments
 (0)