Skip to content

Commit 7bcb770

Browse files
Merge branch 'main' of https://github.com/maxkb-dev/maxkb
2 parents 45a60cd + b5b09dc commit 7bcb770

File tree

56 files changed

+756
-308
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+756
-308
lines changed

.github/dependabot.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
version: 2
2+
updates:
3+
- package-ecosystem: "pip"
4+
directory: "/"
5+
schedule:
6+
interval: "weekly"
7+
timezone: "Asia/Shanghai"
8+
day: "friday"
9+
target-branch: "v2"

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
MaxKB = Max Knowledge Brain, it is a powerful and easy-to-use AI assistant that integrates Retrieval-Augmented Generation (RAG) pipelines, supports robust workflows, and provides advanced MCP tool-use capabilities. MaxKB is widely applied in scenarios such as intelligent customer service, corporate internal knowledge bases, academic research, and education.
1414

1515
- **RAG Pipeline**: Supports direct uploading of documents / automatic crawling of online documents, with features for automatic text splitting, vectorization. This effectively reduces hallucinations in large models, providing a superior smart Q&A interaction experience.
16-
- **Flexible Orchestration**: Equipped with a powerful workflow engine, function library and MCP tool-use, enabling the orchestration of AI processes to meet the needs of complex business scenarios.
16+
- **Agentic Workflow**: Equipped with a powerful workflow engine, function library and MCP tool-use, enabling the orchestration of AI processes to meet the needs of complex business scenarios.
1717
- **Seamless Integration**: Facilitates zero-coding rapid integration into third-party business systems, quickly equipping existing systems with intelligent Q&A capabilities to enhance user satisfaction.
1818
- **Model-Agnostic**: Supports various large models, including private models (such as DeepSeek, Llama, Qwen, etc.) and public models (like OpenAI, Claude, Gemini, etc.).
1919
- **Multi Modal**: Native support for input and output text, image, audio and video.

apps/application/flow/step_node/start_node/impl/base_start_node.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def save_context(self, details, workflow_manage):
4040
self.context['document'] = details.get('document_list')
4141
self.context['image'] = details.get('image_list')
4242
self.context['audio'] = details.get('audio_list')
43+
self.context['other'] = details.get('other_list')
4344
self.status = details.get('status')
4445
self.err_message = details.get('err_message')
4546
for key, value in workflow_variable.items():
@@ -59,7 +60,8 @@ def execute(self, question, **kwargs) -> NodeResult:
5960
'question': question,
6061
'image': self.workflow_manage.image_list,
6162
'document': self.workflow_manage.document_list,
62-
'audio': self.workflow_manage.audio_list
63+
'audio': self.workflow_manage.audio_list,
64+
'other': self.workflow_manage.other_list,
6365
}
6466
return NodeResult(node_variable, workflow_variable)
6567

@@ -83,5 +85,6 @@ def get_details(self, index: int, **kwargs):
8385
'image_list': self.context.get('image'),
8486
'document_list': self.context.get('document'),
8587
'audio_list': self.context.get('audio'),
88+
'other_list': self.context.get('other'),
8689
'global_fields': global_fields
8790
}

apps/application/flow/workflow_manage.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ def __init__(self, flow: Flow, params, work_flow_post_handler: WorkFlowPostHandl
238238
base_to_response: BaseToResponse = SystemToResponse(), form_data=None, image_list=None,
239239
document_list=None,
240240
audio_list=None,
241+
other_list=None,
241242
start_node_id=None,
242243
start_node_data=None, chat_record=None, child_node=None):
243244
if form_data is None:
@@ -248,12 +249,15 @@ def __init__(self, flow: Flow, params, work_flow_post_handler: WorkFlowPostHandl
248249
document_list = []
249250
if audio_list is None:
250251
audio_list = []
252+
if other_list is None:
253+
other_list = []
251254
self.start_node_id = start_node_id
252255
self.start_node = None
253256
self.form_data = form_data
254257
self.image_list = image_list
255258
self.document_list = document_list
256259
self.audio_list = audio_list
260+
self.other_list = other_list
257261
self.params = params
258262
self.flow = flow
259263
self.context = {}

apps/application/models/application.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from django.contrib.postgres.fields import ArrayField
1212
from django.db import models
1313
from langchain.schema import HumanMessage, AIMessage
14-
14+
from django.utils.translation import gettext as _
1515
from common.encoder.encoder import SystemEncoder
1616
from common.mixins.app_model_mixin import AppModelMixin
1717
from dataset.models.data_set import DataSet
@@ -167,7 +167,11 @@ def get_human_message(self):
167167
return HumanMessage(content=self.problem_text)
168168

169169
def get_ai_message(self):
170-
return AIMessage(content=self.answer_text)
170+
answer_text = self.answer_text
171+
if answer_text is None or len(str(answer_text).strip()) == 0:
172+
answer_text = _(
173+
'Sorry, no relevant content was found. Please re-describe your problem or provide more information. ')
174+
return AIMessage(content=answer_text)
171175

172176
def get_node_details_runtime_node_id(self, runtime_node_id):
173177
return self.details.get(runtime_node_id, None)

apps/application/serializers/chat_message_serializers.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,21 @@ def get_message(instance):
213213
return instance.get('messages')[-1].get('content')
214214

215215
@staticmethod
216-
def generate_chat(chat_id, application_id, message, client_id):
216+
def generate_chat(chat_id, application_id, message, client_id, asker=None):
217217
if chat_id is None:
218218
chat_id = str(uuid.uuid1())
219219
chat = QuerySet(Chat).filter(id=chat_id).first()
220220
if chat is None:
221-
Chat(id=chat_id, application_id=application_id, abstract=message[0:1024], client_id=client_id).save()
221+
asker_dict = {'user_name': '游客'}
222+
if asker is not None:
223+
if isinstance(asker, str):
224+
asker_dict = {
225+
'user_name': asker
226+
}
227+
elif isinstance(asker, dict):
228+
asker_dict = asker
229+
Chat(id=chat_id, application_id=application_id, abstract=message[0:1024], client_id=client_id,
230+
asker=asker_dict).save()
222231
return chat_id
223232

224233
def chat(self, instance: Dict, with_valid=True):
@@ -232,7 +241,8 @@ def chat(self, instance: Dict, with_valid=True):
232241
application_id = self.data.get('application_id')
233242
client_id = self.data.get('client_id')
234243
client_type = self.data.get('client_type')
235-
chat_id = self.generate_chat(chat_id, application_id, message, client_id)
244+
chat_id = self.generate_chat(chat_id, application_id, message, client_id,
245+
asker=instance.get('form_data', {}).get("asker"))
236246
return ChatMessageSerializer(
237247
data={
238248
'chat_id': chat_id, 'message': message,
@@ -245,6 +255,7 @@ def chat(self, instance: Dict, with_valid=True):
245255
'image_list': instance.get('image_list', []),
246256
'document_list': instance.get('document_list', []),
247257
'audio_list': instance.get('audio_list', []),
258+
'other_list': instance.get('other_list', []),
248259
}
249260
).chat(base_to_response=OpenaiToResponse())
250261

@@ -274,6 +285,7 @@ class ChatMessageSerializer(serializers.Serializer):
274285
image_list = serializers.ListField(required=False, error_messages=ErrMessage.list(_("picture")))
275286
document_list = serializers.ListField(required=False, error_messages=ErrMessage.list(_("document")))
276287
audio_list = serializers.ListField(required=False, error_messages=ErrMessage.list(_("Audio")))
288+
other_list = serializers.ListField(required=False, error_messages=ErrMessage.list(_("Other")))
277289
child_node = serializers.DictField(required=False, allow_null=True,
278290
error_messages=ErrMessage.dict(_("Child Nodes")))
279291

@@ -372,6 +384,7 @@ def chat_work_flow(self, chat_info: ChatInfo, base_to_response):
372384
image_list = self.data.get('image_list')
373385
document_list = self.data.get('document_list')
374386
audio_list = self.data.get('audio_list')
387+
other_list = self.data.get('other_list')
375388
user_id = chat_info.application.user_id
376389
chat_record_id = self.data.get('chat_record_id')
377390
chat_record = None
@@ -388,7 +401,7 @@ def chat_work_flow(self, chat_info: ChatInfo, base_to_response):
388401
'client_id': client_id,
389402
'client_type': client_type,
390403
'user_id': user_id}, WorkFlowPostHandler(chat_info, client_id, client_type),
391-
base_to_response, form_data, image_list, document_list, audio_list,
404+
base_to_response, form_data, image_list, document_list, audio_list, other_list,
392405
self.data.get('runtime_node_id'),
393406
self.data.get('node_data'), chat_record, self.data.get('child_node'))
394407
r = work_flow_manage.run()

apps/application/views/chat_views.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ def post(self, request: Request, chat_id: str):
144144
'document_list') if 'document_list' in request.data else [],
145145
'audio_list': request.data.get(
146146
'audio_list') if 'audio_list' in request.data else [],
147+
'other_list': request.data.get(
148+
'other_list') if 'other_list' in request.data else [],
147149
'client_type': request.auth.client_type,
148150
'node_id': request.data.get('node_id', None),
149151
'runtime_node_id': request.data.get('runtime_node_id', None),

apps/common/handle/impl/doc_split_handle.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,7 @@ def get_image_id(image_id):
112112

113113
title_font_list = [
114114
[36, 100],
115-
[26, 36],
116-
[24, 26],
117-
[22, 24],
118-
[18, 22],
119-
[16, 18]
115+
[30, 36]
120116
]
121117

122118

@@ -130,7 +126,7 @@ def get_title_level(paragraph: Paragraph):
130126
if len(paragraph.runs) == 1:
131127
font_size = paragraph.runs[0].font.size
132128
pt = font_size.pt
133-
if pt >= 16:
129+
if pt >= 30:
134130
for _value, index in zip(title_font_list, range(len(title_font_list))):
135131
if pt >= _value[0] and pt < _value[1]:
136132
return index + 1

apps/locales/zh_CN/LC_MESSAGES/django.po

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4536,7 +4536,7 @@ msgstr "修改知识库信息"
45364536
#: community/apps/dataset/views/document.py:463
45374537
#: community/apps/dataset/views/document.py:464
45384538
msgid "Get the knowledge base paginated list"
4539-
msgstr "获取知识库分页列表"
4539+
msgstr "获取知识库文档分页列表"
45404540

45414541
#: community/apps/dataset/views/document.py:31
45424542
#: community/apps/dataset/views/document.py:32

apps/locales/zh_Hant/LC_MESSAGES/django.po

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4545,7 +4545,7 @@ msgstr "修改知識庫信息"
45454545
#: community/apps/dataset/views/document.py:463
45464546
#: community/apps/dataset/views/document.py:464
45474547
msgid "Get the knowledge base paginated list"
4548-
msgstr "獲取知識庫分頁列表"
4548+
msgstr "獲取知識庫文档分頁列表"
45494549

45504550
#: community/apps/dataset/views/document.py:31
45514551
#: community/apps/dataset/views/document.py:32

0 commit comments

Comments
 (0)