Skip to content

Commit 3d3fba6

Browse files
committed
🐛 When the knowledge base is automatically summarized, other interfaces will be pended.
[Specification Details] Unify the management of prompts into prompt_template_utils.
1 parent 259c710 commit 3d3fba6

File tree

2 files changed

+65
-39
lines changed

2 files changed

+65
-39
lines changed

backend/utils/document_vector_utils.py

Lines changed: 11 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,41 +9,26 @@
99
4. Cluster summarization
1010
"""
1111
import logging
12-
import os
1312
import random
1413
from typing import Dict, List, Optional, Tuple
1514

1615
import numpy as np
17-
import yaml
1816
from jinja2 import Template, StrictUndefined
1917
from nexent.vector_database.base import VectorDatabaseCore
2018
from sklearn.cluster import KMeans
2119
from sklearn.metrics import silhouette_score
2220
from sklearn.metrics.pairwise import cosine_similarity
2321

2422
from consts.const import LANGUAGE
23+
from utils.prompt_template_utils import (
24+
get_document_summary_prompt_template,
25+
get_cluster_summary_reduce_prompt_template,
26+
get_cluster_summary_agent_prompt_template
27+
)
2528

2629
logger = logging.getLogger("document_vector_utils")
2730

2831

29-
def _get_prompt_absolute_path(relative_path: str) -> str:
30-
"""
31-
Get absolute path for prompt files.
32-
33-
Args:
34-
relative_path: Relative path like 'backend/prompts/xxx.yaml'
35-
36-
Returns:
37-
Absolute path to the prompt file
38-
"""
39-
# Get the directory of this file and construct absolute path
40-
current_dir = os.path.dirname(os.path.abspath(__file__))
41-
# Go up one level from utils to backend, then use the template path
42-
backend_dir = os.path.dirname(current_dir)
43-
absolute_path = os.path.join(backend_dir, relative_path.replace('backend/', ''))
44-
return absolute_path
45-
46-
4732
def get_documents_from_es(index_name: str, vdb_core: VectorDatabaseCore, sample_doc_count: int = 200) -> Dict[str, Dict]:
4833
"""
4934
Get document samples from Elasticsearch, aggregated by path_or_url
@@ -567,14 +552,8 @@ def summarize_document(document_content: str, filename: str, language: str = LAN
567552
Document summary text
568553
"""
569554
try:
570-
# Select prompt file based on language
571-
if language == LANGUAGE["ZH"]:
572-
prompt_path = _get_prompt_absolute_path('backend/prompts/document_summary_agent_zh.yaml')
573-
else:
574-
prompt_path = _get_prompt_absolute_path('backend/prompts/document_summary_agent.yaml')
575-
576-
with open(prompt_path, 'r', encoding='utf-8') as f:
577-
prompts = yaml.safe_load(f)
555+
# Get prompt template from prompt_template_utils
556+
prompts = get_document_summary_prompt_template(language)
578557

579558
system_prompt = prompts.get('system_prompt', '')
580559
user_prompt_template = prompts.get('user_prompt', '')
@@ -645,14 +624,8 @@ def summarize_cluster(document_summaries: List[str], language: str = LANGUAGE["Z
645624
Cluster summary text
646625
"""
647626
try:
648-
# Select prompt file based on language
649-
if language == LANGUAGE["ZH"]:
650-
prompt_path = _get_prompt_absolute_path('backend/prompts/cluster_summary_reduce_zh.yaml')
651-
else:
652-
prompt_path = _get_prompt_absolute_path('backend/prompts/cluster_summary_reduce.yaml')
653-
654-
with open(prompt_path, 'r', encoding='utf-8') as f:
655-
prompts = yaml.safe_load(f)
627+
# Get prompt template from prompt_template_utils
628+
prompts = get_cluster_summary_reduce_prompt_template(language)
656629

657630
system_prompt = prompts.get('system_prompt', '')
658631
user_prompt_template = prompts.get('user_prompt', '')
@@ -957,9 +930,8 @@ def summarize_cluster_legacy(cluster_content: str, language: str = LANGUAGE["ZH"
957930
Cluster summary text
958931
"""
959932
try:
960-
prompt_path = _get_prompt_absolute_path('backend/prompts/cluster_summary_agent.yaml')
961-
with open(prompt_path, 'r', encoding='utf-8') as f:
962-
prompts = yaml.safe_load(f)
933+
# Get prompt template from prompt_template_utils
934+
prompts = get_cluster_summary_agent_prompt_template(language)
963935

964936
system_prompt = prompts.get('system_prompt', '')
965937
user_prompt_template = prompts.get('user_prompt', '')

backend/utils/prompt_template_utils.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw
2121
- 'analyze_file': File analysis template
2222
- 'generate_title': Title generation template
2323
- 'file_processing_messages': File processing messages template
24+
- 'document_summary': Document summary template (Map stage)
25+
- 'cluster_summary_reduce': Cluster summary reduce template (Reduce stage)
26+
- 'cluster_summary_agent': Cluster summary agent template (legacy)
2427
language: Language code ('zh' or 'en')
2528
**kwargs: Additional parameters, for agent type need to pass is_manager parameter
2629
@@ -61,6 +64,18 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw
6164
'file_processing_messages': {
6265
LANGUAGE["ZH"]: 'backend/prompts/utils/file_processing_messages.yaml',
6366
LANGUAGE["EN"]: 'backend/prompts/utils/file_processing_messages_en.yaml'
67+
},
68+
'document_summary': {
69+
LANGUAGE["ZH"]: 'backend/prompts/document_summary_agent_zh.yaml',
70+
LANGUAGE["EN"]: 'backend/prompts/document_summary_agent.yaml'
71+
},
72+
'cluster_summary_reduce': {
73+
LANGUAGE["ZH"]: 'backend/prompts/cluster_summary_reduce_zh.yaml',
74+
LANGUAGE["EN"]: 'backend/prompts/cluster_summary_reduce.yaml'
75+
},
76+
'cluster_summary_agent': {
77+
LANGUAGE["ZH"]: 'backend/prompts/cluster_summary_agent.yaml',
78+
LANGUAGE["EN"]: 'backend/prompts/cluster_summary_agent.yaml'
6479
}
6580
}
6681

@@ -164,3 +179,42 @@ def get_file_processing_messages_template(language: str = 'zh') -> Dict[str, Any
164179
dict: Loaded file processing messages configuration
165180
"""
166181
return get_prompt_template('file_processing_messages', language)
182+
183+
184+
def get_document_summary_prompt_template(language: str = LANGUAGE["ZH"]) -> Dict[str, Any]:
185+
"""
186+
Get document summary prompt template (Map stage)
187+
188+
Args:
189+
language: Language code ('zh' or 'en')
190+
191+
Returns:
192+
dict: Loaded document summary prompt template configuration
193+
"""
194+
return get_prompt_template('document_summary', language)
195+
196+
197+
def get_cluster_summary_reduce_prompt_template(language: str = LANGUAGE["ZH"]) -> Dict[str, Any]:
198+
"""
199+
Get cluster summary reduce prompt template (Reduce stage)
200+
201+
Args:
202+
language: Language code ('zh' or 'en')
203+
204+
Returns:
205+
dict: Loaded cluster summary reduce prompt template configuration
206+
"""
207+
return get_prompt_template('cluster_summary_reduce', language)
208+
209+
210+
def get_cluster_summary_agent_prompt_template(language: str = LANGUAGE["ZH"]) -> Dict[str, Any]:
211+
"""
212+
Get cluster summary agent prompt template (legacy)
213+
214+
Args:
215+
language: Language code ('zh' or 'en')
216+
217+
Returns:
218+
dict: Loaded cluster summary agent prompt template configuration
219+
"""
220+
return get_prompt_template('cluster_summary_agent', language)

0 commit comments

Comments
 (0)