Skip to content

Commit 938b036

Browse files
authored
feat: knowledgebase summary index (#31600)
2 parents 5ccd578 + e1cb37e commit 938b036

File tree

5 files changed

+69
-14
lines changed

5 files changed

+69
-14
lines changed

api/core/rag/index_processor/processor/paragraph_index_processor.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from core.file import File, FileTransferMethod, FileType, file_manager
1313
from core.llm_generator.prompts import DEFAULT_GENERATOR_SUMMARY_PROMPT
1414
from core.model_manager import ModelInstance
15+
from core.model_runtime.entities.llm_entities import LLMUsage
1516
from core.model_runtime.entities.message_entities import (
1617
ImagePromptMessageContent,
1718
PromptMessageContentUnionTypes,
@@ -295,11 +296,11 @@ def process(preview: PreviewDetail) -> None:
295296
if flask_app:
296297
# Ensure Flask app context in worker thread
297298
with flask_app.app_context():
298-
summary = self.generate_summary(tenant_id, preview.content, summary_index_setting)
299+
summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting)
299300
preview.summary = summary
300301
else:
301302
# Fallback: try without app context (may fail)
302-
summary = self.generate_summary(tenant_id, preview.content, summary_index_setting)
303+
summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting)
303304
preview.summary = summary
304305

305306
# Generate summaries concurrently using ThreadPoolExecutor
@@ -356,7 +357,7 @@ def generate_summary(
356357
text: str,
357358
summary_index_setting: dict | None = None,
358359
segment_id: str | None = None,
359-
) -> str:
360+
) -> tuple[str, LLMUsage]:
360361
"""
361362
Generate summary for the given text using ModelInstance.invoke_llm and the default or custom summary prompt,
362363
and supports vision models by including images from the segment attachments or text content.
@@ -366,6 +367,9 @@ def generate_summary(
366367
text: Text content to summarize
367368
summary_index_setting: Summary index configuration
368369
segment_id: Optional segment ID to fetch attachments from SegmentAttachmentBinding table
370+
371+
Returns:
372+
Tuple of (summary_content, llm_usage) where llm_usage is LLMUsage object
369373
"""
370374
if not summary_index_setting or not summary_index_setting.get("enable"):
371375
raise ValueError("summary_index_setting is required and must be enabled to generate summary.")
@@ -432,7 +436,19 @@ def generate_summary(
432436

433437
result = model_instance.invoke_llm(prompt_messages=prompt_messages, model_parameters={}, stream=False)
434438

435-
return getattr(result.message, "content", "")
439+
summary_content = getattr(result.message, "content", "")
440+
usage = result.usage
441+
442+
# Deduct quota for summary generation (same as workflow nodes)
443+
from core.workflow.nodes.llm import llm_utils
444+
445+
try:
446+
llm_utils.deduct_llm_quota(tenant_id=tenant_id, model_instance=model_instance, usage=usage)
447+
except Exception as e:
448+
# Log but don't fail summary generation if quota deduction fails
449+
logger.warning("Failed to deduct quota for summary generation: %s", str(e))
450+
451+
return summary_content, usage
436452

437453
@staticmethod
438454
def _extract_images_from_text(tenant_id: str, text: str) -> list[File]:

api/core/rag/index_processor/processor/parent_child_index_processor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,15 +382,15 @@ def process(preview: PreviewDetail) -> None:
382382
if flask_app:
383383
# Ensure Flask app context in worker thread
384384
with flask_app.app_context():
385-
summary = ParagraphIndexProcessor.generate_summary(
385+
summary, _ = ParagraphIndexProcessor.generate_summary(
386386
tenant_id=tenant_id,
387387
text=preview.content,
388388
summary_index_setting=summary_index_setting,
389389
)
390390
preview.summary = summary
391391
else:
392392
# Fallback: try without app context (may fail)
393-
summary = ParagraphIndexProcessor.generate_summary(
393+
summary, _ = ParagraphIndexProcessor.generate_summary(
394394
tenant_id=tenant_id,
395395
text=preview.content,
396396
summary_index_setting=summary_index_setting,

api/core/workflow/nodes/knowledge_index/knowledge_index_node.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ def generate_summary_for_chunk(preview_item: dict) -> None:
364364
# Set Flask application context in worker thread
365365
if flask_app:
366366
with flask_app.app_context():
367-
summary = ParagraphIndexProcessor.generate_summary(
367+
summary, _ = ParagraphIndexProcessor.generate_summary(
368368
tenant_id=dataset.tenant_id,
369369
text=preview_item["content"],
370370
summary_index_setting=summary_index_setting,
@@ -373,7 +373,7 @@ def generate_summary_for_chunk(preview_item: dict) -> None:
373373
preview_item["summary"] = summary
374374
else:
375375
# Fallback: try without app context (may fail)
376-
summary = ParagraphIndexProcessor.generate_summary(
376+
summary, _ = ParagraphIndexProcessor.generate_summary(
377377
tenant_id=dataset.tenant_id,
378378
text=preview_item["content"],
379379
summary_index_setting=summary_index_setting,

api/models/dataset.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1597,6 +1597,7 @@ class DocumentSegmentSummary(Base):
15971597
summary_content: Mapped[str] = mapped_column(LongText, nullable=True)
15981598
summary_index_node_id: Mapped[str] = mapped_column(String(255), nullable=True)
15991599
summary_index_node_hash: Mapped[str] = mapped_column(String(255), nullable=True)
1600+
tokens: Mapped[int | None] = mapped_column(sa.Integer, nullable=True)
16001601
status: Mapped[str] = mapped_column(String(32), nullable=False, server_default=sa.text("'generating'"))
16011602
error: Mapped[str] = mapped_column(LongText, nullable=True)
16021603
enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))

api/services/summary_index_service.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
import uuid
66
from datetime import UTC, datetime
77

8+
from core.model_manager import ModelManager
9+
from core.model_runtime.entities.llm_entities import LLMUsage
10+
from core.model_runtime.entities.model_entities import ModelType
811
from core.rag.datasource.vdb.vector_factory import Vector
912
from core.rag.index_processor.constant.doc_type import DocType
1013
from core.rag.models.document import Document
@@ -24,7 +27,7 @@ def generate_summary_for_segment(
2427
segment: DocumentSegment,
2528
dataset: Dataset,
2629
summary_index_setting: dict,
27-
) -> str:
30+
) -> tuple[str, LLMUsage]:
2831
"""
2932
Generate summary for a single segment.
3033
@@ -34,7 +37,7 @@ def generate_summary_for_segment(
3437
summary_index_setting: Summary index configuration
3538
3639
Returns:
37-
Generated summary text
40+
Tuple of (summary_content, llm_usage) where llm_usage is LLMUsage object
3841
3942
Raises:
4043
ValueError: If summary_index_setting is invalid or generation fails
@@ -43,7 +46,7 @@ def generate_summary_for_segment(
4346
# Use lazy import to avoid circular import
4447
from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor
4548

46-
summary_content = ParagraphIndexProcessor.generate_summary(
49+
summary_content, usage = ParagraphIndexProcessor.generate_summary(
4750
tenant_id=dataset.tenant_id,
4851
text=segment.content,
4952
summary_index_setting=summary_index_setting,
@@ -53,7 +56,7 @@ def generate_summary_for_segment(
5356
if not summary_content:
5457
raise ValueError("Generated summary is empty")
5558

56-
return summary_content
59+
return summary_content, usage
5760

5861
@staticmethod
5962
def create_summary_record(
@@ -153,6 +156,22 @@ def vectorize_summary(
153156
str(e),
154157
)
155158

159+
# Calculate embedding tokens for summary (for logging and statistics)
160+
embedding_tokens = 0
161+
try:
162+
model_manager = ModelManager()
163+
embedding_model = model_manager.get_model_instance(
164+
tenant_id=dataset.tenant_id,
165+
provider=dataset.embedding_model_provider,
166+
model_type=ModelType.TEXT_EMBEDDING,
167+
model=dataset.embedding_model,
168+
)
169+
if embedding_model:
170+
tokens_list = embedding_model.get_text_embedding_num_tokens([summary_record.summary_content])
171+
embedding_tokens = tokens_list[0] if tokens_list else 0
172+
except Exception as e:
173+
logger.warning("Failed to calculate embedding tokens for summary: %s", str(e))
174+
156175
# Create document with summary content and metadata
157176
summary_document = Document(
158177
page_content=summary_record.summary_content,
@@ -179,9 +198,18 @@ def vectorize_summary(
179198
# we still want to re-vectorize (upsert will overwrite)
180199
vector.add_texts([summary_document], duplicate_check=False)
181200

201+
# Log embedding token usage
202+
if embedding_tokens > 0:
203+
logger.info(
204+
"Summary embedding for segment %s used %s tokens",
205+
segment.id,
206+
embedding_tokens,
207+
)
208+
182209
# Success - update summary record with index node info
183210
summary_record.summary_index_node_id = summary_index_node_id
184211
summary_record.summary_index_node_hash = summary_hash
212+
summary_record.tokens = embedding_tokens # Save embedding tokens
185213
summary_record.status = "completed"
186214
# Explicitly update updated_at to ensure it's refreshed even if other fields haven't changed
187215
summary_record.updated_at = datetime.now(UTC).replace(tzinfo=None)
@@ -364,14 +392,24 @@ def generate_and_vectorize_summary(
364392
db.session.add(summary_record)
365393
db.session.flush()
366394

367-
# Generate summary
368-
summary_content = SummaryIndexService.generate_summary_for_segment(
395+
# Generate summary (returns summary_content and llm_usage)
396+
summary_content, llm_usage = SummaryIndexService.generate_summary_for_segment(
369397
segment, dataset, summary_index_setting
370398
)
371399

372400
# Update summary content
373401
summary_record.summary_content = summary_content
374402

403+
# Log LLM usage for summary generation
404+
if llm_usage and llm_usage.total_tokens > 0:
405+
logger.info(
406+
"Summary generation for segment %s used %s tokens (prompt: %s, completion: %s)",
407+
segment.id,
408+
llm_usage.total_tokens,
409+
llm_usage.prompt_tokens,
410+
llm_usage.completion_tokens,
411+
)
412+
375413
# Vectorize summary (will delete old vector if exists before creating new one)
376414
SummaryIndexService.vectorize_summary(summary_record, segment, dataset)
377415

0 commit comments

Comments
 (0)