Skip to content

Commit aad136d

Browse files
committed
feat: Chunks stored
1 parent 1d60741 commit aad136d

File tree

4 files changed

+29
-5
lines changed

4 files changed

+29
-5
lines changed

apps/application/flow/step_node/knowledge_write_node/impl/base_knowledge_write_node.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from django.utils.translation import gettext_lazy as _
1717
from application.flow.i_step_node import NodeResult
1818
from application.flow.step_node.knowledge_write_node.i_knowledge_write_node import IKnowledgeWriteNode
19+
from common.chunk import text_to_chunk
1920
from common.utils.common import bulk_create_in_batches
2021
from knowledge.models import Document, KnowledgeType, Paragraph, File, FileSourceType, Problem, ProblemParagraphMapping
2122
from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage
@@ -67,14 +68,14 @@ def link_file(source_file_id, document_id):
6768
# 保存文件内容和元数据
6869
new_file.save(file_content)
6970

70-
7171
def get_paragraph_problem_model(knowledge_id: str, document_id: str, instance: Dict):
7272
paragraph = Paragraph(
7373
id=uuid.uuid7(),
7474
document_id=document_id,
7575
content=instance.get("content"),
7676
knowledge_id=knowledge_id,
77-
title=instance.get("title") if 'title' in instance else ''
77+
title=instance.get("title") if 'title' in instance else '',
78+
chunks = instance.get('chunks') if 'chunks' in instance else text_to_chunk(instance.get("content")),
7879
)
7980

8081
problem_paragraph_object_list = [ProblemParagraphObject(
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Generated by Django 5.2.8 on 2025-11-24 07:09
2+
3+
import django.contrib.postgres.fields
4+
from django.db import migrations, models
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
('knowledge', '0005_knowledgeaction'),
11+
]
12+
13+
operations = [
14+
migrations.AddField(
15+
model_name='paragraph',
16+
name='chunks',
17+
field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(), default=list, size=None, verbose_name='块'),
18+
),
19+
]

apps/knowledge/models/knowledge.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from enum import Enum
44

55
import uuid_utils.compat as uuid
6+
from django.contrib.postgres.fields import ArrayField
67
from django.contrib.postgres.search import SearchVectorField
78
from django.db import models
89
from django.db.models import QuerySet
@@ -242,6 +243,7 @@ class Paragraph(AppModelMixin):
242243
hit_num = models.IntegerField(verbose_name="命中次数", default=0)
243244
is_active = models.BooleanField(default=True, db_index=True)
244245
position = models.IntegerField(verbose_name="段落顺序", default=0, db_index=True)
246+
chunks = ArrayField(verbose_name="块", base_field=models.CharField(), default=list)
245247

246248
class Meta:
247249
db_table = "paragraph"

apps/knowledge/vector/base_vector.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
def chunk_data(data: Dict):
2424
if str(data.get('source_type')) == str(SourceType.PARAGRAPH.value):
2525
text = data.get('text')
26-
chunk_list = text_to_chunk(text)
26+
chunk_list = data.get('chunks') if data.get('chunks') else text_to_chunk(text)
2727
return [{**data, 'text': chunk} for chunk in chunk_list]
2828
return [data]
2929

@@ -63,7 +63,8 @@ def save_pre_handler(self):
6363
BaseVectorStore.vector_exists = True
6464
return True
6565

66-
def save(self, text, source_type: SourceType, knowledge_id: str, document_id: str, paragraph_id: str, source_id: str,
66+
def save(self, text, source_type: SourceType, knowledge_id: str, document_id: str, paragraph_id: str,
67+
source_id: str,
6768
is_active: bool,
6869
embedding: Embeddings):
6970
"""
@@ -104,7 +105,8 @@ def batch_save(self, data_list: List[Dict], embedding: Embeddings, is_the_task_i
104105
break
105106

106107
@abstractmethod
107-
def _save(self, text, source_type: SourceType, knowledge_id: str, document_id: str, paragraph_id: str, source_id: str,
108+
def _save(self, text, source_type: SourceType, knowledge_id: str, document_id: str, paragraph_id: str,
109+
source_id: str,
108110
is_active: bool,
109111
embedding: Embeddings):
110112
pass

0 commit comments

Comments
 (0)