Skip to content

Commit bd865ce

Browse files
committed
feat: add DocumentSplitPatternAPI for segment ID retrieval with workspace and knowledge ID parameters
1 parent 6f5645e commit bd865ce

File tree

4 files changed

+63
-1
lines changed

4 files changed

+63
-1
lines changed

apps/knowledge/api/document.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,28 @@ def get_parameters():
315315
required=False,
316316
),
317317
]
318+
319+
320+
class DocumentSplitPatternAPI(APIMixin):
321+
@staticmethod
322+
def get_parameters():
323+
return [
324+
OpenApiParameter(
325+
name="workspace_id",
326+
description="工作空间id",
327+
type=OpenApiTypes.STR,
328+
location='path',
329+
required=True,
330+
),
331+
OpenApiParameter(
332+
name="knowledge_id",
333+
description="知识库id",
334+
type=OpenApiTypes.STR,
335+
location='path',
336+
required=True,
337+
),
338+
]
339+
340+
@staticmethod
341+
def get_response():
342+
return DefaultResultSerializer

apps/knowledge/serializers/document.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,26 @@ def file_to_paragraph(self, file, pattern_list: List, with_filter: bool, limit:
624624
return result
625625
return [result]
626626

627+
class SplitPattern(serializers.Serializer):
628+
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
629+
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
630+
631+
@staticmethod
632+
def list():
633+
return [
634+
{'key': "#", 'value': '(?<=^)# .*|(?<=\\n)# .*'},
635+
{'key': '##', 'value': '(?<=\\n)(?<!#)## (?!#).*|(?<=^)(?<!#)## (?!#).*'},
636+
{'key': '###', 'value': "(?<=\\n)(?<!#)### (?!#).*|(?<=^)(?<!#)### (?!#).*"},
637+
{'key': '####', 'value': "(?<=\\n)(?<!#)#### (?!#).*|(?<=^)(?<!#)#### (?!#).*"},
638+
{'key': '#####', 'value': "(?<=\\n)(?<!#)##### (?!#).*|(?<=^)(?<!#)##### (?!#).*"},
639+
{'key': '######', 'value': "(?<=\\n)(?<!#)###### (?!#).*|(?<=^)(?<!#)###### (?!#).*"},
640+
{'key': '-', 'value': '(?<! )- .*'},
641+
{'key': _('space'), 'value': '(?<! ) (?! )'},
642+
{'key': _('semicolon'), 'value': '(?<!;);(?!;)'}, {'key': _('comma'), 'value': '(?<!,),(?!,)'},
643+
{'key': _('period'), 'value': '(?<!。)。(?!。)'}, {'key': _('enter'), 'value': '(?<!\\n)\\n(?!\\n)'},
644+
{'key': _('blank line'), 'value': '(?<!\\n)\\n\\n(?!\\n)'}
645+
]
646+
627647
class Batch(serializers.Serializer):
628648
workspace_id = serializers.UUIDField(required=True, label=_('workspace id'))
629649
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))

apps/knowledge/urls.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>', views.KnowledgeView.Operate.as_view()),
1111
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document', views.DocumentView.as_view()),
1212
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split', views.DocumentView.Split.as_view()),
13+
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split_pattern', views.DocumentView.SplitPattern.as_view()),
1314
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch', views.DocumentView.Batch.as_view()),
1415
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/web', views.WebDocumentView.as_view()),
1516
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/qa', views.QaDocumentView.as_view()),

apps/knowledge/views/document.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI, DocumentCreateAPI, \
1212
DocumentReadAPI, DocumentEditAPI, DocumentDeleteAPI, TableDocumentCreateAPI, QaDocumentCreateAPI, \
1313
WebDocumentCreateAPI, CancelTaskAPI, BatchCancelTaskAPI, SyncWebAPI, RefreshAPI, BatchEditHitHandlingAPI, \
14-
DocumentTreeReadAPI
14+
DocumentTreeReadAPI, DocumentSplitPatternAPI
1515
from knowledge.serializers.document import DocumentSerializers
1616

1717

@@ -140,6 +140,22 @@ def post(self, request: Request, workspace_id: str, knowledge_id: str):
140140
'knowledge_id': knowledge_id,
141141
}).parse(split_data))
142142

143+
class SplitPattern(APIView):
144+
authentication_classes = [TokenAuth]
145+
146+
@extend_schema(
147+
summary=_('Get a list of segment IDs'),
148+
description=_('Get a list of segment IDs'),
149+
operation_id=_('Get a list of segment IDs'),
150+
parameters=DocumentSplitPatternAPI.get_parameters(),
151+
responses=DocumentSplitPatternAPI.get_response(),
152+
tags=[_('Knowledge Base/Documentation')]
153+
)
154+
def get(self, request: Request, workspace_id: str, knowledge_id: str):
155+
return result.success(DocumentSerializers.SplitPattern(
156+
data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id}
157+
).list())
158+
143159
class BatchEditHitHandling(APIView):
144160
authentication_classes = [TokenAuth]
145161

0 commit comments

Comments
 (0)