Skip to content

Commit 4995721

Browse files
committed
feat: implement Problem API for CRUD operations and batch processing
1 parent fd7fd36 commit 4995721

File tree

5 files changed

+309
-4
lines changed

5 files changed

+309
-4
lines changed

apps/knowledge/api/problem.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from drf_spectacular.types import OpenApiTypes
2+
from drf_spectacular.utils import OpenApiParameter
3+
4+
from common.mixins.api_mixin import APIMixin
5+
from common.result import DefaultResultSerializer
6+
from knowledge.serializers.problem import ProblemBatchSerializer, \
7+
ProblemBatchDeleteSerializer, BatchAssociation
8+
9+
10+
class ProblemReadAPI(APIMixin):
11+
@staticmethod
12+
def get_parameters():
13+
return [
14+
OpenApiParameter(
15+
name="workspace_id",
16+
description="工作空间id",
17+
type=OpenApiTypes.STR,
18+
location='path',
19+
required=True,
20+
),
21+
OpenApiParameter(
22+
name="knowledge_id",
23+
description="知识库id",
24+
type=OpenApiTypes.STR,
25+
location='path',
26+
required=True,
27+
),
28+
]
29+
30+
@staticmethod
31+
def get_response():
32+
return DefaultResultSerializer
33+
34+
35+
class ProblemBatchCreateAPI(ProblemReadAPI):
36+
@staticmethod
37+
def get_request():
38+
return ProblemBatchSerializer
39+
40+
41+
class BatchAssociationAPI(ProblemReadAPI):
42+
@staticmethod
43+
def get_request():
44+
return BatchAssociation
45+
46+
47+
class BatchDeleteAPI(ProblemReadAPI):
48+
@staticmethod
49+
def get_request():
50+
return ProblemBatchDeleteSerializer

apps/knowledge/serializers/problem.py

Lines changed: 165 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
import os
2-
from typing import Dict
2+
from functools import reduce
3+
from typing import Dict, List
34

5+
import uuid_utils.compat as uuid
46
from django.db import transaction
57
from django.db.models import QuerySet
68
from django.utils.translation import gettext_lazy as _
79
from rest_framework import serializers
810

9-
from common.db.search import native_search
11+
from common.db.search import native_search, native_page_search
1012
from common.utils.common import get_file_content
11-
from knowledge.models import Problem, ProblemParagraphMapping, Paragraph, Knowledge
13+
from knowledge.models import Problem, ProblemParagraphMapping, Paragraph, Knowledge, SourceType
1214
from knowledge.serializers.common import get_embedding_model_id_by_knowledge_id
13-
from knowledge.task.embedding import delete_embedding_by_source_ids, update_problem_embedding
15+
from knowledge.task.embedding import delete_embedding_by_source_ids, update_problem_embedding, embedding_by_data_list
1416
from maxkb.const import PROJECT_DIR
1517

1618

@@ -25,7 +27,114 @@ class ProblemInstanceSerializer(serializers.Serializer):
2527
content = serializers.CharField(required=True, max_length=256, label=_('content'))
2628

2729

30+
class ProblemMappingSerializer(serializers.Serializer):
31+
paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
32+
document_id = serializers.UUIDField(required=True, label=_('document id'))
33+
34+
35+
class ProblemBatchSerializer(serializers.Serializer):
36+
problem_list = serializers.ListField(required=True, label=_('problem list'),
37+
child=serializers.CharField(required=True, max_length=256, label=_('problem')))
38+
39+
40+
class ProblemBatchDeleteSerializer(serializers.Serializer):
41+
problem_id_list = serializers.ListField(required=True, label=_('problem id list'),
42+
child=serializers.UUIDField(required=True, label=_('problem id')))
43+
44+
45+
class AssociationParagraph(serializers.Serializer):
46+
paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
47+
document_id = serializers.UUIDField(required=True, label=_('document id'))
48+
49+
50+
class BatchAssociation(serializers.Serializer):
51+
problem_id_list = serializers.ListField(required=True, label=_('problem id list'),
52+
child=serializers.UUIDField(required=True, label=_('problem id')))
53+
paragraph_list = AssociationParagraph(many=True)
54+
55+
56+
def is_exits(exits_problem_paragraph_mapping_list, new_paragraph_mapping):
57+
filter_list = [exits_problem_paragraph_mapping for exits_problem_paragraph_mapping in
58+
exits_problem_paragraph_mapping_list if
59+
str(exits_problem_paragraph_mapping.paragraph_id) == new_paragraph_mapping.paragraph_id
60+
and str(exits_problem_paragraph_mapping.problem_id) == new_paragraph_mapping.problem_id
61+
and str(exits_problem_paragraph_mapping.dataset_id) == new_paragraph_mapping.dataset_id]
62+
return len(filter_list) > 0
63+
64+
65+
def to_problem_paragraph_mapping(problem, document_id: str, paragraph_id: str, dataset_id: str):
66+
return ProblemParagraphMapping(id=uuid.uuid1(),
67+
document_id=document_id,
68+
paragraph_id=paragraph_id,
69+
dataset_id=dataset_id,
70+
problem_id=str(problem.id)), problem
71+
72+
2873
class ProblemSerializers(serializers.Serializer):
74+
class BatchOperate(serializers.Serializer):
75+
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
76+
77+
def delete(self, problem_id_list: List, with_valid=True):
78+
if with_valid:
79+
self.is_valid(raise_exception=True)
80+
knowledge_id = self.data.get('knowledge_id')
81+
problem_paragraph_mapping_list = QuerySet(ProblemParagraphMapping).filter(
82+
knowledge_id=knowledge_id,
83+
problem_id__in=problem_id_list)
84+
source_ids = [row.id for row in problem_paragraph_mapping_list]
85+
problem_paragraph_mapping_list.delete()
86+
QuerySet(Problem).filter(id__in=problem_id_list).delete()
87+
delete_embedding_by_source_ids(source_ids)
88+
return True
89+
90+
def association(self, instance: Dict, with_valid=True):
91+
if with_valid:
92+
self.is_valid(raise_exception=True)
93+
BatchAssociation(data=instance).is_valid(raise_exception=True)
94+
knowledge_id = self.data.get('knowledge_id')
95+
paragraph_list = instance.get('paragraph_list')
96+
problem_id_list = instance.get('problem_id_list')
97+
problem_list = QuerySet(Problem).filter(id__in=problem_id_list)
98+
99+
exits_problem_paragraph_mapping = QuerySet(
100+
ProblemParagraphMapping
101+
).filter(problem_id__in=problem_id_list, paragraph_id__in=[p.get('paragraph_id') for p in paragraph_list])
102+
103+
problem_paragraph_mapping_list = [
104+
(problem_paragraph_mapping, problem) for problem_paragraph_mapping, problem in
105+
reduce(
106+
lambda x, y: [*x, *y],
107+
[
108+
[
109+
to_problem_paragraph_mapping(
110+
problem, paragraph.get('document_id'),
111+
paragraph.get('paragraph_id'),
112+
knowledge_id
113+
) for paragraph in paragraph_list
114+
] for problem in problem_list
115+
],
116+
[]
117+
) if not is_exits(exits_problem_paragraph_mapping, problem_paragraph_mapping)
118+
]
119+
120+
QuerySet(ProblemParagraphMapping).bulk_create([
121+
problem_paragraph_mapping for problem_paragraph_mapping, problem in problem_paragraph_mapping_list
122+
])
123+
124+
data_list = [
125+
{
126+
'text': problem.content,
127+
'is_active': True,
128+
'source_type': SourceType.PROBLEM,
129+
'source_id': str(problem_paragraph_mapping.id),
130+
'document_id': str(problem_paragraph_mapping.document_id),
131+
'paragraph_id': str(problem_paragraph_mapping.paragraph_id),
132+
'knowledge_id': knowledge_id,
133+
} for problem_paragraph_mapping, problem in problem_paragraph_mapping_list
134+
]
135+
model_id = get_embedding_model_id_by_knowledge_id(self.data.get('knowledge_id'))
136+
embedding_by_data_list(data_list, model_id=model_id)
137+
29138
class Operate(serializers.Serializer):
30139
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
31140
problem_id = serializers.UUIDField(required=True, label=_('problem id'))
@@ -75,3 +184,55 @@ def edit(self, instance: Dict, with_valid=True):
75184
problem.save()
76185
model_id = get_embedding_model_id_by_knowledge_id(knowledge_id)
77186
update_problem_embedding(problem_id, content, model_id)
187+
188+
class Create(serializers.Serializer):
189+
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
190+
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
191+
192+
def batch(self, problem_list, with_valid=True):
193+
if with_valid:
194+
self.is_valid(raise_exception=True)
195+
ProblemBatchSerializer(data={'problem_list': problem_list}).is_valid(raise_exception=True)
196+
problem_list = list(set(problem_list))
197+
knowledge_id = self.data.get('knowledge_id')
198+
exists_problem_content_list = [
199+
problem.content for problem in QuerySet(
200+
Problem
201+
).filter(knowledge_id=knowledge_id, content__in=problem_list)
202+
]
203+
problem_instance_list = [
204+
Problem(
205+
id=uuid.uuid7(), knowledge_id=knowledge_id, content=problem_content
206+
) for problem_content in problem_list if (
207+
not exists_problem_content_list.__contains__(
208+
problem_content
209+
) if len(exists_problem_content_list) > 0 else True
210+
)
211+
]
212+
213+
QuerySet(Problem).bulk_create(problem_instance_list) if len(problem_instance_list) > 0 else None
214+
return [ProblemSerializer(problem_instance).data for problem_instance in problem_instance_list]
215+
216+
class Query(serializers.Serializer):
217+
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
218+
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
219+
content = serializers.CharField(required=False, label=_('content'))
220+
221+
def get_query_set(self):
222+
query_set = QuerySet(model=Problem)
223+
query_set = query_set.filter(
224+
**{'knowledge_id': self.data.get('knowledge_id')})
225+
if 'content' in self.data:
226+
query_set = query_set.filter(**{'content__icontains': self.data.get('content')})
227+
query_set = query_set.order_by("-create_time")
228+
return query_set
229+
230+
def list(self):
231+
query_set = self.get_query_set()
232+
return native_search(query_set, select_string=get_file_content(
233+
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem.sql')))
234+
235+
def page(self, current_page, page_size):
236+
query_set = self.get_query_set()
237+
return native_page_search(current_page, page_size, query_set, select_string=get_file_content(
238+
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem.sql')))

apps/knowledge/urls.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
path( 'workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<int:current_page>/<int:page_size>', views.ParagraphView.Page.as_view()),
3131
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem/<str:problem_id>/association', views.ParagraphView.Association.as_view()),
3232
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem/<str:problem_id>/unassociation', views.ParagraphView.UnAssociation.as_view()),
33+
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/problem', views.ProblemView.as_view()),
34+
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/problem/batch_delete', views.ProblemView.BatchDelete.as_view()),
35+
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/problem/batch_association', views.ProblemView.BatchAssociation.as_view()),
3336
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<int:current_page>/<int:page_sige>', views.DocumentView.Page.as_view()),
3437
path('workspace/<str:workspace_id>/knowledge/<int:current_page>/<int:page_size>', views.KnowledgeView.Page.as_view()),
3538
]

apps/knowledge/views/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .document import *
22
from .knowledge import *
33
from .paragraph import *
4+
from .problem import *

apps/knowledge/views/problem.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
from django.utils.translation import gettext_lazy as _
2+
from drf_spectacular.utils import extend_schema
3+
from rest_framework.views import APIView
4+
from rest_framework.views import Request
5+
6+
from common.auth import TokenAuth
7+
from common.auth.authentication import has_permissions
8+
from common.constants.permission_constants import PermissionConstants
9+
from common.result import result
10+
from common.utils.common import query_params_to_single_dict
11+
from knowledge.api.problem import ProblemReadAPI, ProblemBatchCreateAPI, BatchAssociationAPI, BatchDeleteAPI
12+
from knowledge.serializers.problem import ProblemSerializers
13+
14+
15+
class ProblemView(APIView):
16+
authentication_classes = [TokenAuth]
17+
18+
@extend_schema(
19+
methods=['GET'],
20+
summary=_('Question list'),
21+
description=_('Question list'),
22+
operation_id=_('Question list'),
23+
parameters=ProblemReadAPI.get_parameters(),
24+
responses=ProblemReadAPI.get_response(),
25+
tags=[_('Knowledge Base/Documentation/Paragraph/Question')]
26+
)
27+
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
28+
def get(self, request: Request, workspace_id: str, knowledge_id: str):
29+
q = ProblemSerializers.Query(
30+
data={
31+
**query_params_to_single_dict(request.query_params),
32+
'workspace_id': workspace_id,
33+
'knowledge_id': knowledge_id
34+
}
35+
)
36+
q.is_valid(raise_exception=True)
37+
return result.success(q.list())
38+
39+
@extend_schema(
40+
methods=['POST'],
41+
summary=_('Create question'),
42+
description=_('Create question'),
43+
operation_id=_('Create question'),
44+
parameters=ProblemBatchCreateAPI.get_parameters(),
45+
responses=ProblemBatchCreateAPI.get_response(),
46+
request=ProblemBatchCreateAPI.get_request(),
47+
tags=[_('Knowledge Base/Documentation/Paragraph/Question')]
48+
)
49+
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
50+
def post(self, request: Request, workspace_id: str, knowledge_id: str):
51+
return result.success(ProblemSerializers.Create(
52+
data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id, 'problem_list': request.data}
53+
).batch())
54+
55+
class BatchAssociation(APIView):
56+
authentication_classes = [TokenAuth]
57+
58+
@extend_schema(
59+
summary=_('Batch associated paragraphs'),
60+
description=_('Batch associated paragraphs'),
61+
operation_id=_('Batch associated paragraphs'),
62+
request=BatchAssociationAPI.get_request(),
63+
parameters=BatchAssociationAPI.get_parameters(),
64+
responses=BatchAssociationAPI.get_response(),
65+
tags=[_('Knowledge Base/Documentation/Paragraph/Question')]
66+
)
67+
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
68+
def put(self, request: Request, workspace_id: str, knowledge_id: str):
69+
return result.success(ProblemSerializers.BatchOperate(
70+
data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id}
71+
).association(request.data))
72+
73+
class BatchDelete(APIView):
74+
authentication_classes = [TokenAuth]
75+
76+
@extend_schema(
77+
methods=['PUT'],
78+
summary=_('Batch deletion issues'),
79+
description=_('Batch deletion issues'),
80+
operation_id=_('Batch deletion issues'),
81+
request=BatchDeleteAPI.get_request(),
82+
parameters=BatchDeleteAPI.get_parameters(),
83+
responses=BatchDeleteAPI.get_response(),
84+
tags=[_('Knowledge Base/Documentation/Paragraph/Question')]
85+
)
86+
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
87+
def put(self, request: Request, workspace_id: str, knowledge_id: str):
88+
return result.success(ProblemSerializers.BatchOperate(
89+
data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id}
90+
).delete(request.data))

0 commit comments

Comments
 (0)