11import os
2- from typing import Dict
2+ from functools import reduce
3+ from typing import Dict , List
34
5+ import uuid_utils .compat as uuid
46from django .db import transaction
57from django .db .models import QuerySet
68from django .utils .translation import gettext_lazy as _
79from rest_framework import serializers
810
9- from common .db .search import native_search
11+ from common .db .search import native_search , native_page_search
1012from common .utils .common import get_file_content
11- from knowledge .models import Problem , ProblemParagraphMapping , Paragraph , Knowledge
13+ from knowledge .models import Problem , ProblemParagraphMapping , Paragraph , Knowledge , SourceType
1214from knowledge .serializers .common import get_embedding_model_id_by_knowledge_id
13- from knowledge .task .embedding import delete_embedding_by_source_ids , update_problem_embedding
15+ from knowledge .task .embedding import delete_embedding_by_source_ids , update_problem_embedding , embedding_by_data_list
1416from maxkb .const import PROJECT_DIR
1517
1618
@@ -25,7 +27,114 @@ class ProblemInstanceSerializer(serializers.Serializer):
2527 content = serializers .CharField (required = True , max_length = 256 , label = _ ('content' ))
2628
2729
30+ class ProblemMappingSerializer (serializers .Serializer ):
31+ paragraph_id = serializers .UUIDField (required = True , label = _ ('paragraph id' ))
32+ document_id = serializers .UUIDField (required = True , label = _ ('document id' ))
33+
34+
35+ class ProblemBatchSerializer (serializers .Serializer ):
36+ problem_list = serializers .ListField (required = True , label = _ ('problem list' ),
37+ child = serializers .CharField (required = True , max_length = 256 , label = _ ('problem' )))
38+
39+
40+ class ProblemBatchDeleteSerializer (serializers .Serializer ):
41+ problem_id_list = serializers .ListField (required = True , label = _ ('problem id list' ),
42+ child = serializers .UUIDField (required = True , label = _ ('problem id' )))
43+
44+
45+ class AssociationParagraph (serializers .Serializer ):
46+ paragraph_id = serializers .UUIDField (required = True , label = _ ('paragraph id' ))
47+ document_id = serializers .UUIDField (required = True , label = _ ('document id' ))
48+
49+
50+ class BatchAssociation (serializers .Serializer ):
51+ problem_id_list = serializers .ListField (required = True , label = _ ('problem id list' ),
52+ child = serializers .UUIDField (required = True , label = _ ('problem id' )))
53+ paragraph_list = AssociationParagraph (many = True )
54+
55+
56+ def is_exits (exits_problem_paragraph_mapping_list , new_paragraph_mapping ):
57+ filter_list = [exits_problem_paragraph_mapping for exits_problem_paragraph_mapping in
58+ exits_problem_paragraph_mapping_list if
59+ str (exits_problem_paragraph_mapping .paragraph_id ) == new_paragraph_mapping .paragraph_id
60+ and str (exits_problem_paragraph_mapping .problem_id ) == new_paragraph_mapping .problem_id
61+ and str (exits_problem_paragraph_mapping .dataset_id ) == new_paragraph_mapping .dataset_id ]
62+ return len (filter_list ) > 0
63+
64+
65+ def to_problem_paragraph_mapping (problem , document_id : str , paragraph_id : str , dataset_id : str ):
66+ return ProblemParagraphMapping (id = uuid .uuid1 (),
67+ document_id = document_id ,
68+ paragraph_id = paragraph_id ,
69+ dataset_id = dataset_id ,
70+ problem_id = str (problem .id )), problem
71+
72+
2873class ProblemSerializers (serializers .Serializer ):
74+ class BatchOperate (serializers .Serializer ):
75+ knowledge_id = serializers .UUIDField (required = True , label = _ ('knowledge id' ))
76+
77+ def delete (self , problem_id_list : List , with_valid = True ):
78+ if with_valid :
79+ self .is_valid (raise_exception = True )
80+ knowledge_id = self .data .get ('knowledge_id' )
81+ problem_paragraph_mapping_list = QuerySet (ProblemParagraphMapping ).filter (
82+ knowledge_id = knowledge_id ,
83+ problem_id__in = problem_id_list )
84+ source_ids = [row .id for row in problem_paragraph_mapping_list ]
85+ problem_paragraph_mapping_list .delete ()
86+ QuerySet (Problem ).filter (id__in = problem_id_list ).delete ()
87+ delete_embedding_by_source_ids (source_ids )
88+ return True
89+
90+ def association (self , instance : Dict , with_valid = True ):
91+ if with_valid :
92+ self .is_valid (raise_exception = True )
93+ BatchAssociation (data = instance ).is_valid (raise_exception = True )
94+ knowledge_id = self .data .get ('knowledge_id' )
95+ paragraph_list = instance .get ('paragraph_list' )
96+ problem_id_list = instance .get ('problem_id_list' )
97+ problem_list = QuerySet (Problem ).filter (id__in = problem_id_list )
98+
99+ exits_problem_paragraph_mapping = QuerySet (
100+ ProblemParagraphMapping
101+ ).filter (problem_id__in = problem_id_list , paragraph_id__in = [p .get ('paragraph_id' ) for p in paragraph_list ])
102+
103+ problem_paragraph_mapping_list = [
104+ (problem_paragraph_mapping , problem ) for problem_paragraph_mapping , problem in
105+ reduce (
106+ lambda x , y : [* x , * y ],
107+ [
108+ [
109+ to_problem_paragraph_mapping (
110+ problem , paragraph .get ('document_id' ),
111+ paragraph .get ('paragraph_id' ),
112+ knowledge_id
113+ ) for paragraph in paragraph_list
114+ ] for problem in problem_list
115+ ],
116+ []
117+ ) if not is_exits (exits_problem_paragraph_mapping , problem_paragraph_mapping )
118+ ]
119+
120+ QuerySet (ProblemParagraphMapping ).bulk_create ([
121+ problem_paragraph_mapping for problem_paragraph_mapping , problem in problem_paragraph_mapping_list
122+ ])
123+
124+ data_list = [
125+ {
126+ 'text' : problem .content ,
127+ 'is_active' : True ,
128+ 'source_type' : SourceType .PROBLEM ,
129+ 'source_id' : str (problem_paragraph_mapping .id ),
130+ 'document_id' : str (problem_paragraph_mapping .document_id ),
131+ 'paragraph_id' : str (problem_paragraph_mapping .paragraph_id ),
132+ 'knowledge_id' : knowledge_id ,
133+ } for problem_paragraph_mapping , problem in problem_paragraph_mapping_list
134+ ]
135+ model_id = get_embedding_model_id_by_knowledge_id (self .data .get ('knowledge_id' ))
136+ embedding_by_data_list (data_list , model_id = model_id )
137+
29138 class Operate (serializers .Serializer ):
30139 knowledge_id = serializers .UUIDField (required = True , label = _ ('knowledge id' ))
31140 problem_id = serializers .UUIDField (required = True , label = _ ('problem id' ))
@@ -75,3 +184,55 @@ def edit(self, instance: Dict, with_valid=True):
75184 problem .save ()
76185 model_id = get_embedding_model_id_by_knowledge_id (knowledge_id )
77186 update_problem_embedding (problem_id , content , model_id )
187+
188+ class Create (serializers .Serializer ):
189+ workspace_id = serializers .CharField (required = True , label = _ ('workspace id' ))
190+ knowledge_id = serializers .UUIDField (required = True , label = _ ('knowledge id' ))
191+
192+ def batch (self , problem_list , with_valid = True ):
193+ if with_valid :
194+ self .is_valid (raise_exception = True )
195+ ProblemBatchSerializer (data = {'problem_list' : problem_list }).is_valid (raise_exception = True )
196+ problem_list = list (set (problem_list ))
197+ knowledge_id = self .data .get ('knowledge_id' )
198+ exists_problem_content_list = [
199+ problem .content for problem in QuerySet (
200+ Problem
201+ ).filter (knowledge_id = knowledge_id , content__in = problem_list )
202+ ]
203+ problem_instance_list = [
204+ Problem (
205+ id = uuid .uuid7 (), knowledge_id = knowledge_id , content = problem_content
206+ ) for problem_content in problem_list if (
207+ not exists_problem_content_list .__contains__ (
208+ problem_content
209+ ) if len (exists_problem_content_list ) > 0 else True
210+ )
211+ ]
212+
213+ QuerySet (Problem ).bulk_create (problem_instance_list ) if len (problem_instance_list ) > 0 else None
214+ return [ProblemSerializer (problem_instance ).data for problem_instance in problem_instance_list ]
215+
216+ class Query (serializers .Serializer ):
217+ workspace_id = serializers .CharField (required = True , label = _ ('workspace id' ))
218+ knowledge_id = serializers .UUIDField (required = True , label = _ ('knowledge id' ))
219+ content = serializers .CharField (required = False , label = _ ('content' ))
220+
221+ def get_query_set (self ):
222+ query_set = QuerySet (model = Problem )
223+ query_set = query_set .filter (
224+ ** {'knowledge_id' : self .data .get ('knowledge_id' )})
225+ if 'content' in self .data :
226+ query_set = query_set .filter (** {'content__icontains' : self .data .get ('content' )})
227+ query_set = query_set .order_by ("-create_time" )
228+ return query_set
229+
230+ def list (self ):
231+ query_set = self .get_query_set ()
232+ return native_search (query_set , select_string = get_file_content (
233+ os .path .join (PROJECT_DIR , "apps" , "knowledge" , 'sql' , 'list_problem.sql' )))
234+
235+ def page (self , current_page , page_size ):
236+ query_set = self .get_query_set ()
237+ return native_page_search (current_page , page_size , query_set , select_string = get_file_content (
238+ os .path .join (PROJECT_DIR , "apps" , "knowledge" , 'sql' , 'list_problem.sql' )))
0 commit comments