|
8 | 8 | from django.utils.translation import gettext_lazy as _ |
9 | 9 | from rest_framework import serializers |
10 | 10 |
|
| 11 | +from common.db.search import page_search |
11 | 12 | from common.exception.app_exception import AppApiException |
12 | 13 | from common.utils.common import post |
13 | 14 | from knowledge.models import Paragraph, Problem, Document, ProblemParagraphMapping, SourceType |
14 | 15 | from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage, \ |
15 | | - get_embedding_model_id_by_knowledge_id, update_document_char_length |
| 16 | + get_embedding_model_id_by_knowledge_id, update_document_char_length, BatchSerializer |
16 | 17 | from knowledge.serializers.problem import ProblemInstanceSerializer, ProblemSerializer, ProblemSerializers |
17 | 18 | from knowledge.task.embedding import embedding_by_paragraph, enable_embedding_by_paragraph, \ |
18 | 19 | disable_embedding_by_paragraph, \ |
19 | | - delete_embedding_by_paragraph, embedding_by_problem as embedding_by_problem_task |
| 20 | + delete_embedding_by_paragraph, embedding_by_problem as embedding_by_problem_task, delete_embedding_by_paragraph_ids, \ |
| 21 | + embedding_by_problem, delete_embedding_by_source |
20 | 22 |
|
21 | 23 |
|
22 | 24 | class ParagraphSerializer(serializers.ModelSerializer): |
@@ -115,6 +117,7 @@ def save(self, instance: Dict, with_valid=True, with_embedding=True, embedding_b |
115 | 117 | ).one(with_valid=True) |
116 | 118 |
|
117 | 119 | class Operate(serializers.Serializer): |
| 120 | + workspace_id = serializers.CharField(required=True, label=_('workspace id')) |
118 | 121 | # 段落id |
119 | 122 | paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id')) |
120 | 123 | # 知识库id |
@@ -282,6 +285,100 @@ def or_get(exists_problem_list, content, knowledge_id): |
282 | 285 | else: |
283 | 286 | return Problem(id=uuid.uuid7(), content=content, knowledge_id=knowledge_id) |
284 | 287 |
|
| 288 | + class Query(serializers.Serializer): |
| 289 | + knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) |
| 290 | + document_id = serializers.UUIDField(required=True, label=_('document id')) |
| 291 | + title = serializers.CharField(required=False, label=_('section title')) |
| 292 | + content = serializers.CharField(required=False) |
| 293 | + |
| 294 | + def get_query_set(self): |
| 295 | + query_set = QuerySet(model=Paragraph) |
| 296 | + query_set = query_set.filter( |
| 297 | + **{'knowledge_id': self.data.get('knowledge_id'), 'document_id': self.data.get("document_id")}) |
| 298 | + if 'title' in self.data: |
| 299 | + query_set = query_set.filter( |
| 300 | + **{'title__icontains': self.data.get('title')}) |
| 301 | + if 'content' in self.data: |
| 302 | + query_set = query_set.filter(**{'content__icontains': self.data.get('content')}) |
| 303 | + query_set.order_by('-create_time', 'id') |
| 304 | + return query_set |
| 305 | + |
| 306 | + def list(self): |
| 307 | + return list(map(lambda row: ParagraphSerializer(row).data, self.get_query_set())) |
| 308 | + |
| 309 | + def page(self, current_page, page_size): |
| 310 | + query_set = self.get_query_set() |
| 311 | + return page_search(current_page, page_size, query_set, lambda row: ParagraphSerializer(row).data) |
| 312 | + |
| 313 | + class Association(serializers.Serializer): |
| 314 | + workspace_id = serializers.CharField(required=True, label=_('workspace id')) |
| 315 | + knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) |
| 316 | + problem_id = serializers.UUIDField(required=True, label=_('problem id')) |
| 317 | + document_id = serializers.UUIDField(required=True, label=_('document id')) |
| 318 | + paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id')) |
| 319 | + |
| 320 | + def is_valid(self, *, raise_exception=True): |
| 321 | + super().is_valid(raise_exception=True) |
| 322 | + knowledge_id = self.data.get('knowledge_id') |
| 323 | + paragraph_id = self.data.get('paragraph_id') |
| 324 | + problem_id = self.data.get("problem_id") |
| 325 | + if not QuerySet(Paragraph).filter(knowledge_id=knowledge_id, id=paragraph_id).exists(): |
| 326 | + raise AppApiException(500, _('Paragraph does not exist')) |
| 327 | + if not QuerySet(Problem).filter(knowledge_id=knowledge_id, id=problem_id).exists(): |
| 328 | + raise AppApiException(500, _('Problem does not exist')) |
| 329 | + |
| 330 | + def association(self, with_valid=True, with_embedding=True): |
| 331 | + if with_valid: |
| 332 | + self.is_valid(raise_exception=True) |
| 333 | + problem = QuerySet(Problem).filter(id=self.data.get("problem_id")).first() |
| 334 | + problem_paragraph_mapping = ProblemParagraphMapping(id=uuid.uuid7(), |
| 335 | + document_id=self.data.get('document_id'), |
| 336 | + paragraph_id=self.data.get('paragraph_id'), |
| 337 | + knowledge_id=self.data.get('knowledge_id'), |
| 338 | + problem_id=problem.id) |
| 339 | + problem_paragraph_mapping.save() |
| 340 | + if with_embedding: |
| 341 | + model_id = get_embedding_model_id_by_knowledge_id(self.data.get('knowledge_id')) |
| 342 | + embedding_by_problem({ |
| 343 | + 'text': problem.content, |
| 344 | + 'is_active': True, |
| 345 | + 'source_type': SourceType.PROBLEM, |
| 346 | + 'source_id': problem_paragraph_mapping.id, |
| 347 | + 'document_id': self.data.get('document_id'), |
| 348 | + 'paragraph_id': self.data.get('paragraph_id'), |
| 349 | + 'knowledge_id': self.data.get('knowledge_id'), |
| 350 | + }, model_id) |
| 351 | + |
| 352 | + def un_association(self, with_valid=True): |
| 353 | + if with_valid: |
| 354 | + self.is_valid(raise_exception=True) |
| 355 | + problem_paragraph_mapping = QuerySet(ProblemParagraphMapping).filter( |
| 356 | + paragraph_id=self.data.get('paragraph_id'), |
| 357 | + knowledge_id=self.data.get('knowledge_id'), |
| 358 | + problem_id=self.data.get( |
| 359 | + 'problem_id')).first() |
| 360 | + problem_paragraph_mapping_id = problem_paragraph_mapping.id |
| 361 | + problem_paragraph_mapping.delete() |
| 362 | + delete_embedding_by_source(problem_paragraph_mapping_id) |
| 363 | + return True |
| 364 | + |
| 365 | + class Batch(serializers.Serializer): |
| 366 | + knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) |
| 367 | + document_id = serializers.UUIDField(required=True, label=_('document id')) |
| 368 | + |
| 369 | + @transaction.atomic |
| 370 | + def batch_delete(self, instance: Dict, with_valid=True): |
| 371 | + if with_valid: |
| 372 | + BatchSerializer(data=instance).is_valid(model=Paragraph, raise_exception=True) |
| 373 | + self.is_valid(raise_exception=True) |
| 374 | + paragraph_id_list = instance.get("id_list") |
| 375 | + QuerySet(Paragraph).filter(id__in=paragraph_id_list).delete() |
| 376 | + delete_problems_and_mappings(paragraph_id_list) |
| 377 | + update_document_char_length(self.data.get('document_id')) |
| 378 | + # 删除向量库 |
| 379 | + delete_embedding_by_paragraph_ids(paragraph_id_list) |
| 380 | + return True |
| 381 | + |
285 | 382 |
|
286 | 383 | def delete_problems_and_mappings(paragraph_ids): |
287 | 384 | problem_paragraph_mappings = ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids) |
|
0 commit comments