|
8 | 8 | """ |
9 | 9 | import os |
10 | 10 | import re |
11 | | -import uuid_utils.compat as uuid |
12 | 11 | import zipfile |
13 | 12 | from typing import List |
14 | 13 |
|
| 14 | +import uuid_utils.compat as uuid |
15 | 15 | from django.db.models import QuerySet |
16 | 16 | from django.utils.translation import gettext_lazy as _ |
17 | 17 | from rest_framework import serializers |
|
27 | 27 | from models_provider.tools import get_model |
28 | 28 |
|
29 | 29 |
|
30 | | -def zip_dir(zip_path, output=None): |
31 | | - output = output or os.path.basename(zip_path) + '.zip' |
32 | | - zip = zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) |
33 | | - for root, dirs, files in os.walk(zip_path): |
34 | | - relative_root = '' if root == zip_path else root.replace(zip_path, '') + os.sep |
35 | | - for filename in files: |
36 | | - zip.write(os.path.join(root, filename), relative_root + filename) |
37 | | - zip.close() |
38 | | - |
39 | | - |
40 | | -def is_valid_uuid(s): |
41 | | - try: |
42 | | - uuid.UUID(s) |
43 | | - return True |
44 | | - except ValueError: |
45 | | - return False |
46 | | - |
47 | | - |
48 | | -def write_image(zip_path: str, image_list: List[str]): |
49 | | - for image in image_list: |
50 | | - search = re.search("\(.*\)", image) |
51 | | - if search: |
52 | | - text = search.group() |
53 | | - if text.startswith('(/api/file/'): |
54 | | - r = text.replace('(/api/file/', '').replace(')', '') |
55 | | - r = r.strip().split(" ")[0] |
56 | | - if not is_valid_uuid(r): |
57 | | - break |
58 | | - file = QuerySet(File).filter(id=r).first() |
59 | | - if file is None: |
60 | | - break |
61 | | - zip_inner_path = os.path.join('api', 'file', r) |
62 | | - file_path = os.path.join(zip_path, zip_inner_path) |
63 | | - if not os.path.exists(os.path.dirname(file_path)): |
64 | | - os.makedirs(os.path.dirname(file_path)) |
65 | | - with open(os.path.join(zip_path, file_path), 'wb') as f: |
66 | | - f.write(file.get_bytes()) |
67 | | - # else: |
68 | | - # r = text.replace('(/api/image/', '').replace(')', '') |
69 | | - # r = r.strip().split(" ")[0] |
70 | | - # if not is_valid_uuid(r): |
71 | | - # break |
72 | | - # image_model = QuerySet(Image).filter(id=r).first() |
73 | | - # if image_model is None: |
74 | | - # break |
75 | | - # zip_inner_path = os.path.join('api', 'image', r) |
76 | | - # file_path = os.path.join(zip_path, zip_inner_path) |
77 | | - # if not os.path.exists(os.path.dirname(file_path)): |
78 | | - # os.makedirs(os.path.dirname(file_path)) |
79 | | - # with open(file_path, 'wb') as f: |
80 | | - # f.write(image_model.image) |
81 | | - |
82 | | - |
83 | | -def update_document_char_length(document_id: str): |
84 | | - update_execute(get_file_content( |
85 | | - os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'update_document_char_length.sql')), |
86 | | - (document_id, document_id)) |
87 | | - |
88 | | - |
89 | | -def list_paragraph(paragraph_list: List[str]): |
90 | | - if paragraph_list is None or len(paragraph_list) == 0: |
91 | | - return [] |
92 | | - return native_search(QuerySet(Paragraph).filter(id__in=paragraph_list), get_file_content( |
93 | | - os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph.sql'))) |
94 | | - |
95 | | - |
96 | 30 | class MetaSerializer(serializers.Serializer): |
97 | 31 | class WebMeta(serializers.Serializer): |
98 | 32 | source_url = serializers.CharField(required=True, label=_('source url')) |
@@ -133,17 +67,11 @@ def __init__(self, knowledge_id: str, document_id: str, paragraph_id: str, probl |
133 | 67 | self.problem_content = problem_content |
134 | 68 |
|
135 | 69 |
|
136 | | -def or_get(exists_problem_list, content, knowledge_id, document_id, paragraph_id, problem_content_dict): |
137 | | - if content in problem_content_dict: |
138 | | - return problem_content_dict.get(content)[0], document_id, paragraph_id |
139 | | - exists = [row for row in exists_problem_list if row.content == content] |
140 | | - if len(exists) > 0: |
141 | | - problem_content_dict[content] = exists[0], False |
142 | | - return exists[0], document_id, paragraph_id |
143 | | - else: |
144 | | - problem = Problem(id=uuid.uuid7(), content=content, knowledge_id=knowledge_id) |
145 | | - problem_content_dict[content] = problem, True |
146 | | - return problem, document_id, paragraph_id |
| 70 | +class GenerateRelatedSerializer(serializers.Serializer): |
| 71 | + model_id = serializers.UUIDField(required=True, label=_('Model id')) |
| 72 | + prompt = serializers.CharField(required=True, label=_('Prompt word')) |
| 73 | + state_list = serializers.ListField(required=False, child=serializers.CharField(required=True), |
| 74 | + label=_("state list")) |
147 | 75 |
|
148 | 76 |
|
149 | 77 | class ProblemParagraphManage: |
@@ -216,8 +144,80 @@ def get_embedding_model_id_by_knowledge_id_list(knowledge_id_list: List): |
216 | 144 | return str(knowledge_list[0].embedding_model_id) |
217 | 145 |
|
218 | 146 |
|
219 | | -class GenerateRelatedSerializer(serializers.Serializer): |
220 | | - model_id = serializers.UUIDField(required=True, label=_('Model id')) |
221 | | - prompt = serializers.CharField(required=True, label=_('Prompt word')) |
222 | | - state_list = serializers.ListField(required=False, child=serializers.CharField(required=True), |
223 | | - label=_("state list")) |
| 147 | +def zip_dir(zip_path, output=None): |
| 148 | + output = output or os.path.basename(zip_path) + '.zip' |
| 149 | + zip = zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) |
| 150 | + for root, dirs, files in os.walk(zip_path): |
| 151 | + relative_root = '' if root == zip_path else root.replace(zip_path, '') + os.sep |
| 152 | + for filename in files: |
| 153 | + zip.write(os.path.join(root, filename), relative_root + filename) |
| 154 | + zip.close() |
| 155 | + |
| 156 | + |
| 157 | +def is_valid_uuid(s): |
| 158 | + try: |
| 159 | + uuid.UUID(s) |
| 160 | + return True |
| 161 | + except ValueError: |
| 162 | + return False |
| 163 | + |
| 164 | + |
| 165 | +def write_image(zip_path: str, image_list: List[str]): |
| 166 | + for image in image_list: |
| 167 | + search = re.search("\(.*\)", image) |
| 168 | + if search: |
| 169 | + text = search.group() |
| 170 | + if text.startswith('(/api/file/'): |
| 171 | + r = text.replace('(/api/file/', '').replace(')', '') |
| 172 | + r = r.strip().split(" ")[0] |
| 173 | + if not is_valid_uuid(r): |
| 174 | + break |
| 175 | + file = QuerySet(File).filter(id=r).first() |
| 176 | + if file is None: |
| 177 | + break |
| 178 | + zip_inner_path = os.path.join('api', 'file', r) |
| 179 | + file_path = os.path.join(zip_path, zip_inner_path) |
| 180 | + if not os.path.exists(os.path.dirname(file_path)): |
| 181 | + os.makedirs(os.path.dirname(file_path)) |
| 182 | + with open(os.path.join(zip_path, file_path), 'wb') as f: |
| 183 | + f.write(file.get_bytes()) |
| 184 | + # else: |
| 185 | + # r = text.replace('(/api/image/', '').replace(')', '') |
| 186 | + # r = r.strip().split(" ")[0] |
| 187 | + # if not is_valid_uuid(r): |
| 188 | + # break |
| 189 | + # image_model = QuerySet(Image).filter(id=r).first() |
| 190 | + # if image_model is None: |
| 191 | + # break |
| 192 | + # zip_inner_path = os.path.join('api', 'image', r) |
| 193 | + # file_path = os.path.join(zip_path, zip_inner_path) |
| 194 | + # if not os.path.exists(os.path.dirname(file_path)): |
| 195 | + # os.makedirs(os.path.dirname(file_path)) |
| 196 | + # with open(file_path, 'wb') as f: |
| 197 | + # f.write(image_model.image) |
| 198 | + |
| 199 | + |
| 200 | +def update_document_char_length(document_id: str): |
| 201 | + update_execute(get_file_content( |
| 202 | + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'update_document_char_length.sql')), |
| 203 | + (document_id, document_id)) |
| 204 | + |
| 205 | + |
| 206 | +def list_paragraph(paragraph_list: List[str]): |
| 207 | + if paragraph_list is None or len(paragraph_list) == 0: |
| 208 | + return [] |
| 209 | + return native_search(QuerySet(Paragraph).filter(id__in=paragraph_list), get_file_content( |
| 210 | + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph.sql'))) |
| 211 | + |
| 212 | + |
| 213 | +def or_get(exists_problem_list, content, knowledge_id, document_id, paragraph_id, problem_content_dict): |
| 214 | + if content in problem_content_dict: |
| 215 | + return problem_content_dict.get(content)[0], document_id, paragraph_id |
| 216 | + exists = [row for row in exists_problem_list if row.content == content] |
| 217 | + if len(exists) > 0: |
| 218 | + problem_content_dict[content] = exists[0], False |
| 219 | + return exists[0], document_id, paragraph_id |
| 220 | + else: |
| 221 | + problem = Problem(id=uuid.uuid7(), content=content, knowledge_id=knowledge_id) |
| 222 | + problem_content_dict[content] = problem, True |
| 223 | + return problem, document_id, paragraph_id |
0 commit comments