|
1 | 1 | # coding=utf-8 |
| 2 | +import ast |
2 | 3 | import io |
3 | | -import mimetypes |
4 | 4 |
|
5 | | -from django.core.files.uploadedfile import InMemoryUploadedFile |
| 5 | +import uuid_utils.compat as uuid |
6 | 6 | from django.db.models import QuerySet |
7 | 7 |
|
8 | 8 | from application.flow.i_step_node import NodeResult |
9 | 9 | from application.flow.step_node.document_extract_node.i_document_extract_node import IDocumentExtractNode |
10 | 10 | from knowledge.models import File, FileSourceType |
11 | 11 | from knowledge.serializers.document import split_handles, parse_table_handle_list, FileBufferHandle |
12 | | -from oss.serializers.file import FileSerializer |
13 | | - |
14 | | - |
15 | | -def bytes_to_uploaded_file(file_bytes, file_name="file.txt"): |
16 | | - content_type, _ = mimetypes.guess_type(file_name) |
17 | | - if content_type is None: |
18 | | - # 如果未能识别,设置为默认的二进制文件类型 |
19 | | - content_type = "application/octet-stream" |
20 | | - # 创建一个内存中的字节流对象 |
21 | | - file_stream = io.BytesIO(file_bytes) |
22 | | - |
23 | | - # 获取文件大小 |
24 | | - file_size = len(file_bytes) |
25 | | - |
26 | | - # 创建 InMemoryUploadedFile 对象 |
27 | | - uploaded_file = InMemoryUploadedFile( |
28 | | - file=file_stream, |
29 | | - field_name=None, |
30 | | - name=file_name, |
31 | | - content_type=content_type, |
32 | | - size=file_size, |
33 | | - charset=None, |
34 | | - ) |
35 | | - return uploaded_file |
36 | | - |
37 | 12 |
|
38 | 13 | splitter = '\n`-----------------------------------`\n' |
39 | 14 |
|
@@ -69,17 +44,42 @@ def save_image(image_list): |
69 | 44 | 'file_id': str(image.id) |
70 | 45 | } |
71 | 46 | file_bytes = image.meta.pop('content') |
72 | | - f = bytes_to_uploaded_file(file_bytes, image.file_name) |
73 | | - FileSerializer(data={ |
74 | | - 'file': f, |
75 | | - 'meta': meta, |
76 | | - 'source_id': meta['application_id'] if meta['application_id'] else meta['knowledge_id'], |
77 | | - 'source_type': FileSourceType.APPLICATION.value if meta[ |
78 | | - 'application_id'] else FileSourceType.KNOWLEDGE.value |
79 | | - }).upload() |
| 47 | + new_file = File( |
| 48 | + id=uuid.uuid7(), |
| 49 | + file_name=image.file_name, |
| 50 | + file_size=len(file_bytes), |
| 51 | + source_type=FileSourceType.APPLICATION.value if meta[ |
| 52 | + 'application_id'] else FileSourceType.KNOWLEDGE.value, |
| 53 | + source_id=meta['application_id'] if meta['application_id'] else meta['knowledge_id'], |
| 54 | + meta=meta |
| 55 | + ) |
| 56 | + new_file.save(file_bytes) |
80 | 57 |
|
81 | 58 | document_list = [] |
82 | 59 | for doc in document: |
| 60 | + if 'file_bytes' in doc: |
| 61 | + file_bytes = doc['file_bytes'] |
| 62 | + # 如果是字符串,转换为字节 |
| 63 | + if isinstance(file_bytes, str): |
| 64 | + file_bytes = ast.literal_eval(file_bytes) |
| 65 | + doc['file_id'] = doc.get('file_id') or uuid.uuid7() |
| 66 | + meta = { |
| 67 | + 'debug': False if (application_id or knowledge_id) else True, |
| 68 | + 'chat_id': chat_id, |
| 69 | + 'application_id': str(application_id) if application_id else None, |
| 70 | + 'knowledge_id': str(knowledge_id) if knowledge_id else None, |
| 71 | + 'file_id': str(doc['file_id']) |
| 72 | + } |
| 73 | + new_file = File( |
| 74 | + id=doc['file_id'], |
| 75 | + file_name=doc['name'], |
| 76 | + file_size=len(file_bytes), |
| 77 | + source_type=FileSourceType.APPLICATION.value if meta[ |
| 78 | + 'application_id'] else FileSourceType.KNOWLEDGE.value, |
| 79 | + source_id=meta['application_id'] if meta['application_id'] else meta['knowledge_id'], |
| 80 | + meta={} |
| 81 | + ) |
| 82 | + new_file.save(file_bytes) |
83 | 83 | file = QuerySet(File).filter(id=doc['file_id']).first() |
84 | 84 | buffer = io.BytesIO(file.get_bytes()) |
85 | 85 | buffer.name = doc['name'] # this is the important line |
|
0 commit comments