11# coding=utf-8
22import io
3+ import mimetypes
34
5+ from django .core .files .uploadedfile import InMemoryUploadedFile
46from django .db .models import QuerySet
57
68from application .flow .i_step_node import NodeResult
79from application .flow .step_node .document_extract_node .i_document_extract_node import IDocumentExtractNode
10+ from application .models import Chat
811from dataset .models import File
912from dataset .serializers .document_serializers import split_handles , parse_table_handle_list , FileBufferHandle
13+ from dataset .serializers .file_serializers import FileSerializer
14+
15+
16+ def bytes_to_uploaded_file (file_bytes , file_name = "file.txt" ):
17+ content_type , _ = mimetypes .guess_type (file_name )
18+ if content_type is None :
19+ # 如果未能识别,设置为默认的二进制文件类型
20+ content_type = "application/octet-stream"
21+ # 创建一个内存中的字节流对象
22+ file_stream = io .BytesIO (file_bytes )
23+
24+ # 获取文件大小
25+ file_size = len (file_bytes )
26+
27+ # 创建 InMemoryUploadedFile 对象
28+ uploaded_file = InMemoryUploadedFile (
29+ file = file_stream ,
30+ field_name = None ,
31+ name = file_name ,
32+ content_type = content_type ,
33+ size = file_size ,
34+ charset = None ,
35+ )
36+ return uploaded_file
1037
1138
1239class BaseDocumentExtractNode (IDocumentExtractNode ):
13- def execute (self , document , ** kwargs ):
40+ def execute (self , document , chat_id , ** kwargs ):
1441 get_buffer = FileBufferHandle ().get_buffer
1542
1643 self .context ['document_list' ] = document
@@ -19,6 +46,20 @@ def execute(self, document, **kwargs):
1946 if document is None or not isinstance (document , list ):
2047 return NodeResult ({'content' : content }, {})
2148
49+ application = self .workflow_manage .work_flow_post_handler .chat_info .application
50+
51+ # doc文件中的图片保存
52+ def save_image (image_list ):
53+ for image in image_list :
54+ meta = {
55+ 'debug' : False if application .id else True ,
56+ 'chat_id' : chat_id ,
57+ 'application_id' : str (application .id ) if application .id else None ,
58+ 'file_id' : str (image .id )
59+ }
60+ file = bytes_to_uploaded_file (image .image , image .image_name )
61+ FileSerializer (data = {'file' : file , 'meta' : meta }).upload ()
62+
2263 for doc in document :
2364 file = QuerySet (File ).filter (id = doc ['file_id' ]).first ()
2465 buffer = io .BytesIO (file .get_byte ().tobytes ())
@@ -28,7 +69,7 @@ def execute(self, document, **kwargs):
2869 if split_handle .support (buffer , get_buffer ):
2970 # 回到文件头
3071 buffer .seek (0 )
31- file_content = split_handle .get_content (buffer )
72+ file_content = split_handle .get_content (buffer , save_image )
3273 content .append ('## ' + doc ['name' ] + '\n ' + file_content )
3374 break
3475
0 commit comments