Skip to content

Commit 43bef21

Browse files
committed
refactor: reorganize file handling imports into a structured directory
1 parent 2a5cd4c commit 43bef21

File tree

13 files changed

+28
-29
lines changed

13 files changed

+28
-29
lines changed

apps/common/handle/impl/qa/zip_parse_qa_handle.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,19 @@
99
import io
1010
import os
1111
import re
12-
import uuid_utils.compat as uuid
1312
import zipfile
1413
from typing import List
1514
from urllib.parse import urljoin
1615

17-
from django.db.models import QuerySet
16+
import uuid_utils.compat as uuid
17+
from django.utils.translation import gettext_lazy as _
1818

1919
from common.handle.base_parse_qa_handle import BaseParseQAHandle
2020
from common.handle.impl.qa.csv_parse_qa_handle import CsvParseQAHandle
2121
from common.handle.impl.qa.xls_parse_qa_handle import XlsParseQAHandle
2222
from common.handle.impl.qa.xlsx_parse_qa_handle import XlsxParseQAHandle
2323
from common.utils.common import parse_md_image
2424
from knowledge.models import File
25-
from django.utils.translation import gettext_lazy as _
2625

2726

2827
class FileBufferHandle:

apps/common/handle/impl/table/csv_parse_table_handle.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def support(self, file, get_buffer):
1515
return True
1616
return False
1717

18-
def handle(self, file, get_buffer,save_image):
18+
def handle(self, file, get_buffer, save_image):
1919
buffer = get_buffer(file)
2020
try:
2121
content = buffer.decode(detect(buffer)['encoding'])
@@ -41,4 +41,4 @@ def get_content(self, file, save_image):
4141
return buffer.decode(detect(buffer)['encoding'])
4242
except BaseException as e:
4343
max_kb.error(f'csv split handle error: {e}')
44-
return f'error: {e}'
44+
return f'error: {e}'

apps/common/handle/impl/table/xlsx_parse_table_handle.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ def handle(self, file, get_buffer, save_image):
7878
return [{'name': file.name, 'paragraphs': []}]
7979
return result
8080

81-
8281
def get_content(self, file, save_image):
8382
try:
8483
# 加载 Excel 文件

apps/common/handle/impl/text/__init__.py

Whitespace-only changes.
File renamed without changes.

apps/common/handle/impl/doc_split_handle.py renamed to apps/common/handle/impl/text/doc_split_handle.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
import os
1111
import re
1212
import traceback
13-
import uuid_utils.compat as uuid
1413
from functools import reduce
1514
from typing import List
1615

16+
import uuid_utils.compat as uuid
1717
from docx import Document, ImagePart
1818
from docx.oxml import ns
1919
from docx.table import Table
@@ -22,7 +22,6 @@
2222
from common.handle.base_split_handle import BaseSplitHandle
2323
from common.utils.split_model import SplitModel
2424
from knowledge.models import File
25-
from django.utils.translation import gettext_lazy as _
2625

2726
default_pattern_list = [re.compile('(?<=^)# .*|(?<=\\n)# .*'),
2827
re.compile('(?<=\\n)(?<!#)## (?!#).*|(?<=^)(?<!#)## (?!#).*'),

apps/common/handle/impl/html_split_handle.py renamed to apps/common/handle/impl/text/html_split_handle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,4 @@ def get_content(self, file, save_image):
7070
return html2text(content)
7171
except BaseException as e:
7272
traceback.print_exception(e)
73-
return f'{e}'
73+
return f'{e}'

apps/common/handle/impl/pdf_split_handle.py renamed to apps/common/handle/impl/text/pdf_split_handle.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
from typing import List
1616

1717
import fitz
18+
from django.utils.translation import gettext_lazy as _
1819
from langchain_community.document_loaders import PyPDFLoader
1920

2021
from common.handle.base_split_handle import BaseSplitHandle
2122
from common.utils.split_model import SplitModel
22-
from django.utils.translation import gettext_lazy as _
2323

2424
default_pattern_list = [re.compile('(?<=^)# .*|(?<=\\n)# .*'),
2525
re.compile('(?<=\\n)(?<!#)## (?!#).*|(?<=^)(?<!#)## (?!#).*'),
@@ -42,6 +42,7 @@ def check_links_in_pdf(doc):
4242
return True
4343
return False
4444

45+
4546
class PdfSplitHandle(BaseSplitHandle):
4647
def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image):
4748
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
@@ -181,7 +182,8 @@ def handle_toc(doc, limit):
181182
for text in split_text:
182183
chapters.append({"title": real_chapter_title, "content": text})
183184
else:
184-
chapters.append({"title": real_chapter_title, "content": chapter_text if chapter_text else real_chapter_title})
185+
chapters.append(
186+
{"title": real_chapter_title, "content": chapter_text if chapter_text else real_chapter_title})
185187
# 保存章节内容和章节标题
186188
return chapters
187189

@@ -336,4 +338,4 @@ def get_content(self, file, save_image):
336338
return self.handle_pdf_content(file, pdf_document)
337339
except BaseException as e:
338340
traceback.print_exception(e)
339-
return f'{e}'
341+
return f'{e}'

apps/common/handle/impl/text_split_handle.py renamed to apps/common/handle/impl/text/text_split_handle.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_bu
5454
def get_content(self, file, save_image):
5555
buffer = file.read()
5656
try:
57-
return buffer.decode(detect(buffer)['encoding'])
57+
return buffer.decode(detect(buffer)['encoding'])
5858
except BaseException as e:
5959
traceback.print_exception(e)
60-
return f'{e}'
60+
return f'{e}'
File renamed without changes.

0 commit comments

Comments
 (0)