Skip to content

Commit 6d5a9c7

Browse files
committed
refactor: integrate core functionality into CHSIConverter class
1 parent ac092d6 commit 6d5a9c7

File tree

5 files changed

+272
-350
lines changed

5 files changed

+272
-350
lines changed

add_float_picture.py

Lines changed: 0 additions & 103 deletions
This file was deleted.

app.py

Lines changed: 5 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1,122 +1,19 @@
1-
from flask import Flask, request, render_template, send_from_directory, make_response
2-
from werkzeug.utils import secure_filename
1+
from flask import Flask, render_template
2+
from utils import CHSIConverter
33
import os
4-
from add_float_picture import add_float_picture
5-
from extract_img import extract_image_from_pdf
6-
from extract_info import extract_info_from_pdf
7-
from docx import Document
8-
from docx.shared import Inches, Pt
9-
from docx.enum.table import WD_ALIGN_VERTICAL
10-
from docx.oxml import parse_xml
11-
import uuid
12-
import shutil
134

145
app = Flask(__name__)
156

16-
def convert_to_docx(path):
17-
try:
18-
extracted_info = extract_info_from_pdf(path)
19-
doc = Document("static/template.docx")
20-
21-
paragraph = doc.add_paragraph()
22-
doc.element.body.insert(1, paragraph._element)
23-
paragraph.alignment = 1
24-
paragraph.add_run('Update date:' + extracted_info['Update Date'])
25-
26-
del extracted_info['Update Date']
27-
28-
table = doc.add_table(rows=1, cols=2)
29-
table.autofit = False
30-
31-
for cell in table.columns[0].cells:
32-
cell.width = Inches(0.5)
33-
for cell in table.columns[1].cells:
34-
cell.width = Inches(5.0)
35-
36-
border_xml = '<w:tcBorders xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">' \
37-
'<w:top w:val="nil"/>' \
38-
'<w:left w:val="nil"/>' \
39-
'<w:bottom w:val="nil"/>' \
40-
'<w:right w:val="nil"/>' \
41-
'</w:tcBorders>'
42-
43-
for key, value in extracted_info.items():
44-
cells = table.add_row().cells
45-
for cell in cells:
46-
cell._element.get_or_add_tcPr().append(parse_xml(border_xml))
47-
cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
48-
49-
is_last = key == list(extracted_info.keys())[-1]
50-
cells[0].text = key + ("" if is_last else "\n")
51-
cells[1].text = value + ("" if is_last else "\n")
52-
53-
cropped_image_1 = extract_image_from_pdf(path, 1, 1898, 583, 2230, 1026)
54-
add_float_picture(doc.add_paragraph(), cropped_image_1, width=Inches(1.2), pos_x=Pt(430), pos_y=Pt(140))
55-
56-
cropped_image_2 = extract_image_from_pdf(path, 1, 300, 2690, 630, 2985)
57-
add_float_picture(doc.add_paragraph(), cropped_image_2, width=Inches(1.2), pos_x=Pt(78), pos_y=Pt(643))
58-
59-
output_path = path.replace(".pdf", ".docx")
60-
doc.save(output_path)
61-
62-
return output_path
63-
64-
except Exception as e:
65-
return make_response(f"<script>alert('Error during DOCX conversion: {e}'); window.location.href = document.referrer;</script>")
66-
677
@app.route('/')
688
def home():
699
return render_template('index.html')
7010

7111
@app.route('/convert', methods=['POST'])
72-
def convert_file():
73-
if 'file' not in request.files:
74-
return make_response("<script>alert('缺少文件部分'); window.location.href = document.referrer;</script>")
75-
76-
file = request.files['file']
77-
if file.filename == '':
78-
return make_response("<script>alert('没有选中的文件'); window.location.href = document.referrer;</script>")
79-
80-
if not file.filename.lower().endswith('.pdf'):
81-
return make_response("<script>alert('只接受 PDF 文件'); window.location.href = document.referrer;</script>")
82-
83-
if not file.filename.startswith('教育部学籍在线验证报告_'):
84-
return make_response("<script>alert('请不要传入无关文件'); window.location.href = document.referrer;</script>")
85-
86-
try:
87-
filename = secure_filename(file.filename)
88-
filepath = os.path.join(os.getcwd(), 'upload', filename)
89-
file.save(filepath)
90-
91-
output_path = convert_to_docx(filepath)
92-
93-
directory = os.path.dirname(output_path)
94-
filename = os.path.basename(output_path)
95-
output_filename = str(uuid.uuid4()) + '.docx'
96-
97-
response = make_response(send_from_directory(directory, filename, as_attachment=True))
98-
response.headers["Content-Disposition"] = f"attachment; filename={output_filename}"
99-
100-
# 隐私处理
101-
upload_folder = os.path.join(os.getcwd(), 'upload')
102-
for filename in os.listdir(upload_folder):
103-
if filename != '.gitkeep':
104-
file_path = os.path.join(upload_folder, filename)
105-
try:
106-
if os.path.isfile(file_path) or os.path.islink(file_path):
107-
os.unlink(file_path)
108-
elif os.path.isdir(file_path):
109-
shutil.rmtree(file_path)
110-
except Exception as e:
111-
print(f"Failed to delete {file_path}. Reason: {e}")
112-
113-
return response
114-
except Exception as e:
115-
return make_response(f"<script>alert('处理文件时发生错误: {e}'); window.location.href = document.referrer;</script>")
12+
def handle_convert():
13+
return CHSIConverter.convert_file()
11614

11715
if __name__ == '__main__':
118-
# debug_mode = os.getenv('FLASK_DEBUG', 'false').lower() == 'true'
11916
port = int(os.getenv('FLASK_PORT', 5001))
12017
app.run(debug=True, port=port, host='0.0.0.0')
12118
else:
122-
application=app
19+
application = app

extract_img.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

extract_info.py

Lines changed: 0 additions & 111 deletions
This file was deleted.

0 commit comments

Comments
 (0)