-
Notifications
You must be signed in to change notification settings - Fork 171
Expand file tree
/
Copy pathPaddleOCR_img2md.py
More file actions
82 lines (63 loc) · 2.75 KB
/
PaddleOCR_img2md.py
File metadata and controls
82 lines (63 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import time
from paddleocr import PaddleOCR, PPStructureV3
from tqdm import tqdm
def process_folder(folder_path, output_dir):
os.makedirs(output_dir, exist_ok=True)
pipeline = PPStructureV3()
image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif']
image_files = [f for f in os.listdir(folder_path)
if os.path.splitext(f)[1].lower() in image_extensions]
processing_stats = []
for img_file in tqdm(image_files, desc="Processing images"):
img_path = os.path.join(folder_path, img_file)
start_time = time.time()
try:
result = pipeline.predict(img_path,
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation = False,
)
end_time = time.time()
processing_time = end_time - start_time
base_name = os.path.splitext(img_file)[0]
oupt_file = os.path.join(output_dir, base_name)
for res in result:
res.save_to_json(oupt_file)
res.save_to_markdown(oupt_file, pretty=False)
print(f"Result save to {oupt_file}")
processing_stats.append({
"image": img_file,
"processing_time": processing_time,
"status": "success"
})
except Exception as e:
end_time = time.time()
processing_time = end_time - start_time
processing_stats.append({
"image": img_file,
"processing_time": processing_time,
"status": f"failed: {str(e)}"
})
print("\nProcessing Statistics:")
print("=" * 50)
for stat in processing_stats:
print(f"Image: {stat['image']}")
print(f"Status: {stat['status']}")
print(f"Processing Time: {stat['processing_time']:.2f} seconds")
print("-" * 50)
total_time = sum(stat['processing_time'] for stat in processing_stats)
success_count = sum(1 for stat in processing_stats if stat['status'] == 'success')
failed_count = len(processing_stats) - success_count
print("\nSummary:")
print("=" * 50)
print(f"Total Images Processed: {len(processing_stats)}")
print(f"Successfully Processed: {success_count}")
print(f"Failed to Process: {failed_count}")
print(f"Total Processing Time: {total_time:.2f} seconds")
print(f"Average Processing Time: {total_time/len(processing_stats):.2f} seconds per image")
return processing_stats
if __name__ == "__main__":
input_img_folder = "./images"
output_md_folder = "./outputs"
stats = process_folder(input_img_folder, output_md_folder)