-
Notifications
You must be signed in to change notification settings - Fork 320
Expand file tree
/
Copy pathimg_process.py
More file actions
104 lines (94 loc) · 3.99 KB
/
img_process.py
File metadata and controls
104 lines (94 loc) · 3.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import torch
import math
from torchvision import transforms as T
from torchvision.transforms.functional import InterpolationMode
def find_closest_aspect_ratio(width, height, min_num=1, max_num=6, image_size=448):
"""
Find the closest aspect ratio from a list of target ratios to match the given aspect ratio.
If the difference is the same, use the area to decide the better ratio.
"""
assert min_num == 1
log_ratio = math.log(width / height)
ratio = width * height / (image_size * image_size)
multiple = min(math.ceil(ratio), max_num)
if multiple <= 1:
return [1, 1]
candidate_split_grids_nums = []
for i in [multiple - 1, multiple, multiple + 1]:
if i > max_num:
continue
candidate_split_grids_nums.append(i)
candidate_grids = []
for split_grids_nums in candidate_split_grids_nums:
m = 1
while m <= split_grids_nums:
if split_grids_nums % m == 0:
candidate_grids.append([m, split_grids_nums // m])
m += 1
best_grid = [1, 1]
min_error = float("inf")
for grid in candidate_grids:
error = abs(log_ratio - math.log(grid[0] / grid[1]))
if error < min_error:
best_grid = grid
min_error = error
return best_grid
def dynamic_preprocess(image, min_num=1, max_num=6, image_size=448, use_thumbnail=True):
"""
Preprocess the image dynamically by finding the closest aspect ratio,
resizing the image, and splitting it into smaller blocks.
Optionally add a thumbnail version of the image.
"""
original_width, original_height = image.size
target_aspect_ratio = find_closest_aspect_ratio(original_width, original_height, min_num, max_num, image_size)
target_width = image_size * target_aspect_ratio[0]
target_height = image_size * target_aspect_ratio[1]
blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
# resize the image
resized_img = image.resize((target_width, target_height))
processed_images = []
for i in range(blocks):
box = (
(i % (target_width // image_size)) * image_size,
(i // (target_width // image_size)) * image_size,
((i % (target_width // image_size)) + 1) * image_size,
((i // (target_width // image_size)) + 1) * image_size,
)
# split the image
split_img = resized_img.crop(box)
processed_images.append(split_img)
assert len(processed_images) == blocks
if use_thumbnail and len(processed_images) != 1:
thumbnail_img = image.resize((image_size, image_size))
processed_images.append(thumbnail_img)
return processed_images
def get_image_patch(orign_width, orign_height, min_num=1, max_num=6, image_size=448, use_thumbnail=True):
"""
Calculate the number of image patches based on the closest aspect ratio
and the given width and height of the original image.
"""
target_aspect_ratio = find_closest_aspect_ratio(orign_width, orign_height, min_num, max_num, image_size)
blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
if use_thumbnail and blocks != 1:
blocks += 1
return blocks
def load_image(image_file, input_size=448, max_num=6):
"""
Load and preprocess an image file, converting it to RGB mode,
resizing, normalizing, and optionally adding a thumbnail version.
"""
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)
image = image_file.convert("RGB")
transform = T.Compose(
[
T.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),
T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
]
)
images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
pixel_values = [transform(image) for image in images]
pixel_values = torch.stack(pixel_values)
return pixel_values