11import torch
2- import torch .nn .functional as F
3- from PIL import Image
2+ import math
43from torchvision import transforms as T
54from torchvision .transforms .functional import InterpolationMode
65
76
8- def find_closest_aspect_ratio (aspect_ratio , target_ratios , width , height , image_size ):
7+ def find_closest_aspect_ratio (width , height , min_num = 1 , max_num = 6 , image_size = 448 ):
98 """
109 Find the closest aspect ratio from a list of target ratios to match the given aspect ratio.
1110 If the difference is the same, use the area to decide the better ratio.
1211 """
13- best_ratio_diff = float ("inf" )
14- best_ratio = (1 , 1 )
15- area = width * height
16- for ratio in target_ratios :
17- target_aspect_ratio = ratio [0 ] / ratio [1 ]
18- ratio_diff = abs (aspect_ratio - target_aspect_ratio )
19- if ratio_diff < best_ratio_diff :
20- best_ratio_diff = ratio_diff
21- best_ratio = ratio
22- elif ratio_diff == best_ratio_diff :
23- # Compare areas to decide the better ratio when the difference is the same
24- if area > 0.5 * image_size * image_size * ratio [0 ] * ratio [1 ]:
25- best_ratio = ratio
26- return best_ratio
27-
28-
29- def dynamic_preprocess (image , min_num = 1 , max_num = 6 , image_size = 448 , use_thumbnail = False ):
12+ assert min_num == 1
13+ log_ratio = math .log (width / height )
14+ ratio = width * height / (image_size * image_size )
15+ multiple = min (math .ceil (ratio ), max_num )
16+ if multiple <= 1 :
17+ return [1 , 1 ]
18+ candidate_split_grids_nums = []
19+ for i in [multiple - 1 , multiple , multiple + 1 ]:
20+ if i > max_num :
21+ continue
22+ candidate_split_grids_nums .append (i )
23+
24+ candidate_grids = []
25+ for split_grids_nums in candidate_split_grids_nums :
26+ m = 1
27+ while m <= split_grids_nums :
28+ if split_grids_nums % m == 0 :
29+ candidate_grids .append ([m , split_grids_nums // m ])
30+ m += 1
31+ best_grid = [1 , 1 ]
32+ min_error = float ("inf" )
33+ for grid in candidate_grids :
34+ error = abs (log_ratio - math .log (grid [0 ] / grid [1 ]))
35+ if error < min_error :
36+ best_grid = grid
37+ min_error = error
38+
39+ return best_grid
40+
41+
42+ def dynamic_preprocess (image , min_num = 1 , max_num = 6 , image_size = 448 , use_thumbnail = True ):
3043 """
3144 Preprocess the image dynamically by finding the closest aspect ratio,
3245 resizing the image, and splitting it into smaller blocks.
3346 Optionally add a thumbnail version of the image.
3447 """
35- orig_width , orig_height = image .size
36- aspect_ratio = orig_width / orig_height
37-
38- # Calculate the existing image aspect ratio
39- target_ratios = set (
40- (i , j )
41- for n in range (min_num , max_num + 1 )
42- for i in range (1 , n + 1 )
43- for j in range (1 , n + 1 )
44- if i * j <= max_num and i * j >= min_num
45- )
46- target_ratios = sorted (target_ratios , key = lambda x : x [0 ] * x [1 ])
47-
48- # Find the closest aspect ratio to the target
49- target_aspect_ratio = find_closest_aspect_ratio (aspect_ratio , target_ratios , orig_width , orig_height , image_size )
50-
51- # Calculate the target width and height
48+ original_width , original_height = image .size
49+ target_aspect_ratio = find_closest_aspect_ratio (original_width , original_height , min_num , max_num , image_size )
5250 target_width = image_size * target_aspect_ratio [0 ]
5351 target_height = image_size * target_aspect_ratio [1 ]
5452 blocks = target_aspect_ratio [0 ] * target_aspect_ratio [1 ]
55-
56- # Resize the image to the target dimensions
53+ # resize the image
5754 resized_img = image .resize ((target_width , target_height ))
5855 processed_images = []
5956 for i in range (blocks ):
@@ -63,40 +60,22 @@ def dynamic_preprocess(image, min_num=1, max_num=6, image_size=448, use_thumbnai
6360 ((i % (target_width // image_size )) + 1 ) * image_size ,
6461 ((i // (target_width // image_size )) + 1 ) * image_size ,
6562 )
66- # Split the image into blocks
63+ # split the image
6764 split_img = resized_img .crop (box )
6865 processed_images .append (split_img )
69-
7066 assert len (processed_images ) == blocks
71-
72- # Optionally add a thumbnail version of the image
7367 if use_thumbnail and len (processed_images ) != 1 :
7468 thumbnail_img = image .resize ((image_size , image_size ))
7569 processed_images .append (thumbnail_img )
76-
7770 return processed_images
7871
7972
80- def get_image_patch (orign_width , orign_height , min_num = 1 , max_num = 6 , image_size = 448 , use_thumbnail = False ):
73+ def get_image_patch (orign_width , orign_height , min_num = 1 , max_num = 6 , image_size = 448 , use_thumbnail = True ):
8174 """
8275 Calculate the number of image patches based on the closest aspect ratio
8376 and the given width and height of the original image.
8477 """
85- aspect_ratio = orign_width / orign_height
86-
87- # calculate the existing image aspect ratio
88- target_ratios = set (
89- (i , j )
90- for n in range (min_num , max_num + 1 )
91- for i in range (1 , n + 1 )
92- for j in range (1 , n + 1 )
93- if i * j <= max_num and i * j >= min_num
94- )
95- target_ratios = sorted (target_ratios , key = lambda x : x [0 ] * x [1 ])
96-
97- # find the closest aspect ratio to the target
98- target_aspect_ratio = find_closest_aspect_ratio (aspect_ratio , target_ratios , orign_width , orign_height , image_size )
99-
78+ target_aspect_ratio = find_closest_aspect_ratio (orign_width , orign_height , min_num , max_num , image_size )
10079 blocks = target_aspect_ratio [0 ] * target_aspect_ratio [1 ]
10180 if use_thumbnail and blocks != 1 :
10281 blocks += 1
0 commit comments