@@ -63,14 +63,8 @@ def is_candidate(self):
6363 logger .info ('Failed on image bytesize check, size is %s, %s' , len (self .raw_data ),
6464 self .url )
6565 return False
66- try :
67- img = Image .open (io .BytesIO (self .raw_data ))
68- colors = img .getcolors (maxcolors = 2 )
69- if colors is not None and len (colors ) == 1 :
70- logger .info ('Maybe a solid color image(%s), colors=%s' , self .url , len (colors ))
71- return False
72- except Exception as e :
73- logger .warning ('Failed on image colors check, %s, url=%s' , e , self .url )
66+ if self .is_predominantly_white_color ():
67+ return False
7468 self ._is_candidate = True
7569 self .width , self .height = width , height
7670 return True
@@ -123,6 +117,24 @@ def raw_data(self):
123117 def raw_data (self , value ):
124118 self ._raw_data = value
125119
120+ def is_predominantly_white_color (self , predominance = .99 , white_distance = 10 ):
121+ try :
122+ maxpixels = 1024
123+ with Image .open (io .BytesIO (self .raw_data )) as img :
124+ img = img .convert ('RGB' )
125+ # img.show()
126+ if img .width and img .height :
127+ maxpixels = img .width * img .height
128+ colors = img .getcolors (maxcolors = maxpixels )
129+ total_count = sum (count for count , color in colors )
130+ for count , color in colors :
131+ if count / total_count > predominance and all (255 - white_distance <= value <= 255 for value in color ):
132+ logger .info ('Maybe a solid color image(%s), dominant_pct=%f, RGB=%s' , self .url , count / total_count , color )
133+ return True
134+ except Exception as e :
135+ logger .warning ('Failed on image colors check, %s, url=%s' , e , self .url )
136+ return False
137+
126138 # 'image/svg+xml;charset=utf-8' -> svg
127139 def guess_suffix (self ):
128140 if not self .content_type :
0 commit comments