1- import base64
1+ import ddddocr
2+ import cv2
3+ import numpy as np
4+ from PIL import Image
5+ import log
26
3- from app .utils import RequestUtils
7+ from app .utils import RequestUtils , StringUtils
48
59
610class OcrHelper :
711
8- _ocr_b64_url = "https://nastool.cn/captcha/base64"
12+ @staticmethod
13+ def around_white (img ):
14+ """
15+ 四周置白色
16+ """
17+ w , h = img .shape
18+ for _w in range (w ):
19+ for _h in range (h ):
20+ if (_w <= 5 ) or (_h <= 5 ) or (_w >= w - 5 ) or (_h >= h - 5 ):
21+ img .itemset ((_w , _h ), 255 )
22+ return img
23+
24+ @staticmethod
25+ def noise_unsome_piexl (img ):
26+ '''
27+ 邻域非同色降噪
28+ 查找像素点上下左右相邻点的颜色,如果是非白色的非像素点颜色,则填充为白色
29+ '''
30+ w , h = img .shape
31+ for _w in range (w ):
32+ for _h in range (h ):
33+ if _h != 0 and _w != 0 and _w < w - 1 and _h < h - 1 :# 剔除顶点、底点
34+ center_color = img [_w , _h ] # 当前坐标颜色
35+ top_color = img [_w , _h + 1 ]
36+ bottom_color = img [_w , _h - 1 ]
37+ left_color = img [_w - 1 , _h ]
38+ right_color = img [_w + 1 , _h ]
39+ cnt = 0
40+ if top_color .all () == center_color .all ():
41+ cnt += 1
42+ if bottom_color .all () == center_color .all ():
43+ cnt += 1
44+ if left_color .all () == center_color .all ():
45+ cnt += 1
46+ if right_color .all () == center_color .all ():
47+ cnt += 1
48+ if cnt < 1 :
49+ img .itemset ((_w , _h ), 255 )
50+ return img
51+
52+ def image_pre_process (self , image ):
53+ """
54+ 图片预处理
55+ """
56+ gray_image = cv2 .cvtColor (image , cv2 .COLOR_BGR2GRAY )
57+ _ , binary_image = cv2 .threshold (gray_image , 127 , 255 , cv2 .THRESH_BINARY )
58+ denoised_image = cv2 .fastNlMeansDenoising (binary_image , h = 30 , templateWindowSize = 11 , searchWindowSize = 21 )
59+ noise_unsome = OcrHelper .noise_unsome_piexl (denoised_image )
60+ op_image = OcrHelper .around_white (noise_unsome )
61+
62+ return op_image
63+
64+ def recognize_captcha (self , image_content ):
65+ """
66+ 识别验证码
67+ """
68+ res = ""
69+ try :
70+ ocr = ddddocr .DdddOcr (show_ad = False )
71+ image = np .asarray (bytearray (image_content ), dtype = "uint8" )
72+ image = cv2 .imdecode (image , cv2 .IMREAD_COLOR )
73+ op_image = self .image_pre_process (image )
74+ pil_image = Image .fromarray (cv2 .cvtColor (op_image , cv2 .COLOR_BGR2RGB ))
75+ res = ocr .classification (pil_image )
76+ res = StringUtils .replace_strings (res , {'之' : '2' , '>' : '7' })
77+ return res .upper ()
78+ except Exception as e :
79+ log .error (f"{ str (e )} :{ res } " )
80+ return res
981
1082 def get_captcha_text (self , image_url = None , image_b64 = None , cookie = None , ua = None ):
1183 """
@@ -22,12 +94,5 @@ def get_captcha_text(self, image_url=None, image_b64=None, cookie=None, ua=None)
2294 image_bin = ret .content
2395 if not image_bin :
2496 return ""
25- image_b64 = base64 .b64encode (image_bin ).decode ()
26- if not image_b64 :
27- return ""
28- ret = RequestUtils (content_type = "application/json" ).post_res (
29- url = self ._ocr_b64_url ,
30- json = {"base64_img" : image_b64 })
31- if ret :
32- return ret .json ().get ("result" )
97+ return self .recognize_captcha (image_bin )
3398 return ""
0 commit comments