Skip to content

Commit 5c4a966

Browse files
committed
Merge remote-tracking branch 'origin/dev'
2 parents ae3fcaa + 194a9a3 commit 5c4a966

File tree

7 files changed

+249
-75
lines changed

7 files changed

+249
-75
lines changed

README.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,58 @@
3131

3232
馒头模拟登录需要添加 cookie,通过cookie访问接口,不排除禁用账户的可能
3333

34+
### 插件
35+
36+
- 自定义识别词
37+
38+
​ tmdb id获取:[tmdb](https://www.themoviedb.org/?language=zh-CN) 网站搜索关键词,打开相关电影复制url对应数字id, 如 https://www.themoviedb.org/movie/693134-dune-part-two?language=zh-CN tmdb id 为693134
39+
40+
41+
- 通用识别词维护:
42+
43+
​ 编辑 [通用识别词](https://pad.xcreal.cc/p/通用识别词) 添加关键词
44+
45+
​ 格式如下:
46+
47+
​ 屏蔽:被替换词
48+
49+
​ 替换:被替换词@@替换词
50+
51+
​ 替换+集偏移:被替换词@@替换词@@前定位词@@后定位词@@集偏移
52+
53+
​ 集偏移:前定位词@@后定位词@@集偏移
54+
55+
- 电影识别词维护:
56+
57+
​ 编辑 [电影识别词](https://pad.xcreal.cc/p/电影识别词) 添加关键词
58+
59+
​ 格式如下:
60+
61+
​ 屏蔽:tmdb id@@被替换词
62+
63+
​ 替换:tmdb id@@被替换词@@替换词
64+
65+
​ 替换+集偏移:tmdb id@@被替换词@@替换词@@前定位词@@后定位词@@集偏移
66+
67+
​ 集偏移:tmdb id@@前定位词@@后定位词@@集偏移
68+
69+
- 电视识别词维护:
70+
71+
​ 编辑 [电视识别词](https://pad.xcreal.cc/p/电视识别词) 添加关键词
72+
73+
格式同电影识别词
74+
75+
- 动漫识别词维护:
76+
77+
​ 编辑 [动漫识别词](https://pad.xcreal.cc/p/动漫识别词) 添加关键词
78+
79+
格式同电影识别词
80+
81+
82+
83+
**如果有好用的识别词,请共同维护**
84+
85+
3486
### 开启公开站点
3587

3688
在 config.yaml 的 laboratory 添加 ```show_more_sites: true```

app/helper/ocr_helper.py

Lines changed: 76 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,83 @@
1-
import base64
1+
import ddddocr
2+
import cv2
3+
import numpy as np
4+
from PIL import Image
5+
import log
26

3-
from app.utils import RequestUtils
7+
from app.utils import RequestUtils, StringUtils
48

59

610
class OcrHelper:
711

8-
_ocr_b64_url = "https://nastool.cn/captcha/base64"
12+
@staticmethod
13+
def around_white(img):
14+
"""
15+
四周置白色
16+
"""
17+
w, h = img.shape
18+
for _w in range(w):
19+
for _h in range(h):
20+
if (_w <= 5) or (_h <= 5) or (_w >= w-5) or (_h >= h-5):
21+
img.itemset((_w, _h), 255)
22+
return img
23+
24+
@staticmethod
25+
def noise_unsome_piexl(img):
26+
'''
27+
邻域非同色降噪
28+
查找像素点上下左右相邻点的颜色,如果是非白色的非像素点颜色,则填充为白色
29+
'''
30+
w, h = img.shape
31+
for _w in range(w):
32+
for _h in range(h):
33+
if _h != 0 and _w != 0 and _w < w - 1 and _h < h - 1:# 剔除顶点、底点
34+
center_color = img[_w, _h] # 当前坐标颜色
35+
top_color = img[_w, _h + 1]
36+
bottom_color = img[_w, _h - 1]
37+
left_color = img[_w - 1, _h]
38+
right_color = img[_w + 1, _h]
39+
cnt = 0
40+
if top_color.all() == center_color.all():
41+
cnt += 1
42+
if bottom_color.all() == center_color.all():
43+
cnt += 1
44+
if left_color.all() == center_color.all():
45+
cnt += 1
46+
if right_color.all() == center_color.all():
47+
cnt += 1
48+
if cnt < 1:
49+
img.itemset((_w, _h), 255)
50+
return img
51+
52+
def image_pre_process(self, image):
53+
"""
54+
图片预处理
55+
"""
56+
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
57+
_, binary_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)
58+
denoised_image = cv2.fastNlMeansDenoising(binary_image, h=30, templateWindowSize=11, searchWindowSize=21)
59+
noise_unsome = OcrHelper.noise_unsome_piexl(denoised_image)
60+
op_image = OcrHelper.around_white(noise_unsome)
61+
62+
return op_image
63+
64+
def recognize_captcha(self, image_content):
65+
"""
66+
识别验证码
67+
"""
68+
res = ""
69+
try:
70+
ocr = ddddocr.DdddOcr(show_ad=False)
71+
image = np.asarray(bytearray(image_content), dtype="uint8")
72+
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
73+
op_image = self.image_pre_process(image)
74+
pil_image = Image.fromarray(cv2.cvtColor(op_image, cv2.COLOR_BGR2RGB))
75+
res = ocr.classification(pil_image)
76+
res = StringUtils.replace_strings(res, {'之': '2', '>': '7'})
77+
return res.upper()
78+
except Exception as e:
79+
log.error(f"{str(e)}{res}")
80+
return res
981

1082
def get_captcha_text(self, image_url=None, image_b64=None, cookie=None, ua=None):
1183
"""
@@ -22,12 +94,5 @@ def get_captcha_text(self, image_url=None, image_b64=None, cookie=None, ua=None)
2294
image_bin = ret.content
2395
if not image_bin:
2496
return ""
25-
image_b64 = base64.b64encode(image_bin).decode()
26-
if not image_b64:
27-
return ""
28-
ret = RequestUtils(content_type="application/json").post_res(
29-
url=self._ocr_b64_url,
30-
json={"base64_img": image_b64})
31-
if ret:
32-
return ret.json().get("result")
97+
return self.recognize_captcha(image_bin)
3398
return ""

0 commit comments

Comments
 (0)