Skip to content

Commit a2d2999

Browse files
committed
v2.5.1: 更新文档,增加option配置和log自定义说明; 优化download api,有返回值对外界更友好; 重构异常处理,异常类别更清晰; 扩大请求重试的范围,加入json格式检查; (#197) [skip ci]
1 parent 14050d4 commit a2d2999

File tree

16 files changed

+417
-196
lines changed

16 files changed

+417
-196
lines changed

assets/docs/sources/option_file_syntax.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ JmOption.default().to_file('./option.yml') # 创建默认option,导出为optio
1515
## 2. option常规配置项
1616

1717
```yml
18+
# 开启jmcomic的日志输入,默认为true
19+
# 对日志有需求的可进一步参考文档
20+
log: true
21+
1822
# 配置客户端相关
1923
client:
2024
# impl: 客户端实现类,不配置默认会使用JmModuleConfig.DEFAULT_CLIENT_IMPL
@@ -32,6 +36,10 @@ client:
3236
- 18comic.vip
3337
- 18comic.org
3438

39+
# retry_times: 请求失败重试次数,默认为5
40+
retry_times: 5
41+
42+
# postman: 请求配置
3543
postman:
3644
meta_data:
3745
# proxies: 代理配置,默认是 system,表示使用系统代理。
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# 日志自定义 - 如果你不想看到那么多的日志
2+
3+
本文档缘起于 GitHub Discussions: [discussions/195](https://github.com/hect0x7/JMComic-Crawler-Python/discussions/195)
4+
5+
下面是这个问题的解决方法:
6+
7+
## 1. 日志完全开启/关闭
8+
9+
使用代码:
10+
11+
```
12+
from jmcomic import disable_jm_log
13+
disable_jm_log()
14+
```
15+
16+
使用配置:
17+
18+
```yml
19+
log: false
20+
```
21+
22+
## 2. 日志过滤,只保留特定topic
23+
24+
使用插件配置
25+
26+
```yml
27+
log: true
28+
29+
plugins:
30+
after_init:
31+
- plugin: log_topic_filter # 日志topic过滤插件
32+
kwargs:
33+
whitelist: [ # 只保留api和html,这两个是Client发请求时会打的日志topic
34+
'api',
35+
'html',
36+
]
37+
```
38+
39+
## 3. 屏蔽插件的日志
40+
41+
给插件配置加上一个`log`配置项即可
42+
43+
```yml
44+
plugins:
45+
after_init:
46+
- plugin: client_proxy # 提高移动端的请求效率的插件
47+
log: false # 插件自身不打印日志
48+
kwargs:
49+
proxy_client_key: cl_proxy_future
50+
whitelist: [ api, ]
51+
```
52+
53+
## 4. 完全自定义 jmcomic 日志
54+
55+
你可以自定义jmcomic的模块日志打印函数,参考文档:[模块自定义](./4_module_custom.md#自定义log)

assets/docs/sources/tutorial/9_custom_download_dir_name.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ JmModuleConfig.PFIELD_ADVICE['myname'] = lambda photo: f'【{photo.id}】{photo.
102102

103103
### 文件夹名=第x话+标题
104104

105-
```python
105+
```yml
106106
# 直接使用内置字段 indextitle 即可
107107
dir_rule:
108108
rule: Bd_Pindextitle

src/jmcomic/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- option <--- downloader
44

5-
__version__ = '2.5.0'
5+
__version__ = '2.5.1'
66

77
from .api import *
88
from .jm_plugin import *
@@ -11,8 +11,8 @@
1111
gb = dict(filter(lambda pair: isinstance(pair[1], type), globals().items()))
1212

1313

14-
def register_jmcomic_component(gb: dict, method, valid_interface: type):
15-
for v in gb.values():
14+
def register_jmcomic_component(variables: Dict[str, Any], method, valid_interface: type):
15+
for v in variables.values():
1616
if v != valid_interface and issubclass(v, valid_interface):
1717
method(v)
1818

src/jmcomic/api.py

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ def download_batch(download_api,
55
jm_id_iter: Union[Iterable, Generator],
66
option=None,
77
downloader=None,
8-
):
8+
) -> Set[Tuple[JmAlbumDetail, JmDownloader]]:
99
"""
1010
批量下载 album / photo
1111
@@ -21,42 +21,73 @@ def download_batch(download_api,
2121
if option is None:
2222
option = JmModuleConfig.option_class().default()
2323

24-
return multi_thread_launcher(
24+
result = set()
25+
26+
def callback(*ret):
27+
result.add(ret)
28+
29+
multi_thread_launcher(
2530
iter_objs=set(
2631
JmcomicText.parse_to_jm_id(jmid)
2732
for jmid in jm_id_iter
2833
),
29-
apply_each_obj_func=lambda aid: download_api(aid, option, downloader),
34+
apply_each_obj_func=lambda aid: download_api(aid,
35+
option,
36+
downloader,
37+
callback=callback,
38+
),
39+
wait_finish=True
3040
)
3141

42+
return result
43+
3244

33-
def download_album(jm_album_id, option=None, downloader=None):
45+
def download_album(jm_album_id,
46+
option=None,
47+
downloader=None,
48+
callback=None,
49+
):
3450
"""
3551
下载一个本子(album),包含其所有的章节(photo)
3652
37-
当jm_album_id不是str或int时,相当于调用 download_batch(download_album, jm_album_id, option, downloader)
53+
当jm_album_id不是str或int时,视为批量下载,相当于调用 download_batch(download_album, jm_album_id, option, downloader)
3854
3955
:param jm_album_id: 本子的禁漫车号
4056
:param option: 下载选项
4157
:param downloader: 下载器类
58+
:param callback: 返回值回调函数,可以拿到 album 和 downloader
59+
:return: 对于的本子实体类,下载器(如果是上述的批量情况,返回值为download_batch的返回值)
4260
"""
4361

4462
if not isinstance(jm_album_id, (str, int)):
4563
return download_batch(download_album, jm_album_id, option, downloader)
4664

4765
with new_downloader(option, downloader) as dler:
48-
dler.download_album(jm_album_id)
66+
album = dler.download_album(jm_album_id)
4967

68+
if callback is not None:
69+
callback(album, dler)
5070

51-
def download_photo(jm_photo_id, option=None, downloader=None):
71+
return album, dler
72+
73+
74+
def download_photo(jm_photo_id,
75+
option=None,
76+
downloader=None,
77+
callback=None):
5278
"""
5379
下载一个章节(photo),参数同 download_album
5480
"""
5581
if not isinstance(jm_photo_id, (str, int)):
5682
return download_batch(download_photo, jm_photo_id, option)
5783

5884
with new_downloader(option, downloader) as dler:
59-
dler.download_photo(jm_photo_id)
85+
photo = dler.download_photo(jm_photo_id)
86+
87+
if callback is not None:
88+
callback(photo, dler)
89+
90+
return photo, dler
6091

6192

6293
def new_downloader(option=None, downloader=None) -> JmDownloader:

src/jmcomic/jm_client_impl.py

Lines changed: 76 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -45,22 +45,22 @@ def of_api_url(self, api_path, domain):
4545

4646
def get_jm_image(self, img_url) -> JmImageResp:
4747

48-
def judge(resp):
48+
def callback(resp):
4949
"""
5050
使用此方法包装 self.get,使得图片数据为空时,判定为请求失败时,走重试逻辑
5151
"""
5252
resp = JmImageResp(resp)
5353
resp.require_success()
5454
return resp
5555

56-
return self.get(img_url, judge=judge, headers=JmModuleConfig.new_html_headers())
56+
return self.get(img_url, callback=callback, headers=JmModuleConfig.new_html_headers())
5757

5858
def request_with_retry(self,
5959
request,
6060
url,
6161
domain_index=0,
6262
retry_count=0,
63-
judge=lambda resp: resp,
63+
callback=None,
6464
**kwargs,
6565
):
6666
"""
@@ -74,7 +74,7 @@ def request_with_retry(self,
7474
:param url: 图片url / path (/album/xxx)
7575
:param domain_index: 域名下标
7676
:param retry_count: 重试次数
77-
:param judge: 判定响应是否成功
77+
:param callback: 回调,可以接收resp返回新的resp,也可以抛出异常强制重试
7878
:param kwargs: 请求方法的kwargs
7979
"""
8080
if domain_index >= len(self.domain_list):
@@ -104,19 +104,32 @@ def request_with_retry(self,
104104

105105
try:
106106
resp = request(url, **kwargs)
107-
return judge(resp)
108-
except KeyboardInterrupt as e:
109-
raise e
107+
108+
# 回调,可以接收resp返回新的resp,也可以抛出异常强制重试
109+
if callback is not None:
110+
resp = callback(resp)
111+
112+
# 依然是回调,在最后返回之前,还可以判断resp是否重试
113+
resp = self.raise_if_resp_should_retry(resp)
114+
115+
return resp
110116
except Exception as e:
111117
if self.retry_times == 0:
112118
raise e
113119

114120
self.before_retry(e, kwargs, retry_count, url)
115121

116122
if retry_count < self.retry_times:
117-
return self.request_with_retry(request, url, domain_index, retry_count + 1, judge, **kwargs)
123+
return self.request_with_retry(request, url, domain_index, retry_count + 1, callback, **kwargs)
118124
else:
119-
return self.request_with_retry(request, url, domain_index + 1, 0, judge, **kwargs)
125+
return self.request_with_retry(request, url, domain_index + 1, 0, callback, **kwargs)
126+
127+
# noinspection PyMethodMayBeStatic
128+
def raise_if_resp_should_retry(self, resp):
129+
"""
130+
依然是回调,在最后返回之前,还可以判断resp是否重试
131+
"""
132+
return resp
120133

121134
def update_request_with_specify_domain(self, kwargs: dict, domain: str):
122135
"""
@@ -269,12 +282,12 @@ def get_photo_detail(self,
269282

270283
return photo
271284

272-
def fetch_detail_entity(self, apid, prefix):
285+
def fetch_detail_entity(self, jmid, prefix):
273286
# 参数校验
274-
apid = JmcomicText.parse_to_jm_id(apid)
287+
jmid = JmcomicText.parse_to_jm_id(jmid)
275288

276289
# 请求
277-
resp = self.get_jm_html(f"/{prefix}/{apid}")
290+
resp = self.get_jm_html(f"/{prefix}/{jmid}")
278291

279292
# 用 JmcomicText 解析 html,返回实体类
280293
if prefix == 'album':
@@ -474,10 +487,10 @@ def album_comment(self,
474487
return ret
475488

476489
@classmethod
477-
def require_resp_success_else_raise(cls, resp, orig_req_url: str):
490+
def require_resp_success_else_raise(cls, resp, url: str):
478491
"""
479492
:param resp: 响应对象
480-
:param orig_req_url: /photo/12412312
493+
:param url: /photo/12412312
481494
"""
482495
resp_url: str = resp.url
483496

@@ -490,11 +503,11 @@ def require_resp_success_else_raise(cls, resp, orig_req_url: str):
490503

491504
# 3. 检查错误类型
492505
def match_case(error_path):
493-
return resp_url.endswith(error_path) and not orig_req_url.endswith(error_path)
506+
return resp_url.endswith(error_path) and not url.endswith(error_path)
494507

495508
# 3.1 album_missing
496509
if match_case('/error/album_missing'):
497-
ExceptionTool.raise_missing(resp, orig_req_url)
510+
ExceptionTool.raise_missing(resp, JmcomicText.parse_to_jm_id(url))
498511

499512
# 3.2 user_missing
500513
if match_case('/error/user_missing'):
@@ -639,17 +652,17 @@ def get_scramble_id(self, photo_id, album_id=None):
639652

640653
return scramble_id
641654

642-
def fetch_detail_entity(self, apid, clazz):
655+
def fetch_detail_entity(self, jmid, clazz):
643656
"""
644657
请求实体类
645658
"""
646-
apid = JmcomicText.parse_to_jm_id(apid)
659+
jmid = JmcomicText.parse_to_jm_id(jmid)
647660
url = self.API_ALBUM if issubclass(clazz, JmAlbumDetail) else self.API_CHAPTER
648-
resp = self.req_api(
661+
resp = self.req_api(self.append_params_to_url(
649662
url,
650-
params={
651-
'id': apid,
652-
},
663+
{
664+
'id': jmid
665+
})
653666
)
654667

655668
return JmApiAdaptTool.parse_entity(resp.res_data, clazz)
@@ -886,18 +899,57 @@ def decide_headers_and_ts(self, kwargs, url):
886899
return ts
887900

888901
@classmethod
889-
def require_resp_success(cls, resp: JmApiResp, orig_req_url: str):
902+
def require_resp_success(cls, resp: JmApiResp, url: Optional[str] = None):
903+
"""
904+
905+
:param resp: 响应对象
906+
:param url: 请求路径,例如 /setting
907+
"""
890908
resp.require_success()
891909

892910
# 1. 检查是否 album_missing
893911
# json: {'code': 200, 'data': []}
894912
data = resp.model().data
895913
if isinstance(data, list) and len(data) == 0:
896-
ExceptionTool.raise_missing(resp, orig_req_url)
914+
ExceptionTool.raise_missing(resp, JmcomicText.parse_to_jm_id(url))
897915

898916
# 2. 是否是特殊的内容
899917
# 暂无
900918

919+
def raise_if_resp_should_retry(self, resp):
920+
"""
921+
该方法会判断resp返回值是否是json格式,
922+
如果不是,大概率是禁漫内部异常,需要进行重试
923+
924+
由于完整的json格式校验会有性能开销,所以只做简单的检查,
925+
只校验第一个有效字符是不是 '{',如果不是,就认为异常数据,需要重试
926+
927+
:param resp: 响应对象
928+
:return: resp
929+
"""
930+
if isinstance(resp, JmResp):
931+
# 不对包装过的resp对象做校验,包装者自行校验
932+
# 例如图片请求
933+
return resp
934+
935+
url = resp.request.url
936+
937+
if self.API_SCRAMBLE in url:
938+
# /chapter_view_template 这个接口不是返回json数据,不做检查
939+
return resp
940+
941+
text = resp.text
942+
for char in text:
943+
if char not in (' ', '\n', '\t'):
944+
# 找到第一个有效字符
945+
ExceptionTool.require_true(
946+
char == '{',
947+
f'请求不是json格式,强制重试!响应文本: [{resp.text}]'
948+
)
949+
return resp
950+
951+
ExceptionTool.raises_resp(f'响应无数据!request_url=[{url}]', resp)
952+
901953
def after_init(self):
902954
# 保证拥有cookies,因为移动端要求必须携带cookies,否则会直接跳转同一本子【禁漫娘】
903955
if JmModuleConfig.flag_api_client_require_cookies:

0 commit comments

Comments
 (0)