Skip to content

Commit 2e29d53

Browse files
authored
v1.1.0: 优化debug日志,使用线程池调整下载调度。 (#5)
1 parent 5a370e3 commit 2e29d53

File tree

18 files changed

+399
-64
lines changed

18 files changed

+399
-64
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: 跑测试
2+
3+
on:
4+
push:
5+
branches: [ "dev", "master" ]
6+
paths:
7+
- 'src/**/*.py' # 源码
8+
- 'tests/**/*.py' # 测试代码
9+
10+
jobs:
11+
test: # This code is based on https://github.com/gaogaotiantian/viztracer/blob/master/.github/workflows/python-package.yml
12+
strategy:
13+
matrix:
14+
python-version: [ 3.7, 3.8, 3.9, "3.10", "3.11" ]
15+
os: [ ubuntu-latest, macos-latest ]
16+
runs-on: ${{ matrix.os }}
17+
timeout-minutes: 5
18+
19+
steps:
20+
- name: Checkout repository
21+
uses: actions/checkout@v3
22+
23+
- name: Set up Python ${{ matrix.python-version }}
24+
uses: actions/setup-python@v4
25+
with:
26+
python-version: ${{ matrix.python-version }}
27+
28+
- name: Install dependencies
29+
if: matrix.os != 'windows-latest'
30+
run: |
31+
python -m pip install --upgrade pip
32+
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
33+
34+
- name: Install dependencies on Windows
35+
if: matrix.os == 'windows-latest'
36+
run: |
37+
python -m pip install --upgrade pip
38+
if (Test-Path -Path '.\requirements-dev.txt' -PathType Leaf) {pip install -r requirements-dev.txt}
39+
40+
- name: Install local
41+
run: |
42+
pip install -e ./
43+
44+
- name: 跑测试
45+
if: matrix.os != 'windows-latest'
46+
run: |
47+
cd ./tests/
48+
python -m unittest
49+
50+
- name: 跑测试(Windows系统)
51+
if: matrix.os == 'windows-latest'
52+
run: |
53+
python -m unittest discover -s ./tests -p "test_*.py"

.github/workflows/action_workflow.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
steps:
2020
- uses: actions/checkout@v3
2121
- name: Set up Python 3.11
22-
uses: actions/setup-python@v3
22+
uses: actions/setup-python@v4
2323
with:
2424
python-version: "3.11"
2525

.github/workflows/action_workflow_local.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
steps:
2626
- uses: actions/checkout@v3
2727
- name: Set up Python 3.11
28-
uses: actions/setup-python@v3
28+
uses: actions/setup-python@v4
2929
with:
3030
python-version: "3.11"
3131

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,27 @@
11
name: 发布PYPI
22

33
on:
4-
push:
5-
branches: [ "releases/**" ]
4+
release:
5+
types: [ published ]
66

77
jobs:
8-
build-and-publish:
8+
release:
99
name: 构建并发布 jmcomic 到 PyPI
1010
runs-on: ubuntu-latest
1111
steps:
12-
- uses: actions/checkout@main
12+
- uses: actions/checkout@v3
13+
1314
- name: Set up Python 3.7
14-
uses: actions/setup-python@v3
15+
uses: actions/setup-python@v4
1516
with:
1617
python-version: "3.7"
18+
1719
- name: 构建模块
1820
run: |
1921
python -m pip install build
2022
python -m build
21-
- name: 发布上传
23+
24+
- name: 发布PYPI
2225
uses: pypa/gh-action-pypi-publish@release/v1
2326
with:
24-
password:
25-
${{ secrets.PYPI_JMCOMIC }}
27+
password: ${{ secrets.PYPI_JMCOMIC }}

assets/config/option.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ client_config:
2828
headers:
2929
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
3030
accept-language: zh-CN,zh;q=0.9
31-
authority: jmcomic1.rocks
31+
authority: jmcomic1.group
3232
sec-ch-ua: '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"'
3333
sec-ch-ua-mobile: ?0
3434
sec-ch-ua-platform: '"Windows"'

assets/config/option_test.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
!!python/object:jmcomic.jm_option.JmOption
2+
dir_tree: !!python/object:jmcomic.jm_option.DownloadDirTree
3+
Bd: $(workspace}/download/
4+
flag: Bd_Id_Image
5+
6+
disable_jm_module_debug: false
7+
download_convert_image_suffix: null
8+
download_image_then_decode: true
9+
download_multi_thread_photo_batch_count: 3
10+
download_multi_thread_photo_len_limit: 5
11+
download_use_disk_cache: true
12+
13+
14+
client_config:
15+
# 请求失败时,重试的次数,配个正数就行
16+
retry_times: 10
17+
domain: jmcomic1.group
18+
postman_type: cffi
19+
postman_type_list:
20+
- requests
21+
- requests_Session
22+
- cffi
23+
- cffi_Session
24+
meta_data:
25+
proxies: null
26+
allow_redirects: true
27+
cookies: null
28+
headers:
29+
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
30+
accept-language: zh-CN,zh;q=0.9
31+
authority: jmcomic1.group
32+
sec-ch-ua: '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"'
33+
sec-ch-ua-mobile: ?0
34+
sec-ch-ua-platform: '"Windows"'
35+
sec-fetch-dest: document
36+
sec-fetch-mode: navigate
37+
sec-fetch-site: none
38+
sec-fetch-user: ?1
39+
upgrade-insecure-requests: '1'
40+
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
41+
like Gecko) Chrome/109.0.0.0 Safari/537.36

requirements-dev.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@ commonX
22
curl_cffi
33
PyYAML
44
Pillow
5+
6+
# for test
7+
pyperclip

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- service <--- option
44

5-
__version__ = '1.0.0'
5+
__version__ = '1.1.0'
66

77
from .api import *

src/jmcomic/api.py

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,11 @@ def download_album(jm_album_id, option=None):
1919
f'获得album_detail成功,准备下载。'
2020
f'本子作者是【{album_detail.author}】,一共有{len(album_detail)}集本子')
2121

22-
def download_photo(index, photo_detail: JmPhotoDetail, debug_topic='download_album_photo'):
23-
jm_client.fill_page_arr(photo_detail)
22+
def download_photo(index: int,
23+
photo_detail: JmPhotoDetail,
24+
debug_topic='download_album_photo',
25+
):
26+
jm_client.update(photo_detail)
2427

2528
jm_debug(debug_topic,
2629
f"下载第[{index + 1}]集: "
@@ -37,16 +40,16 @@ def download_photo(index, photo_detail: JmPhotoDetail, debug_topic='download_alb
3740
f"下载完成:({photo_detail.title}) "
3841
f"-- photo {photo_detail.photo_id}")
3942

40-
multi_thread_launcher(
43+
thread_pool_executor(
4144
iter_objs=enumerate(album_detail),
4245
apply_each_obj_func=download_photo,
43-
wait_finish=True,
4446
)
4547

4648

4749
def download_album_batch(jm_album_id_iter: Union[Iterable, Generator],
4850
option=None,
49-
wait_finish=True) -> List[Thread]:
51+
wait_finish=True,
52+
) -> List[Thread]:
5053
"""
5154
批量下载album,每个album一个线程,使用的是同一个option。
5255
@@ -58,7 +61,7 @@ def download_album_batch(jm_album_id_iter: Union[Iterable, Generator],
5861
if option is None:
5962
option = JmOption.default()
6063

61-
return multi_thread_launcher(
64+
return thread_pool_executor(
6265
iter_objs=((album_id, option) for album_id in jm_album_id_iter),
6366
apply_each_obj_func=download_album,
6467
wait_finish=wait_finish,
@@ -92,18 +95,17 @@ def download_by_photo_detail(photo_detail: JmPhotoDetail,
9295
jm_client.fill_from_album(photo_detail)
9396

9497
if photo_detail.page_arr is None:
95-
jm_client.fill_page_arr(photo_detail)
98+
jm_client.update(photo_detail)
9699

97100
# 下载每个图片的函数
98-
def download_image(index, debug_topic='download_images_of_photo'):
99-
img_detail = photo_detail[index]
101+
def download_image(index,img_detail, debug_topic='download_images_of_photo'):
100102
img_save_path = option.decide_image_filepath(photo_detail, index)
101103

102104
# 已下载过,缓存命中
103105
if use_cache is True and file_exists(img_save_path):
104106
jm_debug(debug_topic, f'photo-{img_detail.aid}: '
105107
f'图片{img_detail.filename}已下载过,'
106-
f'命中磁盘缓存({img_detail.img_url})')
108+
f'命中磁盘缓存({img_save_path})')
107109
return
108110

109111
# 开始下载
@@ -121,26 +123,15 @@ def download_image(index, debug_topic='download_images_of_photo'):
121123
if length <= option.download_multi_thread_photo_len_limit:
122124
# 如果图片数小的话,直接使用多线程下载,一张图一个线程。
123125
multi_thread_launcher(
124-
iter_objs=range(len(photo_detail)),
126+
iter_objs=enumerate(photo_detail),
125127
apply_each_obj_func=download_image,
126-
wait_finish=True
127128
)
128129
else:
129130
# 如果图片数多的话,还是分批下载。
130-
batch_count = option.download_multi_thread_photo_batch_count
131-
batch_times = length // batch_count
132-
133-
for i in range(batch_times):
134-
begin = i * batch_count
135-
multi_thread_launcher(
136-
iter_objs=range(begin, begin + batch_count),
137-
apply_each_obj_func=download_image,
138-
)
139-
140-
multi_thread_launcher(
141-
iter_objs=range(batch_times * batch_count,
142-
length),
131+
multi_task_launcher_batch(
132+
iter_objs=enumerate(photo_detail),
143133
apply_each_obj_func=download_image,
134+
batch_size=option.download_multi_thread_photo_batch_count
144135
)
145136

146137

src/jmcomic/jm_client.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33

44
class JmcomicClient(PostmanProxy):
5-
debug_from_class = 'jm-client'
65
retry_postman_type = RetryPostman
76

87
def __init__(self, postman: Postman, domain, retry_times=None):
@@ -98,11 +97,9 @@ def fill_from_album(self, photo_detail: JmPhotoDetail) -> JmAlbumDetail:
9897
photo_detail.from_album = album_detail
9998
return album_detail
10099

101-
def fill_page_arr(self, photo_detail: JmPhotoDetail):
100+
def update(self, photo_detail: JmPhotoDetail):
102101
new = self.get_photo_detail(photo_detail.photo_id)
103-
photo_detail.page_arr = new.page_arr
104-
photo_detail.data_original_domain = new.data_original_domain
105-
return new
102+
photo_detail.__dict__.update(new.__dict__)
106103

107104
# -- search --
108105

@@ -127,7 +124,7 @@ def jm_get(self, url, is_api=True, require_200=True, **kwargs):
127124
"""
128125
url = self.of_api_url(url) if is_api is True else url
129126
if is_api is True:
130-
self.debug("api", url)
127+
jm_debug("api", url)
131128

132129
resp = self.get(url, **kwargs)
133130

@@ -152,10 +149,6 @@ def jm_get(self, url, is_api=True, require_200=True, **kwargs):
152149
def is_empty_image(cls, resp):
153150
return resp.status_code != 200 or len(resp.content) == 0
154151

155-
@staticmethod
156-
def debug(topic: str, *args, sep='', end='\n', file=None, from_class=debug_from_class):
157-
jm_debug(topic, *args, sep=sep, end=end, file=file, from_class=from_class)
158-
159152
@classmethod
160153
def img_is_not_need_to_decode(cls, data_original: str, _resp):
161154
return data_original.endswith('.gif')

0 commit comments

Comments
 (0)