Skip to content

Commit 4e296d7

Browse files
authored
Add: detect loop in analyzer, bump dependencies (#402)
* Add: detect loop in analyzer * Change: use skip page error * Bump deps
1 parent ab1be03 commit 4e296d7

File tree

6 files changed

+38
-30
lines changed

6 files changed

+38
-30
lines changed

comiccrawler/analyzer.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,11 @@ def analyze_pages(self):
102102
url = self.mission.url
103103
old_eps = EpisodeList(self.mission.episodes or ())
104104
new_eps = EpisodeList()
105-
105+
106+
visited_urls = set()
107+
106108
while True:
109+
visited_urls.add(url)
107110
try:
108111
eps = list(self.mission.module.get_episodes(self.html, url))
109112
except SkipPageError:
@@ -134,6 +137,8 @@ def analyze_pages(self):
134137
next_url = self.get_next_page(self.html, url)
135138
if not next_url:
136139
break
140+
if next_url in visited_urls:
141+
raise TypeError("Loop detected: {}".format(next_url))
137142
url = next_url
138143
print('Analyzing {}...'.format(url))
139144
sleep(getattr(self.mission.module, "rest_analyze", 0))

comiccrawler/mods/exh.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def has_login(html, page="ep"):
3636
return True
3737

3838
def check_login(html, page="ep"):
39+
# breakpoint()
3940
if not has_login(html, page):
4041
raise PauseDownloadError("You didn't login!")
4142

comiccrawler/mods/twitter.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from ..episode import Episode
1212
from ..grabber import grabber
1313
from ..url import update_qs
14-
from ..error import is_http, SkipEpisodeError
14+
from ..error import is_http, SkipEpisodeError, SkipPageError
1515
from ..session_manager import session_manager
1616
from ..util import get_cookie
1717

@@ -73,7 +73,7 @@ def get_episodes(html, url):
7373

7474
endpoint = user_media_graph if is_media(url) else user_tweets_graph
7575
next_page_cache[url] = endpoint(userId=uid)
76-
return
76+
raise SkipPageError
7777

7878
if any(k in url for k in ["UserTweets", "UserMedia"]):
7979
data = json.loads(html)
@@ -84,7 +84,7 @@ def get_episodes(html, url):
8484
extract_pin_entry(instruction["entry"], url)
8585

8686
if instruction["type"] == "TimelineAddEntries":
87-
yield from reversed(list(extract_added_entries(instruction["entries"], url)))
87+
return reversed(list(extract_added_entries(instruction["entries"], url)))
8888

8989
def tweet_result_to_episode(tweet_result):
9090
try:

requirements-lock.txt

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,45 @@
11
ansicon==1.89.0
2-
astroid==3.3.5
2+
astroid==3.3.9
33
belfrywidgets==1.0.3
44
bidict==0.23.1
55
blessed==1.20.0
66
Brotli==1.1.0
7-
certifi==2024.8.30
7+
certifi==2025.1.31
88
cffi==1.17.1
9-
charset-normalizer==3.4.0
9+
charset-normalizer==3.4.1
1010
colorama==0.4.6
11-
curl_cffi==0.7.3
11+
curl_cffi==0.10.0
1212
deno_vm==0.6.0
1313
desktop3==0.5.3
1414
dill==0.3.9
1515
docopt==0.6.2
1616
docutils==0.21.2
17-
enlighten==1.12.4
17+
enlighten==1.14.1
18+
id==1.5.0
1819
idna==3.10
1920
importlib_metadata==8.5.0
20-
isort==5.13.2
21+
isort==6.0.1
2122
jaraco.classes==3.4.0
2223
jaraco.context==6.0.1
2324
jaraco.functools==4.1.0
2425
jinxed==1.3.0
25-
keyring==25.5.0
26-
livereload==2.7.0
26+
keyring==25.6.0
27+
livereload==2.7.1
2728
markdown-it-py==3.0.0
2829
mccabe==0.7.0
2930
mdurl==0.1.2
30-
more-itertools==10.5.0
31-
nh3==0.2.18
31+
more-itertools==10.6.0
32+
nh3==0.2.21
3233
ordered-set==3.1.1
34+
packaging==24.2
3335
pkginfo==1.10.0
3436
platformdirs==4.3.6
3537
prefixed==0.9.0
3638
puremagic==1.28
3739
pycparser==2.22
3840
pycryptodomex==3.21.0
39-
Pygments==2.18.0
40-
pylint==3.3.1
41+
Pygments==2.19.1
42+
pylint==3.3.5
4143
pyperclip==1.9.0
4244
pythreadworker==0.10.0
4345
pywin32-ctypes==0.2.3
@@ -50,15 +52,15 @@ rich==13.9.4
5052
safeprint==0.2.0
5153
semver==2.13.0
5254
Send2Trash==1.8.3
53-
setuptools==75.5.0
54-
six==1.16.0
55+
setuptools==76.0.0
56+
six==1.17.0
5557
tomlkit==0.13.2
56-
tornado==6.4.1
57-
twine==5.1.1
58+
tornado==6.4.2
59+
twine==6.1.0
5860
typing_extensions==4.8.0
5961
uncurl==0.0.11
60-
urllib3==2.2.3
62+
urllib3==2.3.0
6163
wcwidth==0.2.13
6264
win_unicode_console==0.5
63-
yt-dlp==2024.11.4
65+
yt-dlp==2025.2.19
6466
zipp==3.21.0

requirements.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
certifi==2024.8.30
1+
certifi==2025.1.31
22
docutils==0.21.2
3-
pygments==2.18.0
4-
pylint==3.3.1
3+
pygments==2.19.1
4+
pylint==3.3.5
55
pyxcute==0.8.1
6-
twine==5.1.1
6+
twine==6.1.0

setup.cfg

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,18 @@ install_requires =
3131
belfrywidgets~=1.0
3232
bidict~=0.23.1
3333
brotli~=1.1
34-
curl_cffi~=0.7.3
34+
curl_cffi~=0.10.0
3535
deno-vm~=0.6.0
3636
desktop3~=0.5.3
3737
docopt~=0.6.2
38-
enlighten~=1.12
38+
enlighten~=1.14
3939
puremagic~=1.28
4040
pycryptodomex~=3.21
4141
pythreadworker~=0.10.0
4242
safeprint~=0.2.0
4343
uncurl~=0.0.11
44-
urllib3~=2.2
45-
yt-dlp~=2024.11
44+
urllib3~=2.3
45+
yt-dlp~=2025.2
4646

4747
python_requires = >=3.10
4848

0 commit comments

Comments
 (0)