Skip to content

Commit 58662f9

Browse files
committed
fix(mangafreak): fix image extraction and simplify code
- Fix image URL extraction pattern to handle img tags with id attribute - Use self.groups pattern instead of custom __init__ methods - Fix chapter list extraction to use correct table structure
1 parent 8b0e8c6 commit 58662f9

File tree

1 file changed

+26
-50
lines changed

1 file changed

+26
-50
lines changed

gallery_dl/extractor/mangafreak.py

Lines changed: 26 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -25,35 +25,30 @@ class MangafreakChapterExtractor(MangafreakBase, ChapterExtractor):
2525
pattern = BASE_PATTERN + r"(/Read1_(.+)_(\d+[a-z]?))"
2626
example = "https://ww2.mangafreak.me/Read1_Onepunch_Man_1"
2727

28-
def __init__(self, match):
29-
ChapterExtractor.__init__(self, match, self.root + match.group(1))
30-
self.manga_slug, self.chapter = match.groups()[1:]
31-
3228
def metadata(self, page):
3329
extr = text.extract_from(page)
3430
manga = text.unescape(extr("<title>", " Chapter "))
3531
title = text.unescape(extr("", " - MangaFreak"))
36-
chapter_str = extr("# ", " MANGA ONLINE")
3732

38-
# Parse chapter number and minor suffix (e.g., "167e" -> chapter=167, minor="e")
39-
chapter, sep, minor = self.chapter.partition("e") if "e" in self.chapter else (self.chapter, "", "")
33+
chapter_str = self.groups[2]
34+
chapter, sep, minor = chapter_str.partition("e")
4035

4136
return {
4237
"manga" : manga,
4338
"title" : title,
4439
"chapter" : text.parse_int(chapter),
4540
"chapter_minor": sep + minor,
46-
"chapter_string": self.chapter,
47-
"manga_slug" : self.manga_slug,
41+
"chapter_string": chapter_str,
42+
"manga_slug" : self.groups[1],
4843
"lang" : "en",
4944
"language" : "English",
5045
}
5146

5247
def images(self, page):
53-
# Extract all <img> tags pointing to manga images
5448
return [
55-
(url, None)
56-
for url in text.extract_iter(page, '<img src="https://images.mangafreak.me/mangas/', '"')
49+
("https://images.mangafreak.me/mangas/" + path, None)
50+
for path in text.extract_iter(
51+
page, 'src="https://images.mangafreak.me/mangas/', '"')
5752
]
5853

5954

@@ -63,54 +58,35 @@ class MangafreakMangaExtractor(MangafreakBase, MangaExtractor):
6358
pattern = BASE_PATTERN + r"(/Manga/([^/?#]+))"
6459
example = "https://ww2.mangafreak.me/Manga/Onepunch_Man"
6560

66-
def __init__(self, match):
67-
MangaExtractor.__init__(self, match, self.root + match.group(1))
68-
self.manga_slug = match.group(2)
69-
7061
def chapters(self, page):
7162
extr = text.extract_from(page)
7263
manga = text.unescape(extr("<title>", " Manga"))
7364

74-
# Extract chapter list from table
75-
chapter_list = text.extr(page, "<tbody>", "</tbody>")
76-
if not chapter_list:
65+
chapter_table = text.extr(page, "<table>", "</table>")
66+
if not chapter_table:
7767
return []
7868

7969
data = {
80-
"manga" : manga,
81-
"manga_slug" : self.manga_slug,
82-
"lang" : "en",
83-
"language" : "English",
70+
"manga" : manga,
71+
"manga_slug": self.groups[1],
72+
"lang" : "en",
73+
"language" : "English",
8474
}
8575

8676
results = []
87-
for row in text.extract_iter(chapter_list, "<tr>", "</tr>"):
88-
# Extract chapter link and date from each row
89-
chapter_link = text.extr(row, '<a href="', '"')
90-
if not chapter_link:
77+
for row in text.extract_iter(chapter_table, "<tr>", "</tr>"):
78+
href = text.extr(row, '<a href="', '"')
79+
if not href:
9180
continue
92-
93-
# Build full URL if relative
94-
if chapter_link.startswith("/"):
95-
url = self.root + chapter_link
96-
else:
97-
url = self.root + "/" + chapter_link
98-
99-
# Parse chapter info from URL like /Read1_Onepunch_Man_167e
100-
chapter_part = url.rsplit("/", 1)[-1] # Read1_Onepunch_Man_167e
101-
if chapter_part.startswith("Read1_"):
102-
parts = chapter_part.split("_")
103-
if len(parts) >= 3:
104-
chapter_str = parts[-1]
105-
# Parse chapter number and minor suffix
106-
chapter, sep, minor = chapter_str.partition("e") if "e" in chapter_str else (chapter_str, "", "")
107-
108-
chapter_data = {
109-
"chapter" : text.parse_int(chapter),
110-
"chapter_minor": sep + minor,
111-
"chapter_string": chapter_str,
112-
**data,
113-
}
114-
results.append((url, chapter_data))
81+
url = self.root + href
82+
chapter_str = url.rpartition("_")[2]
83+
chapter, sep, minor = chapter_str.partition("e")
84+
85+
results.append((url, {
86+
"chapter" : text.parse_int(chapter),
87+
"chapter_minor": sep + minor,
88+
"chapter_string": chapter_str,
89+
**data,
90+
}))
11591

11692
return results

0 commit comments

Comments
 (0)