Skip to content

Commit 6f6c5bb

Browse files
committed
fix layout
1 parent ded0fd8 commit 6f6c5bb

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

4.cool18-Article2epub/c2epub.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,16 @@ def extract_title(content, full=False):
7474
title = title.replace(" - cool18.com", "").replace("/",
7575
"-").replace("\\", "-").strip()
7676
else:
77-
title_search = re.search('[《](.*)[】》]', title, re.IGNORECASE)
77+
title_search = re.search('[《](.*?)[》]', title, re.IGNORECASE)
7878
if title_search:
7979
title = title_search.group(1)
8080
else:
81-
title = title.replace(
82-
" - cool18.com", "").replace("/", "-").replace("\\", "-").strip()
81+
title_search = re.search('[【](.*?)[】]', title, re.IGNORECASE)
82+
if title_search:
83+
title = title_search.group(1)
84+
else:
85+
title = title.replace(
86+
" - cool18.com", "").replace("/", "-").replace("\\", "-").strip()
8387

8488
return title
8589

@@ -162,9 +166,14 @@ def download(url):
162166
[s.extract() for s in content_soup('script')]
163167

164168
page_content = str(content_soup.find('body').getText())
165-
page_content = page_content.replace("\n", "")
169+
page_content = page_content.replace(os.linesep, "@@@@@@@@")
166170
page_content = page_content.replace(
167-
'cool18.com', '\n').replace('www.6park.com', '').replace('6park.com', '').replace("\n", "</p><p>").replace("<p></p>", "")
171+
'cool18.com', '@@@@@@@@').replace('www.6park.com', '').replace('6park.com', '')
172+
page_content = page_content.replace("@@@@@@@@@@@@@@@@", "</p><p>")
173+
page_content = page_content.replace("@@@@@@@@", "")
174+
page_content = page_content.replace(" ", "")
175+
page_content = page_content.replace(" ", "")
176+
168177
try:
169178
last_pos = page_content.rindex('评分完成')
170179
page_content = page_content[:last_pos]

0 commit comments

Comments
 (0)