forked from yeorinhieut/novel-dl
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathscript.js
More file actions
226 lines (216 loc) Β· 9.38 KB
/
script.js
File metadata and controls
226 lines (216 loc) Β· 9.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
// Add JSZip library
const script = document.createElement('script');
script.src = 'https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js';
document.head.appendChild(script);
async function fetchNovelContent(url) {
const response = await fetch(url);
if (!response.ok) {
console.error(`Failed to fetch content from ${url}. Status: ${response.status}`);
return null;
}
const html = await response.text();
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
const content = doc.querySelector('#novel_content');
if (!content) {
console.error(`Failed to find '#novel_content' element on the page: ${url}`);
return null;
}
return cleanText(content.innerHTML);
}
function unescapeHTML(text) {
const entities = {
'<': '<', '>': '>', '&': '&',
'"': '"', ''': "'", ''': "'",
' ': ' ', '–': 'β', '—': 'β',
'‘': 'β', '’': 'β', '“': 'β', '”': 'β'
};
Object.entries(entities).forEach(([entity, replacement]) => {
const regex = new RegExp(entity, 'g');
text = text.replace(regex, replacement);
});
return text;
}
function cleanText(text) {
text = text.replace(/<div>/g, '');
text = text.replace(/<\/div>/g, '');
text = text.replace(/<p>/g, '\n');
text = text.replace(/<\/p>/g, '\n');
text = text.replace(/<br\s*[/]?>/g, '\n');
text = text.replace(/<[^>]*>/g, '');
text = text.replace(/ {2,}/g, ' ');
text = text.replace(/\n{2,}/g, '\n');
text = unescapeHTML(text);
text = text.split('\n')
.map(line => line.trim())
.filter(line => line.length > 0)
.join('\n');
text = text.replace(/^\n+|\n+$/g, '');
return text;
}
function createModal() {
const modal = document.createElement('div');
modal.id = 'downloadProgressModal';
modal.style.display = 'block';
modal.style.position = 'fixed';
modal.style.zIndex = '1';
modal.style.left = '0';
modal.style.top = '0';
modal.style.width = '100%';
modal.style.height = '100%';
modal.style.overflow = 'auto';
modal.style.backgroundColor = 'rgba(0,0,0,0.4)';
const modalContent = document.createElement('div');
modalContent.style.backgroundColor = '#fefefe';
modalContent.style.position = 'relative';
modalContent.style.margin = '15% auto 0';
modalContent.style.padding = '20px';
modalContent.style.border = '1px solid #888';
modalContent.style.width = '50%';
modalContent.style.textAlign = 'center';
modal.appendChild(modalContent);
return {modal, modalContent};
}
async function downloadNovel(title, episodeLinks, startEpisode) {
const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
const {modal, modalContent} = createModal();
document.body.appendChild(modal);
const progressBar = document.createElement('div');
progressBar.style.width = '0%';
progressBar.style.height = '10px';
progressBar.style.backgroundColor = '#008CBA';
progressBar.style.marginTop = '10px';
progressBar.style.borderRadius = '3px';
modalContent.appendChild(progressBar);
const progressLabel = document.createElement('div');
progressLabel.style.marginTop = '5px';
modalContent.appendChild(progressLabel);
const startTime = new Date();
const startingIndex = startEpisode - 1;
const totalEpisodes = episodeLinks.length - startingIndex;
const zip = new JSZip();
for (let i = startingIndex; i < episodeLinks.length; i++) {
const episodeUrl = episodeLinks[i];
const episodeNumber = i + 1;
if (!episodeUrl.startsWith('https://booktoki')) {
console.log(`Skipping invalid episode link: ${episodeUrl}`);
continue;
}
console.log(`Downloading: ${title} - Episode ${episodeNumber}/${episodeLinks.length}`);
let episodeContent = await fetchNovelContent(episodeUrl);
if (!episodeContent) {
console.error(`Failed to fetch content for episode: ${episodeUrl}`);
const userConfirmed = await new Promise(resolve => {
const confirmResult = confirm(`CAPTCHA detected on page!\n${episodeUrl}\nClick OK to open the link in a new tab and solve the CAPTCHA, then return here and click OK again to continue.`);
resolve(confirmResult);
});
if (userConfirmed) {
// Open a new tab with the CAPTCHA URL only after user clicks OK
window.open(episodeUrl, '_blank');
const retryConfirmed = await new Promise(resolve => {
const confirmResult = confirm(`Please solve the CAPTCHA in the new tab. Click OK to continue downloading after solving it.`);
resolve(confirmResult);
});
if (retryConfirmed) {
episodeContent = await fetchNovelContent(episodeUrl);
if (!episodeContent) {
console.error(`Failed to fetch content after CAPTCHA: ${episodeUrl}`);
continue;
}
} else {
console.log("Download cancelled by user. Skipping this episode.");
continue;
}
} else {
console.log("Download cancelled by user. Skipping this episode.");
continue;
}
}
// Add to ZIP instead of individual downloads
zip.file(`${title} - Episode ${episodeNumber}.txt`, episodeContent);
const progress = ((i - startingIndex + 1) / totalEpisodes) * 100;
progressBar.style.width = `${progress}%`;
const elapsedTime = new Date() - startTime;
const estimatedTotalTime = (elapsedTime / (i - startingIndex + 1)) * totalEpisodes;
const remainingTime = estimatedTotalTime - elapsedTime;
const remainingMinutes = Math.floor(remainingTime / (1000 * 60));
const remainingSeconds = Math.floor((remainingTime % (1000 * 60)) / 1000);
progressLabel.textContent = `Downloading... ${progress.toFixed(2)}% - Time remaining: ${remainingMinutes}m ${remainingSeconds}s`;
await delay(Math.random() * 500 + 1000);
}
// Generate and download ZIP
const zipContent = await zip.generateAsync({type: "blob"});
const a = document.createElement('a');
a.href = URL.createObjectURL(zipContent);
a.download = `${title.replace(/[<>:"/\\|?*]/g, '_')}.zip`;
a.click();
document.body.removeChild(modal);
console.log('All chapters downloaded successfully!');
}
// Remaining functions remain completely unchanged
function extractTitle() {
const titleElement = document.evaluate('//*[@id="content_wrapper"]/div[1]/span', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
return titleElement ? titleElement.textContent.trim() : null;
}
function extractEpisodeLinks() {
const episodeLinks = [];
const links = document.querySelectorAll('.item-subject');
links.forEach(link => {
const episodeLink = link.getAttribute('href');
episodeLinks.push(episodeLink);
});
return episodeLinks;
}
async function fetchPage(url) {
const response = await fetch(url);
if (!response.ok) {
console.error(`Failed to fetch page: ${url}. Status: ${response.status}`);
return null;
}
const html = await response.text();
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
return doc;
}
async function runCrawler() {
const novelPageRule = 'https://booktoki';
let currentUrl = window.location.href.split('?')[0];
if (!currentUrl.startsWith(novelPageRule)) {
console.log('This script should be run on the novel episode list page.');
return;
}
const title = extractTitle();
if (!title) {
console.log('Failed to extract the novel title.');
return;
}
const totalPages = prompt(`Enter the number of pages in the episode list:\n(Enter 1 if less than 1000 episodes, 2 or more for 1000+ episodes)`, '1');
if (!totalPages || isNaN(totalPages)) {
console.log('Invalid page number or input cancelled.');
return;
}
const totalPagesNumber = parseInt(totalPages, 10);
const allEpisodeLinks = [];
for (let page = totalPagesNumber; page >= 1; page--) {
const nextPageUrl = `${currentUrl}?spage=${page}`;
const nextPageDoc = await fetchPage(nextPageUrl);
if (nextPageDoc) {
const nextPageLinks = Array.from(nextPageDoc.querySelectorAll('.item-subject'))
.map(link => link.getAttribute('href'));
allEpisodeLinks.push(...nextPageLinks.reverse());
}
}
const startEpisode = prompt(`Enter the starting episode number (1 to ${allEpisodeLinks.length}):`, '1');
if (!startEpisode || isNaN(startEpisode)) {
console.log('Invalid episode number or input cancelled.');
return;
}
const startEpisodeNumber = parseInt(startEpisode, 10);
if (startEpisodeNumber < 1 || startEpisodeNumber > allEpisodeLinks.length) {
console.log('Invalid episode number. Please enter a number between 1 and the total number of episodes.');
return;
}
console.log(`Starting download: ${title} from episode ${startEpisodeNumber}`);
downloadNovel(title, allEpisodeLinks, startEpisodeNumber);
}
runCrawler();