Skip to content

Commit 4f768cf

Browse files
committed
fix crawlers
1 parent 814815e commit 4f768cf

File tree

1 file changed

+60
-45
lines changed

1 file changed

+60
-45
lines changed

public/index.js

Lines changed: 60 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -113,61 +113,75 @@ const getAllCharacterNames = async () => {
113113
return validLinks
114114
}
115115
const getAllSfxs = async (url) => {
116-
const response = await fetch(url)
117-
if (response.status === 404) {
118-
return
119-
}
120-
121-
// Create a fake webpage
122-
const websiteDirectoryPage = await response.text()
123-
const tempPage = document.createElement("html");
124-
tempPage.innerHTML = websiteDirectoryPage;
125-
126-
const tags = tempPage.getElementsByTagName('a')
127-
var validLinks = []
128-
for (const link of tags) {
129-
const aTagValue = link.getAttribute('href')
130-
if (IGNORE_VALUES.has(link.innerHTML) || aTagValue == "music/") {
131-
continue
116+
try {
117+
const files = [];
118+
119+
if (url === `${BASE_SOUNDS_URL}` || url === `${BASE_SOUNDS_URL}`.replace(/\/+$/,'')) {
120+
const general = await crawl(`${BASE_SOUNDS_URL}general/`, 0, 99);
121+
const blips = await crawl(`${BASE_SOUNDS_URL}blips/`, 0, 99);
122+
if (Array.isArray(general)) files.push(...general);
123+
if (Array.isArray(blips)) files.push(...blips);
124+
} else {
125+
const res = await crawl(url, 0, 99);
126+
if (Array.isArray(res)) files.push(...res);
132127
}
133128

134-
// Crawl all directories,
135-
if (aTagValue.endsWith('/')) {
136-
const extraLinks = await getAllSfxs(url + aTagValue);
137-
if (extraLinks != null)
138-
validLinks = validLinks.concat(extraLinks);
139-
} else
140-
validLinks.push(decodeURI(url + aTagValue));
129+
const filtered = Array.from(new Set(files
130+
.filter(u => typeof u === 'string')
131+
.filter(u => !u.endsWith('/'))
132+
.map(u => decodeURI(u))
133+
));
134+
135+
return filtered;
136+
} catch (e) {
137+
console.error(`Error getting sfxs from ${url}:`, e);
138+
return [];
141139
}
142-
return validLinks
143140
}
144141

145142
const getAllBackgroundNames = async () => {
146-
const response = await fetch(`${BASE_BACKGROUND_URL}`)
147-
if (response.status === 404) {
148-
return
149-
}
143+
try {
144+
const response = await fetch(`${BASE_BACKGROUND_URL}`)
145+
if (!response.ok) return [];
150146

151-
// Create a fake webpage
152-
const websiteDirectoryPage = await response.text()
153-
const tempPage = document.createElement("html");
154-
tempPage.innerHTML = websiteDirectoryPage;
147+
// Create a fake webpage
148+
const websiteDirectoryPage = await response.text()
149+
const tempPage = document.createElement("html");
150+
tempPage.innerHTML = websiteDirectoryPage;
155151

156-
const tags = tempPage.getElementsByTagName('a')
157-
const validLinks = []
158-
for (const link of tags) {
159-
const aTagValue = link.getAttribute('href')
160-
if (IGNORE_VALUES.has(link.innerHTML)) {
161-
continue
152+
const tags = tempPage.getElementsByTagName('a')
153+
const validLinks = []
154+
const baseUrlObj = new URL(BASE_BACKGROUND_URL);
155+
const basePath = baseUrlObj.pathname;
156+
157+
for (const link of tags) {
158+
const href = link.getAttribute('href')
159+
if (!href) continue;
160+
if (IGNORE_VALUES.has(link.innerHTML)) continue;
161+
162+
try {
163+
const urlObj = new URL(href, BASE_BACKGROUND_URL);
164+
165+
// Only include directories that are inside the base path
166+
if (!urlObj.pathname.startsWith(basePath)) continue;
167+
168+
if (href.endsWith('/')) {
169+
// Extract the folder name relative to the base path
170+
const relative = urlObj.pathname.slice(basePath.length).replace(/\/+$/,'')
171+
if (relative && relative !== '..') {
172+
validLinks.push(decodeURI(relative))
173+
}
174+
}
175+
} catch (e) {
176+
console.log(`Invalid URL in backgrounds index: ${href}`)
177+
}
162178
}
163-
164-
// Crawl all directories,
165-
if (aTagValue.endsWith('/')) {
166-
validLinks.push(decodeURI(aTagValue.slice(0,-1)))
167-
}
168-
179+
180+
return validLinks
181+
} catch (e) {
182+
console.error('Error fetching background names:', e)
183+
return []
169184
}
170-
return validLinks
171185
}
172186

173187
const failureText = document.getElementById('downloadFeedback')
@@ -192,6 +206,7 @@ export const getCharacterUrls = async () => {
192206
const charIni = ini.parse(text.toLowerCase());
193207
console.log(charIni);
194208
const blip = (charIni.options.blips != null) ? charIni.options.blips : (charIni.options.gender != null) ? charIni.options.gender : null;
209+
195210
if (blip !== null && window.sfx.find((element) => element.includes(blip)))
196211
validUrls.push(`${BASE_SOUNDS_URL}` + "blips/" + blip + ".opus");
197212

0 commit comments

Comments
 (0)