Skip to content

Commit 62ef474

Browse files
committed
perf: replace css-tree with pw css-parser in get-css endpoint
1 parent 897e75b commit 62ef474

File tree

3 files changed

+42
-30
lines changed

3 files changed

+42
-30
lines changed

package-lock.json

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
"@projectwallace/css-code-quality": "^3.0.2",
2828
"@projectwallace/css-design-tokens": "^0.10.0",
2929
"@projectwallace/css-layer-tree": "^2.0.2",
30+
"@projectwallace/css-parser": "^0.8.1",
3031
"@projectwallace/format-css": "^2.1.1",
3132
"@sveltejs/enhanced-img": "0.4.4",
3233
"@sveltejs/kit": "^2.49.0",

src/routes/api/get-css/get-css.ts

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { parseHTML } from 'linkedom'
2-
import { parse, walk } from 'css-tree'
2+
import { parse, walk } from '@projectwallace/css-parser'
33
import { resolve_url } from '../../../lib/resolve-url.js'
44

55
export const USER_AGENT = 'Project Wallace CSS Scraper/1.1 (+https://www.projectwallace.com/docs/css-scraper)'
@@ -8,19 +8,22 @@ function is_wayback_url(url: string) {
88
return /^(?:(?:https:)?\/\/)?web\.archive\.org\/web\/\d{14}\/.+/.test(url)
99
}
1010

11+
function unquote(str: string): string {
12+
return str.replaceAll(/(^['"])(['"]$)/g, '')
13+
}
14+
1115
function get_import_urls(css: string) {
16+
let urls: string[] = []
1217
let ast = parse(css, {
13-
parseAtrulePrelude: false,
14-
parseRulePrelude: false,
15-
parseValue: false,
16-
parseCustomProperty: false
18+
parse_selectors: false,
19+
parse_values: false
1720
})
18-
let urls: string[] = []
19-
20-
walk(ast, function (node) {
21-
// Can not be a URL inside something else because otherwise this.atrule could never be an import
22-
if (node.type === 'Url' && this.atrule?.name === 'import') {
23-
urls.push(node.value)
21+
walk(ast, (node) => {
22+
if (node.type_name === 'Atrule' && node.name === 'import') {
23+
let url = node.children.find((child) => child.type_name === 'Url')
24+
if (url) {
25+
urls.push(unquote(url.value as string))
26+
}
2427
}
2528
})
2629
return urls
@@ -31,17 +34,17 @@ async function get_css_file(url: string | URL, abort_signal: AbortSignal) {
3134
let response = await fetch(url, {
3235
headers: {
3336
'User-Agent': USER_AGENT,
34-
'Accept': 'text/css,*/*;q=0.1'
37+
Accept: 'text/css,*/*;q=0.1'
3538
},
3639
// If aborted early try to return an empty string so we can continue with just the content we have
37-
signal: abort_signal,
40+
signal: abort_signal
3841
})
3942

4043
if (!response.ok) {
4144
throw new Error(response.statusText)
4245
}
4346
return response.text()
44-
} catch (error: unknown) {
47+
} catch {
4548
return ''
4649
}
4750
}
@@ -66,7 +69,7 @@ function get_styles(nodes: NodeListOf<Element>, base_url: string) {
6669
items.push({
6770
type: 'style',
6871
css,
69-
url: base_url,
72+
url: base_url
7073
})
7174
} else if (node.hasAttribute('style')) {
7275
let declarations = (node.getAttribute('style') || '').trim()
@@ -84,15 +87,15 @@ function get_styles(nodes: NodeListOf<Element>, base_url: string) {
8487
class_name += '.'
8588
class_name += class_attr
8689
.split(/\s+/g)
87-
.filter(s => {
90+
.filter((s) => {
8891
if (s.length === 0) return false
8992
if (s.length === 1) {
9093
let code = s.charCodeAt(0)
9194
if (code < 48 || code > 122) return false
9295
}
9396
return true
9497
})
95-
.map(s => s.replaceAll(/(\[|\]|:|\.|\/)/g, '\\$1'))
98+
.map((s) => s.replaceAll(/(\[|\]|:|\.|\/)/g, '\\$1'))
9699
.join('.')
97100
}
98101
let node_name = node.nodeName.toLocaleLowerCase()
@@ -115,9 +118,7 @@ function get_styles(nodes: NodeListOf<Element>, base_url: string) {
115118
return items
116119
}
117120

118-
export async function get_css(url: string, {
119-
timeout = 10000,
120-
} = {}) {
121+
export async function get_css(url: string, { timeout = 10000 } = {}) {
121122
let resolved_url = resolve_url(url)
122123

123124
if (resolved_url === undefined) {
@@ -140,7 +141,7 @@ export async function get_css(url: string, {
140141
signal: abort_controller.signal,
141142
headers: {
142143
'User-Agent': USER_AGENT,
143-
'Accept': 'text/html,*/*;q=0.1'
144+
Accept: 'text/html,*/*;q=0.1'
144145
}
145146
})
146147

@@ -160,23 +161,24 @@ export async function get_css(url: string, {
160161
error: {
161162
url,
162163
statusCode: 403,
163-
message: "The origin server responded with a 403 Forbidden status code which means that scraping CSS is blocked. Is the URL publicly accessible?"
164+
message:
165+
'The origin server responded with a 403 Forbidden status code which means that scraping CSS is blocked. Is the URL publicly accessible?'
164166
}
165167
}
166168
}
167169

168170
// Examples: localhost, sduhsdf.test
169171
if (error.message === 'fetch failed') {
170-
let message = "The origin server is refusing connections."
172+
let message = 'The origin server is refusing connections.'
171173
if (url.includes('localhost') || url.includes('192.168') || url.includes('127.0.0.1')) {
172-
message += " You are trying to scrape a local server. Make sure to use a public URL."
174+
message += ' You are trying to scrape a local server. Make sure to use a public URL.'
173175
}
174176

175177
return {
176178
error: {
177179
url,
178180
statusCode: 400,
179-
message,
181+
message
180182
}
181183
}
182184
}
@@ -187,7 +189,7 @@ export async function get_css(url: string, {
187189
error: {
188190
url,
189191
statusCode: 404,
190-
message: "The origin server responded with a 404 Not Found status code."
192+
message: 'The origin server responded with a 404 Not Found status code.'
191193
}
192194
}
193195
}
@@ -198,7 +200,7 @@ export async function get_css(url: string, {
198200
error: {
199201
url,
200202
statusCode: 500,
201-
message: 'something went wrong',
203+
message: 'something went wrong'
202204
}
203205
}
204206
}
@@ -238,7 +240,8 @@ export async function get_css(url: string, {
238240

239241
let nodes = document.querySelectorAll('link[rel*="stylesheet"][href], style, [style]')
240242
let baseElement = document.querySelector('base[href]')
241-
let baseUrl = (baseElement !== null && baseElement.hasAttribute('href')) ? baseElement.getAttribute('href') : resolved_url
243+
let baseUrl =
244+
baseElement !== null && baseElement.hasAttribute('href') ? baseElement.getAttribute('href') : resolved_url
242245
let items = get_styles(nodes, baseUrl?.toString() || '') || []
243246
let result = []
244247

@@ -271,7 +274,9 @@ export async function get_css(url: string, {
271274
// And c'mon, don't @import inside your @import.
272275
let importUrls = get_import_urls(item.css)
273276
if (importUrls.length > 0) {
274-
let cssRequests = importUrls.map((importUrl) => get_css_file(resolve_url(importUrl, url)!, abort_controller.signal))
277+
let cssRequests = importUrls.map((importUrl) =>
278+
get_css_file(resolve_url(importUrl, url)!, abort_controller.signal)
279+
)
275280
let importedFiles = await Promise.all(cssRequests)
276281
importedFiles.forEach((css, index) => {
277282
result.push({
@@ -284,7 +289,6 @@ export async function get_css(url: string, {
284289
}
285290
}
286291

287-
288292
clearTimeout(timeout_id)
289293

290294
return result

0 commit comments

Comments
 (0)