Skip to content

Commit b934d2f

Browse files
committed
Add a helper script to run Pagefind
Nominally, `pagefind.js` can only be used on a website. But with a few hacks, as described in Pagefind/pagefind#175 (reply in thread), it can also be run via a node.js script on the command-line. This script will be invaluable in the CI builds, to guarantee that certain search results appear on top (such as `docs/git-log` for the search term "log"). Signed-off-by: Johannes Schindelin <[email protected]>
1 parent 8caabdf commit b934d2f

File tree

1 file changed

+158
-0
lines changed

1 file changed

+158
-0
lines changed

script/run-pagefind.js

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
#!/usr/bin/env node
2+
3+
(async () => {
4+
const options = {
5+
basePath: {
6+
option: '--base-path',
7+
value: 'public/pagefind/',
8+
},
9+
language: {
10+
option: '--language',
11+
value: 'en',
12+
},
13+
limit: {
14+
option: '--limit',
15+
type: 'int',
16+
value: 10
17+
},
18+
termFrequency: {
19+
option: '--term-frequency',
20+
type: 'float',
21+
value: 0.1 // do not favor short pages
22+
},
23+
termSimilarity: {
24+
option: '--term-similarity',
25+
type: 'float',
26+
value: 9
27+
},
28+
termSaturation: {
29+
option: '--term-saturation',
30+
type: 'float',
31+
value: 2
32+
},
33+
pageLength: {
34+
option: '--page-length',
35+
type: 'float',
36+
value: 0.1
37+
},
38+
json: {
39+
option: '--json',
40+
type: 'boolean',
41+
value: false
42+
}
43+
}
44+
let terms = []
45+
46+
// Parse command-line options
47+
const handleValue = (option, value) => {
48+
if (option.type === 'int') option.value = Number.parseInt(value)
49+
else if (option.type === 'float') option.value = Number.parseFloat(value)
50+
else option.value = value
51+
}
52+
53+
const args = process.argv.slice(2)
54+
optionLoop:
55+
for (let i = 2; i < process.argv.length; i++) {
56+
for (const key of Object.keys(options)) {
57+
const option = options[key]
58+
if (option.type === 'boolean') {
59+
if (process.argv[i] === option.option) option.value = true
60+
else if (process.argv[i] === `--no-${option.option}`) option.value = false
61+
else continue
62+
} else if (process.argv[i] === option.option) {
63+
if (++i >= process.argv.length) throw new Error(`\`${option.option}\` requires an argument`)
64+
handleValue(option, process.argv[i])
65+
} else if (process.argv[i].startsWith(`${option.option}=`)) {
66+
const value = process.argv[i].slice(option.option.length + 1)
67+
handleValue(option, value)
68+
} else continue
69+
continue optionLoop
70+
}
71+
if (process.argv[i].startsWith('-')) throw new Error(`Unknown option: ${process.argv[i]}`)
72+
terms.push(...process.argv.slice(i))
73+
break
74+
}
75+
76+
if (terms.length === 0) throw new Error('No search terms provided')
77+
78+
// Make `basePath` an absolute path
79+
const path = await import('path')
80+
if (!path.isAbsolute(options.basePath.value)) options.basePath.value = `${path.resolve(process.cwd(), options.basePath.value)}/`
81+
82+
const pagefindPath = path.join(options.basePath.value, 'pagefind.js')
83+
const url = await import('url')
84+
const pagefindUrl = url.pathToFileURL(pagefindPath).href
85+
86+
// Cannot use `await import('public/pagefind/pagefind.js')` because
87+
// `pagefind.js` does not have the `.mjs` file extension yet is an ESM
88+
// module. This would elicit the warning and error:
89+
//
90+
// Warning: To load an ES module, set "type": "module" in the package.json or use the .mjs extension.
91+
// [...]
92+
// SyntaxError: Cannot use 'import.meta' outside a module
93+
//
94+
// Instead, we load the file contents and dynamically import them via a data URL.
95+
const fs = await import('fs')
96+
const contents = await fs.promises.readFile(pagefindPath)
97+
const moduleUrl = `data:application/javascript;base64,${contents.toString('base64')}`
98+
// Dynamically import the module from the blob URL
99+
const pagefind = await import(moduleUrl)
100+
101+
// Emulate the globals that the `pagefind.js` script expects to find in the
102+
// browser environment.
103+
Object.assign(globalThis, {
104+
window: {
105+
location: {
106+
origin: ''
107+
}
108+
},
109+
document: {
110+
querySelector: () => {
111+
return {
112+
getAttribute: () => {
113+
return 'en'
114+
}
115+
}
116+
}
117+
},
118+
location: {
119+
href: pagefindUrl
120+
},
121+
fetch: async url => {
122+
const match = url.match(/(.*)\?.*$/)
123+
const contents = await fs.promises.readFile(match ? match[1] : url)
124+
return {
125+
...contents,
126+
arrayBuffer: () => contents,
127+
json: () => JSON.parse(contents),
128+
}
129+
}
130+
})
131+
132+
const pagefindOptions = {
133+
basePath: options.basePath.value,
134+
ranking: ['termFrequency', 'termSaturation', 'termSimilarity', 'pageLength'].reduce((v, c) => {
135+
if (options[c].value !== undefined) v[c] = options[c].value
136+
return v
137+
}, {})
138+
}
139+
140+
pagefind.options(pagefindOptions)
141+
await pagefind.init(options.language.value)
142+
143+
for (const term of terms) {
144+
let results = await pagefind.debouncedSearch(term)
145+
results = options.limit.value < 1 ? results.results : results.results.slice(0, options.limit.value)
146+
await Promise.all(results.map(async (item, index) => {
147+
item.index = index
148+
item.data = await item.data()
149+
}))
150+
console.log(`Results for ${term}:\n${options.json.value
151+
? JSON.stringify(results, null, 2)
152+
: results.map(item => `${item.index + 1}. ${item.data.raw_url} (${item.score})`).join('\n')
153+
}`)
154+
}
155+
})().catch(e => {
156+
process.stderr.write(`${e}\n`)
157+
process.exit(1)
158+
})

0 commit comments

Comments
 (0)