|
| 1 | +#!/usr/bin/env node |
| 2 | + |
| 3 | +(async () => { |
| 4 | + const options = { |
| 5 | + basePath: { |
| 6 | + option: '--base-path', |
| 7 | + value: 'public/pagefind/', |
| 8 | + }, |
| 9 | + language: { |
| 10 | + option: '--language', |
| 11 | + value: 'en', |
| 12 | + }, |
| 13 | + limit: { |
| 14 | + option: '--limit', |
| 15 | + type: 'int', |
| 16 | + value: 10 |
| 17 | + }, |
| 18 | + termFrequency: { |
| 19 | + option: '--term-frequency', |
| 20 | + type: 'float', |
| 21 | + value: 0.1 // do not favor short pages |
| 22 | + }, |
| 23 | + termSimilarity: { |
| 24 | + option: '--term-similarity', |
| 25 | + type: 'float', |
| 26 | + value: 9 |
| 27 | + }, |
| 28 | + termSaturation: { |
| 29 | + option: '--term-saturation', |
| 30 | + type: 'float', |
| 31 | + value: 2 |
| 32 | + }, |
| 33 | + pageLength: { |
| 34 | + option: '--page-length', |
| 35 | + type: 'float', |
| 36 | + value: 0.1 |
| 37 | + }, |
| 38 | + json: { |
| 39 | + option: '--json', |
| 40 | + type: 'boolean', |
| 41 | + value: false |
| 42 | + } |
| 43 | + } |
| 44 | + let terms = [] |
| 45 | + |
| 46 | + // Parse command-line options |
| 47 | + const handleValue = (option, value) => { |
| 48 | + if (option.type === 'int') option.value = Number.parseInt(value) |
| 49 | + else if (option.type === 'float') option.value = Number.parseFloat(value) |
| 50 | + else option.value = value |
| 51 | + } |
| 52 | + |
| 53 | + const args = process.argv.slice(2) |
| 54 | + optionLoop: |
| 55 | + for (let i = 2; i < process.argv.length; i++) { |
| 56 | + for (const key of Object.keys(options)) { |
| 57 | + const option = options[key] |
| 58 | + if (option.type === 'boolean') { |
| 59 | + if (process.argv[i] === option.option) option.value = true |
| 60 | + else if (process.argv[i] === `--no-${option.option}`) option.value = false |
| 61 | + else continue |
| 62 | + } else if (process.argv[i] === option.option) { |
| 63 | + if (++i >= process.argv.length) throw new Error(`\`${option.option}\` requires an argument`) |
| 64 | + handleValue(option, process.argv[i]) |
| 65 | + } else if (process.argv[i].startsWith(`${option.option}=`)) { |
| 66 | + const value = process.argv[i].slice(option.option.length + 1) |
| 67 | + handleValue(option, value) |
| 68 | + } else continue |
| 69 | + continue optionLoop |
| 70 | + } |
| 71 | + if (process.argv[i].startsWith('-')) throw new Error(`Unknown option: ${process.argv[i]}`) |
| 72 | + terms.push(...process.argv.slice(i)) |
| 73 | + break |
| 74 | + } |
| 75 | + |
| 76 | + if (terms.length === 0) throw new Error('No search terms provided') |
| 77 | + |
| 78 | + // Make `basePath` an absolute path |
| 79 | + const path = await import('path') |
| 80 | + if (!path.isAbsolute(options.basePath.value)) options.basePath.value = `${path.resolve(process.cwd(), options.basePath.value)}/` |
| 81 | + |
| 82 | + const pagefindPath = path.join(options.basePath.value, 'pagefind.js') |
| 83 | + const url = await import('url') |
| 84 | + const pagefindUrl = url.pathToFileURL(pagefindPath).href |
| 85 | + |
| 86 | + // Cannot use `await import('public/pagefind/pagefind.js')` because |
| 87 | + // `pagefind.js` does not have the `.mjs` file extension yet is an ESM |
| 88 | + // module. This would elicit the warning and error: |
| 89 | + // |
| 90 | + // Warning: To load an ES module, set "type": "module" in the package.json or use the .mjs extension. |
| 91 | + // [...] |
| 92 | + // SyntaxError: Cannot use 'import.meta' outside a module |
| 93 | + // |
| 94 | + // Instead, we load the file contents and dynamically import them via a data URL. |
| 95 | + const fs = await import('fs') |
| 96 | + const contents = await fs.promises.readFile(pagefindPath) |
| 97 | + const moduleUrl = `data:application/javascript;base64,${contents.toString('base64')}` |
| 98 | + // Dynamically import the module from the blob URL |
| 99 | + const pagefind = await import(moduleUrl) |
| 100 | + |
| 101 | + // Emulate the globals that the `pagefind.js` script expects to find in the |
| 102 | + // browser environment. |
| 103 | + Object.assign(globalThis, { |
| 104 | + window: { |
| 105 | + location: { |
| 106 | + origin: '' |
| 107 | + } |
| 108 | + }, |
| 109 | + document: { |
| 110 | + querySelector: () => { |
| 111 | + return { |
| 112 | + getAttribute: () => { |
| 113 | + return 'en' |
| 114 | + } |
| 115 | + } |
| 116 | + } |
| 117 | + }, |
| 118 | + location: { |
| 119 | + href: pagefindUrl |
| 120 | + }, |
| 121 | + fetch: async url => { |
| 122 | + const match = url.match(/(.*)\?.*$/) |
| 123 | + const contents = await fs.promises.readFile(match ? match[1] : url) |
| 124 | + return { |
| 125 | + ...contents, |
| 126 | + arrayBuffer: () => contents, |
| 127 | + json: () => JSON.parse(contents), |
| 128 | + } |
| 129 | + } |
| 130 | + }) |
| 131 | + |
| 132 | + const pagefindOptions = { |
| 133 | + basePath: options.basePath.value, |
| 134 | + ranking: ['termFrequency', 'termSaturation', 'termSimilarity', 'pageLength'].reduce((v, c) => { |
| 135 | + if (options[c].value !== undefined) v[c] = options[c].value |
| 136 | + return v |
| 137 | + }, {}) |
| 138 | + } |
| 139 | + |
| 140 | + pagefind.options(pagefindOptions) |
| 141 | + await pagefind.init(options.language.value) |
| 142 | + |
| 143 | + for (const term of terms) { |
| 144 | + let results = await pagefind.debouncedSearch(term) |
| 145 | + results = options.limit.value < 1 ? results.results : results.results.slice(0, options.limit.value) |
| 146 | + await Promise.all(results.map(async (item, index) => { |
| 147 | + item.index = index |
| 148 | + item.data = await item.data() |
| 149 | + })) |
| 150 | + console.log(`Results for ${term}:\n${options.json.value |
| 151 | + ? JSON.stringify(results, null, 2) |
| 152 | + : results.map(item => `${item.index + 1}. ${item.data.raw_url} (${item.score})`).join('\n') |
| 153 | + }`) |
| 154 | + } |
| 155 | +})().catch(e => { |
| 156 | + process.stderr.write(`${e}\n`) |
| 157 | + process.exit(1) |
| 158 | +}) |
0 commit comments