|
1 | | -import { call, type Operation, resource, spawn, useAbortSignal } from "effection"; |
| 1 | +import { |
| 2 | + call, |
| 3 | + type Operation, |
| 4 | + resource, |
| 5 | + spawn, |
| 6 | + until, |
| 7 | + useAbortSignal, |
| 8 | +} from "effection"; |
2 | 9 | import { dirname, join, normalize } from "@std/path"; |
3 | 10 | import { ensureDir } from "@std/fs/ensure-dir"; |
4 | 11 | import { stringify } from "@libs/xml/stringify"; |
5 | 12 | import { fromHtml } from "hast-util-from-html"; |
6 | | -//import { toHtml } from "hast-util-to-html"; |
| 13 | +import { toHtml } from "hast-util-to-html"; |
7 | 14 | import { selectAll } from "hast-util-select"; |
8 | 15 | import { parse } from "@libs/xml/parse"; |
9 | 16 | import { useTaskBuffer } from "./task-buffer.ts"; |
@@ -43,7 +50,7 @@ export function* staticalize(options: StaticalizeOptions): Operation<void> { |
43 | 50 | return Array.isArray(urls) ? urls : [urls]; |
44 | 51 | }); |
45 | 52 |
|
46 | | - let downloader = yield* useDownloader({ host, outdir: dir }); |
| 53 | + let downloader = yield* useDownloader({ host, base, outdir: dir }); |
47 | 54 |
|
48 | 55 | yield* call(() => ensureDir(dir)); |
49 | 56 |
|
@@ -82,13 +89,14 @@ interface Downloader extends Operation<void> { |
82 | 89 |
|
83 | 90 | interface DownloaderOptions { |
84 | 91 | host: URL; |
| 92 | + base: URL; |
85 | 93 | outdir: string; |
86 | 94 | } |
87 | 95 |
|
88 | 96 | function useDownloader(opts: DownloaderOptions): Operation<Downloader> { |
89 | 97 | let seen = new Map<string, boolean>(); |
90 | 98 | return resource(function* (provide) { |
91 | | - let { host, outdir } = opts; |
| 99 | + let { host, base, outdir } = opts; |
92 | 100 |
|
93 | 101 | let buffer = yield* useTaskBuffer(75); |
94 | 102 |
|
@@ -116,27 +124,45 @@ function useDownloader(opts: DownloaderOptions): Operation<Downloader> { |
116 | 124 | if (response.ok) { |
117 | 125 | if (response.headers.get("Content-Type")?.includes("html")) { |
118 | 126 | let destpath = join(path, "index.html"); |
119 | | - let content = yield* call(() => response.text()); |
120 | | - let html = fromHtml(content); |
| 127 | + let content = yield* until(response.text()); |
| 128 | + let html = fromHtml(content); |
121 | 129 |
|
122 | 130 | let links = selectAll("link[href]", html); |
123 | 131 |
|
124 | 132 | for (let link of links) { |
125 | | - let href = link.properties.href as string |
| 133 | + let href = link.properties.href as string; |
126 | 134 | yield* downloader.download(href, source); |
| 135 | + |
| 136 | + // replace self-referencing absolute urls with the destination site |
| 137 | + if (href.startsWith(host.origin)) { |
| 138 | + let url = new URL(href); |
| 139 | + url.host = base.host; |
| 140 | + url.port = base.port; |
| 141 | + url.protocol = base.protocol; |
| 142 | + link.properties.href = url.href; |
| 143 | + } |
127 | 144 | } |
128 | | - |
| 145 | + |
129 | 146 | let assets = selectAll("[src]", html); |
130 | 147 |
|
131 | 148 | for (let element of assets) { |
132 | | - let src = element.properties.src as string; |
| 149 | + let src = element.properties.src as string; |
133 | 150 | yield* downloader.download(src, source); |
| 151 | + |
| 152 | + // replace self-referencing absolute urls with the destination sie |
| 153 | + if (src.startsWith(host.origin)) { |
| 154 | + let url = new URL(src); |
| 155 | + url.host = base.host; |
| 156 | + url.port = base.port; |
| 157 | + url.protocol = base.protocol; |
| 158 | + element.properties.src = url.href; |
| 159 | + } |
134 | 160 | } |
135 | 161 |
|
136 | 162 | yield* call(async () => { |
137 | 163 | let destdir = dirname(destpath); |
138 | 164 | await ensureDir(destdir); |
139 | | - await Deno.writeTextFile(destpath, content); |
| 165 | + await Deno.writeTextFile(destpath, toHtml(html)); |
140 | 166 | }); |
141 | 167 | } else { |
142 | 168 | yield* call(async () => { |
|
0 commit comments