1- /// <reference types="@cloudflare/workers-types" />
21import { TextEncoder } from "util" ;
32import { Response } from "@mrbbot/node-fetch" ;
4- import { ReadableStream } from "web-streams-polyfill/ponyfill/es6" ;
5- // This import relies on dist having the same structure as src
63import {
7- HTMLRewriter as LOLHTMLRewriter ,
8- registerPromise ,
9- } from "../../vendor/lol-html" ;
10- import { Mutex } from "../kv/helpers" ;
11- import { ProcessedOptions } from "../options" ;
4+ HTMLRewriter as BaseHTMLRewriter ,
5+ Comment ,
6+ ContentTypeOptions ,
7+ Doctype ,
8+ DocumentEnd ,
9+ DocumentHandlers ,
10+ Element ,
11+ ElementHandlers ,
12+ TextChunk ,
13+ } from "html-rewriter-wasm" ;
14+ import { ReadableStream } from "web-streams-polyfill/ponyfill/es6" ;
1215import { Context , Module } from "./module" ;
1316
14- function wrapHandler < T > ( handler ?: ( arg : T ) => void | Promise < void > ) {
15- if ( handler === undefined ) return undefined ;
16- return function ( arg : T ) {
17- const result = handler ( arg ) ;
18- // If this handler is async and returns a promise, register it and return
19- // its handle so it can be awaited later in WebAssembly
20- if ( typeof result === "object" && typeof result . then === "function" ) {
21- return registerPromise ( result ) ;
22- }
23- // Otherwise, return 0 to signal there's nothing to await
24- return 0 ;
25- } ;
26- }
27-
2817// Based on https://developer.mozilla.org/en-US/docs/Web/API/TransformStream#anything-to-uint8array_stream
2918const encoder = new TextEncoder ( ) ;
3019// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
@@ -49,54 +38,28 @@ export function transformToArray(chunk: any): Uint8Array {
4938 }
5039}
5140
52- /* The WebAssembly version of lol-html used by Miniflare uses asyncify for
53- * async handlers. When a handler returns a promise, the WebAssembly stack is
54- * stored in temporary storage, the promise is awaited, then the stack is
55- * restored and WebAssembly execution continues where it left off. This
56- * temporary storage is currently per module instance and we only have a single
57- * instance because of how wasm-pack generates package code for NodeJS.
58- * TODO: ideally, we would allocate each transform call its own temporary
59- * space for the saved stack.
60- *
61- * This means if you have multiple concurrent transforms in progress, the saved
62- * stacks will be overwritten and lol-html will be unhappy. Therefore, to be
63- * "safe", we need to make sure only one transform operation is in-progress at
64- * any time, hence the mutex.
65- *
66- * However, this problem only occurs when using async handlers with concurrent
67- * transforms. If just using sync handlers, or not doing multiple rewrites
68- * concurrently (very likely), there's no need for the mutex, so we can use the
69- * "unsafe" version. The "safe" version is the default just so people don't see
70- * confusing errors. See the docs for concrete examples of where the "unsafe"
71- * version can be used.
72- */
73-
74- const wasmModuleMutex = new Mutex ( ) ;
75- // Symbol gives us "protected" method only accessible/overridable by subclass
76- const runCriticalSectionSymbol = Symbol ( "HTMLRewriter runCriticalSection" ) ;
77-
78- export class UnsafeHTMLRewriter {
41+ export class HTMLRewriter {
7942 #elementHandlers: [ selector : string , handlers : any ] [ ] = [ ] ;
8043 #documentHandlers: any [ ] = [ ] ;
8144
82- on ( selector : string , handlers : Partial < ElementHandler > ) : this {
45+ on ( selector : string , handlers : ElementHandlers ) : this {
8346 // Ensure handlers register returned promises, and `this` is bound correctly
8447 const wrappedHandlers = {
85- element : wrapHandler ( handlers . element ?. bind ( handlers ) ) ,
86- comments : wrapHandler ( handlers . comments ?. bind ( handlers ) ) ,
87- text : wrapHandler ( handlers . text ?. bind ( handlers ) ) ,
48+ element : handlers . element ?. bind ( handlers ) ,
49+ comments : handlers . comments ?. bind ( handlers ) ,
50+ text : handlers . text ?. bind ( handlers ) ,
8851 } ;
8952 this . #elementHandlers. push ( [ selector , wrappedHandlers ] ) ;
9053 return this ;
9154 }
9255
93- onDocument ( handlers : Partial < DocumentHandler > ) : this {
56+ onDocument ( handlers : DocumentHandlers ) : this {
9457 // Ensure handlers register returned promises, and `this` is bound correctly
9558 const wrappedHandlers = {
96- doctype : wrapHandler ( handlers . doctype ?. bind ( handlers ) ) ,
97- comments : wrapHandler ( handlers . comments ?. bind ( handlers ) ) ,
98- text : wrapHandler ( handlers . text ?. bind ( handlers ) ) ,
99- end : wrapHandler ( handlers . end ?. bind ( handlers ) ) ,
59+ doctype : handlers . doctype ?. bind ( handlers ) ,
60+ comments : handlers . comments ?. bind ( handlers ) ,
61+ text : handlers . text ?. bind ( handlers ) ,
62+ end : handlers . end ?. bind ( handlers ) ,
10063 } ;
10164 this . #documentHandlers. push ( wrappedHandlers ) ;
10265 return this ;
@@ -108,7 +71,7 @@ export class UnsafeHTMLRewriter {
10871 start : async ( controller ) => {
10972 // Create a rewriter instance for this transformation that writes its
11073 // output to the transformed response's stream
111- const rewriter = new LOLHTMLRewriter ( ( output : Uint8Array ) => {
74+ const rewriter = new BaseHTMLRewriter ( ( output : Uint8Array ) => {
11275 if ( output . length === 0 ) {
11376 // Free the rewriter once it's finished doing its thing
11477 queueMicrotask ( ( ) => rewriter . free ( ) ) ;
@@ -125,41 +88,32 @@ export class UnsafeHTMLRewriter {
12588 rewriter . onDocument ( handlers ) ;
12689 }
12790
128- await this [ runCriticalSectionSymbol ] ( async ( ) => {
129- // Transform the response body (may be null if empty)
130- if ( response . body ) {
131- for await ( const chunk of response . body ) {
132- await rewriter . write ( transformToArray ( chunk ) ) ;
133- }
91+ // Transform the response body (may be null if empty)
92+ if ( response . body ) {
93+ for await ( const chunk of response . body ) {
94+ await rewriter . write ( transformToArray ( chunk ) ) ;
13495 }
135- await rewriter . end ( ) ;
136- } ) ;
96+ }
97+ await rewriter . end ( ) ;
13798 } ,
13899 } ) ;
139100
140101 // Return a response with the transformed body, copying over headers, etc
141102 return new Response ( transformedStream , response ) ;
142103 }
143-
144- [ runCriticalSectionSymbol ] ( closure : ( ) => Promise < void > ) : Promise < void > {
145- return closure ( ) ;
146- }
147- }
148-
149- // See big comment above for what this does and why it's needed. It's possible
150- // we'll remove this distinction in the future.
151- export class HTMLRewriter extends UnsafeHTMLRewriter {
152- [ runCriticalSectionSymbol ] ( closure : ( ) => Promise < void > ) : Promise < void > {
153- return wasmModuleMutex . run ( closure ) ;
154- }
155104}
156105
157106export class HTMLRewriterModule extends Module {
158- buildSandbox ( options : ProcessedOptions ) : Context {
159- return {
160- HTMLRewriter : options . htmlRewriterUnsafe
161- ? UnsafeHTMLRewriter
162- : HTMLRewriter ,
163- } ;
107+ buildSandbox ( ) : Context {
108+ return { HTMLRewriter } ;
164109 }
165110}
111+
112+ export {
113+ Element ,
114+ Comment ,
115+ TextChunk ,
116+ Doctype ,
117+ DocumentEnd ,
118+ ContentTypeOptions ,
119+ } ;
0 commit comments