11import type { ColumnData } from 'hyparquet'
2- import type { ClientMessage , WorkerMessage , WorkerOptions } from './types.js'
2+ import type { ClientMessage , ParquetReadObjectsWorkerOptions , ParquetReadWorkerOptions , Rows , WorkerMessage } from './types.js'
33
44let worker : Worker | undefined
55let nextQueryId = 0
6- interface QueryAgent {
7- resolve : ( ) => void
6+ interface Agent {
7+ onComplete ?: ( rows : Rows ) => void
8+ onChunk ?: ( chunk : ColumnData ) => void
9+ onPage ?: ( page : ColumnData ) => void
810 reject : ( error : Error ) => void
9- onChunk : ( chunk : ColumnData ) => void
11+ resolveEmpty ?: ( ) => void
12+ resolveRowObjects ?: ( rowObjects : Rows ) => void
1013}
1114
12- const pending = new Map < number , QueryAgent > ( )
15+ const pendingAgents = new Map < number , Agent > ( )
1316
1417function getWorker ( ) {
1518 if ( ! worker ) {
1619 worker = new Worker ( new URL ( './parquetWorker.js' , import . meta. url ) , { type : 'module' } )
1720 worker . onmessage = ( { data } : { data : WorkerMessage } ) => {
18- const pendingQueryAgent = pending . get ( data . queryId )
19- if ( ! pendingQueryAgent ) {
21+ const pendingAgent = pendingAgents . get ( data . queryId )
22+ if ( ! pendingAgent ) {
2023 console . warn (
2124 `Unexpected: no pending promise found for queryId: ${ data . queryId . toString ( ) } `
2225 )
2326 return
2427 }
2528
26- const { onChunk, resolve , reject } = pendingQueryAgent
27- if ( 'error ' in data ) {
28- reject ( data . error )
29+ const { onComplete , onChunk, onPage , reject, resolveEmpty , resolveRowObjects } = pendingAgent
30+ if ( 'rows ' in data ) {
31+ onComplete ?. ( data . rows )
2932 } else if ( 'chunk' in data ) {
30- onChunk ( data . chunk )
33+ onChunk ?.( data . chunk )
34+ } else if ( 'page' in data ) {
35+ onPage ?.( data . page )
3136 } else {
32- resolve ( )
37+ if ( 'error' in data ) {
38+ reject ( data . error )
39+ } else if ( 'rowObjects' in data ) {
40+ resolveRowObjects ?.( data . rowObjects )
41+ } else {
42+ resolveEmpty ?.( )
43+ }
44+ /* clean up */
45+ pendingAgents . delete ( data . queryId )
46+ // TODO(SL): maybe terminate the worker when no pending agents left
3347 }
3448 }
3549 }
@@ -40,14 +54,34 @@ function getWorker() {
4054 * Presents almost the same interface as parquetRead, but runs in a worker.
4155 * This is useful for reading large parquet files without blocking the main thread.
4256 * Instead of taking an AsyncBuffer, it takes a AsyncBufferFrom, because it needs
43- * to be serialized to the worker.
57+ * to be serialized to the worker. Also: the worker uses hyparquet-compressors and
58+ * the default parsers.
4459 */
45- export function parquetQueryWorker ( { metadata, from, rowStart, rowEnd, columns, onChunk } : WorkerOptions ) : Promise < void > {
60+ export function parquetReadWorker ( options : ParquetReadWorkerOptions ) : Promise < void > {
61+ const { onComplete, onChunk, onPage, ...serializableOptions } = options
4662 return new Promise ( ( resolve , reject ) => {
4763 const queryId = nextQueryId ++
48- pending . set ( queryId , { resolve, reject, onChunk } )
64+ pendingAgents . set ( queryId , { resolveEmpty : resolve , reject, onComplete , onChunk, onPage } )
4965 const worker = getWorker ( )
50- const message : ClientMessage = { queryId, metadata, from, rowStart, rowEnd, columns }
66+ const message : ClientMessage = { queryId, ...serializableOptions , kind : 'parquetRead' }
67+ worker . postMessage ( message )
68+ } )
69+ }
70+
71+ /**
72+ * Presents almost the same interface as parquetReadObjects, but runs in a worker.
73+ * This is useful for reading large parquet files without blocking the main thread.
74+ * Instead of taking an AsyncBuffer, it takes a AsyncBufferFrom, because it needs
75+ * to be serialized to the worker. Also: the worker uses hyparquet-compressors and
76+ * the default parsers.
77+ */
78+ export function parquetReadObjectsWorker ( options : ParquetReadObjectsWorkerOptions ) : Promise < Rows > {
79+ const { onChunk, onPage, ...serializableOptions } = options
80+ return new Promise ( ( resolve , reject ) => {
81+ const queryId = nextQueryId ++
82+ pendingAgents . set ( queryId , { resolveRowObjects : resolve , reject, onChunk, onPage } )
83+ const worker = getWorker ( )
84+ const message : ClientMessage = { queryId, ...serializableOptions , kind : 'parquetReadObjects' }
5185 worker . postMessage ( message )
5286 } )
5387}
0 commit comments