Skip to content

Commit 5f16b3b

Browse files
committed
Move parsing to web worker
1 parent ca429b8 commit 5f16b3b

File tree

12 files changed

+110
-33
lines changed

12 files changed

+110
-33
lines changed

.gitattributes

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
1-
public/build/app.js -diff
21
*.js.map -diff
32
*.min.js -diff

package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@
3636
"@testing-library/react": "16.0.1",
3737
"@types/node": "22.7.5",
3838
"@types/react": "18.3.11",
39-
"@types/react-dom": "18.3.0",
39+
"@types/react-dom": "18.3.1",
4040
"@vitejs/plugin-react": "4.3.2",
41-
"@vitest/coverage-v8": "2.1.2",
41+
"@vitest/coverage-v8": "2.1.3",
4242
"eslint": "9.12.0",
4343
"globals": "15.11.0",
4444
"jsdom": "25.0.1",
@@ -48,7 +48,7 @@
4848
"rollup-plugin-postcss": "4.0.2",
4949
"tslib": "2.7.0",
5050
"typescript": "5.6.3",
51-
"typescript-eslint": "8.8.1",
52-
"vitest": "2.1.2"
51+
"typescript-eslint": "8.9.0",
52+
"vitest": "2.1.3"
5353
}
5454
}

public/build/app.min.js

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

public/build/app.min.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

public/build/worker.min.js

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

public/build/worker.min.js.map

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rollup.config.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,19 @@ export default [
3333
}),
3434
],
3535
},
36+
// web worker
37+
{
38+
input: 'src/workers/parquetWorker.ts',
39+
output: {
40+
file: 'public/build/worker.min.js',
41+
format: 'umd',
42+
sourcemap: true,
43+
},
44+
plugins: [
45+
commonjs(),
46+
resolve({ browser: true }),
47+
terser(),
48+
typescript(),
49+
],
50+
},
3651
]

src/components/Cell.tsx

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import React, { useEffect, useState } from 'react'
2-
import { asyncBufferFrom, parquetDataFrame } from '../tableProvider.js'
3-
import Highlight from './Highlight.js'
2+
import { parquetDataFrame } from '../tableProvider.js'
43
import Layout from './Layout.js'
4+
import { asyncBufferFromUrl, parquetMetadataAsync } from 'hyparquet'
55

66
enum LoadingState {
77
NotLoaded,
@@ -36,10 +36,12 @@ export default function CellView() {
3636
try {
3737
// TODO: handle first row > 100kb
3838
setProgress(0.25)
39-
const asyncBuffer = await asyncBufferFrom(url)
39+
const asyncBuffer = await asyncBufferFromUrl(url)
40+
const from = { url, byteLength: asyncBuffer.byteLength }
4041
setProgress(0.5)
41-
const df = await parquetDataFrame(asyncBuffer)
42+
const metadata = await parquetMetadataAsync(asyncBuffer)
4243
setProgress(0.75)
44+
const df = await parquetDataFrame(from, metadata)
4345
const rows = await df.rows(row, row + 1)
4446
const colName = df.header[col]
4547
const cell = rows[0][colName]

src/components/viewers/ParquetView.tsx

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import HighTable, { DataFrame, rowCache } from 'hightable'
22
import React, { useCallback, useEffect, useState } from 'react'
3-
import { asyncBufferFrom, parquetDataFrame } from '../../tableProvider.js'
3+
import { parquetDataFrame } from '../../tableProvider.js'
44
import { Spinner } from '../Layout.js'
55
import ContentHeader from './ContentHeader.js'
6+
import { asyncBufferFromUrl, FileMetaData, parquetMetadataAsync } from 'hyparquet'
67

78
enum LoadingState {
89
NotLoaded,
@@ -27,6 +28,7 @@ interface Content {
2728
export default function ParquetView({ file, setProgress, setError }: ViewerProps) {
2829
const [loading, setLoading] = useState<LoadingState>(LoadingState.NotLoaded)
2930
const [content, setContent] = useState<Content>()
31+
const [metadata, setMetadata] = useState<FileMetaData>()
3032

3133
const isUrl = file.startsWith('http://') || file.startsWith('https://')
3234
const url = isUrl ? file : '/api/store/get?key=' + file
@@ -35,9 +37,12 @@ export default function ParquetView({ file, setProgress, setError }: ViewerProps
3537
async function loadParquetDataFrame() {
3638
try {
3739
setProgress(0.33)
38-
const asyncBuffer = await asyncBufferFrom(url)
40+
const asyncBuffer = await asyncBufferFromUrl(url)
41+
const from = { url, byteLength: asyncBuffer.byteLength }
3942
setProgress(0.66)
40-
let dataframe = await parquetDataFrame(asyncBuffer)
43+
const metadata = await parquetMetadataAsync(asyncBuffer)
44+
setMetadata(metadata)
45+
let dataframe = await parquetDataFrame(from, metadata)
4146
dataframe = rowCache(dataframe)
4247
const fileSize = asyncBuffer.byteLength
4348
setContent({ dataframe, fileSize })

src/tableProvider.ts

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,18 @@
11
import type { DataFrame } from 'hightable'
2-
import {
3-
AsyncBuffer, SchemaTree, parquetMetadataAsync, parquetQuery, parquetSchema,
4-
} from 'hyparquet'
5-
import { compressors } from 'hyparquet-compressors'
2+
import { AsyncBuffer, FileMetaData, parquetSchema } from 'hyparquet'
63
import { readableStreamToArrayBuffer } from './streamConverters.js'
4+
import { AsyncBufferFrom, parquetQueryWorker } from './workers/parquetWorkerClient.js'
75

86
/**
9-
* Construct a dataframe from a parquet file asynchronously.
7+
* Convert a parquet file into a dataframe.
108
*/
11-
export async function parquetDataFrame(asyncBuffer: AsyncBuffer): Promise<DataFrame> {
12-
// load parquet metadata
13-
const metadata = await parquetMetadataAsync(asyncBuffer)
14-
15-
// construct dataframe
16-
const { children }: SchemaTree = parquetSchema(metadata)
17-
// TODO: concat child names for like-like columns?
18-
const header = children.map(child => child.element.name)
19-
const numRows = Number(metadata.num_rows)
9+
export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData): DataFrame {
10+
const { children } = parquetSchema(metadata)
2011
return {
21-
header,
22-
numRows,
23-
rows(rowStart?: number, rowEnd?: number, orderBy?: string) {
24-
return parquetQuery({ metadata, compressors, file: asyncBuffer, rowStart, rowEnd, orderBy })
12+
header: children.map(child => child.element.name),
13+
numRows: Number(metadata.num_rows),
14+
rows(rowStart: number, rowEnd: number, orderBy?: string) {
15+
return parquetQueryWorker({ asyncBuffer: from, rowStart, rowEnd, orderBy })
2516
},
2617
sortable: true,
2718
}

0 commit comments

Comments
 (0)