Skip to content

Commit 6e93354

Browse files
committed
remove unused headers and depend on hyparquet
1 parent d61c1f8 commit 6e93354

File tree

8 files changed

+43
-157
lines changed

8 files changed

+43
-157
lines changed

packages/components/src/components/Cell.tsx

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import { asyncRows } from 'hightable'
2-
import { parquetMetadataAsync } from 'hyparquet'
2+
import { asyncBufferFromUrl, parquetMetadataAsync } from 'hyparquet'
33
import { useEffect, useState } from 'react'
44
import { FileKey, UrlKey } from '../lib/key.ts'
55
import { parquetDataFrame } from '../lib/tableProvider.js'
6-
import { asyncBufferFromUrl } from '../lib/utils.ts'
76
import Breadcrumb from './Breadcrumb.tsx'
87
import Layout from './Layout.tsx'
98

@@ -37,15 +36,8 @@ export default function CellView({ parsedKey, row, col }: CellProps) {
3736
try {
3837
// TODO: handle first row > 100kb
3938
setProgress(0.25)
40-
const asyncBuffer = await asyncBufferFromUrl({
41-
url: resolveUrl,
42-
headers: {},
43-
})
44-
const from = {
45-
url: resolveUrl,
46-
byteLength: asyncBuffer.byteLength,
47-
headers: {},
48-
}
39+
const asyncBuffer = await asyncBufferFromUrl(resolveUrl)
40+
const from = { url: resolveUrl, byteLength: asyncBuffer.byteLength }
4941
setProgress(0.5)
5042
const metadata = await parquetMetadataAsync(asyncBuffer)
5143
setProgress(0.75)

packages/components/src/components/viewers/ParquetView.tsx

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import HighTable, { DataFrame, rowCache } from 'hightable'
2-
import { parquetMetadataAsync } from 'hyparquet'
2+
import { asyncBufferFromUrl, parquetMetadataAsync } from 'hyparquet'
33
import React, { useCallback, useEffect, useState } from 'react'
44
import { FileKey, UrlKey } from '../../lib/key.ts'
55
import { parquetDataFrame } from '../../lib/tableProvider.ts'
6-
import { asyncBufferFromUrl } from '../../lib/utils.ts'
76
import { Spinner } from '../Layout.tsx'
87
import ContentHeader, { ContentSize } from './ContentHeader.tsx'
98

@@ -35,8 +34,8 @@ export default function ParquetView({ parsedKey, setProgress, setError }: Viewer
3534
async function loadParquetDataFrame() {
3635
try {
3736
setProgress(0.33)
38-
const asyncBuffer = await asyncBufferFromUrl({ url: resolveUrl, headers: {} })
39-
const from = { url: resolveUrl, byteLength: asyncBuffer.byteLength, headers: {} }
37+
const asyncBuffer = await asyncBufferFromUrl(resolveUrl)
38+
const from = { url: resolveUrl, byteLength: asyncBuffer.byteLength }
4039
setProgress(0.66)
4140
const metadata = await parquetMetadataAsync(asyncBuffer)
4241
let dataframe = parquetDataFrame(from, metadata)

packages/components/src/lib/tableProvider.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import { DataFrame, ResolvablePromise, resolvablePromise } from 'hightable'
22
import { FileMetaData, parquetSchema } from 'hyparquet'
33
import { parquetQueryWorker, parquetSortIndexWorker } from '../workers/parquetWorkerClient.ts'
4-
import type { AsyncBufferFrom } from '../workers/types.d.ts'
4+
import type { AsyncBufferFromUrl } from '../workers/types.d.ts'
55

66
type ResolvableRow = Record<string, ResolvablePromise<unknown>>;
77

88
/**
99
* Convert a parquet file into a dataframe.
1010
*/
11-
export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData): DataFrame {
11+
export function parquetDataFrame(from: AsyncBufferFromUrl, metadata: FileMetaData): DataFrame {
1212
const { children } = parquetSchema(metadata)
1313
const header = children.map(child => child.element.name)
1414
const sortCache = new Map<string, Promise<number[]>>()
Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,7 @@
1-
import { AsyncBuffer } from 'hyparquet'
21

32
/**
43
* Helper function to join class names
54
*/
65
export function cn(...names: (string | undefined | false)[]): string {
76
return names.filter((n) => n).join(' ')
87
}
9-
10-
interface AsyncBufferFromUrlOptions {
11-
url: string;
12-
byteLength?: number;
13-
headers?: Record<string, string>;
14-
}
15-
16-
/**
17-
* Get the byte length of a URL using a HEAD request.
18-
*
19-
* @param {string} url
20-
* @returns {Promise<number>}
21-
*/
22-
export async function byteLengthFromUrl(
23-
url: globalThis.RequestInfo | URL,
24-
init?: globalThis.RequestInit,
25-
): Promise<number> {
26-
return await fetch(url, { ...init, method: 'HEAD' }).then((res) => {
27-
if (!res.ok) throw new Error(`fetch head failed ${res.status.toString()}`)
28-
const length = res.headers.get('Content-Length')
29-
if (!length) throw new Error('missing content length')
30-
return parseInt(length)
31-
})
32-
}
33-
34-
export async function asyncBufferFromUrl({
35-
url,
36-
byteLength,
37-
headers,
38-
}: AsyncBufferFromUrlOptions): Promise<AsyncBuffer> {
39-
// byte length from HEAD request
40-
byteLength ??= await byteLengthFromUrl(url, { headers })
41-
return {
42-
byteLength,
43-
async slice(start, end) {
44-
// fetch byte range from url
45-
const endStr = end === undefined ? '' : end - 1
46-
const res = await fetch(url, {
47-
headers: {
48-
...headers,
49-
range: `bytes=${start.toString()}-${endStr.toString()}`,
50-
},
51-
})
52-
if (!res.ok || !res.body)
53-
throw new Error(`fetch failed ${res.status.toString()}`)
54-
return res.arrayBuffer()
55-
},
56-
}
57-
}

packages/components/src/workers/parquetWorker.ts

Lines changed: 5 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import { AsyncBuffer, ColumnData, parquetQuery } from 'hyparquet'
1+
import { AsyncBuffer, ColumnData, asyncBufferFromUrl, cachedAsyncBuffer, parquetQuery } from 'hyparquet'
22
import { compressors } from 'hyparquet-compressors'
3-
import { asyncBufferFromUrl } from '../lib/utils.ts'
3+
// import { asyncBufferFromUrl } from '../lib/utils.ts'
44
import type {
5-
AsyncBufferFrom,
5+
AsyncBufferFromUrl,
66
ChunkMessage,
77
ErrorMessage,
88
IndicesMessage,
@@ -97,66 +97,12 @@ function compare<T>(a: T, b: T): number {
9797
* Convert AsyncBufferFrom to AsyncBuffer and cache results.
9898
*/
9999
function asyncBufferFrom(
100-
from: AsyncBufferFrom,
100+
from: AsyncBufferFromUrl,
101101
): Promise<AsyncBuffer> {
102102
const key = JSON.stringify(from)
103103
const cached = cache.get(key)
104104
if (cached) return cached
105-
const asyncBuffer = asyncBufferFromUrl(from).then(cachedAsyncBuffer)
105+
const asyncBuffer = asyncBufferFromUrl(from.url, from.byteLength).then(cachedAsyncBuffer)
106106
cache.set(key, asyncBuffer)
107107
return asyncBuffer
108108
}
109-
110-
type Awaitable<T> = T | Promise<T>;
111-
112-
function cachedAsyncBuffer(asyncBuffer: AsyncBuffer): AsyncBuffer {
113-
const cache = new Map<string, Awaitable<ArrayBuffer>>()
114-
const { byteLength } = asyncBuffer
115-
return {
116-
byteLength,
117-
/**
118-
* @param {number} start
119-
* @param {number} [end]
120-
* @returns {Awaitable<ArrayBuffer>}
121-
*/
122-
slice(start: number, end?: number): Awaitable<ArrayBuffer> {
123-
const key = cacheKey(start, end, byteLength)
124-
const cached = cache.get(key)
125-
if (cached) return cached
126-
// cache miss, read from file
127-
const promise = asyncBuffer.slice(start, end)
128-
cache.set(key, promise)
129-
return promise
130-
},
131-
}
132-
}
133-
134-
/**
135-
* Returns canonical cache key for a byte range 'start,end'.
136-
* Normalize int-range and suffix-range requests to the same key.
137-
*
138-
* @param {number} start start byte of range
139-
* @param {number} [end] end byte of range, or undefined for suffix range
140-
* @param {number} [size] size of file, or undefined for suffix range
141-
* @returns {string}
142-
*/
143-
function cacheKey(start: number, end?: number, size?: number): string {
144-
if (start < 0) {
145-
if (end !== undefined)
146-
throw new Error(
147-
`invalid suffix range [${start.toString()}, ${end.toString()}]`,
148-
)
149-
if (size === undefined) return `${start.toString()},`
150-
return `${(size + start).toString()},${size.toString()}`
151-
} else if (end !== undefined) {
152-
if (start > end)
153-
throw new Error(
154-
`invalid empty range [${start.toString()}, ${end.toString()}]`,
155-
)
156-
return `${start.toString()},${end.toString()}`
157-
} else if (size === undefined) {
158-
return `${start.toString()},`
159-
} else {
160-
return `${start.toString()},${size.toString()}`
161-
}
162-
}
Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,40 @@
11
import { ColumnData, FileMetaData, ParquetReadOptions } from 'hyparquet'
22

33
// Serializable constructor for AsyncBuffers
4-
export interface AsyncBufferFrom {
5-
url: string
6-
byteLength: number
7-
headers?: Record<string, string>
8-
}
9-
// Same as ParquetReadOptions, but AsyncBufferFrom instead of AsyncBuffer
4+
export interface AsyncBufferFromUrl {
5+
url: string
6+
byteLength: number
7+
}
8+
// Same as ParquetReadOptions, but AsyncBufferFrom instead of AsyncBuffer
109
export interface ParquetReadWorkerOptions extends Omit<ParquetReadOptions, 'file'> {
11-
from: AsyncBufferFrom
12-
orderBy?: string
13-
sortIndex?: boolean
14-
}
15-
// Row is defined in hightable, but not exported + we change any to unknown
16-
export type Row = Record<string, unknown>;
10+
from: AsyncBufferFromUrl
11+
orderBy?: string
12+
sortIndex?: boolean
13+
}
14+
// Row is defined in hightable, but not exported + we change any to unknown
15+
export type Row = Record<string, unknown> ;
1716

18-
interface Message {
19-
queryId: number
20-
}
17+
interface Message {
18+
queryId: number
19+
}
2120
export interface ChunkMessage extends Message {
22-
chunk: ColumnData
23-
}
21+
chunk: ColumnData
22+
}
2423
export interface ResultMessage extends Message {
25-
result: Row[]
26-
}
24+
result: Row[]
25+
}
2726
export interface IndicesMessage extends Message {
28-
indices: number[]
29-
}
27+
indices: number[]
28+
}
3029
export interface ErrorMessage extends Message {
31-
error: Error
32-
}
30+
error: Error
31+
}
3332

3433
export type ParquetMessage = ChunkMessage | ResultMessage | ErrorMessage
3534
export type SortParquetMessage = IndicesMessage | ErrorMessage
3635

3736
export interface ParquetSortIndexOptions {
38-
metadata: FileMetaData
39-
from: AsyncBufferFrom
40-
orderBy: string
41-
}
37+
metadata: FileMetaData
38+
from: AsyncBufferFromUrl
39+
orderBy: string
40+
}

0 commit comments

Comments
 (0)