Skip to content

Commit 96f533e

Browse files
committed
update code related to hightable 0.19.1
1 parent 1208edf commit 96f533e

File tree

1 file changed

+15
-38
lines changed

1 file changed

+15
-38
lines changed

src/lib/tableProvider.ts

Lines changed: 15 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { DataFrame, DataFrameEvents, ResolvedValue, UnsortableDataFrame, createEventTarget, sortableDataFrame } from 'hightable'
1+
import { DataFrame, DataFrameEvents, ResolvedValue, checkSignal, createEventTarget, sortableDataFrame, validateFetchParams, validateGetCellParams, validateGetRowNumberParams } from 'hightable'
22
import type { ColumnData } from 'hyparquet'
33
import { FileMetaData, ParquetReadOptions, parquetSchema } from 'hyparquet'
44
import { parquetReadWorker } from './workers/parquetWorkerClient.js'
@@ -20,13 +20,15 @@ interface VirtualRowGroup {
2020

2121
/**
2222
* Convert a parquet file into a dataframe.
23+
*
24+
* It's sortable on all the columns, and fetches data on demand in chunks of 1000 rows.
2325
*/
24-
export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData, options?: Pick<ParquetReadOptions, 'utf8'>): DataFrame {
26+
export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData, options?: Pick<ParquetReadOptions, 'utf8'>): DataFrame<{parquet: FileMetaData}> {
2527
const { children } = parquetSchema(metadata)
26-
const header = children.map(child => child.element.name)
28+
const columnDescriptors = children.map(child => ({ name: child.element.name }))
2729
const eventTarget = createEventTarget<DataFrameEvents>()
2830

29-
const cellCache = new Map<string, ResolvedValue<unknown>[]>(header.map(name => [name, []]))
31+
const cellCache = new Map<string, ResolvedValue<unknown>[]>(columnDescriptors.map(({ name }) => [name, []]))
3032

3133
// virtual row groups are up to 1000 rows within row group boundaries
3234
const groups: VirtualRowGroup[] = []
@@ -39,7 +41,7 @@ export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData,
3941
groups.push({
4042
groupStart,
4143
groupEnd,
42-
state: new Map(header.map(name => [name, { kind: 'unfetched' }])),
44+
state: new Map(columnDescriptors.map(({ name }) => [name, { kind: 'unfetched' }])),
4345
})
4446
groupStart = groupEnd
4547
}
@@ -84,22 +86,21 @@ export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData,
8486

8587
const numRows = Number(metadata.num_rows)
8688

87-
const unsortableDataFrame: UnsortableDataFrame = {
88-
header,
89+
const unsortableDataFrame: DataFrame<{parquet: FileMetaData}> = {
90+
columnDescriptors,
8991
numRows,
90-
metadata,
92+
metadata: { parquet: metadata },
9193
eventTarget,
92-
getRowNumber({ row }) {
93-
validateRow({ row, data: { numRows } })
94+
getRowNumber({ row, orderBy }) {
95+
validateGetRowNumberParams({ row, orderBy, data: { numRows, columnDescriptors } })
9496
return { value: row }
9597
},
96-
getCell({ row, column }) {
97-
validateRow({ row, data: { numRows } })
98-
validateColumn({ column, data: { header } })
98+
getCell({ row, column, orderBy }) {
99+
validateGetCellParams({ row, column, orderBy, data: { numRows, columnDescriptors } })
99100
return cellCache.get(column)?.[row]
100101
},
101102
fetch: async ({ rowStart, rowEnd, columns, signal }) => {
102-
validateFetchParams({ rowStart, rowEnd, columns, data: { numRows, header } })
103+
validateFetchParams({ rowStart, rowEnd, columns, data: { numRows, columnDescriptors } })
103104
checkSignal(signal)
104105

105106
if (!columns || columns.length === 0) {
@@ -128,27 +129,3 @@ export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData,
128129

129130
return sortableDataFrame(unsortableDataFrame)
130131
}
131-
132-
function validateFetchParams({ rowStart, rowEnd, columns, data: { numRows, header } }: {rowStart: number, rowEnd: number, columns?: string[], data: Pick<DataFrame, 'numRows' | 'header'>}): void {
133-
if (rowStart < 0 || rowEnd > numRows || !Number.isInteger(rowStart) || !Number.isInteger(rowEnd) || rowStart > rowEnd) {
134-
throw new Error(`Invalid row range: ${rowStart} - ${rowEnd}, numRows: ${numRows}`)
135-
}
136-
if (columns?.some(column => !header.includes(column))) {
137-
throw new Error(`Invalid columns: ${columns.join(', ')}. Available columns: ${header.join(', ')}`)
138-
}
139-
}
140-
function validateRow({ row, data: { numRows } }: {row: number, data: Pick<DataFrame, 'numRows'>}): void {
141-
if (row < 0 || row >= numRows || !Number.isInteger(row)) {
142-
throw new Error(`Invalid row index: ${row}, numRows: ${numRows}`)
143-
}
144-
}
145-
function validateColumn({ column, data: { header } }: {column: string, data: Pick<DataFrame, 'header'>}): void {
146-
if (!header.includes(column)) {
147-
throw new Error(`Invalid column: ${column}. Available columns: ${header.join(', ')}`)
148-
}
149-
}
150-
function checkSignal(signal?: AbortSignal): void {
151-
if (signal?.aborted) {
152-
throw new DOMException('The operation was aborted.', 'AbortError')
153-
}
154-
}

0 commit comments

Comments
 (0)