Skip to content

Commit e928e97

Browse files
committed
remove the concept of virtual row groups
1 parent aa61ba8 commit e928e97

File tree

1 file changed

+17
-22
lines changed

1 file changed

+17
-22
lines changed

src/lib/tableProvider.ts

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ type GroupStatus = {
1212
} | {
1313
kind: 'fetched'
1414
}
15-
interface VirtualRowGroup {
15+
interface RowGroup {
1616
groupStart: number
1717
groupEnd: number
1818
state: Map<string, GroupStatus>
@@ -28,29 +28,24 @@ export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData):
2828

2929
const cellCache = new Map<string, ResolvedValue<unknown>[]>(header.map(name => [name, []]))
3030

31-
// virtual row groups are up to 1000 rows within row group boundaries
32-
const groups: VirtualRowGroup[] = []
31+
const rowGroups: RowGroup[] = []
3332
let groupStart = 0
3433
for (const rg of metadata.row_groups) {
35-
// make virtual row groups of size 1000
36-
for (let j = 0; j < rg.num_rows; j += 1000) {
37-
const groupSize = Math.min(1000, Number(rg.num_rows) - j)
38-
const groupEnd = groupStart + groupSize
39-
groups.push({
40-
groupStart,
41-
groupEnd,
42-
state: new Map(header.map(name => [name, { kind: 'unfetched' }])),
43-
})
44-
groupStart = groupEnd
45-
}
34+
const groupEnd = groupStart + Number(rg.num_rows)
35+
rowGroups.push({
36+
groupStart,
37+
groupEnd,
38+
state: new Map(header.map(name => [name, { kind: 'unfetched' }])),
39+
})
40+
groupStart = groupEnd
4641
}
4742

48-
async function fetchVirtualRowGroup({ group, columns }: {
49-
group: VirtualRowGroup, columns: string[]
43+
async function fetchRowGroup({ rowGroup, columns }: {
44+
rowGroup: RowGroup, columns: string[]
5045
}): Promise<void> {
51-
const { groupStart, groupEnd, state } = group
46+
const { groupStart, groupEnd, state } = rowGroup
5247
const columnsToFetch = columns.filter(column => state.get(column)?.kind === 'unfetched')
53-
const promises = [...group.state.values()].filter((status): status is { kind: 'fetching', promise: Promise<void> } => status.kind === 'fetching').map(status => status.promise)
48+
const promises = [...rowGroup.state.values()].filter((status): status is { kind: 'fetching', promise: Promise<void> } => status.kind === 'fetching').map(status => status.promise)
5449

5550
// TODO(SL): pass AbortSignal to the worker?
5651
if (columnsToFetch.length > 0) {
@@ -107,12 +102,12 @@ export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData):
107102

108103
const promises: Promise<void>[] = []
109104

110-
groups.forEach((group) => {
111-
const { groupStart, groupEnd } = group
105+
rowGroups.forEach((rowGroup) => {
106+
const { groupStart, groupEnd } = rowGroup
112107
if (groupStart < rowEnd && groupEnd > rowStart) {
113108
promises.push(
114-
fetchVirtualRowGroup({
115-
group,
109+
fetchRowGroup({
110+
rowGroup,
116111
columns,
117112
}).then(() => {
118113
checkSignal(signal)

0 commit comments

Comments
 (0)