diff --git a/.github/workflows/ci_apps_cli.yml b/.github/workflows/ci_apps_cli.yml index aa3e0d30..faa57375 100644 --- a/.github/workflows/ci_apps_cli.yml +++ b/.github/workflows/ci_apps_cli.yml @@ -15,6 +15,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils @@ -29,6 +32,9 @@ jobs: working-directory: ./ steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils @@ -41,6 +47,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils @@ -55,6 +64,9 @@ jobs: working-directory: ./ steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils diff --git a/.github/workflows/ci_apps_hightable_demo.yml b/.github/workflows/ci_apps_hightable_demo.yml index 3703d078..01d7faa1 100644 --- a/.github/workflows/ci_apps_hightable_demo.yml +++ b/.github/workflows/ci_apps_hightable_demo.yml @@ -14,6 +14,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run lint @@ -21,6 +24,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: tsc @@ -28,6 +34,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run coverage @@ -35,5 +44,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build \ No newline at end of file diff --git a/.github/workflows/ci_apps_hyparquet_demo.yml b/.github/workflows/ci_apps_hyparquet_demo.yml index fa0f3ff1..e8bc73c7 100644 --- a/.github/workflows/ci_apps_hyparquet_demo.yml +++ b/.github/workflows/ci_apps_hyparquet_demo.yml @@ -14,6 +14,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils @@ -23,6 +26,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils @@ -32,6 +38,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils @@ -41,6 +50,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils diff --git a/.github/workflows/ci_packages_components.yml b/.github/workflows/ci_packages_components.yml index 5a44a632..0cd317dd 100644 --- a/.github/workflows/ci_packages_components.yml +++ b/.github/workflows/ci_packages_components.yml @@ -14,6 +14,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils @@ -23,6 +26,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils @@ -32,6 +38,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils @@ -41,6 +50,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build working-directory: ./packages/utils diff --git a/.github/workflows/ci_packages_utils.yml b/.github/workflows/ci_packages_utils.yml index 244e9448..e4c1cc29 100644 --- a/.github/workflows/ci_packages_utils.yml +++ b/.github/workflows/ci_packages_utils.yml @@ -14,6 +14,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run lint @@ -21,6 +24,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: tsc @@ -28,6 +34,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run coverage @@ -35,5 +44,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 - run: npm i - run: npm run build diff --git a/apps/hyparquet-demo/src/App.tsx b/apps/hyparquet-demo/src/App.tsx index 4b500baa..80c28b98 100644 --- a/apps/hyparquet-demo/src/App.tsx +++ b/apps/hyparquet-demo/src/App.tsx @@ -2,8 +2,7 @@ import { ReactNode } from 'react' import Page, { PageProps } from './Page.js' import Welcome from './Welcome.js' -import { AsyncBufferFrom, Row, asyncBufferFrom, parquetQueryWorker } from '@hyparam/utils' -import { DataFrame, rowCache } from 'hightable' +import { AsyncBufferFrom, DataFrame, Row, asyncBufferFrom, parquetQueryWorker, rowCache } from '@hyparam/utils' import { FileMetaData, byteLengthFromUrl, parquetMetadataAsync, parquetSchema } from 'hyparquet' import { useCallback, useEffect, useState } from 'react' import Dropzone from './Dropzone.js' diff --git a/apps/hyparquet-demo/src/Page.tsx b/apps/hyparquet-demo/src/Page.tsx index 1d3735e7..60cff72a 100644 --- a/apps/hyparquet-demo/src/Page.tsx +++ b/apps/hyparquet-demo/src/Page.tsx @@ -1,4 +1,5 @@ -import HighTable, { DataFrame } from 'hightable' +import { DataFrame } from '@hyparam/utils' +import HighTable from 'hightable' import { FileMetaData } from 'hyparquet' import { ReactNode, useState } from 'react' import Dropdown from './Dropdown.js' diff --git a/packages/components/src/viewers/ParquetView.tsx b/packages/components/src/viewers/ParquetView.tsx index c9270ca2..778a5ee4 100644 --- a/packages/components/src/viewers/ParquetView.tsx +++ b/packages/components/src/viewers/ParquetView.tsx @@ -1,5 +1,5 @@ -import { FileKey, UrlKey, parquetDataFrame } from '@hyparam/utils' -import HighTable, { DataFrame, rowCache } from 'hightable' +import { DataFrame, FileKey, UrlKey, parquetDataFrame, rowCache } from '@hyparam/utils' +import HighTable from 'hightable' import { asyncBufferFromUrl, parquetMetadataAsync } from 'hyparquet' import React, { useCallback, useEffect, useState } from 'react' import { Spinner } from '../Layout.js' diff --git a/packages/utils/eslint.config.js b/packages/utils/eslint.config.js index 7e636c78..222b3993 100644 --- a/packages/utils/eslint.config.js +++ b/packages/utils/eslint.config.js @@ -9,7 +9,7 @@ export default tseslint.config( extends: [js.configs.recommended, ...tseslint.configs.strictTypeChecked, ...tseslint.configs.stylisticTypeChecked], files: ['**/*.{ts,js}'], languageOptions: { - ecmaVersion: 2020, + ecmaVersion: globals.esnext, globals: globals.browser, parserOptions: { project: ['./tsconfig.json', './tsconfig.eslint.json'], @@ -26,7 +26,7 @@ export default tseslint.config( { files: ['test/**/*.{ts}', '*.{js,ts}'], languageOptions: { - ecmaVersion: 2020, + ecmaVersion: globals.esnext, globals: { ...globals.node, ...globals.browser, diff --git a/packages/utils/package.json b/packages/utils/package.json index 8243095e..c8017b3d 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -36,7 +36,6 @@ "test": "vitest run" }, "dependencies": { - "hightable": "0.6.3", "hyparquet": "1.5.0", "hyparquet-compressors": "0.1.4" }, diff --git a/packages/utils/src/dataframe.ts b/packages/utils/src/dataframe.ts new file mode 100644 index 00000000..7c6c5042 --- /dev/null +++ b/packages/utils/src/dataframe.ts @@ -0,0 +1,169 @@ +type WrappedPromise = Promise & { + resolved?: T + rejected?: Error +} + +/** + * A row where each cell is a promise. + * The promise must be wrapped with `wrapPromise` so that HighTable can render + * the state synchronously. + */ +export type AsyncRow = Record> + +/** + * A row where each cell is a resolved value. + */ +export type Row = Record + +/** + * Streamable row data + */ +export interface DataFrame { + header: string[] + numRows: number + // Rows are 0-indexed, excludes the header, end is exclusive + rows(start: number, end: number, orderBy?: string): AsyncRow[] | Promise + sortable?: boolean +} + +export function resolvableRow(header: string[]): Record> { + return Object.fromEntries(header.map(key => [key, resolvablePromise()])) +} + +/** + * Helper method to wrap future rows into AsyncRows. + * Helpful when you want to define a DataFrame with simple async fetching of rows. + * This function turns future data into a "grid" of wrapped promises. + */ +export function asyncRows(rows: AsyncRow[] | Promise, numRows: number, header: string[]): AsyncRow[] { + if (Array.isArray(rows)) return rows + // Make grid of resolvable promises + const wrapped = new Array(numRows).fill(null).map(() => resolvableRow(header)) + rows.then(rows => { + if (rows.length !== numRows) { + console.warn(`Expected ${numRows} rows, got ${rows.length}`) + } + for (let i = 0; i < rows.length; i++) { + const row = rows[i] + for (const key of header) { + wrapped[i][key].resolve(row[key]) + } + } + }).catch((error: unknown) => { + const rejected = error instanceof Error ? error : new Error(String(error)) + // Reject all promises on error + for (let i = 0; i < numRows; i++) { + for (const key of header) { + wrapped[i][key].reject(rejected) + } + } + }) + return wrapped +} + +/** + * Wrap a promise to save the resolved value and error. + * Note: you can't await on a WrappedPromise, you must use then. + */ +export function wrapPromise(promise: Promise | T): WrappedPromise { + if (!(promise instanceof Promise)) { + promise = Promise.resolve(promise) + } + const wrapped: WrappedPromise = promise.then(resolved => { + wrapped.resolved = resolved + return resolved + }).catch((error: unknown) => { + wrapped.rejected = error instanceof Error ? error : new Error(String(error)) + throw error + }) + return wrapped +} + +export type ResolvablePromise = Promise & { + resolve: (value: T) => void + reject: (error: Error) => void +} + +/** + * Create a promise that can be resolved or rejected later using the resolve and reject methods. + * It's also a wrapped promise, with resolved and rejected properties. + */ +export function resolvablePromise(): ResolvablePromise & WrappedPromise { + const promise = Promise.withResolvers() + const wrapped = Object.assign( + wrapPromise(promise.promise), + { + resolve: promise.resolve, + reject : promise.reject, + }, + ) + return wrapped +} + +/** + * Wraps a DataFrame to make it sortable. + * Requires fetching all rows to sort. + */ +export function sortableDataFrame(data: DataFrame): DataFrame { + if (data.sortable) return data // already sortable + // Fetch all rows and add __index__ column + let all: Promise | undefined + return { + ...data, + rows(start: number, end: number, orderBy?: string): AsyncRow[] | Promise { + if (orderBy) { + if (!data.header.includes(orderBy)) { + throw new Error(`Invalid orderBy field: ${orderBy}`) + } + if (!all) { + // Fetch all rows and add __index__ column + all = awaitRows(data.rows(0, data.numRows)) + .then(rows => rows.map((row, i) => ({ __index__: i, ...row }))) + } + const sorted = all.then(all => { + return all.sort((a, b) => { + /// TODO(SL): rewrite the function, handling every case + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const valueA: any = a[orderBy] + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const valueB: any = b[orderBy] + if (valueA < valueB) return -1 + if (valueA > valueB) return 1 + return 0 + }).slice(start, end) + }) + return sorted + } else { + return data.rows(start, end) + } + }, + sortable: true, + } +} + +/** + * Await all promises in an AsyncRow and return resolved row. + */ +export async function awaitRow(row: AsyncRow): Promise { + const values = await Promise.all(Object.values(row)) + return Object.fromEntries(Object.keys(row).map((key, i) => [key, values[i]])) +} + +/** + * Await all promises in list of AsyncRows and return resolved rows. + */ +export function awaitRows(rows: AsyncRow[] | Promise): Promise { + if (rows instanceof Promise) return rows + return Promise.all(rows.map(awaitRow)) +} + +export function arrayDataFrame(data: Row[]): DataFrame { + if (!data.length) return { header: [], numRows: 0, rows: () => Promise.resolve([]) } + return { + header: Object.keys(data[0]), + numRows: data.length, + rows(start: number, end: number): Promise { + return Promise.resolve(data.slice(start, end)) + }, + } +} diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index 239674ea..c92b5cab 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -1,9 +1,12 @@ +export { arrayDataFrame, asyncRows, awaitRow, awaitRows, resolvablePromise, resolvableRow, sortableDataFrame, wrapPromise } from './dataframe.js' +export type { AsyncRow, DataFrame, ResolvablePromise, Row } from './dataframe.js' export { contentTypes, formatFileSize, getFileDate, getFileDateShort, getFileSize, imageTypes, listFiles, parseFileSize } from './files.js' export type { FileMetadata } from './files.js' export { parseKey } from './key.js' export type { FileKey, FolderKey, ParsedKey, UrlKey } from './key.js' +export { rowCache } from './rowCache.js' export { parquetDataFrame } from './tableProvider.js' export { asyncBufferFrom, cn } from './utils.js' export { parquetQueryWorker } from './workers/parquetWorkerClient.js' -export type { AsyncBufferFrom, Row } from './workers/types.js' +export type { AsyncBufferFrom } from './workers/types.js' diff --git a/packages/utils/src/rowCache.ts b/packages/utils/src/rowCache.ts new file mode 100644 index 00000000..1f5ebbbc --- /dev/null +++ b/packages/utils/src/rowCache.ts @@ -0,0 +1,50 @@ +import { AsyncRow, DataFrame, asyncRows } from './dataframe.js' + +/** + * Wrap a dataframe with cached rows. + * + * @param df async dataframe to wrap + */ +export function rowCache(df: DataFrame): DataFrame { + // Row cache is stored as a sorted array of RowGroups, per sort order + const caches: Record = {} + + let hits = 0 + let misses = 0 + + return { + ...df, + rows(start: number, end: number, orderBy?: string): AsyncRow[] { + // Cache per sort order + const cache = caches[orderBy ?? ''] ??= new Array(df.numRows) + const n = hits + misses + if (n && !(n % 10)) { + console.log(`Cache hits: ${hits} / ${hits + misses} (${(100 * hits / (hits + misses)).toFixed(1)}%)`) + } + + // Fetch missing rows in contiguous blocks + let blockStart: number | undefined + let hasCacheMiss = false + for (let i = start; i <= end; i++) { + if (i < end && !cache[i]) { + if (blockStart === undefined) { + blockStart = i + } + } else if (blockStart !== undefined) { + const blockEnd = i + const numRows = blockEnd - blockStart + const futureRows = asyncRows(df.rows(blockStart, blockEnd, orderBy), numRows, df.header) + for (let j = 0; j < blockEnd - blockStart; j++) { + cache[blockStart + j] = futureRows[j] + } + blockStart = undefined + hasCacheMiss = true + } + } + if (hasCacheMiss) misses++ + else hits++ + + return cache.slice(start, end) + }, + } +} diff --git a/packages/utils/src/tableProvider.ts b/packages/utils/src/tableProvider.ts index 5791a853..24faf9bd 100644 --- a/packages/utils/src/tableProvider.ts +++ b/packages/utils/src/tableProvider.ts @@ -1,5 +1,5 @@ -import { DataFrame, ResolvablePromise, resolvablePromise } from 'hightable' import { FileMetaData, parquetSchema } from 'hyparquet' +import { DataFrame, ResolvablePromise, resolvablePromise } from './dataframe.js' import { parquetQueryWorker, parquetSortIndexWorker } from './workers/parquetWorkerClient.js' import type { AsyncBufferFrom } from './workers/types.d.ts' @@ -26,7 +26,6 @@ export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData): for (let i = rowStart; i < rowEnd; i++) { data[i] = Object.fromEntries( header.map((key) => [key, resolvablePromise()]), - // ^ for type: resolvableRow uses any, not unknown ) } parquetQueryWorker({ from, metadata, rowStart, rowEnd }) diff --git a/packages/utils/src/workers/parquetWorkerClient.ts b/packages/utils/src/workers/parquetWorkerClient.ts index 66053ced..29998cb5 100644 --- a/packages/utils/src/workers/parquetWorkerClient.ts +++ b/packages/utils/src/workers/parquetWorkerClient.ts @@ -2,7 +2,8 @@ import ParquetWorker from './parquetWorker?worker&inline' /// ^ the worker is bundled with the main thread code (inline) which is easier for users to import /// (no need to copy the worker file to the right place) import { ColumnData } from 'hyparquet' -import type { ParquetMessage, ParquetReadWorkerOptions, ParquetSortIndexOptions, Row, SortParquetMessage } from './types.js' +import type { Row } from '../dataframe.js' +import type { ParquetMessage, ParquetReadWorkerOptions, ParquetSortIndexOptions, SortParquetMessage } from './types.js' let worker: Worker | undefined let nextQueryId = 0 diff --git a/packages/utils/src/workers/types.ts b/packages/utils/src/workers/types.ts index 6915993c..fabd377d 100644 --- a/packages/utils/src/workers/types.ts +++ b/packages/utils/src/workers/types.ts @@ -1,4 +1,5 @@ import { ColumnData, FileMetaData, ParquetReadOptions } from 'hyparquet' +import type { Row } from '../dataframe.js' // Serializable constructors for AsyncBuffers interface AsyncBufferFromFile { @@ -17,8 +18,6 @@ export interface ParquetReadWorkerOptions extends Omit ; interface Message { queryId: number diff --git a/packages/utils/test/dataframe.test.ts b/packages/utils/test/dataframe.test.ts new file mode 100644 index 00000000..986aa165 --- /dev/null +++ b/packages/utils/test/dataframe.test.ts @@ -0,0 +1,143 @@ +import { describe, expect, it } from 'vitest' +import { + AsyncRow, + DataFrame, + arrayDataFrame, awaitRows, resolvablePromise, sortableDataFrame, wrapPromise, +} from '../src/dataframe.js' + +function wrapObject(obj: Record): AsyncRow { + return Object.fromEntries( + Object.entries(obj).map(([key, value]) => [key, wrapPromise(value)]), + ) +} + +describe('resolvablePromise', () => { + it('should resolve with a value', async () => { + const promise = resolvablePromise() + promise.resolve(42) + expect(await promise).toBe(42) + }) + it('should reject with an error', async () => { + const promise = resolvablePromise() + promise.reject(new Error('Failed')) + await expect(promise).rejects.toThrow('Failed') + }) +}) + +describe('awaitRows', () => { + it('should resolve with a row', async () => { + const row = wrapObject({ id: 1, name: 'Alice', age: 30, __index__: 0 }) + const result = await awaitRows([row]) + expect(result).toEqual([{ id: 1, name: 'Alice', age: 30, __index__: 0 }]) + }) +}) + +describe('sortableDataFrame', () => { + const data = [ + { id: 3, name: 'Charlie', age: 25 }, + { id: 1, name: 'Alice', age: 30 }, + { id: 2, name: 'Bob', age: 20 }, + { id: 4, name: 'Dani', age: 20 }, + ] + + const dataFrame: DataFrame = { + header: ['id', 'name', 'age'], + numRows: data.length, + rows(start: number, end: number): AsyncRow[] { + // Return the slice of data between start and end indices + return data.slice(start, end).map(wrapObject) + }, + sortable: false, + } + + const sortableDf = sortableDataFrame(dataFrame) + + it('should set sortable to true', () => { + expect(sortableDf.sortable).toBe(true) + }) + + it('should preserve header and numRows', () => { + expect(sortableDf.header).toEqual(dataFrame.header) + expect(sortableDf.numRows).toBe(dataFrame.numRows) + }) + + it('should return unsorted data when orderBy is not provided', async () => { + const rows = await awaitRows(sortableDf.rows(0, 3)) + expect(rows).toEqual([ + { id: 3, name: 'Charlie', age: 25 }, + { id: 1, name: 'Alice', age: 30 }, + { id: 2, name: 'Bob', age: 20 }, + ]) + }) + + it('should return data sorted by column "age"', async () => { + const rows = await awaitRows(sortableDf.rows(0, 4, 'age')) + expect(rows).toEqual([ + { id: 2, name: 'Bob', age: 20, __index__: 2 }, + { id: 4, name: 'Dani', age: 20, __index__: 3 }, + { id: 3, name: 'Charlie', age: 25, __index__: 0 }, + { id: 1, name: 'Alice', age: 30, __index__: 1 }, + ]) + }) + + it('should slice the sorted data correctly', async () => { + const rows = await awaitRows(sortableDf.rows(1, 3, 'id')) + expect(rows).toEqual([ + { id: 2, name: 'Bob', age: 20, __index__: 2 }, + { id: 3, name: 'Charlie', age: 25, __index__: 0 }, + ]) + }) + + it('returns self if already sortable', () => { + const sortableDf2 = sortableDataFrame(sortableDf) + expect(sortableDf2).toBe(sortableDf) + }) + + it('should throw for invalid orderBy field', () => { + expect(() => sortableDf.rows(0, 3, 'invalid')) + .toThrowError('Invalid orderBy field: invalid') + }) +}) + +describe('arrayDataFrame', () => { + const testData = [ + { id: 1, name: 'Alice', age: 30 }, + { id: 2, name: 'Bob', age: 25 }, + { id: 3, name: 'Charlie', age: 35 }, + ] + + it('should create a DataFrame with correct header and numRows', () => { + const df = arrayDataFrame(testData) + expect(df.header).toEqual(['id', 'name', 'age']) + expect(df.numRows).toBe(3) + }) + + it('should handle empty data array', async () => { + const df = arrayDataFrame([]) + expect(df.header).toEqual([]) + expect(df.numRows).toBe(0) + const rows = await df.rows(0, 1) + expect(rows).toEqual([]) + }) + + it('should return correct rows for given range', async () => { + const df = arrayDataFrame(testData) + const rows = await df.rows(0, 2) + expect(rows).toEqual([ + { id: 1, name: 'Alice', age: 30 }, + { id: 2, name: 'Bob', age: 25 }, + ]) + }) + + it('should handle start index equal to end index', async () => { + const df = arrayDataFrame(testData) + const rows = await df.rows(1, 1) + expect(rows).toEqual([]) + }) + + it('should return all rows when end index exceeds array length', async () => { + const df = arrayDataFrame(testData) + const rows = await df.rows(0, 10) + expect(rows).toEqual(testData) + }) +}) diff --git a/packages/utils/test/rowCache.test.ts b/packages/utils/test/rowCache.test.ts new file mode 100644 index 00000000..4ef63cea --- /dev/null +++ b/packages/utils/test/rowCache.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, it, vi } from 'vitest' +import { AsyncRow, awaitRows } from '../src/dataframe.js' +import { rowCache } from '../src/rowCache.js' + +// Mock DataFrame +function makeDf() { + return { + header: ['id'], + numRows: 10, + rows: vi.fn((start: number, end: number): AsyncRow[] => { + return new Array(end - start).fill(null) + .map((_, index) => ({ id: Promise.resolve(start + index) })) + }), + } +} + +describe('rowCache', () => { + it('should fetch uncached rows', async () => { + const df = makeDf() + const dfCached = rowCache(df) + const rows = await awaitRows(dfCached.rows(0, 3)) + expect(rows).toEqual([{ id: 0 }, { id: 1 }, { id: 2 }]) + expect(df.rows).toHaveBeenCalledTimes(1) + expect(df.rows).toHaveBeenCalledWith(0, 3, undefined) + }) + + it('should return cached rows', async () => { + const df = makeDf() + const dfCached = rowCache(df) + + // Initial fetch to cache rows + const rowsPre = await awaitRows(dfCached.rows(3, 6)) + expect(rowsPre).toEqual([{ id: 3 }, { id: 4 }, { id: 5 }]) + expect(df.rows).toHaveBeenCalledTimes(1) + expect(df.rows).toHaveBeenCalledWith(3, 6, undefined) + + // Subsequent fetch should use cache + const rowsPost = await awaitRows(dfCached.rows(3, 6)) + expect(rowsPost).toEqual([{ id: 3 }, { id: 4 }, { id: 5 }]) + expect(df.rows).toHaveBeenCalledTimes(1) + }) + + it('should handle adjacent cached blocks', async () => { + const df = makeDf() + const dfCached = rowCache(df) + + // Cache first block + await dfCached.rows(0, 3) + expect(df.rows).toHaveBeenCalledTimes(1) + // Cache adjacent block + await dfCached.rows(3, 6) + expect(df.rows).toHaveBeenCalledTimes(2) + // Fetch combined block + const adjacentRows = await awaitRows(dfCached.rows(0, 6)) + + expect(adjacentRows).toEqual([ + { id: 0 }, { id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }, + ]) + expect(df.rows).toHaveBeenCalledTimes(2) + }) + + it('should handle a gap in cached blocks', async () => { + const df = makeDf() + const dfCached = rowCache(df) + + // Cache first block + await dfCached.rows(0, 2) + // Cache second block + await dfCached.rows(4, 6) + // Fetch combined block + const gapRows = await awaitRows(dfCached.rows(0, 6)) + expect(gapRows).toEqual([ + { id: 0 }, { id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }, + ]) + expect(df.rows).toHaveBeenCalledTimes(3) + expect(df.rows).toHaveBeenCalledWith(2, 4, undefined) + }) + + it('should handle overlapping cached blocks', async () => { + const df = makeDf() + const dfCached = rowCache(df) + + // Cache first block + await dfCached.rows(6, 9) + + // Fetch overlapping block + const overlappingRows = await awaitRows(dfCached.rows(8, 11)) + expect(overlappingRows).toEqual([ + { id: 8 }, { id: 9 }, { id: 10 }, + ]) + expect(df.rows).toHaveBeenCalledTimes(2) + expect(df.rows).toHaveBeenCalledWith(9, 11, undefined) + }) +}) diff --git a/packages/utils/tsconfig.json b/packages/utils/tsconfig.json index c3e5ca28..97c94b26 100644 --- a/packages/utils/tsconfig.json +++ b/packages/utils/tsconfig.json @@ -3,7 +3,7 @@ "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.tsbuildinfo", "target": "ES2020", "useDefineForClassFields": true, - "lib": ["ES2020", "DOM", "DOM.Iterable"], + "lib": ["ESNext", "DOM", "DOM.Iterable"], "module": "NodeNext", "skipLibCheck": true,