Skip to content

Commit 92670c0

Browse files
authored
CSV and JSONL support (#371)
* CSV and JSONL table providers * Rename ParquetView to TableView
1 parent afca623 commit 92670c0

File tree

10 files changed

+225
-30
lines changed

10 files changed

+225
-30
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ package-lock.json
2828
*.tgz
2929
.vscode
3030
*.parquet
31+
*.csv
32+
*.jsonl
3133
/coverage/
3234

3335
/lib/

package.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,25 +62,25 @@
6262
"squirreling": "0.7.9"
6363
},
6464
"devDependencies": {
65-
"@storybook/react-vite": "10.2.2",
65+
"@storybook/react-vite": "10.2.3",
6666
"@testing-library/react": "16.3.2",
67-
"@types/node": "25.1.0",
67+
"@types/node": "25.2.0",
6868
"@types/react": "19.2.10",
6969
"@types/react-dom": "19.2.3",
7070
"@vitejs/plugin-react": "5.1.2",
7171
"@vitest/coverage-v8": "4.0.18",
7272
"eslint": "9.39.2",
7373
"eslint-plugin-react": "7.37.5",
7474
"eslint-plugin-react-hooks": "7.0.1",
75-
"eslint-plugin-react-refresh": "0.4.26",
76-
"eslint-plugin-storybook": "10.2.2",
77-
"globals": "17.2.0",
75+
"eslint-plugin-react-refresh": "0.5.0",
76+
"eslint-plugin-storybook": "10.2.3",
77+
"globals": "17.3.0",
7878
"jsdom": "27.4.0",
7979
"nodemon": "3.1.11",
8080
"npm-run-all": "4.1.5",
8181
"react": "19.2.4",
8282
"react-dom": "19.2.4",
83-
"storybook": "10.2.2",
83+
"storybook": "10.2.3",
8484
"typescript": "5.9.3",
8585
"typescript-eslint": "8.54.0",
8686
"vite": "7.3.1",
File renamed without changes.

src/components/ParquetView/ParquetView.tsx renamed to src/components/TableView/TableView.tsx

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
1-
import HighTable, { DataFrame, sortableDataFrame } from 'hightable'
1+
import HighTable, { DataFrame } from 'hightable'
22
import 'hightable/src/HighTable.css'
3-
import { asyncBufferFromUrl, parquetMetadataAsync } from 'hyparquet'
43
import React, { useCallback, useEffect, useState } from 'react'
54
import { useConfig } from '../../hooks/useConfig.js'
65
import { appendSearchParams } from '../../lib/routes.js'
76
import { FileSource } from '../../lib/sources/types.js'
8-
import { parquetDataFrame } from '../../lib/tableProvider.js'
7+
import { tableProvider } from '../../lib/tableProvider.js'
98
import { cn } from '../../lib/utils.js'
109
import CellPanel from '../CellPanel/CellPanel.js'
1110
import ContentWrapper, { ContentSize } from '../ContentWrapper/ContentWrapper.js'
1211
import SlidePanel from '../SlidePanel/SlidePanel.js'
13-
import styles from './ParquetView.module.css'
12+
import styles from './TableView.module.css'
1413

1514
interface ViewerProps {
1615
source: FileSource
@@ -23,35 +22,30 @@ interface Content extends ContentSize {
2322
}
2423

2524
/**
26-
* Parquet file viewer
25+
* Table file viewer for parquet, CSV, and JSONL files
2726
*/
28-
export default function ParquetView({ source, setProgress, setError }: ViewerProps) {
27+
export default function TableView({ source, setProgress, setError }: ViewerProps) {
2928
const [isLoading, setIsLoading] = useState<boolean>(true)
3029
const [content, setContent] = useState<Content>()
3130
const [cell, setCell] = useState<{ row: number, col: number } | undefined>()
3231
const { customClass, routes } = useConfig()
3332

3433
useEffect(() => {
35-
async function loadParquetDataFrame() {
34+
async function loadDataFrame() {
3635
try {
3736
setIsLoading(true)
38-
setProgress(0.33)
39-
const { resolveUrl, requestInit } = source
40-
const asyncBuffer = await asyncBufferFromUrl({ url: resolveUrl, requestInit })
41-
const from = { url: resolveUrl, byteLength: asyncBuffer.byteLength, requestInit }
42-
setProgress(0.66)
43-
const metadata = await parquetMetadataAsync(asyncBuffer)
44-
const dataframe = sortableDataFrame(parquetDataFrame(from, metadata))
45-
const fileSize = asyncBuffer.byteLength
46-
setContent({ dataframe, fileSize })
37+
setProgress(0.5)
38+
const { resolveUrl, fileName, requestInit } = source
39+
const dataframe = await tableProvider({ url: resolveUrl, fileName, requestInit })
40+
setContent({ dataframe })
4741
} catch (error) {
4842
setError(error)
4943
} finally {
5044
setIsLoading(false)
5145
setProgress(1)
5246
}
5347
}
54-
void loadParquetDataFrame()
48+
void loadDataFrame()
5549
}, [setError, setProgress, source])
5650

5751
// Close cell view on escape key

src/components/Viewer/Viewer.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import AvroView from '../AvroView/AvroView.js'
44
import ImageView from '../ImageView/ImageView.js'
55
import JsonView from '../JsonView/JsonView.js'
66
import MarkdownView from '../MarkdownView/MarkdownView.js'
7-
import TableView from '../ParquetView/ParquetView.js'
7+
import TableView from '../TableView/TableView.js'
88
import TextView from '../TextView/TextView.js'
99

1010
interface ViewerProps {
@@ -21,7 +21,7 @@ export default function Viewer({ source, setError, setProgress }: ViewerProps) {
2121
const { fileName } = source
2222
if (fileName.endsWith('.md')) {
2323
return <MarkdownView source={source} setError={setError} />
24-
} else if (fileName.endsWith('.parquet')) {
24+
} else if (fileName.endsWith('.parquet') || fileName.endsWith('.csv') || fileName.endsWith('.jsonl')) {
2525
return <TableView source={source} setError={setError} setProgress={setProgress} />
2626
} else if (fileName.endsWith('.json')) {
2727
return <JsonView source={source} setError={setError} />

src/components/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import Layout from './Layout/Layout.js'
1414
import Markdown from './Markdown/Markdown.js'
1515
import MarkdownView from './MarkdownView/MarkdownView.js'
1616
import Page from './Page/Page.js'
17-
import ParquetView from './ParquetView/ParquetView.js'
17+
import TableView from './TableView/TableView.js'
1818
import ProgressBar from './ProgressBar/ProgressBar.js'
1919
import SlidePanel from './SlidePanel/SlidePanel.js'
2020
import Spinner from './Spinner/Spinner.js'
@@ -40,7 +40,7 @@ export {
4040
Markdown,
4141
MarkdownView,
4242
Page,
43-
ParquetView,
43+
TableView,
4444
ProgressBar,
4545
SlidePanel,
4646
Spinner,

src/lib/csv.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/**
2+
* Parse CSV text into nested array of rows and columns.
3+
*/
4+
export function parseCsv(text: string): string[][] {
5+
const rows = []
6+
let row = []
7+
let field = ''
8+
let inQuotes = false
9+
let previousCharWasQuote = false
10+
11+
for (const char of text) {
12+
13+
if (inQuotes && char === '"' && !previousCharWasQuote) {
14+
// first quote, wait to see if it's escaped or end of field
15+
previousCharWasQuote = true
16+
} else if (inQuotes && char === '"' && previousCharWasQuote) {
17+
// csv escaped quote ##
18+
field += char
19+
previousCharWasQuote = false
20+
} else if (inQuotes && !previousCharWasQuote) {
21+
// append quoted character to field
22+
field += char
23+
} else {
24+
// not in quotes
25+
inQuotes = false
26+
previousCharWasQuote = false
27+
switch (char) {
28+
case ',':
29+
// emit column
30+
row.push(field)
31+
field = ''
32+
break
33+
case '\n':
34+
// emit row
35+
row.push(field)
36+
rows.push(row)
37+
row = []
38+
field = ''
39+
break
40+
case '"':
41+
inQuotes = true
42+
break
43+
default:
44+
field += char
45+
}
46+
}
47+
}
48+
49+
if (inQuotes && !previousCharWasQuote) {
50+
console.error('csv unterminated quote')
51+
}
52+
53+
// handle last field and row, but skip empty last line
54+
if (field || row.length) {
55+
row.push(field)
56+
rows.push(row)
57+
}
58+
59+
return rows
60+
}

src/lib/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
export { appendSearchParams, replaceSearchParams } from './routes.js'
22
export * from './sources/index.js'
3-
export { parquetDataFrame } from './tableProvider.js'
3+
export { parseCsv } from './csv.js'
4+
export { csvDataFrame, jsonLinesDataFrame, parquetDataFrame, tableProvider } from './tableProvider.js'
45
export { asyncBufferFrom, cn, contentTypes, formatFileSize, getFileDate, getFileDateShort, imageTypes, parseFileSize } from './utils.js'
56
export { parquetQueryWorker, parquetReadObjectsWorker, parquetReadWorker } from './workers/parquetWorkerClient.js'
67
export type { AsyncBufferFrom } from './workers/types.js'

src/lib/tableProvider.ts

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,38 @@
1-
import { DataFrame, DataFrameEvents, ResolvedValue, checkSignal, createEventTarget, validateFetchParams, validateGetCellParams, validateGetRowNumberParams } from 'hightable'
1+
import { DataFrame, DataFrameEvents, ResolvedValue, arrayDataFrame, checkSignal, createEventTarget, sortableDataFrame, validateFetchParams, validateGetCellParams, validateGetRowNumberParams } from 'hightable'
22
import type { ColumnData } from 'hyparquet'
3-
import { FileMetaData, ParquetReadOptions, parquetSchema } from 'hyparquet'
3+
import { FileMetaData, ParquetReadOptions, asyncBufferFromUrl, parquetMetadataAsync, parquetSchema } from 'hyparquet'
4+
import { parseCsv } from './csv.js'
45
import { parquetReadWorker } from './workers/parquetWorkerClient.js'
56
import type { AsyncBufferFrom } from './workers/types.d.ts'
67

8+
interface TableProviderOptions {
9+
url: string
10+
fileName: string
11+
requestInit?: RequestInit
12+
}
13+
14+
/**
15+
* Create a dataframe from a file URL, automatically detecting the file type.
16+
* Supports parquet, CSV, and JSONL files.
17+
*/
18+
export async function tableProvider({ url, fileName, requestInit }: TableProviderOptions): Promise<DataFrame> {
19+
const asyncBuffer = await asyncBufferFromUrl({ url, requestInit })
20+
const from = { url, byteLength: asyncBuffer.byteLength, requestInit }
21+
22+
const baseName = fileName.toLowerCase()
23+
if (baseName.endsWith('.csv')) {
24+
return csvDataFrame(from)
25+
}
26+
27+
if (baseName.endsWith('.jsonl')) {
28+
return jsonLinesDataFrame(from)
29+
}
30+
31+
// Default to parquet
32+
const metadata = await parquetMetadataAsync(asyncBuffer)
33+
return sortableDataFrame(parquetDataFrame(from, metadata))
34+
}
35+
736
type GroupStatus = {
837
kind: 'unfetched'
938
} | {
@@ -130,3 +159,49 @@ export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData,
130159

131160
return unsortableDataFrame
132161
}
162+
163+
/**
164+
* Convert a CSV file into a sortable dataframe.
165+
*
166+
* Parses the entire file and creates a sortable dataframe.
167+
* The first row is treated as the header.
168+
*/
169+
export async function csvDataFrame(from: AsyncBufferFrom): Promise<DataFrame> {
170+
let buffer: ArrayBuffer
171+
if ('file' in from) {
172+
buffer = await from.file.arrayBuffer()
173+
} else {
174+
const response = await fetch(from.url, from.requestInit)
175+
buffer = await response.arrayBuffer()
176+
}
177+
178+
const text = new TextDecoder().decode(buffer)
179+
const lines = parseCsv(text)
180+
const header = lines[0] ?? []
181+
const rows = lines.slice(1).map(row => {
182+
return Object.fromEntries(header.map((key, i) => [key, row[i]]))
183+
})
184+
return sortableDataFrame(arrayDataFrame(rows))
185+
}
186+
187+
/**
188+
* Convert a JSONL file into a sortable dataframe.
189+
*
190+
* Parses each line as a JSON object and creates a sortable dataframe.
191+
*/
192+
export async function jsonLinesDataFrame(from: AsyncBufferFrom): Promise<DataFrame> {
193+
let buffer: ArrayBuffer
194+
if ('file' in from) {
195+
buffer = await from.file.arrayBuffer()
196+
} else {
197+
const response = await fetch(from.url, from.requestInit)
198+
buffer = await response.arrayBuffer()
199+
}
200+
201+
const text = new TextDecoder().decode(buffer).trimEnd()
202+
const lines = text.split('\n').filter(line => line.trim())
203+
const rows: Record<string, unknown>[] = lines.map(line => {
204+
return line ? JSON.parse(line) as Record<string, unknown> : {}
205+
})
206+
return sortableDataFrame(arrayDataFrame(rows))
207+
}

test/lib/csv.test.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { describe, expect, it, vi } from 'vitest'
2+
import { parseCsv } from '../../src/index.js'
3+
4+
describe('parseCsv', () => {
5+
it('parses simple CSV', () => {
6+
const csv = 'Name,Age,Occupation\nAlice,30,Engineer\nBob,25,Designer'
7+
const expected = [
8+
['Name', 'Age', 'Occupation'],
9+
['Alice', '30', 'Engineer'],
10+
['Bob', '25', 'Designer'],
11+
]
12+
expect(parseCsv(csv)).toEqual(expected)
13+
})
14+
15+
it('ignores empty last line', () => {
16+
const csv = 'Name,Age,Occupation\nAlice,30,Engineer\n'
17+
const expected = [
18+
['Name', 'Age', 'Occupation'],
19+
['Alice', '30', 'Engineer'],
20+
]
21+
expect(parseCsv(csv)).toEqual(expected)
22+
})
23+
24+
it('handles quoted fields', () => {
25+
const csv = 'Name,Age,Occupation\n"Alice, PhD",30,Engineer\nBob,25,"Designer, Senior"'
26+
const expected = [
27+
['Name', 'Age', 'Occupation'],
28+
['Alice, PhD', '30', 'Engineer'],
29+
['Bob', '25', 'Designer, Senior'],
30+
]
31+
expect(parseCsv(csv)).toEqual(expected)
32+
})
33+
34+
it('handles escaped quotes', () => {
35+
const csv = 'Name,Quote\nAlice,"She said, ""Hello world"""\nBob,"This is ""an example"" of quotes"'
36+
const expected = [
37+
['Name', 'Quote'],
38+
['Alice', 'She said, "Hello world"'],
39+
['Bob', 'This is "an example" of quotes'],
40+
]
41+
expect(parseCsv(csv)).toEqual(expected)
42+
})
43+
44+
it('handles newlines within quoted fields', () => {
45+
const csv = 'Name,Address\nAlice,"123 Main St.\nAnytown, USA"'
46+
const expected = [
47+
['Name', 'Address'],
48+
['Alice', '123 Main St.\nAnytown, USA'],
49+
]
50+
expect(parseCsv(csv)).toEqual(expected)
51+
})
52+
53+
it('handles unterminated quotes', () => {
54+
const csv = 'Name,Quote\nAlice,"This is an unterminated quote\n'
55+
const expected = [
56+
['Name', 'Quote'],
57+
['Alice', 'This is an unterminated quote\n'],
58+
]
59+
vi.spyOn(console, 'error')
60+
expect(parseCsv(csv)).toEqual(expected)
61+
expect(console.error).toHaveBeenCalledWith('csv unterminated quote')
62+
})
63+
})

0 commit comments

Comments
 (0)