Skip to content

Commit 87c84c6

Browse files
ryan-williamsclaude
andcommitted
fix: read parquet by column name instead of index
Frontend was reading parquet columns by array index, which broke when February 2026 file was created with different column order. Changes: - Use `rowFormat: 'object'` in hyparquet to get columns by name - Convert all numeric fields with `Number()` to handle BigInt from parquet - Makes reader independent of column order in parquet schema 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 41bd037 commit 87c84c6

File tree

2 files changed

+25
-12
lines changed

2 files changed

+25
-12
lines changed

www/src/services/dataSources/hyparquetSource.ts

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,18 @@ import { ParquetCache } from '../parquetCache'
1111
import type { AwairRecord } from '../../types/awair'
1212
import type { DataSource, FetchOptions, FetchResult, FetchTiming } from '../dataSource'
1313

14-
// Parquet row tuple type (match column order: timestamp, temp, co2, pm10, pm25, humid, voc)
15-
// Note: timestamp is Date object, temp/humid are float (number), others are BigInt
16-
type AwairRow = [Date, number, bigint, bigint, bigint, number, bigint]
14+
// Parquet row object type (hyparquet returns objects when rowFormat: 'object')
15+
// Note: hyparquet may return numbers OR BigInts depending on parquet column types
16+
// We explicitly convert all numeric fields to Number on read to ensure consistency
17+
interface AwairRowObject {
18+
timestamp: Date
19+
temp: number | bigint
20+
co2: number | bigint
21+
pm10: number | bigint
22+
pm25: number | bigint
23+
humid: number | bigint
24+
voc: number | bigint
25+
}
1726

1827
/** Global cache manager - one ParquetCache per URL */
1928
const cacheManager = new Map<string, ParquetCache>()
@@ -124,23 +133,26 @@ export class HyparquetSource implements DataSource {
124133
}
125134

126135
// Read rows (cache handles fetching missing RGs)
127-
let rows: AwairRow[] = []
128-
await cache.readRows<AwairRow>(rowStart, rowEnd, (data) => {
136+
// Using rowFormat: 'object' to get column names as keys (order-independent)
137+
let rows: AwairRowObject[] = []
138+
await cache.readRows<AwairRowObject>(rowStart, rowEnd, (data) => {
129139
rows = data
130140
})
131141

132142
const stats = cache.getStats()
133143

134144
// Convert to typed records and filter by time range
145+
// Explicitly convert ALL numeric fields to Number to handle BigInt from parquet
146+
// Using object property access (not array index) to be independent of column order
135147
const records: AwairRecord[] = rows
136148
.map((row) => ({
137-
timestamp: row[0],
138-
temp: row[1],
139-
co2: Number(row[2]),
140-
pm10: Number(row[3]),
141-
pm25: Number(row[4]),
142-
humid: row[5],
143-
voc: Number(row[6]),
149+
timestamp: row.timestamp,
150+
temp: Number(row.temp),
151+
co2: Number(row.co2),
152+
pm10: Number(row.pm10),
153+
pm25: Number(row.pm25),
154+
humid: Number(row.humid),
155+
voc: Number(row.voc),
144156
}))
145157
.filter(record => {
146158
const ts = new Date(record.timestamp).getTime()

www/src/services/parquetCache.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ export class ParquetCache {
301301
metadata: this.metadata,
302302
rowStart,
303303
rowEnd,
304+
rowFormat: 'object',
304305
onComplete: (data) => {
305306
if (Array.isArray(data)) {
306307
onComplete(data as T[])

0 commit comments

Comments
 (0)