Skip to content

Commit c7063dd

Browse files
committed
Add duration support (#73)
1 parent 3760085 commit c7063dd

File tree

7 files changed

+104
-2
lines changed

7 files changed

+104
-2
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ Most of the unsupported types should be pretty straightforward to implement; the
139139
- [x] Time32
140140
- [x] Time64
141141
- [x] Timestamp (with timezone)
142-
- [ ] Duration
142+
- [x] Duration
143143
- [ ] Interval
144144

145145
### Nested Types

src/field.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,31 @@ export function parseField(buffer: ArrayBuffer, ptr: number): arrow.Field {
118118
return new arrow.Field(name, type, flags.nullable, metadata);
119119
}
120120

121+
// duration
122+
if (formatString.slice(0, 2) === "tD") {
123+
let timeUnit: arrow.TimeUnit | null = null;
124+
switch (formatString[2]) {
125+
case "s":
126+
timeUnit = arrow.TimeUnit.SECOND;
127+
break;
128+
case "m":
129+
timeUnit = arrow.TimeUnit.MILLISECOND;
130+
break;
131+
case "u":
132+
timeUnit = arrow.TimeUnit.MICROSECOND;
133+
break;
134+
case "n":
135+
timeUnit = arrow.TimeUnit.NANOSECOND;
136+
break;
137+
138+
default:
139+
throw new Error(`invalid timestamp ${formatString}`);
140+
}
141+
142+
const type = new arrow.Duration(timeUnit);
143+
return new arrow.Field(name, type, flags.nullable, metadata);
144+
}
145+
121146
// struct
122147
if (formatString === "+s") {
123148
const type = new arrow.Struct(childrenFields);

src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export enum Type {
2020
FixedSizeBinary = 15 /** Fixed-size binary. Each value occupies the same number of bytes */,
2121
FixedSizeList = 16 /** Fixed-size list. Each value occupies the same number of bytes */,
2222
Map = 17 /** Map of named logical types */,
23+
Duration = 18 /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. */,
2324

2425
// These 3 are not included in the upstream enum
2526
LargeList = 30,

src/vector.ts

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,31 @@ export function parseData<T extends DataType>(
252252
});
253253
}
254254

255+
if (DataType.isDuration(dataType)) {
256+
const [validityPtr, dataPtr] = bufferPtrs;
257+
const nullBitmap = parseNullBitmap(
258+
dataView.buffer,
259+
validityPtr,
260+
length,
261+
copy,
262+
);
263+
264+
let byteWidth = getTimeByteWidth(dataType);
265+
const data = copy
266+
? new dataType.ArrayType(
267+
copyBuffer(dataView.buffer, dataPtr, length * byteWidth),
268+
)
269+
: new dataType.ArrayType(dataView.buffer, dataPtr, length);
270+
return arrow.makeData({
271+
type: dataType,
272+
offset,
273+
length,
274+
nullCount,
275+
data,
276+
nullBitmap,
277+
});
278+
}
279+
255280
if (DataType.isInterval(dataType)) {
256281
const [validityPtr, dataPtr] = bufferPtrs;
257282
const nullBitmap = parseNullBitmap(
@@ -642,7 +667,9 @@ function getDateByteWidth(type: arrow.Date_): number {
642667
assertUnreachable();
643668
}
644669

645-
function getTimeByteWidth(type: arrow.Time | arrow.Timestamp): number {
670+
function getTimeByteWidth(
671+
type: arrow.Time | arrow.Timestamp | arrow.Duration,
672+
): number {
646673
switch (type.unit) {
647674
case arrow.TimeUnit.SECOND:
648675
case arrow.TimeUnit.MILLISECOND:

tests/ffi.test.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,39 @@ describe("date32", (t) => {
628628
// }
629629
// });
630630

631+
describe("duration", (t) => {
632+
function test(copy: boolean) {
633+
let columnIndex = TEST_TABLE.schema.fields.findIndex(
634+
(field) => field.name == "duration"
635+
);
636+
637+
const originalField = TEST_TABLE.schema.fields[columnIndex];
638+
// declare it's not null
639+
const originalVector = TEST_TABLE.getChildAt(columnIndex) as arrow.Vector;
640+
const fieldPtr = FFI_TABLE.schemaAddr(columnIndex);
641+
const field = parseField(WASM_MEMORY.buffer, fieldPtr);
642+
643+
expect(field.name).toStrictEqual(originalField.name);
644+
expect(field.typeId).toStrictEqual(originalField.typeId);
645+
expect(field.nullable).toStrictEqual(originalField.nullable);
646+
647+
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
648+
const wasmVector = parseVector(
649+
WASM_MEMORY.buffer,
650+
arrayPtr,
651+
field.type,
652+
copy
653+
);
654+
655+
for (let i = 0; i < 3; i++) {
656+
expect(originalVector.get(i), wasmVector.get(i));
657+
}
658+
}
659+
660+
it("copy=false", () => test(false));
661+
it("copy=true", () => test(true));
662+
});
663+
631664
describe("nullable int", (t) => {
632665
function test(copy: boolean) {
633666
let columnIndex = TEST_TABLE.schema.fields.findIndex(

tests/pyarrow_generate_data.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from decimal import Decimal
33

44
import numpy as np
5+
import pandas as pd
56
import pyarrow as pa
67
import pyarrow.feather as feather
78

@@ -115,6 +116,20 @@ def timestamp_array() -> pa.Array:
115116
return arr
116117

117118

119+
def duration_array() -> pa.Array:
120+
arr = pa.DurationArray.from_pandas(
121+
[
122+
pd.Timedelta("2d"),
123+
pd.Timedelta("1d"),
124+
pd.Timedelta("1w"),
125+
]
126+
)
127+
128+
assert isinstance(arr, pa.DurationArray)
129+
assert arr.type.unit == "us"
130+
return arr
131+
132+
118133
def nullable_int() -> pa.Array:
119134
# True means null
120135
mask = [True, False, True]
@@ -227,6 +242,7 @@ def table() -> pa.Table:
227242
"nullable_int": nullable_int(),
228243
"sparse_union": sparse_union_array(),
229244
"dense_union": dense_union_array(),
245+
"duration": duration_array(),
230246
}
231247
)
232248

tests/table.arrow

176 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)