Skip to content

Commit 5716faf

Browse files
authored
Parse field nullability (#27)
1 parent 2d2a202 commit 5716faf

File tree

2 files changed

+87
-8
lines changed

2 files changed

+87
-8
lines changed

src/field.ts

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@
33
import * as arrow from "apache-arrow";
44
import { assert } from "./vector";
55

6+
interface Flags {
7+
nullable: boolean;
8+
dictionaryOrdered: boolean;
9+
mapKeysSorted: boolean;
10+
}
11+
612
const UTF8_DECODER = new TextDecoder("utf-8");
713
// Note: it looks like duration types don't yet exist in Arrow JS
814
const formatMapping: Record<string, arrow.DataType | undefined> = {
@@ -49,7 +55,7 @@ export function parseField(buffer: ArrayBuffer, ptr: number) {
4955
const metadata = parseMetadata(dataView, metadataPtr);
5056

5157
// Extra 4 to be 8-byte aligned
52-
const flags = dataView.getBigInt64(ptr + 16, true);
58+
const flags = parseFlags(dataView.getBigInt64(ptr + 16, true));
5359
const nChildren = dataView.getBigInt64(ptr + 24, true);
5460

5561
const ptrToChildrenPtrs = dataView.getUint32(ptr + 32, true);
@@ -63,7 +69,7 @@ export function parseField(buffer: ArrayBuffer, ptr: number) {
6369

6470
const primitiveType = formatMapping[formatString];
6571
if (primitiveType) {
66-
return new arrow.Field(name, primitiveType, undefined, metadata);
72+
return new arrow.Field(name, primitiveType, flags.nullable, metadata);
6773
}
6874

6975
// decimal
@@ -74,7 +80,7 @@ export function parseField(buffer: ArrayBuffer, ptr: number) {
7480
const bitWidth = parts[2] ? parseInt(parts[2]) : undefined;
7581

7682
const type = new arrow.Decimal(scale, precision, bitWidth);
77-
return new arrow.Field(name, type, undefined, metadata);
83+
return new arrow.Field(name, type, flags.nullable, metadata);
7884
}
7985

8086
// timestamp
@@ -105,28 +111,28 @@ export function parseField(buffer: ArrayBuffer, ptr: number) {
105111
}
106112

107113
const type = new arrow.Timestamp(timeUnit, timezone);
108-
return new arrow.Field(name, type, undefined, metadata);
114+
return new arrow.Field(name, type, flags.nullable, metadata);
109115
}
110116

111117
// struct
112118
if (formatString === "+s") {
113119
const type = new arrow.Struct(childrenFields);
114-
return new arrow.Field(name, type, undefined, metadata);
120+
return new arrow.Field(name, type, flags.nullable, metadata);
115121
}
116122

117123
// list
118124
if (formatString === "+l") {
119125
assert(childrenFields.length === 1);
120126
const type = new arrow.List(childrenFields[0]);
121-
return new arrow.Field(name, type, undefined, metadata);
127+
return new arrow.Field(name, type, flags.nullable, metadata);
122128
}
123129

124130
// FixedSizeBinary
125131
if (formatString.slice(0, 2) === "w:") {
126132
// The size of the binary is the integer after the colon
127133
const byteWidth = parseInt(formatString.slice(2));
128134
const type = new arrow.FixedSizeBinary(byteWidth);
129-
return new arrow.Field(name, type, undefined, metadata);
135+
return new arrow.Field(name, type, flags.nullable, metadata);
130136
}
131137

132138
// FixedSizeList
@@ -135,12 +141,30 @@ export function parseField(buffer: ArrayBuffer, ptr: number) {
135141
// The size of the list is the integer after the colon
136142
const innerSize = parseInt(formatString.slice(3));
137143
const type = new arrow.FixedSizeList(innerSize, childrenFields[0]);
138-
return new arrow.Field(name, type, undefined, metadata);
144+
return new arrow.Field(name, type, flags.nullable, metadata);
139145
}
140146

141147
throw new Error(`Unsupported format: ${formatString}`);
142148
}
143149

150+
// https://stackoverflow.com/a/9954810
151+
function parseFlags(flag: bigint): Flags {
152+
if (flag === 0n) {
153+
return {
154+
nullable: false,
155+
dictionaryOrdered: false,
156+
mapKeysSorted: false,
157+
};
158+
}
159+
160+
let parsed = flag.toString(2);
161+
return {
162+
nullable: parsed[0] === "1" ? true : false,
163+
dictionaryOrdered: parsed[1] === "1" ? true : false,
164+
mapKeysSorted: parsed[2] === "1" ? true : false,
165+
};
166+
}
167+
144168
/** Parse a null-terminated C-style string */
145169
function parseNullTerminatedString(
146170
dataView: DataView,

tests/ffi.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,11 @@ test("fixed size list", (t) => {
136136

137137
t.equals(field.name, originalField.name, "Field name should be equal.");
138138
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
139+
t.equals(
140+
field.nullable,
141+
originalField.nullable,
142+
"Field nullability should be equal."
143+
);
139144

140145
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
141146
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
@@ -162,6 +167,11 @@ test("struct", (t) => {
162167

163168
t.equals(field.name, originalField.name, "Field name should be equal.");
164169
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
170+
t.equals(
171+
field.nullable,
172+
originalField.nullable,
173+
"Field nullability should be equal."
174+
);
165175

166176
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
167177
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
@@ -197,6 +207,11 @@ test("binary", (t) => {
197207

198208
t.equals(field.name, originalField.name, "Field name should be equal.");
199209
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
210+
t.equals(
211+
field.nullable,
212+
originalField.nullable,
213+
"Field nullability should be equal."
214+
);
200215

201216
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
202217
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
@@ -229,6 +244,11 @@ test("string", (t) => {
229244

230245
t.equals(field.name, originalField.name, "Field name should be equal.");
231246
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
247+
t.equals(
248+
field.nullable,
249+
originalField.nullable,
250+
"Field nullability should be equal."
251+
);
232252

233253
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
234254
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
@@ -254,6 +274,11 @@ test("boolean", (t) => {
254274

255275
t.equals(field.name, originalField.name, "Field name should be equal.");
256276
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
277+
t.equals(
278+
field.nullable,
279+
originalField.nullable,
280+
"Field nullability should be equal."
281+
);
257282

258283
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
259284
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
@@ -279,6 +304,11 @@ test("null array", (t) => {
279304

280305
t.equals(field.name, originalField.name, "Field name should be equal.");
281306
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
307+
t.equals(
308+
field.nullable,
309+
originalField.nullable,
310+
"Field nullability should be equal."
311+
);
282312

283313
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
284314
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
@@ -304,6 +334,11 @@ test("list array", (t) => {
304334

305335
t.equals(field.name, originalField.name, "Field name should be equal.");
306336
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
337+
t.equals(
338+
field.nullable,
339+
originalField.nullable,
340+
"Field nullability should be equal."
341+
);
307342

308343
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
309344
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
@@ -339,6 +374,11 @@ test("extension array", (t) => {
339374

340375
t.equals(field.name, originalField.name, "Field name should be equal.");
341376
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
377+
t.equals(
378+
field.nullable,
379+
originalField.nullable,
380+
"Field nullability should be equal."
381+
);
342382
t.equals(
343383
field.metadata.size,
344384
originalField.metadata.size,
@@ -379,6 +419,11 @@ test.skip("decimal128", (t) => {
379419

380420
t.equals(field.name, originalField.name, "Field name should be equal.");
381421
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
422+
t.equals(
423+
field.nullable,
424+
originalField.nullable,
425+
"Field nullability should be equal."
426+
);
382427

383428
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
384429
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
@@ -406,6 +451,11 @@ test("date32", (t) => {
406451

407452
t.equals(field.name, originalField.name, "Field name should be equal.");
408453
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
454+
t.equals(
455+
field.nullable,
456+
originalField.nullable,
457+
"Field nullability should be equal."
458+
);
409459

410460
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
411461
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
@@ -431,6 +481,11 @@ test.skip("timestamp", (t) => {
431481

432482
t.equals(field.name, originalField.name, "Field name should be equal.");
433483
t.equals(field.typeId, originalField.typeId, "Type id should be equal.");
484+
t.equals(
485+
field.nullable,
486+
originalField.nullable,
487+
"Field nullability should be equal."
488+
);
434489

435490
const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
436491
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);

0 commit comments

Comments
 (0)