Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/constants.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
/** Magic bytes 'ARROW1' indicating the Arrow 'file' format. */
export const MAGIC = Uint8Array.of(65, 82, 82, 79, 87, 49);

/** Bytes for an 'end of stream' message. */
export const EOS = Uint8Array.of(255, 255, 255, 255, 0, 0, 0, 0);

/**
* Apache Arrow version.
*/
Expand Down
12 changes: 4 additions & 8 deletions src/encode/data-type.js
Original file line number Diff line number Diff line change
Expand Up @@ -133,17 +133,13 @@ function encodeUnion(builder, type) {
}

function encodeDictionary(builder, type) {
const keyTypeOffset = isInt32(type.indices)
? 0
: encodeDataType(builder, type.indices);
// The Arrow spec uses signed 32-bit integers as the default index type.
// However, multiple 3rd party tools fail on a null (default) index type,
// so we always encode the index data type explicitly here.
return builder.addObject(4, b => {
b.addInt64(0, type.id, 0);
b.addOffset(1, keyTypeOffset, 0);
b.addOffset(1, encodeDataType(builder, type.indices), 0);
b.addInt8(2, +type.ordered, 0);
// NOT SUPPORTED: 3, dictionaryKind (defaults to dense array)
});
}

function isInt32(type) {
return type.typeId === Type.Int && type.bitWidth === 32 && type.signed;
}
4 changes: 3 additions & 1 deletion src/encode/encode-ipc.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { MAGIC, MessageHeader } from '../constants.js';
import { EOS, MAGIC, MessageHeader } from '../constants.js';
import { Builder } from './builder.js';
import { encodeDictionaryBatch } from './dictionary-batch.js';
import { writeFooter } from './footer.js';
Expand Down Expand Up @@ -64,6 +64,8 @@ export function encodeIPC(data, { sink, format = STREAM } = {}) {

if (file) {
writeFooter(builder, schema, dictBlocks, recordBlocks, metadata);
} else {
builder.addBuffer(EOS);
}

return builder.sink;
Expand Down
43 changes: 43 additions & 0 deletions test/duckdb-compat-test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import assert from 'node:assert';
import { DuckDB } from '@uwdata/mosaic-duckdb';
import { tableFromArrays, tableFromIPC, tableToIPC } from '../src/index.js';
import * as dataMethods from './util/data.js';

// Arrow types not supported by DuckDB
const skip = new Set([
'binaryView', 'empty', 'largeListView', 'listView',
'runEndEncoded32', 'runEndEncoded64', 'utf8View'
]);

describe('DuckDB compatibility', () => {
for (const [name, method] of Object.entries(dataMethods)) {
if (skip.has(name)) continue;
it(`includes ${name} data`, async () => {
const data = await method();
const load = await Promise.all(
data.map(({ bytes }) => loadIPC(tableFromIPC(bytes)))
);
assert.deepStrictEqual(load, Array(data.length).fill(true));
});
}

it('includes default dictionary types', async () => {
const t = tableFromArrays({ foo: ['x', 'y', 'z'] });
assert.strictEqual(await loadIPC(t), true);
});
});

function loadIPC(table) {
const bytes = tableToIPC(table, { format: 'stream' });
return new Promise((resolve) => {
const db = new DuckDB();
db.db.register_buffer('arrow_ipc', [bytes], true, (err) => {
if (err) {
console.error(err);
resolve(false);
} else {
resolve(true);
}
});
});
}
Loading