Skip to content

Commit 63ebe4c

Browse files
authored
fix: decoder bug when a null column is not last (#50)
* fix: decoder bug when a null column is not last 2.2.4 added null column support (7b9b5eb), but the encoder/decoder got out of sync. Encoder wrote field nodes for null columns, decoder didn't read them. Result: columns after null-type columns read wrong field nodes and got corrupted. Added tests and test file as well. * Fix linter
1 parent 829c093 commit 63ebe4c

File tree

3 files changed

+80
-6
lines changed

3 files changed

+80
-6
lines changed

src/decode/table-from-ipc.js

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -137,17 +137,17 @@ function contextGenerator(options, version, dictionaryMap) {
137137
*/
138138
function visit(type, ctx) {
139139
const { typeId } = type;
140-
const { length, options, node, buffer, variadic, version } = ctx;
140+
const { options, node, buffer, variadic, version } = ctx;
141141
const BatchType = batchType(type, options);
142142

143+
// extract the next { length, nullCount } field node - ALL fields have field nodes
144+
const base = { ...node(), type };
145+
143146
if (typeId === Type.Null) {
144-
// no field node, no buffers
145-
return new BatchType({ length, nullCount: length, type });
147+
// null fields have field nodes but no data buffers
148+
return new BatchType({ ...base, nullCount: base.length });
146149
}
147150

148-
// extract the next { length, nullCount } field node
149-
const base = { ...node(), type };
150-
151151
switch (typeId) {
152152
// validity and data value buffers
153153
case Type.Bool:

test/data/null_not_last.arrows

712 Bytes
Binary file not shown.

test/null-field-test.js

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,78 @@ describe('Null field compatibility', () => {
9898
assert.deepStrictEqual(Array.from(mixedData), [1, null, 3, null, 5]);
9999
assert.deepStrictEqual(Array.from(nullData), [null, null, null, null, null]);
100100
});
101+
102+
it('handles null-type column NOT in last position (field node alignment bug)', async () => {
103+
// Regression test: 2.2.4 added null column support (7b9b5eb), but the encoder/decoder
104+
// got out of sync. Encoder wrote field nodes for null columns, decoder didn't read them.
105+
// Result: columns after null-type columns read wrong field nodes and got corrupted.
106+
107+
// Test data generated with flechette itself, contains:
108+
// - strings column: ['s1', 's2', 's3']
109+
// - nulls column: [null, null, null] (Type.Null)
110+
// - floats column: [3.14, 3.14, 3.14]
111+
const bytes = new Uint8Array(await readFile('test/data/null_not_last.arrows'));
112+
const table = tableFromIPC(bytes);
113+
114+
assert.strictEqual(table.numRows, 3);
115+
assert.strictEqual(table.numCols, 3);
116+
117+
const fields = table.schema.fields;
118+
assert.strictEqual(fields[0].name, 'strings');
119+
assert.strictEqual(fields[1].name, 'nulls');
120+
assert.strictEqual(fields[2].name, 'floats');
121+
assert.strictEqual(fields[1].type.typeId, 1); // Type.Null
122+
123+
const strings = table.getChild('strings');
124+
const nulls = table.getChild('nulls');
125+
const floats = table.getChild('floats');
126+
127+
// Validate strings column
128+
assert.deepStrictEqual(Array.from(strings.toArray()), ['s1', 's2', 's3']);
129+
assert.strictEqual(strings.data[0].nullCount, 0);
130+
131+
// Validate nulls column
132+
assert.deepStrictEqual(Array.from(nulls.toArray()), [null, null, null]);
133+
assert.strictEqual(nulls.data[0].nullCount, 3);
134+
135+
// Critical: floats column after null-type column must decode correctly
136+
// Bug would cause: nullCount=3, values=[null, null, null]
137+
assert.strictEqual(floats.data[0].nullCount, 0);
138+
assert.strictEqual(floats.data[0].length, 3);
139+
assert.deepStrictEqual(Array.from(floats.toArray()), [3.14, 3.14, 3.14]);
140+
});
141+
142+
it('handles multiple consecutive null-type columns', () => {
143+
// Edge case: multiple null-type columns in a row
144+
const table = tableFromColumns({
145+
str1: columnFromArray(['a', 'b']),
146+
null1: columnFromArray([null, null]),
147+
null2: columnFromArray([null, null]),
148+
float: columnFromArray([3.14, 3.14])
149+
});
150+
151+
const bytes = tableToIPC(table, { format: 'stream' });
152+
const decoded = tableFromIPC(bytes);
153+
154+
assert.strictEqual(decoded.numRows, 2);
155+
assert.strictEqual(decoded.numCols, 4);
156+
157+
// All columns must decode correctly despite two null columns in between
158+
const str1 = decoded.getChild('str1');
159+
const null1 = decoded.getChild('null1');
160+
const null2 = decoded.getChild('null2');
161+
const float = decoded.getChild('float');
162+
163+
assert.deepStrictEqual(Array.from(str1.toArray()), ['a', 'b']);
164+
assert.strictEqual(str1.data[0].nullCount, 0);
165+
166+
assert.deepStrictEqual(Array.from(null1.toArray()), [null, null]);
167+
assert.strictEqual(null1.data[0].nullCount, 2);
168+
169+
assert.deepStrictEqual(Array.from(null2.toArray()), [null, null]);
170+
assert.strictEqual(null2.data[0].nullCount, 2);
171+
172+
assert.deepStrictEqual(Array.from(float.toArray()), [3.14, 3.14]);
173+
assert.strictEqual(float.data[0].nullCount, 0);
174+
});
101175
});

0 commit comments

Comments
 (0)