feat: Add 32- and 64-bit decimal support.

jheer · jheer · commit 29ee34508a07 · 2025-02-26T20:18:21.000-08:00
diff --git a/docs/api/data-types.md b/docs/api/data-types.md
@@ -50,7 +50,7 @@ Each of the methods below returns a `DataType` instance as a standard JavaScript
 * [binary](#binary)
 * [utf8](#utf8)
 * [bool](#bool)
-* [decimal](#decimal)
+* [decimal](#decimal), [decimal32](#decimal32), [decimal64](#decimal63), [decimal128](#decimal128), [decimal256](#decimal256)
 * [date](#date), [dateDay](#dateDay), [dateMillisecond](#dateMillisecond)
 * [time](#time), [timeSecond](#timeSecond), [timeMillisecond](#timeMillisecond), [timeMicrosecond](#timeMicrosecond), [timeNanosecond](#timeNanosecond)
 * [timestamp](#timestamp)
@@ -239,21 +239,53 @@ bool()
 <hr/><a id="decimal" href="#decimal">#</a>
 <b>decimal</b>(<i>precision</i>, <i>scale</i>[, <i>bitWidth</i>])
 
-Create an Decimal data type instance for exact decimal values, represented as a 128 or 256-bit integer value in two's complement. Decimals are fixed point numbers with a set *precision* (total number of decimal digits) and *scale* (number of fractional digits). For example, the number `35.42` can be represented as `3542` with *precision* ≥ 4 and *scale* = 2.
+Create an Decimal data type instance for exact decimal values, represented as a 32, 64, 128, or 256-bit integer value in two's complement. Decimals are fixed point numbers with a set *precision* (total number of decimal digits) and *scale* (number of fractional digits). For example, the number `35.42` can be represented as `3542` with *precision* ≥ 4 and *scale* = 2.
 
-By default, Flechette converts decimals to 64-bit floating point numbers upon extraction (e.g., mapping `3542` back to `35.42`). While useful for many downstream applications, this conversion may be lossy and introduce inaccuracies. Pass the `useDecimalBigInt` extraction option (e.g., to [`tableFromIPC`](/flechette/api/#tableFromIPC) or [`tableFromArrays`](/flechette/api/#tableFromArrays)) to instead extract decimal data as `BigInt` values.
+By default, Flechette converts decimals to 64-bit floating point numbers upon extraction (e.g., mapping `3542` back to `35.42`). While useful for many downstream applications, this conversion may be lossy and introduce inaccuracies. Pass the `useDecimalBigInt` extraction option (e.g., to [`tableFromIPC`](/flechette/api/#tableFromIPC) or [`tableFromArrays`](/flechette/api/#tableFromArrays)) to instead extract decimal data as `BigInt` values (64-bit or larger decimals) or integer `number` values (32-bit decimals).
 
 * *precision* (`number`): The total number of decimal digits that can be represented.
 * *scale* (`number`): The number of fractional digits, beyond the decimal point.
-* *bitWidth* (`number`): The decimal bit width, one of `128` (default) or `256`.
+* *bitWidth* (`number`): The decimal bit width, one of `32`, `64`, `128` (default) or `256`.
 
 ```js
-import { utf8 } from '@uwdata/flechette';
+import { decimal } from '@uwdata/flechette';
 // decimal with 18 total digits, including 3 fractional digits
 // { typeId: 7, precision: 18, scale: 3, bitWidth: 128, ... }
 decimal(18, 3)
 ```
 
+<hr/><a id="decimal32" href="#decimal32">#</a>
+<b>decimal32</b>(<i>precision</i>, <i>scale</i>)
+
+Create a Decimal data type instance that uses 32 bits per decimal. 32-bit decimals are stored within an `Int32Array`.
+
+* *precision* (`number`): The total number of decimal digits that can be represented.
+* *scale* (`number`): The number of fractional digits, beyond the decimal point.
+
+<hr/><a id="decimal64" href="#decimal64">#</a>
+<b>decimal64</b>(<i>precision</i>, <i>scale</i>)
+
+Create a Decimal data type instance that uses 64 bits per decimal. 64-bit decimals are stored within a `Uint64Array`.
+
+* *precision* (`number`): The total number of decimal digits that can be represented.
+* *scale* (`number`): The number of fractional digits, beyond the decimal point.
+
+<hr/><a id="decimal128" href="#decimal128">#</a>
+<b>decimal128</b>(<i>precision</i>, <i>scale</i>)
+
+Create a Decimal data type instance that uses 128 bits per decimal. 128-bit decimals are stored within a `Uint64Array` with a stride of 2 (two array entries per decimal value).
+
+* *precision* (`number`): The total number of decimal digits that can be represented.
+* *scale* (`number`): The number of fractional digits, beyond the decimal point.
+
+<hr/><a id="decimal256" href="#decimal256">#</a>
+<b>decimal256</b>(<i>precision</i>, <i>scale</i>)
+
+Create a Decimal data type instance that uses 256 bits per decimal. 256-bit decimals are stored within a `Uint64Array` with a stride of 4 (four array entries per decimal value).
+
+* *precision* (`number`): The total number of decimal digits that can be represented.
+* *scale* (`number`): The number of fractional digits, beyond the decimal point.
+
 ### Date
 
 <hr/><a id="date" href="#date">#</a>
diff --git a/src/batch-type.js b/src/batch-type.js
@@ -1,4 +1,4 @@
-import { BinaryBatch, BinaryViewBatch, BoolBatch, DateBatch, DateDayBatch, DateDayMillisecondBatch, DecimalBigIntBatch, DecimalNumberBatch, DenseUnionBatch, DictionaryBatch, DirectBatch, FixedBinaryBatch, FixedListBatch, Float16Batch, Int64Batch, IntervalDayTimeBatch, IntervalMonthDayNanoBatch, LargeBinaryBatch, LargeListBatch, LargeListViewBatch, LargeUtf8Batch, ListBatch, ListViewBatch, MapBatch, MapEntryBatch, NullBatch, RunEndEncodedBatch, SparseUnionBatch, StructBatch, StructProxyBatch, TimestampMicrosecondBatch, TimestampMillisecondBatch, TimestampNanosecondBatch, TimestampSecondBatch, Utf8Batch, Utf8ViewBatch } from './batch.js';
+import { BinaryBatch, BinaryViewBatch, BoolBatch, DateBatch, DateDayBatch, DateDayMillisecondBatch, Decimal32NumberBatch, DecimalBigIntBatch, DecimalNumberBatch, DenseUnionBatch, DictionaryBatch, DirectBatch, FixedBinaryBatch, FixedListBatch, Float16Batch, Int64Batch, IntervalDayTimeBatch, IntervalMonthDayNanoBatch, LargeBinaryBatch, LargeListBatch, LargeListViewBatch, LargeUtf8Batch, ListBatch, ListViewBatch, MapBatch, MapEntryBatch, NullBatch, RunEndEncodedBatch, SparseUnionBatch, StructBatch, StructProxyBatch, TimestampMicrosecondBatch, TimestampMillisecondBatch, TimestampNanosecondBatch, TimestampSecondBatch, Utf8Batch, Utf8ViewBatch } from './batch.js';
 import { DateUnit, IntervalUnit, TimeUnit, Type } from './constants.js';
 import { invalidDataType } from './data-types.js';
 
@@ -29,7 +29,9 @@ export function batchType(type, options = {}) {
         useDate && DateBatch
       );
     case Type.Decimal:
-      return useDecimalBigInt ? DecimalBigIntBatch : DecimalNumberBatch;
+      return bitWidth === 32
+        ? (useDecimalBigInt ? DirectBatch : Decimal32NumberBatch)
+        : (useDecimalBigInt ? DecimalBigIntBatch : DecimalNumberBatch);
     case Type.Interval:
       return unit === IntervalUnit.DAY_TIME ? IntervalDayTimeBatch
         : unit === IntervalUnit.YEAR_MONTH ? DirectBatch
diff --git a/src/batch.js b/src/batch.js
@@ -1,5 +1,5 @@
 import { bisect, float64Array } from './util/arrays.js';
-import { divide, fromDecimal128, fromDecimal256, toNumber } from './util/numbers.js';
+import { divide, fromDecimal128, fromDecimal256, fromDecimal64, toNumber } from './util/numbers.js';
 import { decodeBit, readInt32, readInt64 } from './util/read.js';
 import { decodeUtf8 } from './util/strings.js';
 import { objectFactory, proxyFactory } from './util/struct.js';
@@ -213,7 +213,7 @@ export class NullBatch extends ArrayBatch {
 
 /**
  * A batch that coerces BigInt values to 64-bit numbers.
- * * @extends {NumberBatch}
+ * @extends {NumberBatch}
  */
 export class Int64Batch extends NumberBatch {
   /**
@@ -259,7 +259,27 @@ export class BoolBatch extends ArrayBatch {
 }
 
 /**
- * An abstract class for a batch of 128- or 256-bit decimal numbers,
+ * A batch of 32-bit decimal numbers, returned as converted 64-bit floating
+ * point numbers. Number coercion may be lossy if the decimal precision can
+ * not be represented in a 64-bit floating point format.
+ * @extends {NumberBatch}
+ */
+export class Decimal32NumberBatch extends NumberBatch {
+  constructor(options) {
+    super(options);
+    const { scale } = /** @type {import('./types.js').DecimalType} */ (this.type);
+    this.scale = 10 ** scale;
+  }
+  /**
+   * @param {number} index The value index
+   */
+  value(index) {
+    return /** @type {number} */(this.values[index]) / this.scale;
+  }
+}
+
+/**
+ * An abstract class for a batch of 64-, 128- or 256-bit decimal numbers,
  * accessed in strided BigUint64Arrays.
  * @template T
  * @extends {Batch<T>}
@@ -268,14 +288,16 @@ export class DecimalBatch extends Batch {
   constructor(options) {
     super(options);
     const { bitWidth, scale } = /** @type {import('./types.js').DecimalType} */ (this.type);
-    this.decimal = bitWidth === 128 ? fromDecimal128 : fromDecimal256;
+    this.decimal = bitWidth === 64 ? fromDecimal64
+      : bitWidth === 128 ? fromDecimal128
+      : fromDecimal256;
     this.scale = 10n ** BigInt(scale);
   }
 }
 
 /**
- * A batch of 128- or 256-bit decimal numbers, returned as converted
- * 64-bit numbers. The number coercion may be lossy if the decimal
+ * A batch of 64-, 128- or 256-bit decimal numbers, returned as converted
+ * 64-bit floating point numbers. Number coercion may be lossy if the decimal
  * precision can not be represented in a 64-bit floating point format.
  * @extends {DecimalBatch<number>}
  */
@@ -293,7 +315,7 @@ export class DecimalNumberBatch extends DecimalBatch {
 }
 
 /**
- * A batch of 128- or 256-bit decimal numbers, returned as scaled
+ * A batch of 64-, 128- or 256-bit decimal numbers, returned as scaled
  * bigint values, such that all fractional digits have been shifted
  * to integer places by the decimal type scale factor.
  * @extends {DecimalBatch<bigint>}
diff --git a/src/build/builder.js b/src/build/builder.js
@@ -2,7 +2,7 @@ import { batchType } from '../batch-type.js';
 import { IntervalUnit, Type } from '../constants.js';
 import { invalidDataType } from '../data-types.js';
 import { isInt64ArrayType } from '../util/arrays.js';
-import { toBigInt, toDateDay, toFloat16, toTimestamp } from '../util/numbers.js';
+import { toBigInt, toDateDay, toDecimal32, toFloat16, toTimestamp } from '../util/numbers.js';
 import { BinaryBuilder } from './builders/binary.js';
 import { BoolBuilder } from './builders/bool.js';
 import { DecimalBuilder } from './builders/decimal.js';
@@ -65,7 +65,9 @@ export function builder(type, ctx = builderContext()) {
     case Type.Bool:
       return new BoolBuilder(type, ctx);
     case Type.Decimal:
-      return new DecimalBuilder(type, ctx);
+      return type.bitWidth === 32
+        ? new TransformBuilder(type, ctx, toDecimal32(type.scale))
+        : new DecimalBuilder(type, ctx);
     case Type.Date:
       return new TransformBuilder(type, ctx, type.unit ? toBigInt : toDateDay);
     case Type.Timestamp:
diff --git a/src/build/builders/values.js b/src/build/builders/values.js
@@ -26,6 +26,7 @@ export class DirectBuilder extends ValidityBuilder {
       this.values.set(value, index);
     }
   }
+
   done() {
     return {
       ...super.done(),
diff --git a/src/data-types.js b/src/data-types.js
@@ -221,26 +221,62 @@ export const utf8 = () => ({
 export const bool = () => basicType(Type.Bool);
 
 /**
- * Return a Decimal data type instance. Decimal values are represented as 128
- * or 256 bit integers in two's complement. Decimals are fixed point numbers
- * with a set *precision* (total number of decimal digits) and *scale*
+ * Return a Decimal data type instance. Decimal values are represented as 32,
+ * 64, 128, or 256 bit integers in two's complement. Decimals are fixed point
+ * numbers with a set *precision* (total number of decimal digits) and *scale*
  * (number of fractional digits). For example, the number `35.42` can be
  * represented as `3542` with *precision* ≥ 4 and *scale* = 2.
  * @param {number} precision The decimal precision: the total number of
  *  decimal digits that can be represented.
  * @param {number} scale The number of fractional digits, beyond the
  *  decimal point.
- * @param {128 | 256} [bitWidth] The decimal bit width.
- *  One of 128 (default) or 256.
+ * @param {32 | 64 | 128 | 256} [bitWidth] The decimal bit width.
+ *  One of 32, 64, 128 (default), or 256.
  * @returns {import('./types.js').DecimalType} The decimal data type.
  */
 export const decimal = (precision, scale, bitWidth = 128) => ({
   typeId: Type.Decimal,
   precision,
   scale,
-  bitWidth: checkOneOf(bitWidth, [128, 256]),
-  values: uint64Array
+  bitWidth: checkOneOf(bitWidth, [32, 64, 128, 256]),
+  values: bitWidth === 32 ? int32Array : uint64Array
 });
+/**
+ * Return an Decimal data type instance with a bit width of 32.
+ * @param {number} precision The decimal precision: the total number of
+ *  decimal digits that can be represented.
+ * @param {number} scale The number of fractional digits, beyond the
+ *  decimal point.
+ * @returns {import('./types.js').DecimalType} The decimal data type.
+ */
+export const decimal32 = (precision, scale) => decimal(precision, scale, 32);
+/**
+ * Return an Decimal data type instance with a bit width of 64.
+ * @param {number} precision The decimal precision: the total number of
+ *  decimal digits that can be represented.
+ * @param {number} scale The number of fractional digits, beyond the
+ *  decimal point.
+ * @returns {import('./types.js').DecimalType} The decimal data type.
+ */
+export const decimal64 = (precision, scale) => decimal(precision, scale, 64);
+/**
+ * Return an Decimal data type instance with a bit width of 128.
+ * @param {number} precision The decimal precision: the total number of
+ *  decimal digits that can be represented.
+ * @param {number} scale The number of fractional digits, beyond the
+ *  decimal point.
+ * @returns {import('./types.js').DecimalType} The decimal data type.
+ */
+export const decimal128 = (precision, scale) => decimal(precision, scale, 128);
+/**
+ * Return an Decimal data type instance with a bit width of 256.
+ * @param {number} precision The decimal precision: the total number of
+ *  decimal digits that can be represented.
+ * @param {number} scale The number of fractional digits, beyond the
+ *  decimal point.
+ * @returns {import('./types.js').DecimalType} The decimal data type.
+ */
+export const decimal256 = (precision, scale) => decimal(precision, scale, 256);
 
 /**
  * Return a Date data type instance. Date values are 32-bit or 64-bit signed
diff --git a/src/index.js b/src/index.js
@@ -17,7 +17,7 @@ export {
   binary,
   utf8,
   bool,
-  decimal,
+  decimal, decimal32, decimal64, decimal128, decimal256,
   date, dateDay, dateMillisecond,
   dictionary,
   time, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond,
diff --git a/src/types.ts b/src/types.ts
@@ -75,6 +75,10 @@ export type DateTimeArrayConstructor =
   | Int32ArrayConstructor
   | BigInt64ArrayConstructor;
 
+export type DecimalArrayConstructor =
+  | Int32ArrayConstructor
+  | BigUint64ArrayConstructor;
+
 export type TypedArrayConstructor =
   | Uint8ArrayConstructor
   | Uint16ArrayConstructor
@@ -146,7 +150,7 @@ export type Utf8Type = { typeId: 5, offsets: Int32ArrayConstructor };
 export type BoolType = { typeId: 6 };
 
 /** Fixed decimal number data type. */
-export type DecimalType = { typeId: 7, precision: number, scale: number, bitWidth: 128 | 256, values: BigUint64ArrayConstructor };
+export type DecimalType = { typeId: 7, precision: number, scale: number, bitWidth: 32 | 64 | 128 | 256, values: DecimalArrayConstructor };
 
 /** Date data type. */
 export type DateType = { typeId: 8, unit: DateUnit_, values: DateTimeArrayConstructor };
diff --git a/src/util/numbers.js b/src/util/numbers.js
@@ -95,6 +95,17 @@ export function divide(num, div) {
   return Number(num / div) + Number(num % div) / Number(div);
 }
 
+/**
+ * Return a 32-bit decimal conversion method for the given decimal scale.
+ * @param {number} scale The scale mapping fractional digits to integers.
+ * @returns {(value: number) => number} A conversion method that maps
+ *  floating point numbers to 32-bit decimals.
+ */
+export function toDecimal32(scale) {
+  const s = 10 ** scale;
+  return (value) => Math.round(value * s) | 0;
+}
+
 /**
  * Convert a floating point number or bigint to decimal bytes.
  * @param {number|bigint} value The number to encode. If a bigint, we assume
@@ -111,16 +122,29 @@ export function toDecimal(value, buf, offset, stride, scale) {
     : toBigInt(Math.trunc(value * scale));
   // assignment into uint64array performs needed truncation for us
   buf[offset] = v;
-  buf[offset + 1] = (v >> 64n);
-  if (stride > 2) {
-    buf[offset + 2] = (v >> 128n);
-    buf[offset + 3] = (v >> 192n);
+  if (stride > 1) {
+    buf[offset + 1] = (v >> 64n);
+    if (stride > 2) {
+      buf[offset + 2] = (v >> 128n);
+      buf[offset + 3] = (v >> 192n);
+    }
   }
 }
 
 // helper method to extract uint64 values from bigints
 const asUint64 = v => BigInt.asUintN(64, v);
 
+/**
+ * Convert a 64-bit decimal value to a bigint.
+ * @param {BigUint64Array} buf The uint64 array containing the decimal bytes.
+ * @param {number} offset The starting index offset into the array.
+ * @returns {bigint} The converted decimal as a bigint, such that all
+ *  fractional digits are scaled up to integers (for example, 1.12 -> 112).
+ */
+export function fromDecimal64(buf, offset) {
+  return BigInt.asIntN(64, buf[offset]);
+}
+
 /**
  * Convert a 128-bit decimal value to a bigint.
  * @param {BigUint64Array} buf The uint64 array containing the decimal bytes.
diff --git a/test/data/decimal128.arrows b/test/data/decimal128.arrows
diff --git a/test/data/decimal256.arrows b/test/data/decimal256.arrows
diff --git a/test/data/decimal32.arrows b/test/data/decimal32.arrows
diff --git a/test/data/decimal64.arrows b/test/data/decimal64.arrows
diff --git a/test/duckdb-compat-test.js b/test/duckdb-compat-test.js
@@ -6,7 +6,8 @@ import * as dataMethods from './util/data.js';
 // Arrow types not supported by DuckDB
 const skip = new Set([
   'binaryView', 'empty', 'largeListView', 'listView',
-  'runEndEncoded32', 'runEndEncoded64', 'utf8View'
+  'runEndEncoded32', 'runEndEncoded64', 'utf8View',
+  'decimal32', 'decimal64', 'decimal128', 'decimal256'
 ]);
 
 describe('DuckDB compatibility', () => {
diff --git a/test/table-from-ipc-test.js b/test/table-from-ipc-test.js
@@ -1,12 +1,14 @@
 import assert from 'node:assert';
 import { tableFromIPC } from '../src/index.js';
 import { arrowFromDuckDB } from './util/arrow-from-duckdb.js';
-import { binaryView, bool, dateDay, decimal, empty, fixedListInt32, fixedListUtf8, float32, float64, int16, int32, int64, int8, intervalMonthDayNano, largeListView, listInt32, listUtf8, listView, map, runEndEncoded32, runEndEncoded64, struct, timestampMicrosecond, timestampMillisecond, timestampNanosecond, timestampSecond, uint16, uint32, uint64, uint8, union, utf8, utf8View } from './util/data.js';
+import { binaryView, bool, dateDay, decimal, decimal32, decimal128, decimal256, decimal64, empty, fixedListInt32, fixedListUtf8, float32, float64, int16, int32, int64, int8, intervalMonthDayNano, largeListView, listInt32, listUtf8, listView, map, runEndEncoded32, runEndEncoded64, struct, timestampMicrosecond, timestampMillisecond, timestampNanosecond, timestampSecond, uint16, uint32, uint64, uint8, union, utf8, utf8View } from './util/data.js';
 import { RowIndex } from '../src/util/struct.js';
 
 const toBigInt = v => BigInt(v);
 const toDate = v => new Date(v);
 const toFloat32 = v => Math.fround(v);
+const toDecimalInt = v => Math.round(v * 100);
+const toDecimalBigInt = v => BigInt(toDecimalInt(v));
 
 async function test(dataMethod, arrayType, opt, transform) {
   const data = await dataMethod();
@@ -73,6 +75,14 @@ describe('tableFromIPC', () => {
   it('decodes boolean data', () => test(bool));
 
   it('decodes decimal data', () => test(decimal, Float64Array));
+  it('decodes decimal32 data', () => test(decimal32, Float64Array));
+  it('decodes decimal64 data', () => test(decimal64, Float64Array));
+  it('decodes decimal128 data', () => test(decimal128, Float64Array));
+  it('decodes decimal256 data', () => test(decimal256, Float64Array));
+  it('decodes decimal32 data to int', () => test(decimal32, Int32Array, { useDecimalBigInt: true }, toDecimalInt));
+  it('decodes decimal64 data to bigint', () => test(decimal64, Array, { useDecimalBigInt: true }, toDecimalBigInt));
+  it('decodes decimal128 data to bigint', () => test(decimal128, Array, { useDecimalBigInt: true }, toDecimalBigInt));
+  it('decodes decimal256 data to bigint', () => test(decimal256, Array, { useDecimalBigInt: true }, toDecimalBigInt));
 
   it('decodes date day data', () => test(dateDay, Float64Array));
   it('decodes date day data to dates', () => test(dateDay, Array, { useDate: true }, toDate));
diff --git a/test/util/data.js b/test/util/data.js

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,7 @@ export class DirectBuilder extends ValidityBuilder {`
`26`	`26`	`this.values.set(value, index);`
`27`	`27`	`}`
`28`	`28`	`}`
	`29`	`+`
`29`	`30`	`done() {`
`30`	`31`	`return {`
`31`	`32`	`...super.done(),`