Skip to content

Commit 29540e2

Browse files
authored
NODEJS-655: Initial cut at vector support (#407)
1 parent fa0a75b commit 29540e2

File tree

5 files changed

+288
-22
lines changed

5 files changed

+288
-22
lines changed

doc/features/datatypes/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ tuple|[Tuple](tuples)
2929
uuid|[Uuid](uuids)
3030
varchar|String
3131
varint|[Integer](numerical)
32+
vector|[Float32Array](collections)
3233

3334
## Encoding data
3435

doc/features/datatypes/collections/README.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,23 @@ client.execute('SELECT map_val FROM tbl')
6969
.then(function (result) {
7070
console.log(result.rows[0]['map_val'] instanceof Map); // true
7171
});
72-
```
72+
```
73+
74+
### Vector
75+
76+
As of version 4.7.0 the driver also includes support for the vector type available in Cassandra 5.0. Vectors are represented as instances of
77+
the [Float32Array] class. For example, to create and write to a vector with three dimensions you might do something like the following:
78+
79+
```javascript
80+
await c.connect()
81+
.then(() => c.execute("drop keyspace if exists test"))
82+
.then(() => c.execute("create KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}"))
83+
.then(() => c.execute("create table test.foo(i varint primary key, j vector<float,3>)"))
84+
.then(() => c.execute("create custom index ann_index on test.foo(j) using 'StorageAttachedIndex'"))
85+
86+
// Base inserts using simple and prepared statements
87+
.then(() => c.execute(`insert into test.foo (i, j) values (?, ?)`, [cassandra.types.Integer.fromInt(1), new Float32Array([8, 2.3, 58])]))
88+
.then(() => c.execute(`insert into test.foo (i, j) values (?, ?)`, [cassandra.types.Integer.fromInt(5), new Float32Array([23, 18, 3.9])], {prepare: true}));
89+
```
90+
91+
[Float32Array]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Float32Array

lib/encoder.js

Lines changed: 168 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ const cqlNames = Object.freeze({
6969
map: 'map',
7070
tuple: 'tuple',
7171
empty: 'empty',
72-
duration: 'duration'
72+
duration: 'duration',
73+
vector: 'vector'
7374
});
7475
const singleTypeNames = Object.freeze({
7576
'org.apache.cassandra.db.marshal.UTF8Type': dataTypes.varchar,
@@ -93,6 +94,7 @@ const singleTypeNames = Object.freeze({
9394
'org.apache.cassandra.db.marshal.IntegerType': dataTypes.varint,
9495
'org.apache.cassandra.db.marshal.CounterColumnType': dataTypes.counter
9596
});
97+
const singleTypeNamesByDataType = invertObject(singleTypeNames);
9698
const singleFqTypeNamesLength = Object.keys(singleTypeNames).reduce(function (previous, current) {
9799
return current.length > previous ? current.length : previous;
98100
}, 0);
@@ -102,7 +104,8 @@ const customTypeNames = Object.freeze({
102104
lineString: 'org.apache.cassandra.db.marshal.LineStringType',
103105
point: 'org.apache.cassandra.db.marshal.PointType',
104106
polygon: 'org.apache.cassandra.db.marshal.PolygonType',
105-
dateRange: 'org.apache.cassandra.db.marshal.DateRangeType'
107+
dateRange: 'org.apache.cassandra.db.marshal.DateRangeType',
108+
vector: 'org.apache.cassandra.db.marshal.VectorType'
106109
});
107110

108111
const nullValueBuffer = utils.allocBufferFromArray([255, 255, 255, 255]);
@@ -186,6 +189,16 @@ function defineInstanceMembers() {
186189
return this.handleBuffer(bytes);
187190
};
188191
this.decodeCustom = function (bytes, typeName) {
192+
193+
// Make sure we actually have something to process in typeName before we go any further
194+
if (!typeName || typeName.length === 0) {
195+
return this.handleBuffer(bytes);
196+
}
197+
198+
// Special handling for vector custom types (since they have args)
199+
if (typeName.startsWith(customTypeNames.vector)) {
200+
return this.decodeVector(bytes, this.parseVectorTypeArgs(typeName, customTypeNames.vector, this.parseFqTypeName));
201+
}
189202
const handler = customDecoders[typeName];
190203
if (handler) {
191204
return handler.call(this, bytes);
@@ -695,12 +708,17 @@ function defineInstanceMembers() {
695708
}
696709
return value;
697710
};
698-
this.encodeCustom = function (value, name) {
699-
const handler = customEncoders[name];
711+
this.encodeCustom = function (value, customTypeName) {
712+
713+
// Special handling for vector custom types (since they have args)
714+
if (customTypeName.startsWith(customTypeNames.vector)) {
715+
return this.encodeVector(value, this.parseVectorTypeArgs(customTypeName, customTypeNames.vector, this.parseFqTypeName));
716+
}
717+
const handler = customEncoders[customTypeName];
700718
if (handler) {
701719
return handler.call(this, value);
702720
}
703-
throw new TypeError('No encoding handler found for type ' + name);
721+
throw new TypeError('No encoding handler found for type ' + customTypeName);
704722
};
705723
/**
706724
* @param {Boolean} value
@@ -890,6 +908,92 @@ function defineInstanceMembers() {
890908
return Buffer.concat(parts, totalLength);
891909
};
892910

911+
this.decodeVector = function(buffer, params) {
912+
const subtype = params["subtype"];
913+
const dimensions = params["dimensions"];
914+
const elemLength = 4; // TODO: figure this out based on the subtype
915+
const expectedLength = buffer.length / elemLength;
916+
if ((elemLength * dimensions) !== buffer.length) {
917+
throw new TypeError(`Expected buffer of subtype ${subtype} with dimensions ${dimensions} to be of size ${expectedLength}, observed size ${buffer.length}`);
918+
}
919+
const rv = [];
920+
let offset = 0;
921+
for (let i = 0; i < dimensions; i++) {
922+
offset = i * elemLength;
923+
rv[i] = this.decode(buffer.slice(offset, offset + elemLength), subtype);
924+
}
925+
return new Float32Array(rv);
926+
};
927+
928+
/**
929+
* @param {CqlVector} value
930+
* @param {Object} params
931+
*/
932+
this.encodeVector = function(value, params) {
933+
934+
// Evaluate params to encodeVector(), returning the computed subtype
935+
function evalParams() {
936+
937+
if (!(value instanceof Float32Array)) {
938+
throw new TypeError("Driver only supports vectors of 4 byte floating point values");
939+
}
940+
941+
// Perform client-side validation iff we were actually supplied with meaningful type info. In practice
942+
// this will only occur when using prepared statements.
943+
if (params.hasOwnProperty("subtype") && params.hasOwnProperty("dimensions")) {
944+
945+
const subtype = params["subtype"];
946+
const dimensions = params["dimensions"];
947+
if (value.length !== dimensions) {
948+
throw new TypeError(`Expected vector with ${dimensions} dimensions, observed size of ${value.length}`);
949+
}
950+
if (subtype.code !== dataTypes.float) {
951+
throw new TypeError("Driver only supports vectors of 4 byte floating point values");
952+
}
953+
return subtype;
954+
}
955+
956+
return { code: dataTypes.float };
957+
}
958+
959+
if (!Encoder.isTypedArray(value)) {
960+
throw new TypeError('Expected TypedArray subclass, obtained ' + util.inspect(value));
961+
}
962+
if (value.length === 0) {
963+
throw new TypeError("Cannot encode empty array as vector");
964+
}
965+
966+
const subtype = evalParams();
967+
968+
// TypedArrays are _not_ JS arrays so explicitly convert them here before trying to write them
969+
// into a buffer
970+
const elems = [];
971+
for (const elem of value) {
972+
elems.push(this.encode(elem, subtype));
973+
}
974+
return Buffer.concat(elems);
975+
};
976+
977+
/**
978+
* Extract the (typed) arguments from a vector type
979+
*
980+
* @param {String} typeName
981+
* @param {String} stringToExclude Leading string indicating this is a vector type (to be excluded when eval'ing args)
982+
* @param {Function} subtypeResolveFn Function used to resolve subtype type; varies depending on type naming convention
983+
* @returns {Object}
984+
* @internal
985+
*/
986+
this.parseVectorTypeArgs = function(typeName, stringToExclude, subtypeResolveFn) {
987+
988+
const argsStartIndex = stringToExclude.length + 1;
989+
const argsLength = typeName.length - (stringToExclude.length + 2);
990+
const params = parseParams(typeName, argsStartIndex, argsLength);
991+
if (params.length === 2) {
992+
return {subtype: subtypeResolveFn(params[0]), dimensions: parseInt(params[1], 10)};
993+
}
994+
throw new TypeError('Not a valid type ' + typeName);
995+
};
996+
893997
/**
894998
* If not provided, it uses the array of buffers or the parameters and hints to build the routingKey
895999
* @param {Array} params
@@ -1139,6 +1243,19 @@ function defineInstanceMembers() {
11391243
return dataType;
11401244
}
11411245

1246+
if (typeName.indexOf(cqlNames.vector, startIndex) === startIndex) {
1247+
// It's a vector, so record the subtype and dimension.
1248+
dataType.code = dataTypes.custom;
1249+
1250+
// parseVectorTypeArgs is not an async function but we are. To keep things simple let's ask the
1251+
// function to just return whatever it finds for an arg and we'll eval it after the fact
1252+
const params = this.parseVectorTypeArgs(typeName, cqlNames.vector, (arg) => arg );
1253+
params["subtype"] = await this.parseTypeName(keyspace, params["subtype"]);
1254+
dataType.info = params;
1255+
1256+
return dataType;
1257+
}
1258+
11421259
const quoted = typeName.indexOf('"', startIndex) === startIndex;
11431260
if (quoted) {
11441261
// Remove quotes
@@ -1214,7 +1331,7 @@ function defineInstanceMembers() {
12141331
}
12151332
};
12161333
startIndex = startIndex || 0;
1217-
let innerTypes;
1334+
let params;
12181335
if (!length) {
12191336
length = typeName.length;
12201337
}
@@ -1254,12 +1371,12 @@ function defineInstanceMembers() {
12541371
//move cursor across the name and bypass the parenthesis
12551372
startIndex += complexTypeNames.list.length + 1;
12561373
length -= complexTypeNames.list.length + 2;
1257-
innerTypes = parseParams(typeName, startIndex, length);
1258-
if (innerTypes.length !== 1) {
1374+
params = parseParams(typeName, startIndex, length);
1375+
if (params.length !== 1) {
12591376
throw new TypeError('Not a valid type ' + typeName);
12601377
}
12611378
dataType.code = dataTypes.list;
1262-
dataType.info = this.parseFqTypeName(innerTypes[0]);
1379+
dataType.info = this.parseFqTypeName(params[0]);
12631380
return dataType;
12641381
}
12651382
if (typeName.indexOf(complexTypeNames.set, startIndex) === startIndex) {
@@ -1268,27 +1385,27 @@ function defineInstanceMembers() {
12681385
//move cursor across the name and bypass the parenthesis
12691386
startIndex += complexTypeNames.set.length + 1;
12701387
length -= complexTypeNames.set.length + 2;
1271-
innerTypes = parseParams(typeName, startIndex, length);
1272-
if (innerTypes.length !== 1)
1388+
params = parseParams(typeName, startIndex, length);
1389+
if (params.length !== 1)
12731390
{
12741391
throw new TypeError('Not a valid type ' + typeName);
12751392
}
12761393
dataType.code = dataTypes.set;
1277-
dataType.info = this.parseFqTypeName(innerTypes[0]);
1394+
dataType.info = this.parseFqTypeName(params[0]);
12781395
return dataType;
12791396
}
12801397
if (typeName.indexOf(complexTypeNames.map, startIndex) === startIndex) {
12811398
//org.apache.cassandra.db.marshal.MapType(keyType,valueType)
12821399
//move cursor across the name and bypass the parenthesis
12831400
startIndex += complexTypeNames.map.length + 1;
12841401
length -= complexTypeNames.map.length + 2;
1285-
innerTypes = parseParams(typeName, startIndex, length);
1402+
params = parseParams(typeName, startIndex, length);
12861403
//It should contain the key and value types
1287-
if (innerTypes.length !== 2) {
1404+
if (params.length !== 2) {
12881405
throw new TypeError('Not a valid type ' + typeName);
12891406
}
12901407
dataType.code = dataTypes.map;
1291-
dataType.info = [this.parseFqTypeName(innerTypes[0]), this.parseFqTypeName(innerTypes[1])];
1408+
dataType.info = [this.parseFqTypeName(params[0]), this.parseFqTypeName(params[1])];
12921409
return dataType;
12931410
}
12941411
if (typeName.indexOf(complexTypeNames.udt, startIndex) === startIndex) {
@@ -1301,12 +1418,19 @@ function defineInstanceMembers() {
13011418
//move cursor across the name and bypass the parenthesis
13021419
startIndex += complexTypeNames.tuple.length + 1;
13031420
length -= complexTypeNames.tuple.length + 2;
1304-
innerTypes = parseParams(typeName, startIndex, length);
1305-
if (innerTypes.length < 1) {
1421+
params = parseParams(typeName, startIndex, length);
1422+
if (params.length < 1) {
13061423
throw new TypeError('Not a valid type ' + typeName);
13071424
}
13081425
dataType.code = dataTypes.tuple;
1309-
dataType.info = innerTypes.map(x => this.parseFqTypeName(x));
1426+
dataType.info = params.map(x => this.parseFqTypeName(x));
1427+
return dataType;
1428+
}
1429+
1430+
if (typeName.indexOf(customTypeNames.vector, startIndex) === startIndex) {
1431+
// It's a vector, so record the subtype and dimension.
1432+
dataType.code = dataTypes.custom;
1433+
dataType.info = this.parseVectorTypeArgs(typeName, customTypeNames.vector, this.parseFqTypeName);
13101434
return dataType;
13111435
}
13121436

@@ -1628,6 +1752,12 @@ Encoder.guessDataType = function (value) {
16281752
code = dataTypes.custom;
16291753
info = customTypeNames.duration;
16301754
}
1755+
// Map JS TypedArrays onto vectors
1756+
else if (Encoder.isTypedArray(value)) {
1757+
code = dataTypes.custom;
1758+
// TODO: another area that we have to generalize if we ever need to support vector subtypes other than float
1759+
info = buildParameterizedCustomType(customTypeNames.vector, [singleTypeNamesByDataType[dataTypes.float], value.length]);
1760+
}
16311761
else if (Array.isArray(value)) {
16321762
code = dataTypes.list;
16331763
}
@@ -1862,4 +1992,24 @@ function concatRoutingKey(parts, totalLength) {
18621992
return routingKey;
18631993
}
18641994

1995+
function buildParameterizedCustomType(customTypeName, args) {
1996+
return `${customTypeName}(${args.join(',')})`;
1997+
}
1998+
1999+
function invertObject(obj) {
2000+
const rv = {};
2001+
for(const k in obj){
2002+
if (Object.hasOwn(obj,k)) {
2003+
rv[obj[k]] = k;
2004+
}
2005+
}
2006+
return rv;
2007+
}
2008+
Encoder.isTypedArray = function(arg) {
2009+
// The TypedArray superclass isn't available directly so to detect an instance of a TypedArray
2010+
// subclass we have to access the prototype of a concrete instance. There's nothing magical about
2011+
// Uint8Array here; we could just as easily use any of the other TypedArray subclasses.
2012+
return (arg instanceof Object.getPrototypeOf(Uint8Array));
2013+
};
2014+
18652015
module.exports = Encoder;

lib/types/index.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ const utils = require('../utils');
2929
*/
3030
const Long = require('long');
3131

32-
3332
/**
3433
* Consistency levels
3534
* @type {Object}

0 commit comments

Comments
 (0)