Skip to content

Commit 791cbe4

Browse files
Store binary as numeric array for Json data structure
Enable vector indexing
1 parent 2185348 commit 791cbe4

File tree

8 files changed

+137
-36
lines changed

8 files changed

+137
-36
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ const point = { longitude: 12.34, latitude: 56.78 }
236236

237237
A `text` field is a lot like a `string`. If you're just reading and writing objects, they are identical. But if you want to *search* on them, they are very, very different. I'll cover that in detail when I talk about [using RediSearch](#-using-redisearch) but the tl;dr is that `string` fields can only be matched on their whole value—no partial matches—and are best for keys while `text` fields have full-text search enabled on them and are optimized for human-readable text.
238238

239-
A `binary` field is a binary blob of data using a `Buffer` object. For Hash data structures it will be stored as a binary field in Redis, for JSON data structures it will be serialized to a Base64 string. The `binary` field stored in a Hash data structure can be indexed as a [Vector Similarity](https://redis.io/docs/stack/search/reference/vectors/) field. When stored in a JSON data structure it will be automatically unindexed.
239+
A `binary` field is a binary blob of data using a `Buffer` object. For Hash data structures it will be stored as a binary field in Redis, for JSON data structures it will be serialized to a numeric array. The `binary` field can be indexed as a [Vector Similarity](https://redis.io/docs/stack/search/reference/vectors/) field.
240240

241241
Additional field options can be set depending on the field type. These correspond to the [Field Options](https://redis.io/commands/ft.create/#field-options) avialable when creating a RediSearch full-text index. Other than the `separator` option, these only affect how content is indexed and searched.
242242

lib/entity/fields/entity-binary-field.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,15 @@ import { EntityValue } from "../entity-value";
55
export class EntityBinaryField extends EntityField {
66
toRedisJson(): RedisJsonData {
77
const data: RedisJsonData = {};
8-
if (this.value !== null) data[this.name] = this.valueAsBuffer.toString('base64')
8+
if (this.value !== null) data[this.name] = [...this.valueAsBuffer]
99
return data;
1010
}
1111

1212
fromRedisJson(value: any) {
13-
if (value !== null) this.value = Buffer.from(value, 'base64');
13+
if (!this.isBuffer(value)) {
14+
throw Error(`Non-binary value of '${value}' read from Redis for binary field.`)
15+
}
16+
this.value = Buffer.from([...value]);
1417
}
1518

1619
toRedisHash(): RedisHashData {

lib/schema/builders/json-schema-builder.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,9 @@ export class JsonSchemaBuilder<TEntity extends Entity> extends SchemaBuilder<TEn
5757
...this.buildIndexed(fieldDef),
5858
]
5959
case 'binary':
60-
if (fieldDef.vector)
61-
console.warn(`You have marked the ${fieldDef.type} field '${field}' as vector but RediSearch doesn't support the VECTOR argument on a field for JSON. Ignored.`);
6260
return [
63-
...fieldInfo, 'NOINDEX',
61+
...fieldInfo,
62+
...this.buildVector(fieldDef),
6463
]
6564
};
6665
}

spec/functional/vector.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ describe("Vector", () => {
7979
const results = await redis.sendCommand([
8080
'FT.SEARCH', 'Product:index', '*=>[KNN 2 @image $query_vector]', 'PARAMS', '2',
8181
'query_vector', Buffer.from(products[0].image, 'hex'),
82-
'RETURN', '3', '__image_score name price',
82+
'RETURN', '3', '__image_score", "name", "price',
8383
'SORTBY', '__image_score',
8484
'DIALECT', '2'
8585
]) as any[]

spec/helpers/example-data.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ export const SOME_MORE_STRINGS: Array<string> = ['charlie', 'delta', 'echo'];
5959
export const SOME_MORE_STRINGS_JSON: string = JSON.stringify(SOME_MORE_STRINGS);
6060
export const SOME_MORE_STRINGS_JOINED: string = SOME_MORE_STRINGS.join('|');
6161

62-
export const A_BUFFER = Buffer.from([1, 2, 3, 4, 5, 6])
63-
export const A_BUFFER_BASE64 = 'AQIDBAUG'
62+
export const A_BUFFER_VALUES = [1, 2, 3, 4, 5, 6]
63+
export const A_BUFFER = Buffer.from(A_BUFFER_VALUES)
6464

6565
export type SampleEntityData = {
6666
aString: string | null;

spec/unit/entity/entity-binary-field.spec.ts

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import { FieldDefinition } from "../../../lib";
22
import { EntityBinaryField } from "$lib/entity/fields";
3-
import { A_DATE, A_NUMBER, A_NUMBER_STRING, A_POINT, A_STRING, SOME_STRINGS, A_BUFFER, A_BUFFER_BASE64 } from "../../helpers/example-data";
3+
import { A_DATE, A_NUMBER, A_NUMBER_STRING, A_POINT, A_STRING, SOME_STRINGS, A_BUFFER_VALUES, A_BUFFER } from "../../helpers/example-data";
44

55
const FIELD_NAME = 'foo';
66
const FIELD_DEF: FieldDefinition = { type: 'binary' };
77
const EXPECTED_NULL_JSON_DATA = {};
88
const EXPECTED_NULL_HASH_DATA = {};
9-
const EXPECTED_JSON_DATA = { foo: A_BUFFER_BASE64 };
9+
const EXPECTED_JSON_DATA = { foo: A_BUFFER_VALUES };
1010
const EXPECTED_HASH_DATA = { foo: A_BUFFER };
1111

1212
describe("EntityBinaryField", () => {
@@ -25,18 +25,14 @@ describe("EntityBinaryField", () => {
2525
it("converts to the expected Redis Hash data", () => expect(field.toRedisHash()).toEqual(EXPECTED_NULL_HASH_DATA));
2626

2727
describe("when loaded from Redis JSON data", () => {
28-
beforeEach(() => field.fromRedisJson(A_BUFFER_BASE64));
28+
beforeEach(() => field.fromRedisJson(A_BUFFER));
2929
it("has the expected value", () => expect(field.value).toEqual(A_BUFFER));
3030
});
3131

32-
describe("when loaded from Redis JSON data containing a null", () => {
33-
beforeEach(() => field.fromRedisJson(null));
34-
it("has the expected value", () => expect(field.value).toBeNull());
35-
});
36-
37-
it("when loaded from Redis JSON data containing invalid base64", () => {
38-
beforeEach(() => field.fromRedisJson('^~$'));
39-
it("has has the null value", () => expect(field.value).toBeNull());
32+
it("complains when loaded from invalid Redis Json data", () => {
33+
// @ts-ignore: JavaScript trap
34+
expect(() => field.fromRedisJson('foo'))
35+
.toThrow("Non-binary value of 'foo' read from Redis for binary field.");
4036
});
4137

4238
describe("when loaded from Redis Hash data", () => {

spec/unit/schema/binary-hash-fields.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ describe("Schema", () => {
5555
}],
5656

5757
// NOTE: it makes little sense to do this, but maybe someone wants to turn off indexing
58-
// but keep the schema definition, so we'll assume that NOINDEX shoudl take precendence
58+
// but keep the schema definition, so we'll assume that NOINDEX should take precendence
5959
["that defines an unindexed FLAT vector for a HASH", {
6060
schemaDef: { aField: { type: 'binary', indexed: false, vector: { algorithm: 'FLAT', dim: 512, distance_metric: 'COSINE' } } } as SchemaDefinition,
6161
dataStructure: 'HASH',

spec/unit/schema/binary-json-fields.spec.ts

Lines changed: 118 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,137 @@ import { Entity } from '$lib/entity/entity';
33
import { SchemaDefinition } from '$lib/schema/definition';
44
import { DataStructure } from '$lib/schema/options';
55

6-
const warnSpy = vi.spyOn(global.console, 'warn').mockImplementation(() => {})
7-
86
describe("Schema", () => {
97
describe.each([
108

11-
["that defines a binary for a JSON", {
9+
["that defines an unindexed binary", {
1210
schemaDef: { aField: { type: 'binary' } } as SchemaDefinition,
1311
dataStructure: 'JSON',
1412
expectedRedisSchema: [
1513
'$.aField', 'AS', 'aField', 'NOINDEX'
16-
],
17-
expectedWarning: null
14+
]
15+
}],
16+
17+
["that defines a FLAT / 512 / COSINE vector for a JSON", {
18+
schemaDef: { aField: { type: 'binary', vector: { algorithm: 'FLAT', dim: 512, distance_metric: 'COSINE' } } } as SchemaDefinition,
19+
dataStructure: 'JSON',
20+
expectedRedisSchema: [
21+
'$.aField', 'AS', 'aField', 'VECTOR', 'FLAT', '6', 'TYPE', 'FLOAT32', 'DIM', '512', 'DISTANCE_METRIC', 'COSINE'
22+
]
23+
}],
24+
25+
["that defines a FLAT / 256 / IP vector for a JSON", {
26+
schemaDef: { aField: { type: 'binary', vector: { algorithm: 'FLAT', dim: 256, distance_metric: 'IP' } } } as SchemaDefinition,
27+
dataStructure: 'JSON',
28+
expectedRedisSchema: [
29+
'$.aField', 'AS', 'aField', 'VECTOR', 'FLAT', '6', 'TYPE', 'FLOAT32', 'DIM', '256', 'DISTANCE_METRIC', 'IP'
30+
]
31+
}],
32+
33+
["that defines a FLAT / 1024 / L2 vector for a JSON", {
34+
schemaDef: { aField: { type: 'binary', vector: { algorithm: 'FLAT', dim: 1024, distance_metric: 'L2' } } } as SchemaDefinition,
35+
dataStructure: 'JSON',
36+
expectedRedisSchema: [
37+
'$.aField', 'AS', 'aField', 'VECTOR', 'FLAT', '6', 'TYPE', 'FLOAT32', 'DIM', '1024', 'DISTANCE_METRIC', 'L2'
38+
]
39+
}],
40+
41+
["that defines a FLAT / 512 / COSINE vector with block_size for a JSON", {
42+
schemaDef: { aField: { type: 'binary', vector: { algorithm: 'FLAT', dim: 512, distance_metric: 'COSINE', block_size: 512*512 } } } as SchemaDefinition,
43+
dataStructure: 'JSON',
44+
expectedRedisSchema: [
45+
'$.aField', 'AS', 'aField', 'VECTOR', 'FLAT', '8', 'TYPE', 'FLOAT32', 'DIM', '512', 'DISTANCE_METRIC', 'COSINE', 'BLOCK_SIZE', '262144'
46+
]
47+
}],
48+
49+
["that defines an aliased FLAT / 512 / COSINE vector for a JSON", {
50+
schemaDef: { aField: { type: 'binary', alias: 'anotherField', vector: { algorithm: 'FLAT', dim: 512, distance_metric: 'COSINE' } } } as SchemaDefinition,
51+
dataStructure: 'JSON',
52+
expectedRedisSchema: [
53+
'$.anotherField', 'AS', 'anotherField', 'VECTOR', 'FLAT', '6', 'TYPE', 'FLOAT32', 'DIM', '512', 'DISTANCE_METRIC', 'COSINE'
54+
]
55+
}],
56+
57+
// NOTE: it makes little sense to do this, but maybe someone wants to turn off indexing
58+
// but keep the schema definition, so we'll assume that NOINDEX should take precendence
59+
["that defines an unindexed FLAT vector for a JSON", {
60+
schemaDef: { aField: { type: 'binary', indexed: false, vector: { algorithm: 'FLAT', dim: 512, distance_metric: 'COSINE' } } } as SchemaDefinition,
61+
dataStructure: 'JSON',
62+
expectedRedisSchema: [
63+
'$.aField', 'AS', 'aField', 'NOINDEX'
64+
]
65+
}],
66+
67+
["that defines a HNSW / 512 / COSINE vector for a JSON", {
68+
schemaDef: { aField: { type: 'binary', vector: { algorithm: 'HNSW', dim: 512, distance_metric: 'COSINE' } } } as SchemaDefinition,
69+
dataStructure: 'JSON',
70+
expectedRedisSchema: [
71+
'$.aField', 'AS', 'aField', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', '512', 'DISTANCE_METRIC', 'COSINE'
72+
]
73+
}],
74+
75+
["that defines a HNSW / 256 / IP vector for a JSON", {
76+
schemaDef: { aField: { type: 'binary', vector: { algorithm: 'HNSW', dim: 256, distance_metric: 'IP' } } } as SchemaDefinition,
77+
dataStructure: 'JSON',
78+
expectedRedisSchema: [
79+
'$.aField', 'AS', 'aField', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', '256', 'DISTANCE_METRIC', 'IP'
80+
]
81+
}],
82+
83+
["that defines a HNSW / 1024 / L2 vector for a JSON", {
84+
schemaDef: { aField: { type: 'binary', vector: { algorithm: 'HNSW', dim: 1024, distance_metric: 'L2' } } } as SchemaDefinition,
85+
dataStructure: 'JSON',
86+
expectedRedisSchema: [
87+
'$.aField', 'AS', 'aField', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', '1024', 'DISTANCE_METRIC', 'L2'
88+
]
89+
}],
90+
91+
["that defines a HNSW / 512 / COSINE vector with M for a JSON", {
92+
schemaDef: { aField: { type: 'binary', vector: { algorithm: 'HNSW', dim: 512, distance_metric: 'COSINE', m: 8 } } } as SchemaDefinition,
93+
dataStructure: 'JSON',
94+
expectedRedisSchema: [
95+
'$.aField', 'AS', 'aField', 'VECTOR', 'HNSW', '8', 'TYPE', 'FLOAT32', 'DIM', '512', 'DISTANCE_METRIC', 'COSINE', 'M', '8'
96+
]
97+
}],
98+
99+
["that defines a HNSW / 512 / COSINE vector with EF_CONSTRUCTION for a JSON", {
100+
schemaDef: { aField: { type: 'binary', vector: { algorithm: 'HNSW', dim: 512, distance_metric: 'COSINE', ef_construction: 250 } } } as SchemaDefinition,
101+
dataStructure: 'JSON',
102+
expectedRedisSchema: [
103+
'$.aField', 'AS', 'aField', 'VECTOR', 'HNSW', '8', 'TYPE', 'FLOAT32', 'DIM', '512', 'DISTANCE_METRIC', 'COSINE', 'EF_CONSTRUCTION', '250'
104+
]
105+
}],
106+
107+
["that defines a HNSW / 512 / COSINE vector with EF_RUNTIME for a JSON", {
108+
schemaDef: { aField: { type: 'binary', vector: { algorithm: 'HNSW', dim: 512, distance_metric: 'COSINE', ef_runtime: 20 } } } as SchemaDefinition,
109+
dataStructure: 'JSON',
110+
expectedRedisSchema: [
111+
'$.aField', 'AS','aField', 'VECTOR', 'HNSW', '8', 'TYPE', 'FLOAT32', 'DIM', '512', 'DISTANCE_METRIC', 'COSINE', 'EF_RUNTIME', '20'
112+
]
113+
}],
114+
115+
["that defines an aliased HNSW / 512 / COSINE vector for a JSON", {
116+
schemaDef: { aField: { type: 'binary', alias: 'anotherField', vector: { algorithm: 'HNSW', dim: 512, distance_metric: 'COSINE' } } } as SchemaDefinition,
117+
dataStructure: 'JSON',
118+
expectedRedisSchema: [
119+
'$.anotherField', 'AS', 'anotherField', 'VECTOR', 'HNSW', '6', 'TYPE', 'FLOAT32', 'DIM', '512', 'DISTANCE_METRIC', 'COSINE'
120+
]
121+
}],
122+
123+
// NOTE: it makes little sense to do this, but maybe someone wants to turn off indexing
124+
// but keep the schema definition, so we'll assume that NOINDEX shoudl take precendence
125+
["that defines an unindexed HNSW vector for a JSON", {
126+
schemaDef: { aField: { type: 'binary', indexed: false, vector: { algorithm: 'HNSW', dim: 512, distance_metric: 'COSINE' } } } as SchemaDefinition,
127+
dataStructure: 'JSON',
128+
expectedRedisSchema: [
129+
'$.aField', 'AS','aField', 'NOINDEX'
130+
]
18131
}],
19132

20133
])("%s", (_, data) => {
21134

22135
class TestEntity extends Entity {}
23136

24-
if (data.expectedWarning) {
25-
it("generates the expected warning", () => {
26-
expect(warnSpy).toHaveBeenCalledWith(data.expectedWarning);
27-
});
28-
} else {
29-
it("does not generate a warning", () => {
30-
expect(warnSpy).not.toHaveBeenCalled();
31-
});
32-
}
33-
34137
it("generates a Redis schema for the field", () => {
35138
let schemaDef = data.schemaDef;
36139
let dataStructure = data.dataStructure as DataStructure;

0 commit comments

Comments
 (0)