@@ -74,24 +74,8 @@ implementation 'ai.djl.huggingface:tokenizers:0.24.0'
74
74
75
75
Import the following classes in your source file:
76
76
77
- ``` java
78
- // Jedis client and query engine classes.
79
- import redis.clients.jedis.UnifiedJedis ;
80
- import redis.clients.jedis.search.* ;
81
- import redis.clients.jedis.search.schemafields.* ;
82
- import redis.clients.jedis.search.schemafields.VectorField.VectorAlgorithm ;
83
- import redis.clients.jedis.exceptions.JedisDataException ;
84
-
85
- // Data manipulation.
86
- import java.nio.ByteBuffer ;
87
- import java.nio.ByteOrder ;
88
- import java.util.Map ;
89
- import java.util.List ;
90
- import org.json.JSONObject ;
91
-
92
- // Tokenizer to generate the vector embeddings.
93
- import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer ;
94
- ```
77
+ {{< clients-example set="HomeQueryVec" step="import" lang_filter="Java-Sync" >}}
78
+ {{< /clients-example >}}
95
79
96
80
## Define a helper method
97
81
@@ -103,22 +87,8 @@ method `longsToFloatsByteString()` that takes the `long` array that the
103
87
embedding model returns, converts it to an array of ` float ` values, and
104
88
then encodes the ` float ` array as a ` byte ` string:
105
89
106
- ``` java
107
- public static byte [] longsToFloatsByteString(long [] input) {
108
- float [] floats = new float [input. length];
109
- for (int i = 0 ; i < input. length; i++ ) {
110
- floats[i] = input[i];
111
- }
112
-
113
- byte [] bytes = new byte [Float . BYTES * floats. length];
114
- ByteBuffer
115
- .wrap(bytes)
116
- .order(ByteOrder . LITTLE_ENDIAN )
117
- .asFloatBuffer()
118
- .put(floats);
119
- return bytes;
120
- }
121
- ```
90
+ {{< clients-example set="HomeQueryVec" step="helper_method" lang_filter="Java-Sync" >}}
91
+ {{< /clients-example >}}
122
92
123
93
## Create a tokenizer instance
124
94
@@ -128,12 +98,8 @@ tokenizer to generate the embeddings. The vectors that represent the
128
98
embeddings have 768 components, regardless of the length of the input
129
99
text.
130
100
131
- ``` java
132
- HuggingFaceTokenizer sentenceTokenizer = HuggingFaceTokenizer . newInstance(
133
- " sentence-transformers/all-mpnet-base-v2" ,
134
- Map . of(" maxLength" , " 768" , " modelMaxLength" , " 768" )
135
- );
136
- ```
101
+ {{< clients-example set="HomeQueryVec" step="tokenizer" lang_filter="Java-Sync" >}}
102
+ {{< /clients-example >}}
137
103
138
104
## Create the index
139
105
@@ -142,11 +108,8 @@ name `vector_idx`. (The `ftDropIndex()` call throws an exception if
142
108
the index doesn't already exist, which is why you need the
143
109
` try...catch ` block.)
144
110
145
- ``` java
146
- UnifiedJedis jedis = new UnifiedJedis (" redis://localhost:6379" );
147
-
148
- try {jedis. ftDropIndex(" vector_idx" );} catch (JedisDataException j){}
149
- ```
111
+ {{< clients-example set="HomeQueryVec" step="connect" lang_filter="Java-Sync" >}}
112
+ {{< /clients-example >}}
150
113
151
114
Next, we create the index.
152
115
The schema in the example below includes three fields: the text content to index, a
@@ -162,30 +125,8 @@ and 768 dimensions, as required by the `all-mpnet-base-v2` embedding model.
162
125
The ` FTCreateParams ` object specifies hash objects for storage and a
163
126
prefix ` doc: ` that identifies the hash objects we want to index.
164
127
165
- ``` java
166
- SchemaField [] schema = {
167
- TextField . of(" content" ),
168
- TagField . of(" genre" ),
169
- VectorField . builder()
170
- .fieldName(" embedding" )
171
- .algorithm(VectorAlgorithm . HNSW )
172
- .attributes(
173
- Map . of(
174
- " TYPE" , " FLOAT32" ,
175
- " DIM" , 768 ,
176
- " DISTANCE_METRIC" , " L2"
177
- )
178
- )
179
- .build()
180
- };
181
-
182
- jedis. ftCreate(" vector_idx" ,
183
- FTCreateParams . createParams()
184
- .addPrefix(" doc:" )
185
- .on(IndexDataType . HASH ),
186
- schema
187
- );
188
- ```
128
+ {{< clients-example set="HomeQueryVec" step="create_index" lang_filter="Java-Sync" >}}
129
+ {{< /clients-example >}}
189
130
190
131
## Add data
191
132
@@ -204,31 +145,8 @@ below). Note that when we set the `embedding` field, we must use an overload
204
145
of ` hset() ` that requires ` byte ` arrays for each of the key, the field name, and
205
146
the value, which is why we include the ` getBytes() ` calls on the strings.
206
147
207
- ``` java
208
- String sentence1 = " That is a very happy person" ;
209
- jedis. hset(" doc:1" , Map . of(" content" , sentence1, " genre" , " persons" ));
210
- jedis. hset(
211
- " doc:1" . getBytes(),
212
- " embedding" . getBytes(),
213
- longsToFloatsByteString(sentenceTokenizer. encode(sentence1). getIds())
214
- );
215
-
216
- String sentence2 = " That is a happy dog" ;
217
- jedis. hset(" doc:2" , Map . of(" content" , sentence2, " genre" , " pets" ));
218
- jedis. hset(
219
- " doc:2" . getBytes(),
220
- " embedding" . getBytes(),
221
- longsToFloatsByteString(sentenceTokenizer. encode(sentence2). getIds())
222
- );
223
-
224
- String sentence3 = " Today is a sunny day" ;
225
- jedis. hset(" doc:3" , Map . of(" content" , sentence3, " genre" , " weather" ));
226
- jedis. hset(
227
- " doc:3" . getBytes(),
228
- " embedding" . getBytes(),
229
- longsToFloatsByteString(sentenceTokenizer. encode(sentence3). getIds())
230
- );
231
- ```
148
+ {{< clients-example set="HomeQueryVec" step="add_data" lang_filter="Java-Sync" >}}
149
+ {{< /clients-example >}}
232
150
233
151
## Run a query
234
152
@@ -246,35 +164,8 @@ The query is a
246
164
[ K nearest neighbors (KNN)] ({{< relref "/develop/ai/search-and-query/vectors#knn-vector-search" >}})
247
165
search that sorts the results in order of vector distance from the query vector.
248
166
249
- ``` java
250
- String sentence = " That is a happy person" ;
251
-
252
- int K = 3 ;
253
- Query q = new Query (" *=>[KNN $K @embedding $BLOB AS distance]" )
254
- .returnFields(" content" , " distance" )
255
- .addParam(" K" , K )
256
- .addParam(
257
- " BLOB" ,
258
- longsToFloatsByteString(
259
- sentenceTokenizer. encode(sentence). . getIds()
260
- )
261
- )
262
- .setSortBy(" distance" , true )
263
- .dialect(2 );
264
-
265
- List<Document > docs = jedis. ftSearch(" vector_idx" , q). getDocuments();
266
-
267
- for (Document doc: docs) {
268
- System . out. println(
269
- String . format(
270
- " ID: %s, Distance: %s, Content: %s" ,
271
- doc. getId(),
272
- doc. get(" distance" ),
273
- doc. get(" content" )
274
- )
275
- );
276
- }
277
- ```
167
+ {{< clients-example set="HomeQueryVec" step="query" lang_filter="Java-Sync" >}}
168
+ {{< /clients-example >}}
278
169
279
170
Assuming you have added the code from the steps above to your source file,
280
171
it is now ready to run, but note that it may take a while to complete when
@@ -307,94 +198,24 @@ every query. Also, you must specify `IndexDataType.JSON` when you create the ind
307
198
The code below shows these differences, but the index is otherwise very similar to
308
199
the one created previously for hashes:
309
200
310
- ``` java
311
- SchemaField [] jsonSchema = {
312
- TextField . of(" $.content" ). as(" content" ),
313
- TagField . of(" $.genre" ). as(" genre" ),
314
- VectorField . builder()
315
- .fieldName(" $.embedding" ). as(" embedding" )
316
- .algorithm(VectorAlgorithm . HNSW )
317
- .attributes(
318
- Map . of(
319
- " TYPE" , " FLOAT32" ,
320
- " DIM" , 768 ,
321
- " DISTANCE_METRIC" , " L2"
322
- )
323
- )
324
- .build()
325
- };
326
-
327
- jedis. ftCreate(" vector_json_idx" ,
328
- FTCreateParams . createParams()
329
- .addPrefix(" jdoc:" )
330
- .on(IndexDataType . JSON ),
331
- jsonSchema
332
- );
333
- ```
201
+ {{< clients-example set="HomeQueryVec" step="json_schema" lang_filter="Java-Sync" >}}
202
+ {{< /clients-example >}}
334
203
335
204
An important difference with JSON indexing is that the vectors are
336
205
specified using arrays of ` float ` instead of binary strings. This requires
337
206
a modified version of the ` longsToFloatsByteString() ` method
338
207
used previously:
339
208
340
- ``` java
341
- public static float [] longArrayToFloatArray(long [] input) {
342
- float [] floats = new float [input. length];
343
- for (int i = 0 ; i < input. length; i++ ) {
344
- floats[i] = input[i];
345
- }
346
- return floats;
347
- }
348
- ```
209
+ {{< clients-example set="HomeQueryVec" step="json_helper_method" lang_filter="Java-Sync" >}}
210
+ {{< /clients-example >}}
349
211
350
212
Use [ ` jsonSet() ` ] ({{< relref "/commands/json.set" >}}) to add the data
351
213
instead of [ ` hset() ` ] ({{< relref "/commands/hset" >}}). Use instances
352
214
of ` JSONObject ` to supply the data instead of ` Map ` , as you would for
353
215
hash objects.
354
216
355
- ``` java
356
- String jSentence1 = " That is a very happy person" ;
357
-
358
- JSONObject jdoc1 = new JSONObject ()
359
- .put(" content" , jSentence1)
360
- .put(" genre" , " persons" )
361
- .put(
362
- " embedding" ,
363
- longArrayToFloatArray(
364
- sentenceTokenizer. encode(jSentence1). getIds()
365
- )
366
- );
367
-
368
- jedis. jsonSet(" jdoc:1" , Path2 . ROOT_PATH , jdoc1);
369
-
370
- String jSentence2 = " That is a happy dog" ;
371
-
372
- JSONObject jdoc2 = new JSONObject ()
373
- .put(" content" , jSentence2)
374
- .put(" genre" , " pets" )
375
- .put(
376
- " embedding" ,
377
- longArrayToFloatArray(
378
- sentenceTokenizer. encode(jSentence2). getIds()
379
- )
380
- );
381
-
382
- jedis. jsonSet(" jdoc:2" , Path2 . ROOT_PATH , jdoc2);
383
-
384
- String jSentence3 = " Today is a sunny day" ;
385
-
386
- JSONObject jdoc3 = new JSONObject ()
387
- .put(" content" , jSentence3)
388
- .put(" genre" , " weather" )
389
- .put(
390
- " embedding" ,
391
- longArrayToFloatArray(
392
- sentenceTokenizer. encode(jSentence3). getIds()
393
- )
394
- );
395
-
396
- jedis. jsonSet(" jdoc:3" , Path2 . ROOT_PATH , jdoc3);
397
- ```
217
+ {{< clients-example set="HomeQueryVec" step="json_data" lang_filter="Java-Sync" >}}
218
+ {{< /clients-example >}}
398
219
399
220
The query is almost identical to the one for the hash documents. This
400
221
demonstrates how the right choice of aliases for the JSON paths can
@@ -403,28 +224,8 @@ is that the vector parameter for the query is still specified as a
403
224
binary string (using the ` longsToFloatsByteString() ` method), even though
404
225
the data for the ` embedding ` field of the JSON was specified as an array.
405
226
406
- ``` java
407
- String jSentence = " That is a happy person" ;
408
-
409
- int jK = 3 ;
410
- Query jq = new Query (" *=>[KNN $K @embedding $BLOB AS distance]" ).
411
- returnFields(" content" , " distance" ).
412
- addParam(" K" , jK).
413
- addParam(
414
- " BLOB" ,
415
- longsToFloatsByteString(
416
- sentenceTokenizer. encode(jSentence). getIds()
417
- )
418
- )
419
- .setSortBy(" distance" , true )
420
- .dialect(2 );
421
-
422
- // Execute the query
423
- List<Document > jDocs = jedis
424
- .ftSearch(" vector_json_idx" , jq)
425
- .getDocuments();
426
-
427
- ```
227
+ {{< clients-example set="HomeQueryVec" step="json_query" lang_filter="Java-Sync" >}}
228
+ {{< /clients-example >}}
428
229
429
230
Apart from the ` jdoc: ` prefixes for the keys, the result from the JSON
430
231
query is the same as for hash:
0 commit comments