@@ -74,24 +74,8 @@ implementation 'ai.djl.huggingface:tokenizers:0.24.0'
7474
7575Import the following classes in your source file:
7676
77- ``` java
78- // Jedis client and query engine classes.
79- import redis.clients.jedis.UnifiedJedis ;
80- import redis.clients.jedis.search.* ;
81- import redis.clients.jedis.search.schemafields.* ;
82- import redis.clients.jedis.search.schemafields.VectorField.VectorAlgorithm ;
83- import redis.clients.jedis.exceptions.JedisDataException ;
84-
85- // Data manipulation.
86- import java.nio.ByteBuffer ;
87- import java.nio.ByteOrder ;
88- import java.util.Map ;
89- import java.util.List ;
90- import org.json.JSONObject ;
91-
92- // Tokenizer to generate the vector embeddings.
93- import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer ;
94- ```
77+ {{< clients-example set="HomeQueryVec" step="import" lang_filter="Java-Sync" >}}
78+ {{< /clients-example >}}
9579
9680## Define a helper method
9781
@@ -103,22 +87,8 @@ method `longsToFloatsByteString()` that takes the `long` array that the
10387embedding model returns, converts it to an array of ` float ` values, and
10488then encodes the ` float ` array as a ` byte ` string:
10589
106- ``` java
107- public static byte [] longsToFloatsByteString(long [] input) {
108- float [] floats = new float [input. length];
109- for (int i = 0 ; i < input. length; i++ ) {
110- floats[i] = input[i];
111- }
112-
113- byte [] bytes = new byte [Float . BYTES * floats. length];
114- ByteBuffer
115- .wrap(bytes)
116- .order(ByteOrder . LITTLE_ENDIAN )
117- .asFloatBuffer()
118- .put(floats);
119- return bytes;
120- }
121- ```
90+ {{< clients-example set="HomeQueryVec" step="helper_method" lang_filter="Java-Sync" >}}
91+ {{< /clients-example >}}
12292
12393## Create a tokenizer instance
12494
@@ -128,12 +98,8 @@ tokenizer to generate the embeddings. The vectors that represent the
12898embeddings have 768 components, regardless of the length of the input
12999text.
130100
131- ``` java
132- HuggingFaceTokenizer sentenceTokenizer = HuggingFaceTokenizer . newInstance(
133- " sentence-transformers/all-mpnet-base-v2" ,
134- Map . of(" maxLength" , " 768" , " modelMaxLength" , " 768" )
135- );
136- ```
101+ {{< clients-example set="HomeQueryVec" step="tokenizer" lang_filter="Java-Sync" >}}
102+ {{< /clients-example >}}
137103
138104## Create the index
139105
@@ -142,11 +108,8 @@ name `vector_idx`. (The `ftDropIndex()` call throws an exception if
142108the index doesn't already exist, which is why you need the
143109` try...catch ` block.)
144110
145- ``` java
146- UnifiedJedis jedis = new UnifiedJedis (" redis://localhost:6379" );
147-
148- try {jedis. ftDropIndex(" vector_idx" );} catch (JedisDataException j){}
149- ```
111+ {{< clients-example set="HomeQueryVec" step="connect" lang_filter="Java-Sync" >}}
112+ {{< /clients-example >}}
150113
151114Next, we create the index.
152115The schema in the example below includes three fields: the text content to index, a
@@ -162,30 +125,8 @@ and 768 dimensions, as required by the `all-mpnet-base-v2` embedding model.
162125The ` FTCreateParams ` object specifies hash objects for storage and a
163126prefix ` doc: ` that identifies the hash objects we want to index.
164127
165- ``` java
166- SchemaField [] schema = {
167- TextField . of(" content" ),
168- TagField . of(" genre" ),
169- VectorField . builder()
170- .fieldName(" embedding" )
171- .algorithm(VectorAlgorithm . HNSW )
172- .attributes(
173- Map . of(
174- " TYPE" , " FLOAT32" ,
175- " DIM" , 768 ,
176- " DISTANCE_METRIC" , " L2"
177- )
178- )
179- .build()
180- };
181-
182- jedis. ftCreate(" vector_idx" ,
183- FTCreateParams . createParams()
184- .addPrefix(" doc:" )
185- .on(IndexDataType . HASH ),
186- schema
187- );
188- ```
128+ {{< clients-example set="HomeQueryVec" step="create_index" lang_filter="Java-Sync" >}}
129+ {{< /clients-example >}}
189130
190131## Add data
191132
@@ -204,31 +145,8 @@ below). Note that when we set the `embedding` field, we must use an overload
204145of ` hset() ` that requires ` byte ` arrays for each of the key, the field name, and
205146the value, which is why we include the ` getBytes() ` calls on the strings.
206147
207- ``` java
208- String sentence1 = " That is a very happy person" ;
209- jedis. hset(" doc:1" , Map . of(" content" , sentence1, " genre" , " persons" ));
210- jedis. hset(
211- " doc:1" . getBytes(),
212- " embedding" . getBytes(),
213- longsToFloatsByteString(sentenceTokenizer. encode(sentence1). getIds())
214- );
215-
216- String sentence2 = " That is a happy dog" ;
217- jedis. hset(" doc:2" , Map . of(" content" , sentence2, " genre" , " pets" ));
218- jedis. hset(
219- " doc:2" . getBytes(),
220- " embedding" . getBytes(),
221- longsToFloatsByteString(sentenceTokenizer. encode(sentence2). getIds())
222- );
223-
224- String sentence3 = " Today is a sunny day" ;
225- jedis. hset(" doc:3" , Map . of(" content" , sentence3, " genre" , " weather" ));
226- jedis. hset(
227- " doc:3" . getBytes(),
228- " embedding" . getBytes(),
229- longsToFloatsByteString(sentenceTokenizer. encode(sentence3). getIds())
230- );
231- ```
148+ {{< clients-example set="HomeQueryVec" step="add_data" lang_filter="Java-Sync" >}}
149+ {{< /clients-example >}}
232150
233151## Run a query
234152
@@ -246,35 +164,8 @@ The query is a
246164[ K nearest neighbors (KNN)] ({{< relref "/develop/ai/search-and-query/vectors#knn-vector-search" >}})
247165search that sorts the results in order of vector distance from the query vector.
248166
249- ``` java
250- String sentence = " That is a happy person" ;
251-
252- int K = 3 ;
253- Query q = new Query (" *=>[KNN $K @embedding $BLOB AS distance]" )
254- .returnFields(" content" , " distance" )
255- .addParam(" K" , K )
256- .addParam(
257- " BLOB" ,
258- longsToFloatsByteString(
259- sentenceTokenizer. encode(sentence). . getIds()
260- )
261- )
262- .setSortBy(" distance" , true )
263- .dialect(2 );
264-
265- List<Document > docs = jedis. ftSearch(" vector_idx" , q). getDocuments();
266-
267- for (Document doc: docs) {
268- System . out. println(
269- String . format(
270- " ID: %s, Distance: %s, Content: %s" ,
271- doc. getId(),
272- doc. get(" distance" ),
273- doc. get(" content" )
274- )
275- );
276- }
277- ```
167+ {{< clients-example set="HomeQueryVec" step="query" lang_filter="Java-Sync" >}}
168+ {{< /clients-example >}}
278169
279170Assuming you have added the code from the steps above to your source file,
280171it is now ready to run, but note that it may take a while to complete when
@@ -307,94 +198,24 @@ every query. Also, you must specify `IndexDataType.JSON` when you create the ind
307198The code below shows these differences, but the index is otherwise very similar to
308199the one created previously for hashes:
309200
310- ``` java
311- SchemaField [] jsonSchema = {
312- TextField . of(" $.content" ). as(" content" ),
313- TagField . of(" $.genre" ). as(" genre" ),
314- VectorField . builder()
315- .fieldName(" $.embedding" ). as(" embedding" )
316- .algorithm(VectorAlgorithm . HNSW )
317- .attributes(
318- Map . of(
319- " TYPE" , " FLOAT32" ,
320- " DIM" , 768 ,
321- " DISTANCE_METRIC" , " L2"
322- )
323- )
324- .build()
325- };
326-
327- jedis. ftCreate(" vector_json_idx" ,
328- FTCreateParams . createParams()
329- .addPrefix(" jdoc:" )
330- .on(IndexDataType . JSON ),
331- jsonSchema
332- );
333- ```
201+ {{< clients-example set="HomeQueryVec" step="json_schema" lang_filter="Java-Sync" >}}
202+ {{< /clients-example >}}
334203
335204An important difference with JSON indexing is that the vectors are
336205specified using arrays of ` float ` instead of binary strings. This requires
337206a modified version of the ` longsToFloatsByteString() ` method
338207used previously:
339208
340- ``` java
341- public static float [] longArrayToFloatArray(long [] input) {
342- float [] floats = new float [input. length];
343- for (int i = 0 ; i < input. length; i++ ) {
344- floats[i] = input[i];
345- }
346- return floats;
347- }
348- ```
209+ {{< clients-example set="HomeQueryVec" step="json_helper_method" lang_filter="Java-Sync" >}}
210+ {{< /clients-example >}}
349211
350212Use [ ` jsonSet() ` ] ({{< relref "/commands/json.set" >}}) to add the data
351213instead of [ ` hset() ` ] ({{< relref "/commands/hset" >}}). Use instances
352214of ` JSONObject ` to supply the data instead of ` Map ` , as you would for
353215hash objects.
354216
355- ``` java
356- String jSentence1 = " That is a very happy person" ;
357-
358- JSONObject jdoc1 = new JSONObject ()
359- .put(" content" , jSentence1)
360- .put(" genre" , " persons" )
361- .put(
362- " embedding" ,
363- longArrayToFloatArray(
364- sentenceTokenizer. encode(jSentence1). getIds()
365- )
366- );
367-
368- jedis. jsonSet(" jdoc:1" , Path2 . ROOT_PATH , jdoc1);
369-
370- String jSentence2 = " That is a happy dog" ;
371-
372- JSONObject jdoc2 = new JSONObject ()
373- .put(" content" , jSentence2)
374- .put(" genre" , " pets" )
375- .put(
376- " embedding" ,
377- longArrayToFloatArray(
378- sentenceTokenizer. encode(jSentence2). getIds()
379- )
380- );
381-
382- jedis. jsonSet(" jdoc:2" , Path2 . ROOT_PATH , jdoc2);
383-
384- String jSentence3 = " Today is a sunny day" ;
385-
386- JSONObject jdoc3 = new JSONObject ()
387- .put(" content" , jSentence3)
388- .put(" genre" , " weather" )
389- .put(
390- " embedding" ,
391- longArrayToFloatArray(
392- sentenceTokenizer. encode(jSentence3). getIds()
393- )
394- );
395-
396- jedis. jsonSet(" jdoc:3" , Path2 . ROOT_PATH , jdoc3);
397- ```
217+ {{< clients-example set="HomeQueryVec" step="json_data" lang_filter="Java-Sync" >}}
218+ {{< /clients-example >}}
398219
399220The query is almost identical to the one for the hash documents. This
400221demonstrates how the right choice of aliases for the JSON paths can
@@ -403,28 +224,8 @@ is that the vector parameter for the query is still specified as a
403224binary string (using the ` longsToFloatsByteString() ` method), even though
404225the data for the ` embedding ` field of the JSON was specified as an array.
405226
406- ``` java
407- String jSentence = " That is a happy person" ;
408-
409- int jK = 3 ;
410- Query jq = new Query (" *=>[KNN $K @embedding $BLOB AS distance]" ).
411- returnFields(" content" , " distance" ).
412- addParam(" K" , jK).
413- addParam(
414- " BLOB" ,
415- longsToFloatsByteString(
416- sentenceTokenizer. encode(jSentence). getIds()
417- )
418- )
419- .setSortBy(" distance" , true )
420- .dialect(2 );
421-
422- // Execute the query
423- List<Document > jDocs = jedis
424- .ftSearch(" vector_json_idx" , jq)
425- .getDocuments();
426-
427- ```
227+ {{< clients-example set="HomeQueryVec" step="json_query" lang_filter="Java-Sync" >}}
228+ {{< /clients-example >}}
428229
429230Apart from the ` jdoc: ` prefixes for the keys, the result from the JSON
430231query is the same as for hash:
0 commit comments