Skip to content

Commit 1d33067

Browse files
authored
Merge pull request #41 from TheovanKraay/nosql-vector-search
Nosql vector search java sample
2 parents 3332a1f + 91a8dca commit 1d33067

File tree

11 files changed

+811
-0
lines changed

11 files changed

+811
-0
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
## [project-title] Changelog
2+
3+
<a name="x.y.z"></a>
4+
# x.y.z (yyyy-mm-dd)
5+
6+
*Features*
7+
* ...
8+
9+
*Bug Fixes*
10+
* ...
11+
12+
*Breaking Changes*
13+
* ...
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
.gradle
2+
build/
3+
!gradle/wrapper/gradle-wrapper.jar
4+
!**/src/main/**/build/
5+
!**/src/test/**/build/
6+
7+
### IntelliJ IDEA ###
8+
.idea/modules.xml
9+
.idea/jarRepositories.xml
10+
.idea/compiler.xml
11+
.idea/libraries/
12+
*.iws
13+
*.iml
14+
*.ipr
15+
out/
16+
!**/src/main/**/out/
17+
!**/src/test/**/out/
18+
/target/
19+
20+
### Eclipse ###
21+
.apt_generated
22+
.classpath
23+
.factorypath
24+
.project
25+
.settings
26+
.springBeans
27+
.sts4-cache
28+
bin/
29+
!**/src/main/**/bin/
30+
!**/src/test/**/bin/
31+
32+
### NetBeans ###
33+
/nbproject/private/
34+
/nbbuild/
35+
/dist/
36+
/nbdist/
37+
/.nb-gradle/
38+
39+
### VS Code ###
40+
.vscode/
41+
42+
### Mac OS ###
43+
.DS_Store
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
6+
<modelVersion>4.0.0</modelVersion>
7+
8+
<groupId>com.azure.cosmos</groupId>
9+
<artifactId>cosmos-db-vector-search-sample</artifactId>
10+
<version>1.0-SNAPSHOT</version>
11+
12+
<properties>
13+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
14+
<maven.compiler.source>17</maven.compiler.source>
15+
<maven.compiler.target>17</maven.compiler.target>
16+
</properties>
17+
18+
<dependencies>
19+
<dependency>
20+
<groupId>com.azure</groupId>
21+
<artifactId>azure-ai-openai</artifactId>
22+
<version>1.0.0-beta.2</version>
23+
</dependency>
24+
<dependency>
25+
<groupId>com.azure</groupId>
26+
<artifactId>azure-cosmos</artifactId>
27+
<version>LATEST</version>
28+
</dependency>
29+
<dependency>
30+
<groupId>org.projectlombok</groupId>
31+
<artifactId>lombok</artifactId>
32+
<version>1.18.20</version>
33+
<scope>provided</scope>
34+
</dependency>
35+
<dependency>
36+
<groupId>org.apache.logging.log4j</groupId>
37+
<artifactId>log4j-api</artifactId>
38+
<version>2.17.2</version>
39+
</dependency>
40+
<dependency>
41+
<groupId>org.apache.logging.log4j</groupId>
42+
<artifactId>log4j-core</artifactId>
43+
<version>2.17.2</version>
44+
</dependency>
45+
<dependency>
46+
<groupId>org.slf4j</groupId>
47+
<artifactId>slf4j-api</artifactId>
48+
<version>1.7.36</version>
49+
</dependency>
50+
<dependency>
51+
<groupId>org.apache.logging.log4j</groupId>
52+
<artifactId>log4j-slf4j-impl</artifactId>
53+
<version>2.17.2</version>
54+
</dependency>
55+
</dependencies>
56+
<build>
57+
<plugins>
58+
<plugin>
59+
<groupId>org.codehaus.mojo</groupId>
60+
<artifactId>exec-maven-plugin</artifactId>
61+
<version>3.0.0</version>
62+
<configuration>
63+
<mainClass>com.azure.recipe.Main</mainClass>
64+
</configuration>
65+
</plugin>
66+
<plugin>
67+
<artifactId>maven-compiler-plugin</artifactId>
68+
<version>3.8.1</version>
69+
<configuration>
70+
<source>17</source>
71+
<target>17</target>
72+
</configuration>
73+
</plugin>
74+
</plugins>
75+
</build>
76+
77+
</project>
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package com.azure.recipe;
2+
3+
import com.azure.cosmos.implementation.apachecommons.lang.StringUtils;
4+
5+
public class AppConfig {
6+
public static String allowedHosts = System.getProperty("ALLOWED_HOSTS",
7+
StringUtils.defaultString(StringUtils.trimToNull(
8+
System.getenv().get("ALLOWED_HOSTS")),
9+
"*"));
10+
public static String cosmosUri = System.getProperty("COSMOS_URI",
11+
StringUtils.defaultString(StringUtils.trimToNull(
12+
System.getenv().get("COSMOS_URI")),
13+
"<COSMOS_URI>"));
14+
public static String cosmosKey = System.getProperty("COSMOS_KEY",
15+
StringUtils.defaultString(StringUtils.trimToNull(
16+
System.getenv().get("COSMOS_KEY")),
17+
"<COSMOS_KEY>"));
18+
public static String cosmosDatabase = System.getProperty("COSMOS_DATABASE",
19+
StringUtils.defaultString(StringUtils.trimToNull(
20+
System.getenv().get("COSMOS_DATABASE")),
21+
"<COSMOS_DATABASE>"));
22+
public static String cosmosContainer = System.getProperty("COSMOS_CONTAINER",
23+
StringUtils.defaultString(StringUtils.trimToNull(
24+
System.getenv().get("COSMOS_CONTAINER")),
25+
"<COSMOS_CONTAINER>"));
26+
public static String recipeLocalFolder = System.getProperty("RECIPE_LOCAL_FOLDER",
27+
StringUtils.defaultString(StringUtils.trimToNull(
28+
System.getenv().get("RECIPE_LOCAL_FOLDER")),
29+
"<RECIPE_LOCAL_FOLDER>"));
30+
public static String openAIEndpoint = System.getProperty("OPENAI_ENDPOINT",
31+
StringUtils.defaultString(StringUtils.trimToNull(
32+
System.getenv().get("OPENAI_ENDPOINT")),
33+
"<OPENAI_ENDPOINT>"));
34+
public static String openAIKey = System.getProperty("OPENAI_KEY",
35+
StringUtils.defaultString(StringUtils.trimToNull(
36+
System.getenv().get("OPENAI_KEY")),
37+
"<OPENAI_KEY>"));
38+
public static String openAIEmbeddingDeployment = System.getProperty("OPENAI_EMBEDDING_DEPLOYMENT",
39+
StringUtils.defaultString(StringUtils.trimToNull(
40+
System.getenv().get("OPENAI_EMBEDDING_DEPLOYMENT")),
41+
"<OPENAI_EMBEDDING_DEPLOYMENT>"));
42+
public static String openAICompletionsDeployment = System.getProperty("OPENAI_COMPLETIONS_DEPLOYMENT",
43+
StringUtils.defaultString(StringUtils.trimToNull(
44+
System.getenv().get("OPENAI_COMPLETIONS_DEPLOYMENT")),
45+
"<OPENAI_COMPLETIONS_DEPLOYMENT>"));
46+
public static int openAIMaxToken = Integer.parseInt(System.getProperty("OPENAI_MAX_TOKEN",
47+
StringUtils.defaultString(StringUtils.trimToNull(
48+
System.getenv().get("OPENAI_MAX_TOKEN")),
49+
"1000")));
50+
}
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
package com.azure.recipe;
2+
3+
import com.azure.recipe.model.Recipe;
4+
import com.azure.recipe.service.CosmosDbService;
5+
import com.azure.recipe.service.OpenAIService;
6+
import com.fasterxml.jackson.core.JsonProcessingException;
7+
import lombok.extern.slf4j.Slf4j;
8+
9+
import java.io.IOException;
10+
import java.util.*;
11+
12+
@Slf4j
13+
public class Main {
14+
public static CosmosDbService cosmosDbService = null;
15+
public static OpenAIService openAIEmbeddingService = null;
16+
17+
public static void main(String[] args) throws IOException {
18+
19+
Scanner scanner = new Scanner(System.in);
20+
21+
cosmosDbService = initCosmosDbService();
22+
23+
while (true) {
24+
System.out.println("\n");
25+
System.out.println("1.\tUpload and vectorize the recipe(s) and store it in Cosmos DB");
26+
System.out.println("2.\tAsk AI Assistant (search for a recipe by name or description, or ask a question)");
27+
System.out.println("3.\tExit this Application");
28+
System.out.print("Please select an option: ");
29+
int selectedOption = Integer.parseInt(scanner.nextLine());
30+
switch (selectedOption) {
31+
case 1 -> uploadRecipes();
32+
//case 2 -> generateEmbeddings();
33+
case 2 -> performSearch(scanner);
34+
default -> {
35+
return;
36+
}
37+
}
38+
39+
}
40+
}
41+
42+
private static CosmosDbService initCosmosDbService() {
43+
CosmosDbService cosmosDbService = new CosmosDbService(AppConfig.cosmosUri,
44+
AppConfig.cosmosKey,
45+
AppConfig.cosmosDatabase,
46+
AppConfig.cosmosContainer
47+
);
48+
int recipeWithEmbedding = cosmosDbService.getRecipeCount(true);
49+
int recipeWithNoEmbedding = cosmosDbService.getRecipeCount(false);
50+
51+
System.out.println("\n");
52+
System.out.printf("We have %d vectorized recipe(s) and %d non vectorized recipe(s).",
53+
recipeWithEmbedding, recipeWithNoEmbedding);
54+
55+
return cosmosDbService;
56+
}
57+
58+
private static OpenAIService initOpenAIService() {
59+
return new OpenAIService(AppConfig.openAIEndpoint,
60+
AppConfig.openAIKey,
61+
AppConfig.openAIEmbeddingDeployment,
62+
AppConfig.openAICompletionsDeployment,
63+
AppConfig.openAIMaxToken);
64+
}
65+
66+
public static void uploadRecipes() throws JsonProcessingException {
67+
List<Recipe> recipes = Utility.parseDocuments(AppConfig.recipeLocalFolder);
68+
uploadAndVectorizeDocs(recipes);
69+
}
70+
71+
public static void performSearch(Scanner scanner) throws JsonProcessingException {
72+
73+
if (openAIEmbeddingService == null) {
74+
log.info("Connecting to Open AI Service..");
75+
openAIEmbeddingService = initOpenAIService();
76+
}
77+
78+
79+
System.out.println("Type the recipe name or your question, hit enter when ready.");
80+
String userQuery = scanner.nextLine();
81+
82+
log.info("Converting User Query to Vector..");
83+
var embeddingVector = openAIEmbeddingService.getEmbeddings(userQuery);
84+
85+
log.info("Performing Vector Search in Cosmos DB NoSQL API..");
86+
Iterable<Recipe> filteredRecipes = cosmosDbService.vectorSearch(embeddingVector);
87+
88+
for (Recipe recipe : filteredRecipes) {
89+
log.info(String.format("Query result: Recipe with (/id, partition key) = (%s,%s)",recipe.getId(),recipe.getId()));
90+
}
91+
92+
log.info("Retrieving recipe(s) from Cosmos DB (RAG pattern)..");
93+
var retrivedDocs = filteredRecipes;
94+
95+
StringBuilder retrivedReceipeNames = new StringBuilder();
96+
97+
for (Recipe recipe : retrivedDocs) {
98+
recipe.embedding = null; //removing embedding to reduce tokens during chat completion
99+
retrivedReceipeNames.append(", ").append(recipe.name); //to dispay recipes submitted for Completion
100+
}
101+
102+
log.info("Processing '{}' to generate Completion using OpenAI Service..", retrivedReceipeNames);
103+
104+
String completion =
105+
openAIEmbeddingService
106+
.getChatCompletionAsync(userQuery, Utility.OBJECT_MAPPER.writeValueAsString(retrivedDocs));
107+
108+
String chatCompletion = completion;
109+
110+
log.info("AI Assistant Response:", chatCompletion);
111+
System.out.println(chatCompletion);
112+
}
113+
114+
private static void uploadAndVectorizeDocs(List<Recipe> recipes) throws JsonProcessingException {
115+
Map<String, List<Double>> dictEmbeddings = new HashMap<>();
116+
int recipeWithEmbedding = 0;
117+
int recipeWithNoEmbedding = 0;
118+
int recipeCount = 0;
119+
120+
if (openAIEmbeddingService == null) {
121+
openAIEmbeddingService = initOpenAIService();
122+
}
123+
124+
log.info("Getting recipe(s) to vectorize..");
125+
for (Recipe recipe : recipes) {
126+
recipe.setId(recipe.getName().replace(" ", ""));
127+
recipeCount++;
128+
log.info("Vectorizing Recipe# {}..", recipeCount);
129+
var embeddingVector = openAIEmbeddingService.getEmbeddings(Utility.OBJECT_MAPPER.writeValueAsString(recipe));
130+
recipe.embedding = embeddingVector;
131+
dictEmbeddings.put(recipe.id, embeddingVector);
132+
}
133+
134+
log.info("Updating {} recipe(s) in Cosmos DB for vectors..", recipes.size());
135+
136+
cosmosDbService.uploadRecipes(recipes);
137+
138+
log.info("Getting Updated Recipe Stats");
139+
recipeWithEmbedding = cosmosDbService.getRecipeCount(true);
140+
recipeWithNoEmbedding = cosmosDbService.getRecipeCount(false);
141+
142+
log.info("Vectorized {} recipe(s).", recipeCount);
143+
System.out.println("\n");
144+
System.out.printf("We have %d vectorized recipe(s) and %d non vectorized recipe(s).",
145+
recipeWithEmbedding, recipeWithNoEmbedding);
146+
}
147+
148+
149+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package com.azure.recipe;
2+
3+
import com.azure.recipe.model.Recipe;
4+
import com.fasterxml.jackson.databind.ObjectMapper;
5+
6+
import java.io.File;
7+
import java.io.IOException;
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
11+
public class Utility {
12+
13+
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
14+
15+
public static List<Recipe> parseDocuments(String directoryPath) {
16+
List<Recipe> recipes = new ArrayList<>();
17+
File directory = new File(directoryPath);
18+
if (directory.isDirectory()) {
19+
File[] files = directory.listFiles();
20+
for (File file : files) {
21+
if (file.isFile() && file.getName().endsWith(".json")) {
22+
try {
23+
Recipe recipe = OBJECT_MAPPER.readValue(file, Recipe.class);
24+
recipes.add(recipe);
25+
} catch (IOException e) {
26+
e.printStackTrace();
27+
}
28+
}
29+
}
30+
}
31+
32+
return recipes;
33+
}
34+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package com.azure.recipe.model;
2+
3+
import lombok.Data;
4+
5+
import java.util.List;
6+
7+
@Data
8+
public class Recipe {
9+
public String id;
10+
public String name;
11+
public String description;
12+
public List<Double> embedding;
13+
public String cuisine;
14+
public String difficulty;
15+
public String prepTime;
16+
public String cookTime;
17+
public String totalTime;
18+
public int servings;
19+
public List<String> ingredients;
20+
public List<String> instructions;
21+
22+
}

0 commit comments

Comments
 (0)