Skip to content

Commit 27647c2

Browse files
committed
Adding VoyageAI integration
1 parent 47867cd commit 27647c2

File tree

16 files changed

+2512
-0
lines changed

16 files changed

+2512
-0
lines changed

PACKAGES.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ A BOM is provided that can be used to define the versions of all Semantic Kernel
3939
`semantickernel-aiservices-openai`
4040
: Provides a connector that can be used to interact with the OpenAI API.
4141

42+
`semantickernel-aiservices-voyageai`
43+
: Provides connectors for VoyageAI's embedding and reranking services, including text embeddings, contextualized embeddings, multimodal embeddings, and document reranking.
44+
4245
## Example Configurations
4346

4447
### Example: OpenAI + SQLite
@@ -72,5 +75,36 @@ POM XML for a simple project that uses OpenAI.
7275
</project>
7376
```
7477

78+
### Example: VoyageAI Embeddings and Reranking
79+
80+
POM XML for a project that uses VoyageAI for embeddings and reranking.
81+
82+
```xml
83+
84+
<project>
85+
<dependencyManagement>
86+
<dependencies>
87+
<dependency>
88+
<groupId>com.microsoft.semantic-kernel</groupId>
89+
<artifactId>semantickernel-bom</artifactId>
90+
<version>${semantickernel.version}</version>
91+
<scope>import</scope>
92+
<type>pom</type>
93+
</dependency>
94+
</dependencies>
95+
</dependencyManagement>
96+
<dependencies>
97+
<dependency>
98+
<groupId>com.microsoft.semantic-kernel</groupId>
99+
<artifactId>semantickernel-api</artifactId>
100+
</dependency>
101+
<dependency>
102+
<groupId>com.microsoft.semantic-kernel</groupId>
103+
<artifactId>semantickernel-aiservices-voyageai</artifactId>
104+
</dependency>
105+
</dependencies>
106+
</project>
107+
```
108+
75109

76110

aiservices/voyageai/pom.xml

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
<parent>
5+
<groupId>com.microsoft.semantic-kernel</groupId>
6+
<artifactId>semantickernel-parent</artifactId>
7+
<version>1.4.4-RC3-SNAPSHOT</version>
8+
<relativePath>../../pom.xml</relativePath>
9+
</parent>
10+
11+
<artifactId>semantickernel-aiservices-voyageai</artifactId>
12+
<name>Semantic Kernel VoyageAI Services</name>
13+
<description>VoyageAI services for Semantic Kernel</description>
14+
15+
<dependencies>
16+
<dependency>
17+
<groupId>com.microsoft.semantic-kernel</groupId>
18+
<artifactId>semantickernel-api</artifactId>
19+
</dependency>
20+
<dependency>
21+
<groupId>com.microsoft.semantic-kernel</groupId>
22+
<artifactId>semantickernel-api-builders</artifactId>
23+
</dependency>
24+
<dependency>
25+
<groupId>com.microsoft.semantic-kernel</groupId>
26+
<artifactId>semantickernel-api-ai-services</artifactId>
27+
</dependency>
28+
<dependency>
29+
<groupId>com.microsoft.semantic-kernel</groupId>
30+
<artifactId>semantickernel-api-textembedding-services</artifactId>
31+
</dependency>
32+
<dependency>
33+
<groupId>com.microsoft.semantic-kernel</groupId>
34+
<artifactId>semantickernel-api-exceptions</artifactId>
35+
</dependency>
36+
<dependency>
37+
<groupId>com.microsoft.semantic-kernel</groupId>
38+
<artifactId>semantickernel-api-localization</artifactId>
39+
</dependency>
40+
41+
<dependency>
42+
<groupId>com.fasterxml.jackson.core</groupId>
43+
<artifactId>jackson-databind</artifactId>
44+
<scope>compile</scope>
45+
</dependency>
46+
<dependency>
47+
<groupId>com.fasterxml.jackson.core</groupId>
48+
<artifactId>jackson-core</artifactId>
49+
<scope>compile</scope>
50+
</dependency>
51+
<dependency>
52+
<groupId>com.fasterxml.jackson.core</groupId>
53+
<artifactId>jackson-annotations</artifactId>
54+
<scope>compile</scope>
55+
</dependency>
56+
57+
<!-- HTTP Client -->
58+
<dependency>
59+
<groupId>com.squareup.okhttp3</groupId>
60+
<artifactId>okhttp</artifactId>
61+
<version>4.12.0</version>
62+
</dependency>
63+
64+
<!-- Reactive Streams -->
65+
<dependency>
66+
<groupId>io.projectreactor</groupId>
67+
<artifactId>reactor-core</artifactId>
68+
</dependency>
69+
70+
<!-- Logging -->
71+
<dependency>
72+
<groupId>org.slf4j</groupId>
73+
<artifactId>slf4j-api</artifactId>
74+
</dependency>
75+
76+
<!-- Test Dependencies -->
77+
<dependency>
78+
<groupId>org.junit.jupiter</groupId>
79+
<artifactId>junit-jupiter</artifactId>
80+
<scope>test</scope>
81+
</dependency>
82+
<dependency>
83+
<groupId>org.mockito</groupId>
84+
<artifactId>mockito-core</artifactId>
85+
<scope>test</scope>
86+
</dependency>
87+
</dependencies>
88+
89+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
package com.microsoft.semantickernel.aiservices.voyageai.contextualizedembedding;
3+
4+
import com.microsoft.semantickernel.aiservices.voyageai.core.VoyageAIClient;
5+
import com.microsoft.semantickernel.aiservices.voyageai.core.VoyageAIModels;
6+
import com.microsoft.semantickernel.orchestration.PromptExecutionSettings;
7+
import com.microsoft.semantickernel.services.textembedding.Embedding;
8+
import com.microsoft.semantickernel.services.textembedding.TextEmbeddingGenerationService;
9+
import org.slf4j.Logger;
10+
import org.slf4j.LoggerFactory;
11+
import reactor.core.publisher.Mono;
12+
13+
import javax.annotation.Nullable;
14+
import java.util.ArrayList;
15+
import java.util.Arrays;
16+
import java.util.Collections;
17+
import java.util.Comparator;
18+
import java.util.List;
19+
import java.util.stream.Collectors;
20+
21+
/**
22+
* VoyageAI contextualized embedding generation service.
23+
* Generates embeddings that capture both local chunk details and global document-level metadata.
24+
* Supports models like voyage-3.
25+
*/
26+
public class VoyageAIContextualizedEmbeddingGenerationService implements TextEmbeddingGenerationService {
27+
28+
private static final Logger LOGGER = LoggerFactory.getLogger(VoyageAIContextualizedEmbeddingGenerationService.class);
29+
30+
private final VoyageAIClient client;
31+
private final String modelId;
32+
private final String serviceId;
33+
34+
/**
35+
* Creates a new instance of VoyageAI contextualized embedding generation service.
36+
*
37+
* @param client VoyageAI client
38+
* @param modelId Model ID (e.g., "voyage-3")
39+
* @param serviceId Optional service ID
40+
*/
41+
public VoyageAIContextualizedEmbeddingGenerationService(
42+
VoyageAIClient client,
43+
String modelId,
44+
@Nullable String serviceId) {
45+
46+
if (client == null) {
47+
throw new IllegalArgumentException("Client cannot be null");
48+
}
49+
if (modelId == null || modelId.trim().isEmpty()) {
50+
throw new IllegalArgumentException("Model ID cannot be null or empty");
51+
}
52+
53+
this.client = client;
54+
this.modelId = modelId;
55+
this.serviceId = serviceId != null ? serviceId : PromptExecutionSettings.DEFAULT_SERVICE_ID;
56+
}
57+
58+
@Override
59+
public String getServiceId() {
60+
return serviceId;
61+
}
62+
63+
@Override
64+
public String getModelId() {
65+
return modelId;
66+
}
67+
68+
/**
69+
* Generates contextualized embeddings for document chunks.
70+
*
71+
* @param inputs List of lists where each inner list contains document chunks
72+
* @return A Mono containing a list of embeddings for all chunks across all documents
73+
*/
74+
public Mono<List<Embedding>> generateContextualizedEmbeddingsAsync(List<List<String>> inputs) {
75+
if (inputs == null || inputs.isEmpty()) {
76+
return Mono.just(Collections.emptyList());
77+
}
78+
79+
LOGGER.debug("Generating contextualized embeddings for {} document groups using model {}",
80+
inputs.size(), modelId);
81+
82+
VoyageAIModels.ContextualizedEmbeddingRequest request =
83+
new VoyageAIModels.ContextualizedEmbeddingRequest();
84+
request.setInputs(inputs);
85+
request.setModel(modelId);
86+
87+
return client.sendRequestAsync(
88+
"contextualizedembeddings",
89+
request,
90+
VoyageAIModels.ContextualizedEmbeddingResponse.class)
91+
.map(response -> {
92+
List<Embedding> embeddings = new ArrayList<>();
93+
// Parse nested data structure: {"data":[{"data":[{"embedding":[...]}]}]}
94+
for (VoyageAIModels.ContextualizedEmbeddingDataList dataList : response.getData()) {
95+
for (VoyageAIModels.EmbeddingDataItem item : dataList.getData()) {
96+
embeddings.add(new Embedding(item.getEmbedding()));
97+
}
98+
}
99+
100+
LOGGER.debug("Received {} contextualized embeddings from VoyageAI", embeddings.size());
101+
return embeddings;
102+
});
103+
}
104+
105+
/**
106+
* Generates embeddings for the given text.
107+
* For standard text embedding, wraps the data as a single input.
108+
*
109+
* @param data The text to generate embeddings for
110+
* @return A Mono that completes with the embedding
111+
*/
112+
@Override
113+
public Mono<Embedding> generateEmbeddingAsync(String data) {
114+
return generateEmbeddingsAsync(Arrays.asList(data))
115+
.flatMap(embeddings -> {
116+
if (embeddings.isEmpty()) {
117+
return Mono.empty();
118+
}
119+
return Mono.just(embeddings.get(0));
120+
});
121+
}
122+
123+
/**
124+
* Generates embeddings for the given texts.
125+
* Each text is treated as a separate document for contextualized embeddings.
126+
*
127+
* @param data The texts to generate embeddings for
128+
* @return A Mono that completes with the list of embeddings
129+
*/
130+
@Override
131+
public Mono<List<Embedding>> generateEmbeddingsAsync(List<String> data) {
132+
if (data == null || data.isEmpty()) {
133+
return Mono.just(Collections.emptyList());
134+
}
135+
136+
// Convert each string to a single-element list for contextualized embeddings
137+
List<List<String>> inputs = new ArrayList<>();
138+
for (String text : data) {
139+
inputs.add(Arrays.asList(text));
140+
}
141+
142+
return generateContextualizedEmbeddingsAsync(inputs);
143+
}
144+
145+
/**
146+
* Creates a builder for VoyageAI contextualized embedding generation service.
147+
*
148+
* @return A new builder instance
149+
*/
150+
public static Builder builder() {
151+
return new Builder();
152+
}
153+
154+
/**
155+
* Builder for {@link VoyageAIContextualizedEmbeddingGenerationService}.
156+
*/
157+
public static class Builder {
158+
private VoyageAIClient client;
159+
private String modelId;
160+
private String serviceId;
161+
162+
/**
163+
* Sets the VoyageAI client.
164+
*
165+
* @param client VoyageAI client
166+
* @return This builder
167+
*/
168+
public Builder withClient(VoyageAIClient client) {
169+
this.client = client;
170+
return this;
171+
}
172+
173+
/**
174+
* Sets the model ID.
175+
*
176+
* @param modelId Model ID (e.g., "voyage-3")
177+
* @return This builder
178+
*/
179+
public Builder withModelId(String modelId) {
180+
this.modelId = modelId;
181+
return this;
182+
}
183+
184+
/**
185+
* Sets the service ID.
186+
*
187+
* @param serviceId Service ID
188+
* @return This builder
189+
*/
190+
public Builder withServiceId(String serviceId) {
191+
this.serviceId = serviceId;
192+
return this;
193+
}
194+
195+
/**
196+
* Builds the VoyageAI contextualized embedding generation service.
197+
*
198+
* @return A new instance of VoyageAIContextualizedEmbeddingGenerationService
199+
*/
200+
public VoyageAIContextualizedEmbeddingGenerationService build() {
201+
return new VoyageAIContextualizedEmbeddingGenerationService(client, modelId, serviceId);
202+
}
203+
}
204+
}

0 commit comments

Comments
 (0)