|
| 1 | +// Copyright (c) Microsoft. All rights reserved. |
| 2 | +package com.microsoft.semantickernel.aiservices.voyageai.contextualizedembedding; |
| 3 | + |
| 4 | +import com.microsoft.semantickernel.aiservices.voyageai.core.VoyageAIClient; |
| 5 | +import com.microsoft.semantickernel.aiservices.voyageai.core.VoyageAIModels; |
| 6 | +import com.microsoft.semantickernel.orchestration.PromptExecutionSettings; |
| 7 | +import com.microsoft.semantickernel.services.textembedding.Embedding; |
| 8 | +import com.microsoft.semantickernel.services.textembedding.TextEmbeddingGenerationService; |
| 9 | +import org.slf4j.Logger; |
| 10 | +import org.slf4j.LoggerFactory; |
| 11 | +import reactor.core.publisher.Mono; |
| 12 | + |
| 13 | +import javax.annotation.Nullable; |
| 14 | +import java.util.ArrayList; |
| 15 | +import java.util.Arrays; |
| 16 | +import java.util.Collections; |
| 17 | +import java.util.Comparator; |
| 18 | +import java.util.List; |
| 19 | +import java.util.stream.Collectors; |
| 20 | + |
| 21 | +/** |
| 22 | + * VoyageAI contextualized embedding generation service. |
| 23 | + * Generates embeddings that capture both local chunk details and global document-level metadata. |
| 24 | + * Supports models like voyage-3. |
| 25 | + */ |
| 26 | +public class VoyageAIContextualizedEmbeddingGenerationService implements TextEmbeddingGenerationService { |
| 27 | + |
| 28 | + private static final Logger LOGGER = LoggerFactory.getLogger(VoyageAIContextualizedEmbeddingGenerationService.class); |
| 29 | + |
| 30 | + private final VoyageAIClient client; |
| 31 | + private final String modelId; |
| 32 | + private final String serviceId; |
| 33 | + |
| 34 | + /** |
| 35 | + * Creates a new instance of VoyageAI contextualized embedding generation service. |
| 36 | + * |
| 37 | + * @param client VoyageAI client |
| 38 | + * @param modelId Model ID (e.g., "voyage-3") |
| 39 | + * @param serviceId Optional service ID |
| 40 | + */ |
| 41 | + public VoyageAIContextualizedEmbeddingGenerationService( |
| 42 | + VoyageAIClient client, |
| 43 | + String modelId, |
| 44 | + @Nullable String serviceId) { |
| 45 | + |
| 46 | + if (client == null) { |
| 47 | + throw new IllegalArgumentException("Client cannot be null"); |
| 48 | + } |
| 49 | + if (modelId == null || modelId.trim().isEmpty()) { |
| 50 | + throw new IllegalArgumentException("Model ID cannot be null or empty"); |
| 51 | + } |
| 52 | + |
| 53 | + this.client = client; |
| 54 | + this.modelId = modelId; |
| 55 | + this.serviceId = serviceId != null ? serviceId : PromptExecutionSettings.DEFAULT_SERVICE_ID; |
| 56 | + } |
| 57 | + |
| 58 | + @Override |
| 59 | + public String getServiceId() { |
| 60 | + return serviceId; |
| 61 | + } |
| 62 | + |
| 63 | + @Override |
| 64 | + public String getModelId() { |
| 65 | + return modelId; |
| 66 | + } |
| 67 | + |
| 68 | + /** |
| 69 | + * Generates contextualized embeddings for document chunks. |
| 70 | + * |
| 71 | + * @param inputs List of lists where each inner list contains document chunks |
| 72 | + * @return A Mono containing a list of embeddings for all chunks across all documents |
| 73 | + */ |
| 74 | + public Mono<List<Embedding>> generateContextualizedEmbeddingsAsync(List<List<String>> inputs) { |
| 75 | + if (inputs == null || inputs.isEmpty()) { |
| 76 | + return Mono.just(Collections.emptyList()); |
| 77 | + } |
| 78 | + |
| 79 | + LOGGER.debug("Generating contextualized embeddings for {} document groups using model {}", |
| 80 | + inputs.size(), modelId); |
| 81 | + |
| 82 | + VoyageAIModels.ContextualizedEmbeddingRequest request = |
| 83 | + new VoyageAIModels.ContextualizedEmbeddingRequest(); |
| 84 | + request.setInputs(inputs); |
| 85 | + request.setModel(modelId); |
| 86 | + |
| 87 | + return client.sendRequestAsync( |
| 88 | + "contextualizedembeddings", |
| 89 | + request, |
| 90 | + VoyageAIModels.ContextualizedEmbeddingResponse.class) |
| 91 | + .map(response -> { |
| 92 | + List<Embedding> embeddings = new ArrayList<>(); |
| 93 | + // Parse nested data structure: {"data":[{"data":[{"embedding":[...]}]}]} |
| 94 | + for (VoyageAIModels.ContextualizedEmbeddingDataList dataList : response.getData()) { |
| 95 | + for (VoyageAIModels.EmbeddingDataItem item : dataList.getData()) { |
| 96 | + embeddings.add(new Embedding(item.getEmbedding())); |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + LOGGER.debug("Received {} contextualized embeddings from VoyageAI", embeddings.size()); |
| 101 | + return embeddings; |
| 102 | + }); |
| 103 | + } |
| 104 | + |
| 105 | + /** |
| 106 | + * Generates embeddings for the given text. |
| 107 | + * For standard text embedding, wraps the data as a single input. |
| 108 | + * |
| 109 | + * @param data The text to generate embeddings for |
| 110 | + * @return A Mono that completes with the embedding |
| 111 | + */ |
| 112 | + @Override |
| 113 | + public Mono<Embedding> generateEmbeddingAsync(String data) { |
| 114 | + return generateEmbeddingsAsync(Arrays.asList(data)) |
| 115 | + .flatMap(embeddings -> { |
| 116 | + if (embeddings.isEmpty()) { |
| 117 | + return Mono.empty(); |
| 118 | + } |
| 119 | + return Mono.just(embeddings.get(0)); |
| 120 | + }); |
| 121 | + } |
| 122 | + |
| 123 | + /** |
| 124 | + * Generates embeddings for the given texts. |
| 125 | + * Each text is treated as a separate document for contextualized embeddings. |
| 126 | + * |
| 127 | + * @param data The texts to generate embeddings for |
| 128 | + * @return A Mono that completes with the list of embeddings |
| 129 | + */ |
| 130 | + @Override |
| 131 | + public Mono<List<Embedding>> generateEmbeddingsAsync(List<String> data) { |
| 132 | + if (data == null || data.isEmpty()) { |
| 133 | + return Mono.just(Collections.emptyList()); |
| 134 | + } |
| 135 | + |
| 136 | + // Convert each string to a single-element list for contextualized embeddings |
| 137 | + List<List<String>> inputs = new ArrayList<>(); |
| 138 | + for (String text : data) { |
| 139 | + inputs.add(Arrays.asList(text)); |
| 140 | + } |
| 141 | + |
| 142 | + return generateContextualizedEmbeddingsAsync(inputs); |
| 143 | + } |
| 144 | + |
| 145 | + /** |
| 146 | + * Creates a builder for VoyageAI contextualized embedding generation service. |
| 147 | + * |
| 148 | + * @return A new builder instance |
| 149 | + */ |
| 150 | + public static Builder builder() { |
| 151 | + return new Builder(); |
| 152 | + } |
| 153 | + |
| 154 | + /** |
| 155 | + * Builder for {@link VoyageAIContextualizedEmbeddingGenerationService}. |
| 156 | + */ |
| 157 | + public static class Builder { |
| 158 | + private VoyageAIClient client; |
| 159 | + private String modelId; |
| 160 | + private String serviceId; |
| 161 | + |
| 162 | + /** |
| 163 | + * Sets the VoyageAI client. |
| 164 | + * |
| 165 | + * @param client VoyageAI client |
| 166 | + * @return This builder |
| 167 | + */ |
| 168 | + public Builder withClient(VoyageAIClient client) { |
| 169 | + this.client = client; |
| 170 | + return this; |
| 171 | + } |
| 172 | + |
| 173 | + /** |
| 174 | + * Sets the model ID. |
| 175 | + * |
| 176 | + * @param modelId Model ID (e.g., "voyage-3") |
| 177 | + * @return This builder |
| 178 | + */ |
| 179 | + public Builder withModelId(String modelId) { |
| 180 | + this.modelId = modelId; |
| 181 | + return this; |
| 182 | + } |
| 183 | + |
| 184 | + /** |
| 185 | + * Sets the service ID. |
| 186 | + * |
| 187 | + * @param serviceId Service ID |
| 188 | + * @return This builder |
| 189 | + */ |
| 190 | + public Builder withServiceId(String serviceId) { |
| 191 | + this.serviceId = serviceId; |
| 192 | + return this; |
| 193 | + } |
| 194 | + |
| 195 | + /** |
| 196 | + * Builds the VoyageAI contextualized embedding generation service. |
| 197 | + * |
| 198 | + * @return A new instance of VoyageAIContextualizedEmbeddingGenerationService |
| 199 | + */ |
| 200 | + public VoyageAIContextualizedEmbeddingGenerationService build() { |
| 201 | + return new VoyageAIContextualizedEmbeddingGenerationService(client, modelId, serviceId); |
| 202 | + } |
| 203 | + } |
| 204 | +} |
0 commit comments