Skip to content

Commit 4cb0d53

Browse files
rpanackalnewtorkJonas-Isr
authored
feat: [Orchestration] Embedding Convenience (#562)
* Request convenience * Finishing up all major items - client method for high level objects - Only support 'float' encoding format - Add javadoc - create convenience embedding response class - Add unit/integration tests - Add json payloads for req and res - Add e2e * Add release notes and clean up * Remove enum and remove redundant test * spec update +maxRetries and +timeout * model name correction * Adjust since versions * Fix minor naming issues * mini fixes --------- Co-authored-by: Alexander Dümont <[email protected]> Co-authored-by: Jonas Israel <[email protected]>
1 parent 8aed9af commit 4cb0d53

File tree

13 files changed

+615
-173
lines changed

13 files changed

+615
-173
lines changed

docs/release_notes.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,10 @@
4343

4444
### ✨ New Functionality
4545

46-
-
47-
46+
- [Orchestration] Added embedding generation support with new `OrchestrationClient#embed()` methods.
47+
- Added `OrchestrationEmbeddingModel` with `TEXT_EMBEDDING_3_SMALL`, `TEXT_EMBEDDING_3_LARGE`, `AMAZON_TITAN_EMBED_TEXT` and `NVIDIA_LLAMA_32_NV_EMBEDQA_1B` embedding models.
48+
- Introduced `OrchestrationEmbeddingRequest` for building requests fluently and `OrchestrationEmbeddingResponse#getEmbeddingVectors()` to retrieve embeddings.
49+
4850
### 📈 Improvements
4951

5052
-

orchestration/src/main/java/com/sap/ai/sdk/orchestration/OrchestrationClient.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,15 +226,34 @@ public Stream<OrchestrationChatCompletionDelta> streamChatCompletionDeltas(
226226
}
227227

228228
/**
229-
* Generate embeddings for the given request.
229+
* Generate embeddings for a {@code OrchestrationEmbeddingRequest} request.
230230
*
231231
* @param request the request containing the input text and other parameters.
232232
* @return the response containing the embeddings.
233233
* @throws OrchestrationClientException if the request fails
234-
* @since 1.9.0
234+
* @since 1.12.0
235235
*/
236236
@Nonnull
237-
EmbeddingsPostResponse embed(@Nonnull final EmbeddingsPostRequest request)
237+
public OrchestrationEmbeddingResponse embed(@Nonnull final OrchestrationEmbeddingRequest request)
238+
throws OrchestrationClientException {
239+
final var response = embed(request.createEmbeddingsPostRequest());
240+
return new OrchestrationEmbeddingResponse(response);
241+
}
242+
243+
/**
244+
* Generates embeddings using the low-level API request.
245+
*
246+
* <p>This method provides direct access to the underlying API for advanced use cases. For most
247+
* scenarios, prefer {@link #embed(OrchestrationEmbeddingRequest)}.
248+
*
249+
* @param request the low-level API request
250+
* @return the low level response object
251+
* @throws OrchestrationClientException if the request fails
252+
* @since 1.12.0
253+
* @see #embed(OrchestrationEmbeddingRequest)
254+
*/
255+
@Nonnull
256+
public EmbeddingsPostResponse embed(@Nonnull final EmbeddingsPostRequest request)
238257
throws OrchestrationClientException {
239258
return executor.execute("/v2/embeddings", request, EmbeddingsPostResponse.class, customHeaders);
240259
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package com.sap.ai.sdk.orchestration;
2+
3+
import com.google.common.annotations.Beta;
4+
import com.sap.ai.sdk.core.AiModel;
5+
import com.sap.ai.sdk.orchestration.model.EmbeddingsModelDetails;
6+
import com.sap.ai.sdk.orchestration.model.EmbeddingsModelParams;
7+
import com.sap.ai.sdk.orchestration.model.EmbeddingsModelParams.EncodingFormatEnum;
8+
import javax.annotation.Nonnull;
9+
import javax.annotation.Nullable;
10+
import lombok.AccessLevel;
11+
import lombok.AllArgsConstructor;
12+
import lombok.Value;
13+
import lombok.With;
14+
import lombok.experimental.Accessors;
15+
16+
/**
17+
* Configuration for embedding models in the Orchestration service.
18+
*
19+
* @since 1.12.0
20+
*/
21+
@Beta
22+
@With
23+
@Value
24+
@Accessors(fluent = true)
25+
@AllArgsConstructor(access = AccessLevel.PRIVATE)
26+
public class OrchestrationEmbeddingModel implements AiModel {
27+
/** The name of the embedding model. */
28+
@Nonnull String name;
29+
30+
/** The version of the model, defaults to latest if not specified. */
31+
@Nullable String version;
32+
33+
/** The number of dimensions for the output embeddings. */
34+
@Nullable Integer dimensions;
35+
36+
/** Whether to normalize the embedding vectors. */
37+
@Nullable Boolean normalize;
38+
39+
/** Azure OpenAI Text Embedding 3 Small model */
40+
public static final OrchestrationEmbeddingModel TEXT_EMBEDDING_3_SMALL =
41+
new OrchestrationEmbeddingModel("text-embedding-3-small");
42+
43+
/** Azure OpenAI Text Embedding 3 Large model */
44+
public static final OrchestrationEmbeddingModel TEXT_EMBEDDING_3_LARGE =
45+
new OrchestrationEmbeddingModel("text-embedding-3-large");
46+
47+
/** Amazon Titan Embed Text model */
48+
public static final OrchestrationEmbeddingModel AMAZON_TITAN_EMBED_TEXT =
49+
new OrchestrationEmbeddingModel("amazon--titan-embed-text");
50+
51+
/** NVIDIA LLaMA 3.2 7B NV EmbedQA model */
52+
public static final OrchestrationEmbeddingModel NVIDIA_LLAMA_32_NV_EMBEDQA_1B =
53+
new OrchestrationEmbeddingModel("nvidia--llama-3.2-nv-embedqa-1b");
54+
55+
/**
56+
* Creates a new embedding model configuration with the specified name.
57+
*
58+
* @param name the model name
59+
*/
60+
public OrchestrationEmbeddingModel(@Nonnull final String name) {
61+
this(name, null, null, null);
62+
}
63+
64+
@Nonnull
65+
EmbeddingsModelDetails createEmbeddingsModelDetails() {
66+
final var params =
67+
EmbeddingsModelParams.create()
68+
.dimensions(dimensions)
69+
.normalize(normalize)
70+
.encodingFormat(EncodingFormatEnum.FLOAT);
71+
return EmbeddingsModelDetails.create().name(name).version(version).params(params);
72+
}
73+
}
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
package com.sap.ai.sdk.orchestration;
2+
3+
import static lombok.AccessLevel.NONE;
4+
import static lombok.AccessLevel.PRIVATE;
5+
6+
import com.google.common.annotations.Beta;
7+
import com.google.common.collect.Lists;
8+
import com.sap.ai.sdk.orchestration.model.EmbeddingsInput;
9+
import com.sap.ai.sdk.orchestration.model.EmbeddingsInputText;
10+
import com.sap.ai.sdk.orchestration.model.EmbeddingsModelConfig;
11+
import com.sap.ai.sdk.orchestration.model.EmbeddingsModuleConfigs;
12+
import com.sap.ai.sdk.orchestration.model.EmbeddingsOrchestrationConfig;
13+
import com.sap.ai.sdk.orchestration.model.EmbeddingsPostRequest;
14+
import com.sap.ai.sdk.orchestration.model.MaskingModuleConfigProviders;
15+
import java.util.List;
16+
import javax.annotation.Nonnull;
17+
import javax.annotation.Nullable;
18+
import lombok.AllArgsConstructor;
19+
import lombok.Getter;
20+
import lombok.Value;
21+
import lombok.With;
22+
import lombok.experimental.Tolerate;
23+
24+
/**
25+
* Represents a request for generating embeddings through the SAP AI Core Orchestration service.
26+
*
27+
* @since 1.12.0
28+
*/
29+
@Beta
30+
@Value
31+
@AllArgsConstructor(access = PRIVATE)
32+
public class OrchestrationEmbeddingRequest {
33+
34+
/** The embedding model to use for generating vector representations. */
35+
@Nonnull OrchestrationEmbeddingModel model;
36+
37+
/** The list of text inputs to be converted into embeddings. */
38+
@Nonnull List<String> inputs;
39+
40+
/** Optional masking providers for data privacy and security. */
41+
@With(value = PRIVATE)
42+
@Nullable
43+
List<MaskingProvider> masking;
44+
45+
/** Optional embedding input type classification to optimize embedding generation. */
46+
@With(value = PRIVATE)
47+
@Getter(NONE)
48+
@Nullable
49+
EmbeddingsInput.TypeEnum inputType;
50+
51+
/**
52+
* Create an embedding request using fluent API starting with model selection.
53+
*
54+
* <pre>{@code
55+
* OrchestrationEmbeddingRequest.forModel(myModel).forInputs("text to embed");
56+
* }</pre>
57+
*
58+
* @param model the embedding model to use
59+
* @return a step for specifying inputs
60+
*/
61+
@Nonnull
62+
public static InputStep forModel(@Nonnull final OrchestrationEmbeddingModel model) {
63+
return inputs -> new OrchestrationEmbeddingRequest(model, List.copyOf(inputs), null, null);
64+
}
65+
66+
/** Builder step for specifying text inputs to embed. */
67+
@FunctionalInterface
68+
public interface InputStep {
69+
70+
/**
71+
* Specifies text inputs to be embedded.
72+
*
73+
* @param inputs the text strings to embed
74+
* @return a new embedding request instance
75+
*/
76+
@Nonnull
77+
OrchestrationEmbeddingRequest forInputs(@Nonnull final List<String> inputs);
78+
79+
/**
80+
* Specifies multiple text inputs using variable arguments.
81+
*
82+
* @param firstInput string to embed
83+
* @param inputs optional additional strings to embed
84+
* @return a new embedding request instance
85+
*/
86+
@Nonnull
87+
default OrchestrationEmbeddingRequest forInputs(
88+
@Nonnull final String firstInput, @Nonnull final String... inputs) {
89+
return forInputs(Lists.asList(firstInput, inputs));
90+
}
91+
}
92+
93+
/**
94+
* Adds data masking providers to enable detection and masking of sensitive information.
95+
*
96+
* @param maskingProvider the primary masking provider
97+
* @param maskingProviders additional masking providers
98+
* @return a new request instance with the specified masking providers
99+
* @see MaskingProvider
100+
*/
101+
@Tolerate
102+
@Nonnull
103+
public OrchestrationEmbeddingRequest withMasking(
104+
@Nonnull final MaskingProvider maskingProvider,
105+
@Nonnull final MaskingProvider... maskingProviders) {
106+
return withMasking(Lists.asList(maskingProvider, maskingProviders));
107+
}
108+
109+
/**
110+
* Configures this request to optimize embeddings for document content.
111+
*
112+
* @return a new request instance configured for document embedding
113+
*/
114+
@Nonnull
115+
public OrchestrationEmbeddingRequest asDocument() {
116+
return withInputType(EmbeddingsInput.TypeEnum.DOCUMENT);
117+
}
118+
119+
/**
120+
* Configures this request to optimize embeddings for general text content.
121+
*
122+
* @return a new request instance configured for text embedding
123+
*/
124+
@Nonnull
125+
public OrchestrationEmbeddingRequest asText() {
126+
return withInputType(EmbeddingsInput.TypeEnum.TEXT);
127+
}
128+
129+
/**
130+
* Configures this request to optimize embeddings for query content.
131+
*
132+
* @return a new request instance configured for query embedding
133+
*/
134+
@Nonnull
135+
public OrchestrationEmbeddingRequest asQuery() {
136+
return withInputType(EmbeddingsInput.TypeEnum.QUERY);
137+
}
138+
139+
@Nonnull
140+
EmbeddingsPostRequest createEmbeddingsPostRequest() {
141+
142+
final var input =
143+
EmbeddingsInput.create().text(EmbeddingsInputText.create(inputs)).type(inputType);
144+
final var embeddingsModelConfig =
145+
EmbeddingsModelConfig.create().model(model.createEmbeddingsModelDetails());
146+
final var modules =
147+
EmbeddingsOrchestrationConfig.create()
148+
.modules(EmbeddingsModuleConfigs.create().embeddings(embeddingsModelConfig));
149+
150+
if (masking != null) {
151+
final var dpiConfigs = masking.stream().map(MaskingProvider::createConfig).toList();
152+
modules.getModules().setMasking(MaskingModuleConfigProviders.create().providers(dpiConfigs));
153+
}
154+
return EmbeddingsPostRequest.create().config(modules).input(input);
155+
}
156+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package com.sap.ai.sdk.orchestration;
2+
3+
import static lombok.AccessLevel.PACKAGE;
4+
5+
import com.google.common.annotations.Beta;
6+
import com.sap.ai.sdk.orchestration.model.Embedding;
7+
import com.sap.ai.sdk.orchestration.model.EmbeddingsPostResponse;
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import javax.annotation.Nonnull;
11+
import lombok.AllArgsConstructor;
12+
import lombok.Value;
13+
14+
/**
15+
* Response wrapper for orchestration embedding operations.
16+
*
17+
* <p>Wraps {@link EmbeddingsPostResponse} and provides convenient access to embedding vectors.
18+
*
19+
* @since 1.12.0
20+
*/
21+
@Beta
22+
@Value
23+
@AllArgsConstructor(access = PACKAGE)
24+
public class OrchestrationEmbeddingResponse {
25+
26+
/** The original embedding response from the orchestration API. */
27+
@Nonnull EmbeddingsPostResponse originalResponse;
28+
29+
/**
30+
* Extracts embedding vectors as float arrays.
31+
*
32+
* @return list of embedding vectors, never {@code null}
33+
*/
34+
@Nonnull
35+
public List<float[]> getEmbeddingVectors() {
36+
final var embeddings = new ArrayList<float[]>();
37+
for (final var container : originalResponse.getFinalResult().getData()) {
38+
final var bigDecimals = (Embedding.InnerBigDecimals) container.getEmbedding();
39+
final var values = bigDecimals.values();
40+
final float[] arr = new float[values.size()];
41+
for (int i = 0; i < values.size(); i++) {
42+
arr[i] = values.get(i).floatValue();
43+
}
44+
embeddings.add(arr);
45+
}
46+
return embeddings;
47+
}
48+
}

0 commit comments

Comments
 (0)