11package com .sap .ai .sdk .orchestration ;
22
3- import static com .sap .ai .sdk .orchestration .model . EmbeddingsInput . TypeEnum .DOCUMENT ;
4- import static com .sap .ai .sdk .orchestration .model . EmbeddingsInput . TypeEnum .QUERY ;
5- import static com .sap .ai .sdk .orchestration .model . EmbeddingsInput . TypeEnum .TEXT ;
3+ import static com .sap .ai .sdk .orchestration .OrchestrationEmbeddingRequest . TokenType .DOCUMENT ;
4+ import static com .sap .ai .sdk .orchestration .OrchestrationEmbeddingRequest . TokenType .QUERY ;
5+ import static com .sap .ai .sdk .orchestration .OrchestrationEmbeddingRequest . TokenType .TEXT ;
66import static lombok .AccessLevel .PRIVATE ;
77
88import com .google .common .annotations .Beta ;
1818import javax .annotation .Nonnull ;
1919import javax .annotation .Nullable ;
2020import lombok .AllArgsConstructor ;
21+ import lombok .Getter ;
22+ import lombok .RequiredArgsConstructor ;
2123import lombok .Value ;
2224import lombok .With ;
2325import lombok .experimental .Tolerate ;
2426
25- // Do we need staged input builder here?
26- // Do we need an enum for tokenType?
27+ /**
28+ * Represents a request for generating embeddings through the SAP AI Core Orchestration service.
29+ *
30+ * @since 1.11.0
31+ */
2732@ Beta
2833@ Value
2934@ AllArgsConstructor (access = PRIVATE )
3035public class OrchestrationEmbeddingRequest {
3136
37+ /** The embedding model to use for generating vector representations. */
3238 @ Nonnull OrchestrationEmbeddingModel model ;
39+
40+ /** The list of text inputs to be converted into embeddings. */
3341 @ Nonnull List <String > tokens ;
3442
43+ /** Optional masking providers for data privacy and security. */
3544 @ With (value = PRIVATE )
3645 @ Nullable
3746 List <MaskingProvider > masking ;
3847
48+ /** Optional token type classification to optimize embedding generation. */
3949 @ With (value = PRIVATE )
4050 @ Nullable
41- EmbeddingsInput .TypeEnum tokenType ;
51+ TokenType tokenType ;
52+
53+ /**
54+ * Create an embedding request using fluent API starting with model selection.
55+ *
56+ * <pre>{@code
57+ * OrchestrationEmbeddingRequest.forModel(myModel).forInputs("text to embed");
58+ * }</pre>
59+ *
60+ * @param model the embedding model to use
61+ * @return a step for specifying inputs
62+ */
63+ @ Nonnull
64+ public static InputStep forModel (@ Nonnull final OrchestrationEmbeddingModel model ) {
65+ return tokens -> new OrchestrationEmbeddingRequest (model , List .copyOf (tokens ), null , null );
66+ }
4267
43- public static OrchestrationEmbeddingRequest create (
44- OrchestrationEmbeddingModel model , List <String > tokens ) {
45- return new OrchestrationEmbeddingRequest (model , tokens , null , null );
68+ /** Builder step for specifying text inputs to embed. */
69+ @ FunctionalInterface
70+ public interface InputStep {
71+
72+ /**
73+ * Specifies text inputs to be embedded.
74+ *
75+ * @param tokens the text strings to embed
76+ * @return a new embedding request instance
77+ */
78+ @ Nonnull
79+ OrchestrationEmbeddingRequest forInputs (@ Nonnull final List <String > tokens );
80+
81+ /**
82+ * Specifies multiple text inputs using variable arguments.
83+ *
84+ * @param tokens one or more strings to embed
85+ * @return a new embedding request instance
86+ */
87+ @ Nonnull
88+ default OrchestrationEmbeddingRequest forInputs (@ Nonnull final String ... tokens ) {
89+ return forInputs (List .of (tokens ));
90+ }
4691 }
4792
93+ /**
94+ * Adds data masking providers to enable detection and masking of sensitive information.
95+ *
96+ * @param maskingProvider the primary masking provider
97+ * @param maskingProviders additional masking providers
98+ * @return a new request instance with the specified masking providers
99+ * @see MaskingProvider
100+ */
48101 @ Tolerate
49102 @ Nonnull
50103 public OrchestrationEmbeddingRequest withMasking (
@@ -53,36 +106,71 @@ public OrchestrationEmbeddingRequest withMasking(
53106 return withMasking (Lists .asList (maskingProvider , maskingProviders ));
54107 }
55108
109+ /**
110+ * Configures this request to optimize embeddings for document content.
111+ *
112+ * @return a new request instance configured for document embedding
113+ */
56114 @ Nonnull
57115 public OrchestrationEmbeddingRequest asDocument () {
58116 return withTokenType (DOCUMENT );
59117 }
60118
119+ /**
120+ * Configures this request to optimize embeddings for general text content.
121+ *
122+ * @return a new request instance configured for text embedding
123+ */
61124 @ Nonnull
62125 public OrchestrationEmbeddingRequest asText () {
63126 return withTokenType (TEXT );
64127 }
65128
129+ /**
130+ * Configures this request to optimize embeddings for query content.
131+ *
132+ * @return a new request instance configured for query embedding
133+ */
66134 @ Nonnull
67135 public OrchestrationEmbeddingRequest asQuery () {
68136 return withTokenType (QUERY );
69137 }
70138
139+ @ Nonnull
71140 EmbeddingsPostRequest createEmbeddingsPostRequest () {
72141
73- final var input =
74- EmbeddingsInput .create ().text (EmbeddingsInputText .create (tokens )).type (tokenType );
142+ final var input = EmbeddingsInput .create ().text (EmbeddingsInputText .create (tokens ));
75143 final var embeddingsModelConfig =
76- EmbeddingsModelConfig .create ().model (this . model .createEmbeddingsModelDetails ());
144+ EmbeddingsModelConfig .create ().model (model .createEmbeddingsModelDetails ());
77145 final var modules =
78146 EmbeddingsOrchestrationConfig .create ()
79147 .modules (EmbeddingsModuleConfigs .create ().embeddings (embeddingsModelConfig ));
80148
149+ if (tokenType != null ) {
150+ input .setType (EmbeddingsInput .TypeEnum .fromValue (tokenType .getValue ()));
151+ }
81152 if (masking != null ) {
82- final var dpiConfigs = this . masking .stream ().map (MaskingProvider ::createConfig ).toList ();
153+ final var dpiConfigs = masking .stream ().map (MaskingProvider ::createConfig ).toList ();
83154 modules .getModules ().setMasking (MaskingModuleConfigProviders .create ().providers (dpiConfigs ));
84155 }
85-
86156 return EmbeddingsPostRequest .create ().config (modules ).input (input );
87157 }
158+
159+ /**
160+ * Token type classification for optimizing embedding generation.
161+ *
162+ * <p>Token types may influence how the embedding model processes and represents the input text.
163+ */
164+ @ Getter
165+ @ RequiredArgsConstructor
166+ public enum TokenType {
167+ /** For document content. */
168+ DOCUMENT ("document" ),
169+ /** For general text (default). */
170+ TEXT ("text" ),
171+ /** For search queries. */
172+ QUERY ("query" );
173+
174+ private final String value ;
175+ }
88176}
0 commit comments