Skip to content
Open
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions foundation-models/openai/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,22 @@
</scm>
<properties>
<project.rootdir>${project.basedir}/../../</project.rootdir>
<coverage.complexity>81%</coverage.complexity>
<coverage.line>91%</coverage.line>
<coverage.instruction>88%</coverage.instruction>
<coverage.branch>78%</coverage.branch>
<coverage.method>90%</coverage.method>
<coverage.class>92%</coverage.class>
<coverage.complexity>77%</coverage.complexity>
<coverage.line>87%</coverage.line>
<coverage.instruction>85%</coverage.instruction>
<coverage.branch>75%</coverage.branch>
<coverage.method>87%</coverage.method>
<coverage.class>91%</coverage.class>
</properties>
<dependencies>
<dependency>
<groupId>com.sap.cloud.sdk.cloudplatform</groupId>
<artifactId>cloudplatform-connectivity</artifactId>
</dependency>
<dependency>
<groupId>com.sap.cloud.sdk.cloudplatform</groupId>
<artifactId>cloudplatform-core</artifactId>
</dependency>
<dependency>
<groupId>com.sap.cloud.sdk.cloudplatform</groupId>
<artifactId>connectivity-apache-httpclient5</artifactId>
Expand Down Expand Up @@ -117,6 +121,10 @@
<artifactId>reactor-core</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.openai</groupId>
<artifactId>openai-java-core</artifactId>
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Major)

I would argue either new module, or set this dependency as optional

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why? we are going to deprecate the 2024 generated API

</dependency>
<!-- scope "provided" -->
<dependency>
<groupId>org.projectlombok</groupId>
Expand Down Expand Up @@ -202,13 +210,13 @@

<!-- new exclude paths -->
<excludePaths>/deployments/{deployment-id}/completions
/deployments/{deployment-id}/audio/transcriptions
/deployments/{deployment-id}/audio/translations
/deployments/{deployment-id}/images/generations</excludePaths>
/deployments/{deployment-id}/audio/transcriptions
/deployments/{deployment-id}/audio/translations
/deployments/{deployment-id}/images/generations</excludePaths>

<!-- new exclude properties -->
<excludeProperties>chatCompletionResponseMessage.context
createChatCompletionRequest.data_sources</excludeProperties>
createChatCompletionRequest.data_sources</excludeProperties>

<!-- new: remove unassigned components -->
<removeUnusedComponents>true</removeUnusedComponents>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
package com.sap.ai.sdk.foundationmodels.openai;

import com.openai.client.OpenAIClient;
import com.openai.client.OpenAIClientImpl;
import com.openai.core.ClientOptions;
import com.openai.core.RequestOptions;
import com.openai.core.http.Headers;
import com.openai.core.http.HttpClient;
import com.openai.core.http.HttpRequest;
import com.openai.core.http.HttpResponse;
import com.openai.errors.OpenAIIoException;
import com.sap.ai.sdk.core.AiCoreService;
import com.sap.ai.sdk.core.AiModel;
import com.sap.ai.sdk.core.DeploymentResolutionException;
import com.sap.cloud.sdk.cloudplatform.connectivity.ApacheHttpClient5Accessor;
import com.sap.cloud.sdk.cloudplatform.connectivity.HttpDestination;
import com.sap.cloud.sdk.cloudplatform.thread.ThreadContextExecutors;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import javax.annotation.Nonnull;
import lombok.AccessLevel;
import lombok.NoArgsConstructor;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.hc.core5.http.ClassicHttpRequest;
import org.apache.hc.core5.http.ClassicHttpResponse;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.io.entity.ByteArrayEntity;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.message.BasicClassicHttpRequest;
import org.apache.hc.core5.net.URIBuilder;

/**
* Factory for creating OpenAI SDK clients configured for SAP AI Core deployments.
*
* <p>This class provides factory methods that return fully configured OpenAI SDK clients using SAP
* Cloud SDK's Apache HttpClient with automatic OAuth token refresh.
*/
@Slf4j
@NoArgsConstructor(access = AccessLevel.PRIVATE)
public final class AiCoreOpenAiClient {

private static final String DEFAULT_RESOURCE_GROUP = "default";

/**
* Create an OpenAI client for a deployment serving the specified model using the default resource
* group.
*
* @param model The AI model to target.
* @return A configured OpenAI client instance.
* @throws DeploymentResolutionException If no running deployment is found for the model.
*/
@Nonnull
public static OpenAIClient forModel(@Nonnull final AiModel model) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could modify the client to not be instantiated but instead be created at the request level and cached.

No strong preference but it is better API

Copy link
Copy Markdown
Member Author

@rpanackal rpanackal Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. This is one of the ways along with few other, each with important caveats

  1. Sniffing request: As Charles mentioned we can parse the body in HttpRequest back to JsonNode or request type to infer the model to fetch deployment for - at request time.

    • As you can imagine, means this deserializing already serialized response.
    • You will only find model in create() calls. But not in retrieve(), delete() or any other operation. Then how should we fetch a deployment ? just any deployment under foundation-model scenario?
    • At request time, we can't reliably infer version out of values like "gpt-5-nano", "gpt-5.2", "o3-2025-04-16". AiCore expects distinct fields for model name and model version to match with a deployment. We will have to rework our deployment resolution logic.
  2. Wrapper API: We draft our own wrapper instead of directly returning an object of com.openai.client.OpenAIClient

    // Our wrapper client
    AiCoreBoundOpenAiClient client = AiCoreOpenAiClient.forModel(OpenAiModel.GPT_41);
    
    ResponseCreateParams params = ResponseCreateParams.builder()
        .input("Hello")
        // .model(...) is optional. We inject or validate for match 
        .build();
    
    Response response = client.responses().create(params);
    public interface AiCoreBoundOpenAiClient {
       AiCoreResponsesService responses();
       AiCoreChatCompletionsService chatCompletions();
       OpenAIClient raw(); // escape hatch
     }
    

    Basically, inject model into params, or validate existing model for match with the one in deployment within in our wrapper api.

    • Maintenance burden is much higher, but we will be able to active choose UX.

return forModel(model, DEFAULT_RESOURCE_GROUP);
}

/**
* Create an OpenAI client for a deployment serving the specified model in the given resource
* group.
*
* @param model The AI model to target.
* @param resourceGroup The resource group containing the deployment.
* @return A configured OpenAI client instance.
* @throws DeploymentResolutionException If no running deployment is found for the model.
*/
@Nonnull
public static OpenAIClient forModel(
@Nonnull final AiModel model, @Nonnull final String resourceGroup) {
final HttpDestination destination =
new AiCoreService().getInferenceDestination(resourceGroup).forModel(model);

return fromDestination(destination);
}

/**
* Create an OpenAI client from a pre-resolved destination.
*
* @param destination The destination to use.
* @return A configured OpenAI client instance.
*/
@Nonnull
@SuppressWarnings("PMD.CloseResource")
static OpenAIClient fromDestination(@Nonnull final HttpDestination destination) {
final var baseUrl = destination.getUri().toString();
final var httpClient = new AiCoreHttpClientImpl(destination);

final ClientOptions clientOptions =
ClientOptions.builder().baseUrl(baseUrl).httpClient(httpClient).apiKey("unused").build();
Comment on lines +101 to +102
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found a way to propagate the model information to the request body.
But it's super ugly :( and you would need to find a way to pass on model information.

(View code suggestion)
Suggested change
final ClientOptions clientOptions =
ClientOptions.builder().baseUrl(baseUrl).httpClient(httpClient).apiKey("unused").build();
final var m = new SimpleModule() {{
setSerializerModifier(new BeanSerializerModifier() {
@Override
@SuppressWarnings("unchecked")
public JsonSerializer<?> modifySerializer(SerializationConfig config, BeanDescription desc, JsonSerializer<?> serializer) {
if (!ResponseCreateParams.Body.class.isAssignableFrom(desc.getBeanClass()))
return serializer;
final var typed = (JsonSerializer<ResponseCreateParams.Body>) serializer;
return new StdSerializer<>(ResponseCreateParams.Body.class) {
@Override
public void serialize(ResponseCreateParams.Body value, JsonGenerator gen, SerializerProvider provider)
throws IOException {
final var buf = new TokenBuffer(gen.getCodec(), false);
typed.serialize(value, buf, provider);
final ObjectNode node = gen.getCodec().readTree(buf.asParser());
if (!node.has("model")) node.put("model", "gpt-5");
gen.writeTree(node);
}
};
}
});
}};
final ClientOptions clientOptions =
ClientOptions.builder().baseUrl(baseUrl).httpClient(httpClient).apiKey("unused")
.jsonMapper((JsonMapper) jsonMapper().registerModule(m))
.build();

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will try this out and get back to you.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to make it work with mixin, without success.


return new OpenAIClientImpl(clientOptions);
}

/**
* Internal implementation of OpenAI SDK's HttpClient interface using Apache HttpClient from SAP
* Cloud SDK.
*/
@Slf4j
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
static final class AiCoreHttpClientImpl implements HttpClient {
private final HttpDestination destination;

private static final String SSE_MEDIA_TYPE = "text/event-stream";
private static final Set<String> ALLOWED_PATHS =
Set.of(
"/chat/completions",
"/responses",
"/responses/[^/]+",
"/responses/[^/]+/input_items",
"/responses/[^/]+/cancel");

@Override
@Nonnull
public HttpResponse execute(
@Nonnull final HttpRequest request, @Nonnull final RequestOptions requestOptions) {
validateAllowedEndpoint(request);
final var apacheClient = ApacheHttpClient5Accessor.getHttpClient(destination);
final var apacheRequest = toApacheRequest(request);

try {
if (isStreaming(request)) {
final var apacheResponse = apacheClient.executeOpen(null, apacheRequest, null);
final int statusCode = apacheResponse.getCode();
if (statusCode >= 200 && statusCode < 300) {
return createStreamingResponse(apacheResponse);
}
return createBufferedResponse(apacheResponse);
} else {
return apacheClient.execute(apacheRequest, this::createBufferedResponse);
}
} catch (final IOException e) {
throw new OpenAIIoException("HTTP request execution failed", e);
}
}

@Override
@Nonnull
public CompletableFuture<HttpResponse> executeAsync(
@Nonnull final HttpRequest request, @Nonnull final RequestOptions requestOptions) {
return CompletableFuture.supplyAsync(
() -> execute(request, requestOptions), ThreadContextExecutors.getExecutor());
}

@Override
public void close() {
// Apache HttpClient lifecycle is managed by Cloud SDK's ApacheHttpClient5Cache
}

private static void validateAllowedEndpoint(@Nonnull final HttpRequest request) {
final var endpoint = "/" + String.join("/", request.pathSegments());
if (ALLOWED_PATHS.stream().noneMatch(endpoint::matches)) {
throw new UnsupportedOperationException(
String.format(
"Only requests to the following endpoints are allowed: %s.", ALLOWED_PATHS));
}
}

@Nonnull
private ClassicHttpRequest toApacheRequest(@Nonnull final HttpRequest request) {
final var fullUri = buildUrlWithQueryParams(request);
final var method = request.method();
final var apacheRequest = new BasicClassicHttpRequest(method.name(), fullUri.toString());
applyRequestHeaders(request, apacheRequest);

try (var requestBody = request.body()) {
if (requestBody != null) {
try (var outputStream = new ByteArrayOutputStream()) {
requestBody.writeTo(outputStream);
final var bodyBytes = outputStream.toByteArray();

final var apacheContentType =
Optional.ofNullable(requestBody.contentType())
.map(ContentType::parse)
.orElse(ContentType.APPLICATION_JSON);

apacheRequest.setEntity(new ByteArrayEntity(bodyBytes, apacheContentType));
return apacheRequest;
} catch (final IOException e) {
throw new OpenAIIoException("Failed to read request body", e);
}
}
}

return apacheRequest;
}

private static URI buildUrlWithQueryParams(@Nonnull final HttpRequest request) {
try {
final var uriBuilder = new URIBuilder(request.url());
final var queryParams = request.queryParams();

for (final var key : queryParams.keys()) {
for (final var value : queryParams.values(key)) {
uriBuilder.addParameter(key, value);
}
}

return uriBuilder.build();
} catch (URISyntaxException e) {
throw new OpenAIIoException("Failed to build URI with query parameters", e);
}
}

private static void applyRequestHeaders(
@Nonnull final HttpRequest request, @Nonnull final BasicClassicHttpRequest apacheRequest) {
final var headers = request.headers();
for (final var name : headers.names()) {
if ("Authorization".equalsIgnoreCase(name)) {
continue;
}
for (final var value : headers.values(name)) {
apacheRequest.addHeader(name, value);
}
}
}

private static boolean isStreaming(@Nonnull final HttpRequest request) {
return request.headers().values("Accept").stream()
.map(value -> Objects.toString(value, "").toLowerCase(Locale.ROOT))
.anyMatch(value -> value.contains(SSE_MEDIA_TYPE));
}

@Nonnull
private static Headers extractResponseHeaders(
@Nonnull final ClassicHttpResponse apacheResponse) {
final var builder = Headers.builder();
for (final var header : apacheResponse.getHeaders()) {
builder.put(header.getName(), header.getValue());
}
return builder.build();
}

@Nonnull
private HttpResponse createStreamingResponse(@Nonnull final ClassicHttpResponse apacheResponse)
throws IOException {

final var statusCode = apacheResponse.getCode();
final var headers = extractResponseHeaders(apacheResponse);

final var liveStream =
apacheResponse.getEntity() != null
? apacheResponse.getEntity().getContent()
: InputStream.nullInputStream();

return new StreamingHttpResponse(statusCode, headers, liveStream, apacheResponse);
}

@Nonnull
private HttpResponse createBufferedResponse(@Nonnull final ClassicHttpResponse apacheResponse)
throws IOException {
try (apacheResponse) {
final int statusCode = apacheResponse.getCode();
final Headers headers = extractResponseHeaders(apacheResponse);

final byte[] body =
apacheResponse.getEntity() != null
? EntityUtils.toByteArray(apacheResponse.getEntity())
: new byte[0];

return new BufferedHttpResponse(statusCode, headers, body);
}
}

/**
* HTTP response for streaming requests. Keeps the connection open and provides a live stream.
* The stream must be closed by calling {@link #close()}.
*/
record StreamingHttpResponse(
int statusCode,
@Nonnull Headers headers,
@Nonnull InputStream body,
@Nonnull ClassicHttpResponse apacheResponse)
implements HttpResponse {

@Override
public void close() {
try {
body.close();
apacheResponse.close();
log.debug("Closed streaming response connection");
} catch (final IOException e) {
log.warn("Failed to close streaming response", e);
}
}
}

/**
* HTTP response for buffered requests. The entire response body is loaded into memory. No
* resources need to be closed.
*/
record BufferedHttpResponse(int statusCode, @Nonnull Headers headers, @Nonnull byte[] bodyBytes)
implements HttpResponse {

@Nonnull
@Override
public InputStream body() {
return new ByteArrayInputStream(bodyBytes);
}

@Override
public void close() {
// Body already consumed and buffered, nothing to close
}
}
}
}
Loading
Loading