Merge branch 'main' into mask-grounding

Jonas-Isr · web-flow · commit 365e88f65d73 · 2025-03-04T16:38:58.000+01:00
diff --git a/docs/guides/ORCHESTRATION_CHAT_COMPLETION.md b/docs/guides/ORCHESTRATION_CHAT_COMPLETION.md
@@ -109,7 +109,8 @@ Use a prepared template and execute requests with by passing only the input para
 
 ```java
 var template = Message.user("Reply with 'Orchestration Service is working!' in {{?language}}");
-var templatingConfig = TemplatingModuleConfig.create().template(template);
+var templatingConfig =
+        TemplateConfig.create().withTemplate(List.of(template.createChatMessage()));
 var configWithTemplate = config.withTemplateConfig(templatingConfig);
 
 var inputParams = Map.of("language", "German");
diff --git a/docs/guides/SPRING_AI_INTEGRATION.md b/docs/guides/SPRING_AI_INTEGRATION.md
@@ -7,6 +7,7 @@
 - [Orchestration Masking](#orchestration-masking)
 - [Stream chat completion](#stream-chat-completion)
 - [Tool Calling](#tool-calling)
+- [Chat Memory](#chat-memory)
 
 ## Introduction
 
@@ -137,3 +138,26 @@ ChatResponse response = client.call(prompt);
 
 Please find [an example in our Spring Boot application](../../sample-code/spring-app/src/main/java/com/sap/ai/sdk/app/services/SpringAiOrchestrationService.java).
 
+## Chat Memory
+
+Create a Spring AI `ChatClient` from our `OrchestrationChatModel` and add a chat memory advisor like so:
+
+```java
+ChatModel client = new OrchestrationChatModel();
+OrchestrationModuleConfig config = new OrchestrationModuleConfig().withLlmConfig(GPT_35_TURBO);
+OrchestrationChatOptions opts = new OrchestrationChatOptions(config);
+
+val memory = new InMemoryChatMemory();
+val advisor = new MessageChatMemoryAdvisor(memory);
+val cl = ChatClient.builder(client).defaultAdvisors(advisor).build();
+
+Prompt prompt1 = new Prompt("What is the capital of France?", defaultOptions);
+String content1 = cl.prompt(prompt1).call().content();
+// content1 is "Paris"
+
+Prompt prompt2 = new Prompt("And what is the typical food there?", defaultOptions);
+String content2 = cl.prompt(prompt2).call().content();
+// chat memory will remember that the user is inquiring about France.
+```
+
+Please find [an example in our Spring Boot application](../../sample-code/spring-app/src/main/java/com/sap/ai/sdk/app/services/SpringAiOrchestrationService.java).
diff --git a/docs/release-notes/release_notes.md b/docs/release-notes/release_notes.md
@@ -12,6 +12,7 @@
 
 ### ✨ New Functionality
 
+- [Orchestration] [Add Spring AI Chat Memory support](https://github.com/SAP/ai-sdk-java/tree/main/docs/guides/SPRING_AI_INTEGRATION.md#chat-memory)
 - [Orchestration] [Prompt templates can be consumed from registry.](https://github.com/SAP/ai-sdk-java/tree/main/docs/guides/ORCHESTRATION_CHAT_COMPLETION.md#Chat-completion-with-Templates)
 - [Orchestration] [Masking is now available on grounding.](https://github.com/SAP/ai-sdk-java/tree/main/docs/guides/ORCHESTRATION_CHAT_COMPLETION.md#mask-grounding)
 
diff --git a/orchestration/src/main/java/com/sap/ai/sdk/orchestration/spring/OrchestrationChatModel.java b/orchestration/src/main/java/com/sap/ai/sdk/orchestration/spring/OrchestrationChatModel.java
@@ -129,7 +129,7 @@ private static com.sap.ai.sdk.orchestration.Message[] toOrchestrationMessages(
               case ASSISTANT:
                 val springToolCalls =
                     ((org.springframework.ai.chat.messages.AssistantMessage) msg).getToolCalls();
-                if (springToolCalls != null) {
+                if (springToolCalls != null && !springToolCalls.isEmpty()) {
                   final List<ResponseMessageToolCall> sdkToolCalls =
                       springToolCalls.stream()
                           .map(OrchestrationChatModel::toOrchestrationToolCall)
diff --git a/orchestration/src/test/java/com/sap/ai/sdk/orchestration/spring/OrchestrationChatModelTest.java b/orchestration/src/test/java/com/sap/ai/sdk/orchestration/spring/OrchestrationChatModelTest.java
@@ -39,6 +39,9 @@
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor;
+import org.springframework.ai.chat.memory.InMemoryChatMemory;
 import org.springframework.ai.chat.messages.AssistantMessage.ToolCall;
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.prompt.Prompt;
@@ -213,4 +216,41 @@ void testToolCallsWithExecution() throws IOException {
       }
     }
   }
+
+  @Test
+  void testChatMemory() throws IOException {
+    stubFor(
+        post(urlPathEqualTo("/completion"))
+            .inScenario("Chat Memory")
+            .whenScenarioStateIs(STARTED)
+            .willReturn(
+                aResponse()
+                    .withBodyFile("templatingResponse.json") // The response is not important
+                    .withHeader("Content-Type", "application/json"))
+            .willSetStateTo("Second Call"));
+
+    stubFor(
+        post(urlPathEqualTo("/completion"))
+            .inScenario("Chat Memory")
+            .whenScenarioStateIs("Second Call")
+            .willReturn(
+                aResponse()
+                    .withBodyFile("templatingResponse.json") // The response is not important
+                    .withHeader("Content-Type", "application/json")));
+
+    val memory = new InMemoryChatMemory();
+    val advisor = new MessageChatMemoryAdvisor(memory);
+    val cl = ChatClient.builder(client).defaultAdvisors(advisor).build();
+    val prompt1 = new Prompt("What is the capital of France?", defaultOptions);
+    val prompt2 = new Prompt("And what is the typical food there?", defaultOptions);
+
+    cl.prompt(prompt1).call().content();
+    cl.prompt(prompt2).call().content();
+    // The response is not important
+    // We just want to verify that the second call remembered the first call
+    try (var requestInputStream = fileLoader.apply("chatMemory.json")) {
+      final String request = new String(requestInputStream.readAllBytes());
+      verify(postRequestedFor(anyUrl()).withRequestBody(equalToJson(request)));
+    }
+  }
 }
diff --git a/orchestration/src/test/resources/chatMemory.json b/orchestration/src/test/resources/chatMemory.json
@@ -0,0 +1,32 @@
+{
+  "orchestration_config": {
+    "module_configurations": {
+      "llm_module_config": {
+        "model_name" : "gpt-4o",
+        "model_params": {},
+        "model_version": "latest"
+      },
+      "templating_module_config": {
+        "template": [
+          {
+            "role": "user",
+            "content": "What is the capital of France?"
+          },
+          {
+            "role": "assistant",
+            "content" : "Le service d'orchestration fonctionne!"
+          },
+          {
+            "role": "user",
+            "content": "And what is the typical food there?"
+          }
+        ],
+        "defaults": {},
+        "tools": []
+      }
+    },
+    "stream": false
+  },
+  "input_params": {},
+  "messages_history": []
+}
diff --git a/sample-code/spring-app/src/main/java/com/sap/ai/sdk/app/controllers/SpringAiOrchestrationController.java b/sample-code/spring-app/src/main/java/com/sap/ai/sdk/app/controllers/SpringAiOrchestrationController.java
@@ -79,4 +79,17 @@ Object toolCalling(
     final String text = message.getText();
     return text.isEmpty() ? message.getToolCalls().toString() : text;
   }
+
+  @GetMapping("/chatMemory")
+  Object chatMemory(
+      @Nullable @RequestParam(value = "format", required = false) final String format) {
+    val response = service.chatMemory();
+
+    if ("json".equals(format)) {
+      return ((OrchestrationSpringChatResponse) response)
+          .getOrchestrationResponse()
+          .getOriginalResponse();
+    }
+    return response.getResult().getOutput().getText();
+  }
 }
diff --git a/sample-code/spring-app/src/main/java/com/sap/ai/sdk/app/services/OrchestrationService.java b/sample-code/spring-app/src/main/java/com/sap/ai/sdk/app/services/OrchestrationService.java
@@ -25,7 +25,6 @@
 import com.sap.ai.sdk.orchestration.model.DocumentGroundingFilter;
 import com.sap.ai.sdk.orchestration.model.GroundingFilterSearchConfiguration;
 import com.sap.ai.sdk.orchestration.model.LlamaGuard38b;
-import com.sap.ai.sdk.orchestration.model.ResponseFormatJsonObject;
 import com.sap.ai.sdk.orchestration.model.ResponseFormatText;
 import com.sap.ai.sdk.orchestration.model.SearchDocumentKeyValueListPair;
 import com.sap.ai.sdk.orchestration.model.SearchSelectOptionEnum;
@@ -112,7 +111,8 @@ public Stream<String> streamChatCompletion(@Nonnull final String topic) {
   @Nonnull
   public OrchestrationChatResponse template(@Nonnull final String language) {
     val template = Message.user("Reply with 'Orchestration Service is working!' in {{?language}}");
-    val templatingConfig = Template.create().template(List.of(template.createChatMessage()));
+    val templatingConfig =
+        TemplateConfig.create().withTemplate(List.of(template.createChatMessage()));
     val configWithTemplate = config.withTemplateConfig(templatingConfig);
 
     val inputParams = Map.of("language", language);
@@ -357,8 +357,6 @@ public OrchestrationChatResponse grounding(@Nonnull final String userMessage) {
    */
   @Nonnull
   public OrchestrationChatResponse responseFormatJsonSchema(@Nonnull final String word) {
-    val config = new OrchestrationModuleConfig().withLlmConfig(GPT_4O_MINI);
-
     //    Example class
     class Translation {
       @JsonProperty(required = true)
@@ -394,17 +392,12 @@ class Translation {
    */
   @Nonnull
   public OrchestrationChatResponse responseFormatJsonObject(@Nonnull final String word) {
-    final var llmWithImageSupportConfig =
-        new OrchestrationModuleConfig().withLlmConfig(GPT_4O_MINI);
-
     val template = Message.user("What is '%s' in German?".formatted(word));
     val templatingConfig =
-        Template.create()
-            .template(List.of(template.createChatMessage()))
-            .responseFormat(
-                ResponseFormatJsonObject.create()
-                    .type(ResponseFormatJsonObject.TypeEnum.JSON_OBJECT));
-    val configWithTemplate = llmWithImageSupportConfig.withTemplateConfig(templatingConfig);
+        TemplateConfig.create()
+            .withTemplate(List.of(template.createChatMessage()))
+            .withJsonResponse();
+    val configWithTemplate = config.withTemplateConfig(templatingConfig);
 
     val prompt =
         new OrchestrationPrompt(
@@ -425,15 +418,12 @@ public OrchestrationChatResponse responseFormatJsonObject(@Nonnull final String
    */
   @Nonnull
   public OrchestrationChatResponse responseFormatText(@Nonnull final String word) {
-    final var llmWithImageSupportConfig =
-        new OrchestrationModuleConfig().withLlmConfig(GPT_4O_MINI);
-
     val template = Message.user("Whats '%s' in German?".formatted(word));
     val templatingConfig =
         Template.create()
             .template(List.of(template.createChatMessage()))
             .responseFormat(ResponseFormatText.create().type(ResponseFormatText.TypeEnum.TEXT));
-    val configWithTemplate = llmWithImageSupportConfig.withTemplateConfig(templatingConfig);
+    val configWithTemplate = config.withTemplateConfig(templatingConfig);
 
     val prompt =
         new OrchestrationPrompt(
diff --git a/sample-code/spring-app/src/main/java/com/sap/ai/sdk/app/services/SpringAiOrchestrationService.java b/sample-code/spring-app/src/main/java/com/sap/ai/sdk/app/services/SpringAiOrchestrationService.java
@@ -9,8 +9,12 @@
 import com.sap.ai.sdk.orchestration.spring.OrchestrationChatOptions;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import javax.annotation.Nonnull;
 import lombok.val;
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor;
+import org.springframework.ai.chat.memory.InMemoryChatMemory;
 import org.springframework.ai.chat.model.ChatModel;
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.prompt.Prompt;
@@ -106,4 +110,22 @@ public ChatResponse toolCalling(final boolean internalToolExecutionEnabled) {
     val prompt = new Prompt("What is the weather in Potsdam and in Toulouse?", options);
     return client.call(prompt);
   }
+
+  /**
+   * Chat request to OpenAI through the Orchestration service using chat memory.
+   *
+   * @return the assistant response object
+   */
+  @Nonnull
+  public ChatResponse chatMemory() {
+    val memory = new InMemoryChatMemory();
+    val advisor = new MessageChatMemoryAdvisor(memory);
+    val cl = ChatClient.builder(client).defaultAdvisors(advisor).build();
+    val prompt1 = new Prompt("What is the capital of France?", defaultOptions);
+    val prompt2 = new Prompt("And what is the typical food there?", defaultOptions);
+
+    cl.prompt(prompt1).call().content();
+    return Objects.requireNonNull(
+        cl.prompt(prompt2).call().chatResponse(), "Chat response is null");
+  }
 }
diff --git a/sample-code/spring-app/src/main/resources/static/index.html b/sample-code/spring-app/src/main/resources/static/index.html
@@ -596,6 +596,16 @@ <h5 class="mb-1">Orchestration Integration</h5>
                                     </div>
                                 </div>
                             </li>
+                            <li class="list-group-item">
+                                <div class="info-tooltip">
+                                    <button type="submit" formaction="/spring-ai-orchestration/chatMemory"
+                                            class="link-offset-2-hover link-underline link-underline-opacity-0 link-underline-opacity-75-hover endpoint">
+                                        <code>/spring-ai-orchestration/chatMemory</code>
+                                    </button>
+                                    <div class="tooltip-content">
+                                        The user firsts asks the capital of France, then the typical for there, chat memory will remember that the user is inquiring about France.
+                                    </div>
+                                </div>
                         </ul>
                     </div>
                 </div>
diff --git a/sample-code/spring-app/src/test/java/com/sap/ai/sdk/app/controllers/SpringAiOrchestrationTest.java b/sample-code/spring-app/src/test/java/com/sap/ai/sdk/app/controllers/SpringAiOrchestrationTest.java
@@ -82,4 +82,15 @@ void testToolCallingWithExecution() {
         .isExactlyInstanceOf(OrchestrationClientException.class)
         .hasMessageContaining("Request failed with status 400 Bad Request");
   }
+
+  @Test
+  void testChatMemory() {
+    ChatResponse response = service.chatMemory();
+    assertThat(response).isNotNull();
+    String text = response.getResult().getOutput().getText();
+    log.info(text);
+    assertThat(text)
+        .containsAnyOf(
+            "French", "onion", "pastries", "cheese", "baguette", "coq au vin", "foie gras");
+  }
 }