Azure-Samples
diff --git a/‎app/backend/pom.xml
Lines changed: 4 additions & 0 deletions b/‎app/backend/pom.xml
Lines changed: 4 additions & 0 deletions
diff --git a/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/Application.java
Lines changed: 0 additions & 1 deletion b/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/Application.java
Lines changed: 0 additions & 1 deletion
diff --git a/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/approaches/RAGApproach.java
Lines changed: 3 additions & 5 deletions b/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/approaches/RAGApproach.java
Lines changed: 3 additions & 5 deletions
diff --git a/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/PlainJavaAskApproach.java
Lines changed: 68 additions & 22 deletions b/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/PlainJavaAskApproach.java
Lines changed: 68 additions & 22 deletions
diff --git a/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java
Lines changed: 6 additions & 0 deletions b/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java
Lines changed: 6 additions & 0 deletions
diff --git a/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelPlannerApproach.java
Lines changed: 6 additions & 0 deletions b/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelPlannerApproach.java
Lines changed: 6 additions & 0 deletions
diff --git a/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelWithMemoryApproach.java
Lines changed: 34 additions & 28 deletions b/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelWithMemoryApproach.java
Lines changed: 34 additions & 28 deletions
diff --git a/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/controller/AskController.java
Lines changed: 1 addition & 9 deletions b/‎app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/controller/AskController.java
Lines changed: 1 addition & 9 deletions
@@ -54,6 +54,10 @@
 			<groupId>org.springframework.boot</groupId>
 			<artifactId>spring-boot-starter-web</artifactId>
 		</dependency>
+		<dependency>
+			<groupId>org.springframework.boot</groupId>
+			<artifactId>spring-boot-starter-webflux</artifactId>
+		</dependency>
 
 		<dependency>
 			<groupId>org.springframework.boot</groupId>
 
@@ -14,5 +14,4 @@ public static void main(String[] args) {
         LOG.info("Application profile from system property is [{}]", System.getProperty("spring.profiles.active"));
         new SpringApplication(Application.class).run(args);
     }
-
 }
@@ -1,11 +1,9 @@
 package com.microsoft.openai.samples.rag.approaches;
 
+import reactor.core.publisher.Flux;
+
 public interface RAGApproach<I, O> {
 
     O run(I questionOrConversation, RAGOptions options);
-
-
-
-
-
+    Flux<O> runStreaming(I questionOrConversation, RAGOptions options);
 }
@@ -1,24 +1,27 @@
 package com.microsoft.openai.samples.rag.ask.approaches;
 
 import com.azure.ai.openai.models.ChatCompletions;
+import com.azure.ai.openai.models.ChatCompletionsOptions;
+import com.azure.ai.openai.models.ChatMessage;
 import com.microsoft.openai.samples.rag.approaches.ContentSource;
 import com.microsoft.openai.samples.rag.approaches.RAGApproach;
 import com.microsoft.openai.samples.rag.approaches.RAGOptions;
 import com.microsoft.openai.samples.rag.approaches.RAGResponse;
 import com.microsoft.openai.samples.rag.common.ChatGPTUtils;
-import com.microsoft.openai.samples.rag.retrieval.FactsRetrieverProvider;
 import com.microsoft.openai.samples.rag.proxy.OpenAIProxy;
+import com.microsoft.openai.samples.rag.retrieval.FactsRetrieverProvider;
 import com.microsoft.openai.samples.rag.retrieval.Retriever;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.stereotype.Component;
+import reactor.core.publisher.Flux;
 
 import java.util.List;
 
 /**
  * Simple retrieve-then-read java implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
- *     top documents from search, then constructs a prompt with them, and then uses OpenAI to generate a completion
- *     (answer) with that prompt.
+ * top documents from search, then constructs a prompt with them, and then uses OpenAI to generate a completion
+ * (answer) with that prompt.
  */
 @Component
 public class PlainJavaAskApproach implements RAGApproach<String, RAGResponse> {
@@ -39,8 +42,65 @@ public PlainJavaAskApproach(FactsRetrieverProvider factsRetrieverProvider, OpenA
      */
     @Override
     public RAGResponse run(String question, RAGOptions options) {
-        //TODO exception handling
+        return formChatCompletionArguments(
+                question,
+                options,
+                (chatCompletionsOptions, groundedChatMessages, sources) -> {
+                    // STEP 3: Generate a contextual and content specific answer using the retrieve facts
+                    ChatCompletions chatCompletions = openAIProxy.getChatCompletions(chatCompletionsOptions);
+
+                    LOGGER.info("Chat completion generated with Prompt Tokens[{}], Completions Tokens[{}], Total Tokens[{}]",
+                            chatCompletions.getUsage().getPromptTokens(),
+                            chatCompletions.getUsage().getCompletionTokens(),
+                            chatCompletions.getUsage().getTotalTokens());
+
+                    return new RAGResponse.Builder()
+                            .question(question)
+                            .prompt(ChatGPTUtils.formatAsChatML(groundedChatMessages))
+                            .answer(chatCompletions.getChoices().get(0).getMessage().getContent())
+                            .sources(sources)
+                            .build();
+                });
+    }
+
+    @Override
+    public Flux<RAGResponse> runStreaming(String question, RAGOptions options) {
+        return formChatCompletionArguments(
+                question,
+                options,
+                (chatCompletionsOptions, groundedChatMessages, sources) -> {
+                    Flux<ChatCompletions> completions = Flux.fromIterable(openAIProxy.getChatCompletionsStream(chatCompletionsOptions));
+                    return completions
+                            .flatMap(completion -> {
+                                LOGGER.info("Chat completion generated with Prompt Tokens[{}], Completions Tokens[{}], Total Tokens[{}]",
+                                        completion.getUsage().getPromptTokens(),
+                                        completion.getUsage().getCompletionTokens(),
+                                        completion.getUsage().getTotalTokens());
+
+                                return Flux.fromIterable(completion.getChoices())
+                                        .map(choice -> new RAGResponse.Builder()
+                                                .question(question)
+                                                .prompt(ChatGPTUtils.formatAsChatML(groundedChatMessages))
+                                                .answer(choice.getMessage().getContent())
+                                                .sources(sources)
+                                                .build());
+                            });
+                });
+    }
 
+    private interface CompletionFunction<T> {
+        T apply(
+                ChatCompletionsOptions chatCompletionsOptions,
+                List<ChatMessage> groundedChatMessages,
+                List<ContentSource> sources
+        );
+    }
+
+    private <T> T formChatCompletionArguments(
+            String question,
+            RAGOptions options,
+            CompletionFunction<T> completionFunction
+    ) {
         //Get instance of retriever based on the retrieval mode: hybryd, text, vectors.
         Retriever factsRetriever = factsRetrieverProvider.getFactsRetriever(options);
         List<ContentSource> sources = factsRetriever.retrieveFromQuestion(question, options);
@@ -51,31 +111,17 @@ public RAGResponse run(String question, RAGOptions options) {
         var customPromptEmpty = (customPrompt == null) || (customPrompt != null && customPrompt.isEmpty());
 
         //true will replace the default prompt. False will add custom prompt as suffix to the default prompt
-        var replacePrompt =  !customPromptEmpty  && !customPrompt.startsWith("|");
-        if(!replacePrompt && !customPromptEmpty){
+        var replacePrompt = !customPromptEmpty && !customPrompt.startsWith("|");
+        if (!replacePrompt && !customPromptEmpty) {
             customPrompt = customPrompt.substring(1);
         }
 
         var answerQuestionChatTemplate = new AnswerQuestionChatTemplate(customPrompt, replacePrompt);
 
-        var groundedChatMessages = answerQuestionChatTemplate.getMessages(question,sources);
+        var groundedChatMessages = answerQuestionChatTemplate.getMessages(question, sources);
         var chatCompletionsOptions = ChatGPTUtils.buildDefaultChatCompletionsOptions(groundedChatMessages);
 
-        // STEP 3: Generate a contextual and content specific answer using the retrieve facts
-        ChatCompletions chatCompletions = openAIProxy.getChatCompletions(chatCompletionsOptions);
-
-        LOGGER.info("Chat completion generated with Prompt Tokens[{}], Completions Tokens[{}], Total Tokens[{}]",
-                chatCompletions.getUsage().getPromptTokens(),
-                chatCompletions.getUsage().getCompletionTokens(),
-                chatCompletions.getUsage().getTotalTokens());
-
-        return new RAGResponse.Builder()
-                .question(question)
-                .prompt(ChatGPTUtils.formatAsChatML(groundedChatMessages))
-                .answer(chatCompletions.getChoices().get(0).getMessage().getContent())
-                .sources(sources)
-                .build();
-
+        return completionFunction.apply(chatCompletionsOptions, groundedChatMessages, sources);
     }
 
 
 
@@ -14,6 +14,7 @@
 import org.slf4j.LoggerFactory;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
+import reactor.core.publisher.Flux;
 
 import java.util.Arrays;
 import java.util.Collections;
@@ -81,6 +82,11 @@ public RAGResponse run(String question, RAGOptions options) {
 
     }
 
+    @Override
+    public Flux<RAGResponse> runStreaming(String questionOrConversation, RAGOptions options) {
+        return Flux.error(new IllegalStateException("Streaming not supported for this approach"));
+    }
+
     private List<ContentSource> formSourcesList(String result) {
         if (result == null) {
             return Collections.emptyList();
 
@@ -15,6 +15,7 @@
 import org.slf4j.LoggerFactory;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
+import reactor.core.publisher.Flux;
 
 import java.util.Objects;
 import java.util.Set;
@@ -80,6 +81,11 @@ public RAGResponse run(String question, RAGOptions options) {
 
     }
 
+    @Override
+    public Flux<RAGResponse> runStreaming(String questionOrConversation, RAGOptions options) {
+        return Flux.error(new IllegalStateException("Streaming not supported for this approach"));
+    }
+
     private Kernel buildSemanticKernel( RAGOptions options) {
         Kernel kernel = SKBuilders.kernel()
                 .withDefaultAIService(SKBuilders.chatCompletion()
 
@@ -19,16 +19,17 @@
 import org.slf4j.LoggerFactory;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
+import reactor.core.publisher.Flux;
 import reactor.core.publisher.Mono;
 
 import java.util.List;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
 /**
- *    Accomplish the same task as in the PlainJavaAskApproach approach but using Semantic Kernel framework:
- *    1. Memory abstraction is used for vector search capability. It uses Azure Cognitive Search as memory store.
- *    2. Semantic function has been defined to ask question using sources from memory search results
+ * Accomplish the same task as in the PlainJavaAskApproach approach but using Semantic Kernel framework:
+ * 1. Memory abstraction is used for vector search capability. It uses Azure Cognitive Search as memory store.
+ * 2. Semantic function has been defined to ask question using sources from memory search results
  */
 @Component
 public class JavaSemanticKernelWithMemoryApproach implements RAGApproach<String, RAGResponse> {
@@ -40,8 +41,10 @@ public class JavaSemanticKernelWithMemoryApproach implements RAGApproach<String,
 
     private final String EMBEDDING_FIELD_NAME = "embedding";
 
-    @Value("${cognitive.search.service}") String searchServiceName ;
-    @Value("${cognitive.search.index}") String indexName;
+    @Value("${cognitive.search.service}")
+    String searchServiceName;
+    @Value("${cognitive.search.index}")
+    String indexName;
     @Value("${openai.chatgpt.deployment}")
     private String gptChatDeploymentModelId;
 
@@ -70,11 +73,11 @@ public RAGResponse run(String question, RAGOptions options) {
          * Question embeddings are provided to cognitive search via search options.
          */
         List<MemoryQueryResult> memoryResult = semanticKernel.getMemory().searchAsync(
-                indexName,
-                question,
-                options.getTop(),
-                0.5f,
-                false)
+                        indexName,
+                        question,
+                        options.getTop(),
+                        0.5f,
+                        false)
                 .block();
 
         LOGGER.info("Total {} sources found in cognitive vector store for search query[{}]", memoryResult.size(), question);
@@ -90,14 +93,19 @@ public RAGResponse run(String question, RAGOptions options) {
         Mono<SKContext> result = semanticKernel.getFunction("RAG", "AnswerQuestion").invokeAsync(skcontext);
 
         return new RAGResponse.Builder()
-                                //.prompt(plan.toPlanString())
-                                .prompt("placeholders for prompt")
-                                .answer(result.block().getResult())
-                                .sources(sourcesList)
-                                .sourcesAsText(sources)
-                                .question(question)
-                                .build();
+                //.prompt(plan.toPlanString())
+                .prompt("placeholders for prompt")
+                .answer(result.block().getResult())
+                .sources(sourcesList)
+                .sourcesAsText(sources)
+                .question(question)
+                .build();
+
+    }
 
+    @Override
+    public Flux<RAGResponse> runStreaming(String questionOrConversation, RAGOptions options) {
+        return Flux.error(new IllegalStateException("Streaming not supported for this approach"));
     }
 
     private List<ContentSource> buildSources(List<MemoryQueryResult> memoryResult) {
@@ -123,15 +131,14 @@ private String buildSourcesText(List<MemoryQueryResult> memoryResult) {
         return sourcesContentBuffer.toString();
     }
 
-    private Kernel buildSemanticKernel( RAGOptions options) {
-
+    private Kernel buildSemanticKernel(RAGOptions options) {
         var kernelWithACS = SKBuilders.kernel()
                 .withMemoryStorage(
                         new CustomAzureCognitiveSearchMemoryStore("https://%s.search.windows.net".formatted(searchServiceName),
-                                                                    tokenCredential,
-                                                                    this.searchAsyncClient,
-                                                                    this.EMBEDDING_FIELD_NAME,
-                                                                    buildCustomMemoryMapper()))
+                                tokenCredential,
+                                this.searchAsyncClient,
+                                this.EMBEDDING_FIELD_NAME,
+                                buildCustomMemoryMapper()))
                 .withDefaultAIService(SKBuilders.textEmbeddingGeneration()
                         .withOpenAIClient(openAIAsyncClient)
                         .withModelId(embeddingDeploymentModelId)
@@ -142,14 +149,13 @@ private Kernel buildSemanticKernel( RAGOptions options) {
                         .build())
                 .build();
 
-        kernelWithACS.importSkillFromResources("semantickernel/Plugins","RAG","AnswerQuestion",null);
-       return kernelWithACS;
+        kernelWithACS.importSkillFromResources("semantickernel/Plugins", "RAG", "AnswerQuestion", null);
+        return kernelWithACS;
     }
 
-
-    private Function<SearchDocument, MemoryRecord> buildCustomMemoryMapper(){
+    private Function<SearchDocument, MemoryRecord> buildCustomMemoryMapper() {
         return searchDocument -> {
-                     return MemoryRecord.localRecord(
+            return MemoryRecord.localRecord(
                     (String) searchDocument.get("sourcepage"),
                     (String) searchDocument.get("content"),
                     "chunked text from original source",
 
@@ -7,10 +7,6 @@
 import com.microsoft.openai.samples.rag.approaches.RAGType;
 import com.microsoft.openai.samples.rag.controller.ChatAppRequest;
 import com.microsoft.openai.samples.rag.controller.ChatResponse;
-import com.microsoft.openai.samples.rag.controller.ResponseChoice;
-import com.microsoft.openai.samples.rag.controller.ResponseContext;
-import com.microsoft.openai.samples.rag.controller.ResponseMessage;
-import com.microsoft.openai.samples.rag.common.ChatGPTMessage;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.http.HttpStatus;
@@ -20,9 +16,6 @@
 import org.springframework.web.bind.annotation.RequestBody;
 import org.springframework.web.bind.annotation.RestController;
 
-import java.util.Collections;
-import java.util.List;
-
 @RestController
 public class AskController {
 
@@ -60,8 +53,7 @@ public ResponseEntity<ChatResponse> openAIAsk(@RequestBody ChatAppRequest askReq
 
         RAGApproach<String, RAGResponse> ragApproach = ragApproachFactory.createApproach(askRequest.approach(), RAGType.ASK, ragOptions);
 
+
         return ResponseEntity.ok(ChatResponse.buildChatResponse(ragApproach.run(question, ragOptions)));
     }
-
-
 }
Original file line number	Diff line number	Diff line change
`@@ -14,5 +14,4 @@ public static void main(String[] args) {`
`14`	`14`	`LOG.info("Application profile from system property is [{}]", System.getProperty("spring.profiles.active"));`
`15`	`15`	`new SpringApplication(Application.class).run(args);`
`16`	`16`	`}`
`17`		`-`
`18`	`17`	`}`