improving code documentation

dantelmomsft · dantelmomsft · commit a544c39c6d8a · 2023-10-19T20:09:16.000+02:00
diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/PlainJavaAskApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/PlainJavaAskApproach.java
@@ -22,9 +22,11 @@
 import java.util.List;
 
 /**
- * Simple retrieve-then-read java implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
- * top documents from search, then constructs a prompt with them, and then uses OpenAI to generate a completion
- * (answer) with that prompt.
+ Use Cognitive Search and Java OpenAI APIs.
+ It first retrieves top documents from search and use them to build a prompt.
+ Then, it uses OpenAI to generate an answer for the user question.
+ Several cognitive search retrieval options are available: Text, Vector, Hybrid.
+ When Hybrid and Vector are selected an additional call to OpenAI is required to generate embeddings vector for the question.
  */
 @Component
 public class PlainJavaAskApproach implements RAGApproach<String, RAGResponse> {
@@ -49,6 +51,8 @@ public PlainJavaAskApproach(FactsRetrieverProvider factsRetrieverProvider, OpenA
     public RAGResponse run(String question, RAGOptions options) {
         //Get instance of retriever based on the retrieval mode: hybryd, text, vectors.
         Retriever factsRetriever = factsRetrieverProvider.getFactsRetriever(options);
+
+       //STEP 1: Retrieve relevant documents using user question as query
         List<ContentSource> sources = factsRetriever.retrieveFromQuestion(question, options);
         LOGGER.info("Total {} sources found in cognitive search for keyword search query[{}]", sources.size(),
                 question);
@@ -62,12 +66,14 @@ public RAGResponse run(String question, RAGOptions options) {
             customPrompt = customPrompt.substring(1);
         }
 
+        //STEP 2: Build a prompt using RAG options to see if prompt should be replaced or extended.
         var answerQuestionChatTemplate = new AnswerQuestionChatTemplate(customPrompt, replacePrompt);
 
+        //STEP 3: Build the chat conversation with grounded messages using the retrieved facts
         var groundedChatMessages = answerQuestionChatTemplate.getMessages(question, sources);
         var chatCompletionsOptions = ChatGPTUtils.buildDefaultChatCompletionsOptions(groundedChatMessages);
 
-        // STEP 3: Generate a contextual and content specific answer using the retrieve facts
+        // STEP 4: Generate a contextual and content specific answer
         ChatCompletions chatCompletions = openAIProxy.getChatCompletions(chatCompletionsOptions);
 
         LOGGER.info("Chat completion generated with Prompt Tokens[{}], Completions Tokens[{}], Total Tokens[{}]",
diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelChainsApproach.java
@@ -1,6 +1,7 @@
 package com.microsoft.openai.samples.rag.ask.approaches.semantickernel;
 
 import com.azure.ai.openai.OpenAIAsyncClient;
+import com.azure.core.annotation.Get;
 import com.microsoft.openai.samples.rag.approaches.ContentSource;
 import com.microsoft.openai.samples.rag.approaches.RAGApproach;
 import com.microsoft.openai.samples.rag.approaches.RAGOptions;
@@ -26,6 +27,7 @@
 /**
  *    Use Java Semantic Kernel framework with semantic and native functions chaining. It uses an imperative style for AI orchestration through semantic kernel functions chaining.
  *    InformationFinder.Search native function and RAG.AnswerQuestion semantic function are called sequentially.
+ *    Several cognitive search retrieval options are available: Text, Vector, Hybrid.
  */
 @Component
 public class JavaSemanticKernelChainsApproach implements RAGApproach<String, RAGResponse> {
@@ -57,20 +59,30 @@ public JavaSemanticKernelChainsApproach(CognitiveSearchProxy cognitiveSearchProx
     @Override
     public RAGResponse run(String question, RAGOptions options) {
 
+        //Build semantic kernel context
         Kernel semanticKernel = buildSemanticKernel(options);
+
+
+        //STEP 1: Retrieve relevant documents using user question. It reuses the CognitiveSearchRetriever appraoch through the CognitiveSearchPlugin native function.
         SKContext searchContext =
                 semanticKernel.runAsync(
                         question,
                         semanticKernel.getSkill("InformationFinder").getFunction("Search", null)).block();
 
         var sources = formSourcesList(searchContext.getResult());
 
+        //STEP 2: Build a SK context with the sources retrieved from the memory store and the user question.
         var answerVariables = SKBuilders.variables()
                 .withVariable("sources", searchContext.getResult())
                 .withVariable("input", question)
                 .build();
 
-        SKContext answerExecutionContext =
+        /**
+         *    STEP 3:
+         *    Get a reference of the semantic function [AnswerQuestion] of the [RAG] plugin (a.k.a. skill) from the SK skills registry and provide it with the pre-built context.
+         *    Triggering Open AI to get an answerVariables.
+         */
+         SKContext answerExecutionContext =
                 semanticKernel.runAsync(answerVariables,
                         semanticKernel.getSkill("RAG").getFunction("AnswerQuestion", null)).block();
        return new RAGResponse.Builder()
@@ -108,6 +120,14 @@ private List<ContentSource> formSourcesList(String result) {
                 .collect(Collectors.toList());
     }
 
+    /**
+     *  Build semantic kernel context with AnswerQuestion semantic function and InformationFinder.Search native function.
+     *  AnswerQuestion is imported from src/main/resources/semantickernel/Plugins.
+     *  InformationFinder.Search is implemented in a traditional Java class method: CognitiveSearchPlugin.search
+     *
+     * @param options
+     * @return
+     */
     private Kernel buildSemanticKernel( RAGOptions options) {
         Kernel kernel = SKBuilders.kernel()
                 .withDefaultAIService(SKBuilders.chatCompletion()
diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelPlannerApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelPlannerApproach.java
@@ -22,14 +22,15 @@
 import java.util.Set;
 
 /**
- *    Use Java Semantic Kernel framework with built-in Planner for functions orchestration. It uses a declarative style for AI orchestration through the built-in SequentialPlanner.
- *    SequentialPlanner call OpenAI to generate a plan for answering a question using available skills/plugins: InformationFinder and RAG
+ *    Use Java Semantic Kernel framework with built-in Planner for functions orchestration.
+ *    It uses a declarative style for AI orchestration through the built-in SequentialPlanner.
+ *    SequentialPlanner call OpenAI to generate a plan for answering a question using available plugins: InformationFinder and RAG
  */
 @Component
 public class JavaSemanticKernelPlannerApproach implements RAGApproach<String, RAGResponse> {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(JavaSemanticKernelPlannerApproach.class);
-    private static final String PLAN_PROMPT = """
+    private static final String GOAL_PROMPT = """
             Take the input as a question and answer it finding any information needed
             """;
     private final CognitiveSearchProxy cognitiveSearchProxy;
@@ -55,6 +56,7 @@ public JavaSemanticKernelPlannerApproach(CognitiveSearchProxy cognitiveSearchPro
     @Override
     public RAGResponse run(String question, RAGOptions options) {
 
+        //Build semantic kernel context
         Kernel semanticKernel = buildSemanticKernel(options);
 
         SequentialPlanner sequentialPlanner = new SequentialPlanner(semanticKernel, new SequentialPlannerRequestSettings(
@@ -66,10 +68,12 @@ public RAGResponse run(String question, RAGOptions options) {
                 1024
         ), null);
 
-        var plan = Objects.requireNonNull(sequentialPlanner.createPlanAsync(PLAN_PROMPT).block());
+        //STEP 1: ask Open AI to generate an execution plan for the goal contained in GOAL_PROMPT.
+        var plan = Objects.requireNonNull(sequentialPlanner.createPlanAsync(GOAL_PROMPT).block());
 
         LOGGER.debug("Semantic kernel plan calculated is [{}]", plan.toPlanString());
 
+        //STEP 2: execute the plan calculated by the planner using Open AI
         SKContext planContext = Objects.requireNonNull(plan.invokeAsync(question).block());
 
        return new RAGResponse.Builder()
@@ -87,6 +91,14 @@ public void runStreaming(String questionOrConversation, RAGOptions options, Outp
         throw new IllegalStateException("Streaming not supported for this approach");
     }
 
+    /**
+     *  Build semantic kernel context with AnswerQuestion semantic function and InformationFinder.Search native function.
+     *  AnswerQuestion is imported from src/main/resources/semantickernel/Plugins.
+     *  InformationFinder.Search is implemented in a traditional Java class method: CognitiveSearchPlugin.search
+     *
+     * @param options
+     * @return
+     */
     private Kernel buildSemanticKernel( RAGOptions options) {
         Kernel kernel = SKBuilders.kernel()
                 .withDefaultAIService(SKBuilders.chatCompletion()
diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelWithMemoryApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/ask/approaches/semantickernel/JavaSemanticKernelWithMemoryApproach.java
@@ -28,9 +28,9 @@
 import java.util.stream.Collectors;
 
 /**
- * Accomplish the same task as in the PlainJavaAskApproach approach but using Semantic Kernel framework:
- * 1. Memory abstraction is used for vector search capability. It uses Azure Cognitive Search as memory store.
- * 2. Semantic function has been defined to ask question using sources from memory search results
+ Use Java Semantic Kernel framework with built-in MemoryStore for embeddings similarity search.
+ A semantic function is defined in RAG.AnswerQuestion (src/main/resources/semantickernel/Plugins) to build the prompt template which is grounded using results from the Memory Store.
+ A customized version of SK built-in CognitiveSearchMemoryStore is used to map index fields populated by the documents ingestion process.
  */
 @Component
 public class JavaSemanticKernelWithMemoryApproach implements RAGApproach<String, RAGResponse> {
@@ -66,10 +66,11 @@ public JavaSemanticKernelWithMemoryApproach(TokenCredential tokenCredential, Ope
     @Override
     public RAGResponse run(String question, RAGOptions options) {
 
-        //Build semantic kernel with Azure Cognitive Search as memory store. AnswerQuestion skill is imported from resources.
+        //Build semantic kernel context with Azure Cognitive Search as memory store. AnswerQuestion skill is imported from src/main/resources/semantickernel/Plugins.
         Kernel semanticKernel = buildSemanticKernel(options);
 
         /**
+         * STEP 1: Retrieve relevant documents using user question
          * Use semantic kernel built-in memory.searchAsync. It uses OpenAI to generate embeddings for the provided question.
          * Question embeddings are provided to cognitive search via search options.
          */
@@ -86,16 +87,18 @@ public RAGResponse run(String question, RAGOptions options) {
         String sources = buildSourcesText(memoryResult);
         List<ContentSource> sourcesList = buildSources(memoryResult);
 
+        //STEP 2: Build a SK context with the sources retrieved from the memory store and the user question.
         SKContext skcontext = SKBuilders.context().build()
                 .setVariable("sources", sources)
                 .setVariable("input", question);
 
-
+        //STEP 3: Get a reference of the semantic function [AnswerQuestion] of the [RAG] plugin (a.k.a. skill) from the SK skills registry and provide it with the pre-built context.
         Mono<SKContext> result = semanticKernel.getFunction("RAG", "AnswerQuestion").invokeAsync(skcontext);
 
         return new RAGResponse.Builder()
                 //.prompt(plan.toPlanString())
-                .prompt("placeholders for prompt")
+                .prompt("Prompt is managed by SK and can't be displayed here. See App logs for prompt")
+                //STEP 4: triggering Open AI to get an answer
                 .answer(result.block().getResult())
                 .sources(sourcesList)
                 .sourcesAsText(sources)
diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/PlainJavaChatApproach.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/chat/approaches/PlainJavaChatApproach.java
@@ -25,9 +25,9 @@
 
 /**
  * Simple chat-read-retrieve-read java implementation, using the Cognitive Search and OpenAI APIs directly.
- * It uses the ChatGPT API to turn the user question into a good search query.
- * It queries Azure Cognitive Search for search results for that query (optionally using the vector embeddings for that query).
- * It then combines the search results and original user question, and asks ChatGPT API to answer the question based on the sources. It includes the last 4K of message history as well (or however many tokens are allowed by the deployed model).
+ * It first calls OpenAI to generate a search keyword for the chat history and then answer to the last chat question.
+ * Several cognitive search retrieval options are available: Text, Vector, Hybrid.
+ * When Hybrid and Vector are selected an additional call to OpenAI is required to generate embeddings vector for the chat extracted keywords.
  */
 @Component
 public class PlainJavaChatApproach implements RAGApproach<ChatGPTConversation, RAGResponse> {
@@ -54,13 +54,15 @@ public PlainJavaChatApproach(
      */
     @Override
     public RAGResponse run(ChatGPTConversation questionOrConversation, RAGOptions options) {
-
+        //Get instance of retriever based on the retrieval mode: hybryd, text, vectors.
         Retriever factsRetriever = factsRetrieverProvider.getFactsRetriever(options);
+
+        //STEP 1: Retrieve relevant documents using kewirds extracted from the chat history. An additional call to OpenAI is required to generate keywords.
         List<ContentSource> sources = factsRetriever.retrieveFromConversation(questionOrConversation, options);
         LOGGER.info("Total {} sources retrieved", sources.size());
 
 
-        // Replace whole prompt is not supported yet
+        //STEP 2: Build a grounded prompt using the retrieved documents. RAG options is used to configure additional prompt extension like 'suggesting follow up questions' option.
         var semanticSearchChat = new SemanticSearchChat(questionOrConversation, sources, options.getPromptTemplate(), false, options.isSuggestFollowupQuestions());
         var chatCompletionsOptions = ChatGPTUtils.buildDefaultChatCompletionsOptions(semanticSearchChat.getMessages());
 
diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/retrieval/CognitiveSearchRetriever.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/retrieval/CognitiveSearchRetriever.java
@@ -23,7 +23,8 @@
 
 /**
  * Cognitive Search retriever implementation that uses the Cognitive Search API to retrieve documents from the search
- * index. If retrieval mode is set to vectors or hybrid, it will use the OpenAI API to convert the user's query text to an embedding vector
+ * index.
+ * If retrieval mode is set to vectors or hybrid, it will use OpenAI embedding API to convert the user's query text to an embedding vector
  * The hybrid search is specific to cognitive search feature which fuses the best of text search and vector search.
  */
 @Component
@@ -36,6 +37,13 @@ public CognitiveSearchRetriever(CognitiveSearchProxy cognitiveSearchProxy, OpenA
         this.cognitiveSearchProxy = cognitiveSearchProxy;
         this.openAIProxy = openAIProxy;
     }
+
+    /**
+     *
+     * @param question
+     * @param ragOptions
+     * @return the top documents retrieved from the search index based on the user's query text
+     */
     @Override
     public List<ContentSource> retrieveFromQuestion(String question, RAGOptions ragOptions) {
         // step 1. Convert the user's query text to an embedding
@@ -63,7 +71,12 @@ public List<ContentSource> retrieveFromQuestion(String question, RAGOptions ragO
 
     }
 
-
+    /**
+     *
+     * @param conversation
+     * @param ragOptions
+     * @return facts retrieved from the search index based on GPT optimized search keywords extracted from the chat history
+     */
     @Override
     public List<ContentSource> retrieveFromConversation(ChatGPTConversation conversation, RAGOptions ragOptions) {
 
diff --git a/app/backend/src/main/java/com/microsoft/openai/samples/rag/retrieval/FactsRetrieverProvider.java b/app/backend/src/main/java/com/microsoft/openai/samples/rag/retrieval/FactsRetrieverProvider.java
@@ -16,7 +16,7 @@ public class FactsRetrieverProvider implements ApplicationContextAware {
      * @return retriever implementation
      */
     public Retriever getFactsRetriever(RAGOptions options) {
-        //default to Cognitive Semantic Search for MVP. More useful in the future to support multiple retrivial systems (RedisSearch.Pinecone, etc)
+        //default to Cognitive Semantic Search for MVP. More useful in the future to support multiple retrieval systems (RedisSearch.Pinecone, etc)
        switch (options.getRetrievalMode()){
            case vectors,hybrid,text:
                return this.applicationContext.getBean(CognitiveSearchRetriever.class);