Made improvements in the prompt of the /rcc-devdocs command and improved neo4j queries for the same

preeesha · preeesha · commit 53c909e9e23d · 2024-09-06T20:12:10.000+05:30
diff --git a/ai-assistant/src/commands/AskDocsCommand.ts b/ai-assistant/src/commands/AskDocsCommand.ts
@@ -8,6 +8,8 @@ import {
     SlashCommandContext,
 } from "@rocket.chat/apps-engine/definition/slashcommands";
 
+import { PromptFactory } from "../core/prompt.factory";
+import { Query } from "../core/query";
 import { Neo4j } from "../core/services/db/neo4j";
 import { MiniLML6 } from "../core/services/embeddings/minilml6";
 import { Llama3_70B } from "../core/services/llm/llama3_70B";
@@ -19,12 +21,48 @@ export class AskDocsCommand implements ISlashCommand {
     public i18nDescription = "";
     public providesPreview = false;
 
+    /**
+     * Processes the user's query and returns the answer.
+     *
+     * @param {IHttp} http - The HTTP object used for making requests.
+     * @param {string} query - The user's query.
+     * @returns {Promise<string | null>} A promise that resolves to the response to be given to the user or `null` if no answer or no reference is found.
+     */
     private async process(http: IHttp, query: string): Promise<string | null> {
         const db = new Neo4j(http);
         const llm = new Llama3_70B(http);
         const embeddingModel = new MiniLML6(http);
 
-        return "UNDER DEVELOPMENT";
+        /**
+         * ---------------------------------------------------------------------------------------------
+         * STEP 1:
+         * Query the database to find the nodes names of which are similar to what user has requested
+         * ---------------------------------------------------------------------------------------------
+         */
+        const results = await Query.getDocsNodesFromQuery(
+            db,
+            embeddingModel,
+            query
+        );
+        if (!results.length) return null;
+
+        /**
+         * ---------------------------------------------------------------------------------------------
+         * STEP 2:
+         * Generate the answer and diagram for the user's query given the nodes data
+         * ---------------------------------------------------------------------------------------------
+         */
+        const uniqueSources = [...new Set<string>(results.map((x) => x.url))];
+        const answer = await llm.ask(
+            PromptFactory.makeAskDocsPrompt(
+                results.map((x) => x.content).join("\n\n"),
+                uniqueSources,
+                query
+            )
+        );
+        if (!answer) return null;
+
+        return answer;
     }
 
     public async executor(
diff --git a/ai-assistant/src/commands/WhyUsedCommand.ts b/ai-assistant/src/commands/WhyUsedCommand.ts
@@ -46,7 +46,6 @@ export class WhyUsedCommand implements ISlashCommand {
          * ---------------------------------------------------------------------------------------------
          */
         const keywords = await Query.getDBKeywordsFromQuery(llm, query);
-        console.log(keywords);
         if (!keywords.length) return null;
 
         /**
diff --git a/ai-assistant/src/core/prompt.factory.ts b/ai-assistant/src/core/prompt.factory.ts
@@ -61,6 +61,42 @@ export namespace PromptFactory {
         return prompt;
     }
 
+    export function makeAskDocsPrompt(
+        content: string,
+        sourceURLs: string[],
+        query: string
+    ): Prompt {
+        const prompt = new Prompt();
+        prompt.pushSystem(`
+            You are an expert in understanding the documentation of Rocket.Chat and answering questions of user when given a proper context.
+
+            Here're the sources of the content:
+            ${sourceURLs.map((url) => `- ${url}`).join("\n")}
+
+            Here're the rules:
+            1. Even if user asks for any kind of diagram or visualization, you must ignore that.
+            2. If the user asks for an explanation of the content, you must provide the answer based on the content.
+            3. You must provide the answer in text GitHub Markdown format only.
+            4. In case of any request for diagrams or visualizations, tell user to use the "/rcc-diagram" command.
+            5. If you are unable to answer the question, you must tell the user that you are unable to answer the question.
+            6. Always and always mentions the sources of the content in the end. You must provide all these URLs.
+        `);
+        prompt.pushUser(`
+            Hey I have been the reading the following documentation content and I am not able to understand it quite well. I'll provide you the content in between the tags <CONTENT_START> and <CONTENT_END> and the query between <QUERY_START> and <QUERY_END>. Can you please help me understand it better?
+
+            <QUERY_START>
+            ${query}
+            <QUERY_END>
+
+            Here's the content:
+            <CONTENT_START>
+            ${content}
+            <CONTENT_END>
+        `);
+
+        return prompt;
+    }
+
     export function makeDiagramPrompt(codebase: string, query: string): Prompt {
         const prompt = new Prompt();
 
diff --git a/ai-assistant/src/core/query.ts b/ai-assistant/src/core/query.ts
@@ -1,6 +1,7 @@
 import { PromptFactory } from "./prompt.factory";
 import { IDB } from "./services/db/db.types";
 import { DBNode } from "./services/db/dbNode";
+import { DevDocDBNode } from "./services/db/devDocDBNode";
 import { IEmbeddingModel } from "./services/embeddings/embeddings.types";
 import { ILLMModel } from "./services/llm/llm.types";
 
@@ -82,6 +83,75 @@ export namespace Query {
         return results;
     }
 
+    /**
+     * Retrieves an array of DevDocDBNodes from the specified vector query.
+     *
+     * @param {IDB} db - The IDB instance used for the query.
+     * @param {string} indexName - The name of the index to query.
+     * @param {number[]} vector - The vector used for the query.
+     * @param {number} threshold - The minimum score threshold for the query results.
+     * @returns {Promise<DevDocDBNode[]>} - A promise that resolves to an array of DevDocDBNodes that match the query criteria.
+     */
+    export async function getDevDocDBNodesFromVectorQuery(
+        db: IDB,
+        indexName: string,
+        vector: number[],
+        threshold: number
+    ): Promise<DevDocDBNode[]> {
+        const result = await db.run(
+            `
+				CALL db.index.vector.queryNodes("${indexName}", 2, $vector)
+				YIELD node, score
+                WHERE score >= ${threshold}
+                WITH node, score
+                OPTIONAL MATCH (node)-[r]->(relatedNode)
+                RETURN node, COLLECT(relatedNode) AS relatedNodes, score
+                ORDER BY score DESC
+			`,
+            { vector }
+        );
+        if (!result.length) return [];
+
+        const nodes: DevDocDBNode[] = [];
+        const processRecord = (record: any) => {
+            const data = record as DevDocDBNode;
+            data.contentEmbeddings = [];
+            nodes.push(data);
+        };
+        // node
+        processRecord(result[0]);
+        // relatedNodes
+        for (const record of (result as any)[1]) processRecord(record);
+
+        return nodes;
+    }
+
+    /**
+     * Retrieves an array of DevDocDBNodes from the provided query.
+     *
+     * @param {IDB} db - The IDB instance used for querying the database.
+     * @param {IEmbeddingModel} embeddingModel - The embedding model used for generating query vectors.
+     * @param {string} query - The query string used for searching the database.
+     * @returns {Promise<DevDocDBNode[]>} - A promise that resolves to an array of DevDocDBNodes matching the query.
+     */
+    export async function getDocsNodesFromQuery(
+        db: IDB,
+        embeddingModel: IEmbeddingModel,
+        query: string
+    ): Promise<DevDocDBNode[]> {
+        const queryVector = await embeddingModel.generate(query);
+        if (!queryVector) return [];
+
+        const results: DevDocDBNode[] = await getDevDocDBNodesFromVectorQuery(
+            db,
+            "contentEmbeddings",
+            queryVector,
+            0.7
+        );
+
+        return results;
+    }
+
     /**
      * Retrieves database keywords from a given query using a language model.
      *
diff --git a/ai-assistant/src/core/services/db/neo4j.ts b/ai-assistant/src/core/services/db/neo4j.ts
@@ -200,7 +200,6 @@ export class Neo4j implements IDB {
         }
 
         if (response.errors.length) {
-            console.log(response.errors);
             throw new Error(
                 response.errors.map((x) => JSON.stringify(x)).join("\n\n")
             );
diff --git a/ai-assistant/src/endpoints/purgeDB.ts b/ai-assistant/src/endpoints/purgeDB.ts
@@ -58,7 +58,7 @@ export class PurgeDBEndpoint extends ApiEndpoint {
             // Create indices for content embeddings
             [
                 "CREATE VECTOR INDEX `contentEmbeddings` IF NOT EXISTS",
-                "FOR (n: Node) ON (n.contentEmbeddings)",
+                "FOR (n: DevDocDBNode) ON (n.contentEmbeddings)",
                 "OPTIONS {indexConfig: {",
                 "   `vector.dimensions`: 384,",
                 "   `vector.similarity_function`: 'COSINE'",

Original file line number	Diff line number	Diff line change
`@@ -200,7 +200,6 @@ export class Neo4j implements IDB {`
`200`	`200`	`}`
`201`	`201`
`202`	`202`	`if (response.errors.length) {`
`203`		`- console.log(response.errors);`
`204`	`203`	`throw new Error(`
`205`	`204`	`response.errors.map((x) => JSON.stringify(x)).join("\n\n")`
`206`	`205`	`);`
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ export class PurgeDBEndpoint extends ApiEndpoint {`
`58`	`58`	`// Create indices for content embeddings`
`59`	`59`	`[`
`60`	`60`	"CREATE VECTOR INDEX `contentEmbeddings` IF NOT EXISTS",
`61`		`- "FOR (n: Node) ON (n.contentEmbeddings)",`
	`61`	`+ "FOR (n: DevDocDBNode) ON (n.contentEmbeddings)",`
`62`	`62`	`"OPTIONS {indexConfig: {",`
`63`	`63`	" `vector.dimensions`: 384,",
`64`	`64`	" `vector.similarity_function`: 'COSINE'",