feat(component): option to use dumb llm in table selection

akshatdubeysf · akshatdubeysf · commit d56da5cd69d3 · 2025-11-07T13:24:47.000+05:30
diff --git a/src/__tests__/db-query/unit/nodes/get-tables.node.unit.ts b/src/__tests__/db-query/unit/nodes/get-tables.node.unit.ts
@@ -25,14 +25,16 @@ import {IAuthUserWithPermissions} from 'loopback4-authorization';
 
 describe('GetTablesNode Unit', function () {
   let node: GetTablesNode;
-  let llmStub: sinon.SinonStub;
+  let smartllmStub: sinon.SinonStub;
+  let dumbllmStub: sinon.SinonStub;
   let schemaHelper: DbSchemaHelperService;
   let schemaStore: SchemaStore;
   let tableSearchStub: StubbedInstanceWithSinonAccessor<TableSearchService>;
 
   beforeEach(async () => {
-    llmStub = sinon.stub();
-    const llm = llmStub as unknown as LLMProvider;
+    smartllmStub = sinon.stub();
+    dumbllmStub = sinon.stub();
+    const llm = dumbllmStub as unknown as LLMProvider;
 
     schemaHelper = new DbSchemaHelperService(
       new SqliteConnector(
@@ -48,9 +50,17 @@ describe('GetTablesNode Unit', function () {
     );
     schemaStore = new SchemaStore();
     tableSearchStub = createStubInstance(TableSearchService);
-    node = new GetTablesNode(llm, schemaHelper, schemaStore, tableSearchStub, [
-      'test context',
-    ]);
+    node = new GetTablesNode(
+      llm,
+      dumbllmStub as unknown as LLMProvider,
+      {
+        models: [],
+      },
+      schemaHelper,
+      schemaStore,
+      tableSearchStub,
+      ['test context'],
+    );
   });
 
   it('should return state with minimal schema based on prompt and table search', async () => {
@@ -69,13 +79,99 @@ describe('GetTablesNode Unit', function () {
       schema: originalSchema,
     } as unknown as DbQueryState;
 
-    llmStub.resolves({
+    dumbllmStub.resolves({
+      content: 'employees',
+    });
+
+    const result = await node.execute(state, {});
+
+    expect(dumbllmStub.getCalls()[0].args[0].value.trim()).equal(
+      `<instructions>
+You are an AI assistant that extracts table names that are relevant to the users query that will be used to generate an SQL query later.
+- Consider not just the user query but also the context and the table descriptions while selecting the tables.
+- Carefully consider each and every table before including or excluding it.
+- If doubtful about a table's relevance, include it anyway to give the SQL generation step more options to choose from.
+- Assume that the table would have appropriate columns for relating them to any other table even if the description does not mention it.
+- If you are not sure about the tables to select from the given schema, just return your doubt asking the user for more details or to rephrase the question in the following format -
+failed attempt: reason for failure
+</instructions>
+
+<tables-with-description>
+employees: ${Employee.definition.settings.description}
+
+exchange_rates: ${ExchangeRate.definition.settings.description}
+</tables-with-description>
+
+<user-question>
+Get me the employee with name Akshat
+</user-question>
+
+<must-follow-rules>
+- test context
+- employee salary must be converted to USD, using the currency_id column and the exchange rate table
+</must-follow-rules>
+
+
+
+<output-format>
+The output should be just a comma separated list of table names with no other text, comments or formatting.
+Ensure that table names are exact and match the names in the input including schema if given.
+<example-output>
+public.employees, public.departments
+</example-output>
+In case of failure, return the failure message in the format -
+failed attempt: <reason for failure>
+<example-failure>
+failed attempt: reason for failure
+</example-failure>
+</output-format>`,
+    );
+
+    expect(result.schema).to.deepEqual(
+      schemaStore.filteredSchema(['employees']),
+    );
+  });
+
+  it('should return state with minimal schema based on prompt and table search with smart llm', async () => {
+    node = new GetTablesNode(
+      dumbllmStub as unknown as LLMProvider,
+      smartllmStub as unknown as LLMProvider,
+      {
+        models: [],
+        nodes: {
+          // config to use smart llm for this node
+          getTablesNode: {
+            useSmartLLM: true,
+          },
+        },
+      },
+      schemaHelper,
+      schemaStore,
+      tableSearchStub,
+      ['test context'],
+    );
+    tableSearchStub.stubs.getTables.resolves(['employees', 'exchange_rates']);
+    const originalSchema = schemaHelper.modelToSchema('', [
+      Employee,
+      ExchangeRate,
+      Currency,
+      Skill,
+      EmployeeSkill,
+    ]);
+    await schemaStore.save(originalSchema);
+
+    const state = {
+      prompt: 'Get me the employee with name Akshat',
+      schema: originalSchema,
+    } as unknown as DbQueryState;
+
+    smartllmStub.resolves({
       content: 'employees',
     });
 
     const result = await node.execute(state, {});
 
-    expect(llmStub.getCalls()[0].args[0].value.trim()).equal(
+    expect(smartllmStub.getCalls()[0].args[0].value.trim()).equal(
       `<instructions>
 You are an AI assistant that extracts table names that are relevant to the users query that will be used to generate an SQL query later.
 - Consider not just the user query but also the context and the table descriptions while selecting the tables.
@@ -138,7 +234,7 @@ failed attempt: reason for failure
       schema: originalSchema,
     } as unknown as DbQueryState;
 
-    llmStub.resolves({
+    dumbllmStub.resolves({
       content: 'employees',
     });
 
@@ -163,16 +259,16 @@ failed attempt: reason for failure
       schema: originalSchema,
     } as unknown as DbQueryState;
 
-    llmStub.onFirstCall().resolves({
+    dumbllmStub.onFirstCall().resolves({
       content: 'non_existing_table',
     });
-    llmStub.onSecondCall().resolves({
+    dumbllmStub.onSecondCall().resolves({
       content: 'employees',
     });
 
     const result = await node.execute(state, {});
 
-    expect(llmStub.callCount).to.equal(2);
+    expect(dumbllmStub.callCount).to.equal(2);
     expect(result.schema).to.deepEqual(
       schemaStore.filteredSchema(['employees']),
     );
diff --git a/src/components/db-query/nodes/get-tables.node.ts b/src/components/db-query/nodes/get-tables.node.ts
@@ -13,13 +13,17 @@ import {DbSchemaHelperService} from '../services';
 import {SchemaStore} from '../services/schema.store';
 import {TableSearchService} from '../services/search/table-search.service';
 import {DbQueryState} from '../state';
-import {DatabaseSchema, GenerationError} from '../types';
+import {DatabaseSchema, DbQueryConfig, GenerationError} from '../types';
 
 @graphNode(DbQueryNodes.GetTables)
 export class GetTablesNode implements IGraphNode<DbQueryState> {
   constructor(
+    @inject(AiIntegrationBindings.CheapLLM)
+    private readonly llmCheap: LLMProvider,
     @inject(AiIntegrationBindings.SmartLLM)
-    private readonly llm: LLMProvider,
+    private readonly llmSmart: LLMProvider,
+    @inject(DbQueryAIExtensionBindings.Config)
+    private readonly config: DbQueryConfig,
     @service(DbSchemaHelperService)
     private readonly schemaHelper: DbSchemaHelperService,
     @service(SchemaStore)
@@ -95,7 +99,10 @@ Use these if they are relevant to the table selection, otherwise ignore them, th
       );
     }
 
-    const chain = RunnableSequence.from([this.prompt, this.llm]);
+    const useSmartLLM = this.config.nodes?.getTablesNode?.useSmartLLM ?? false;
+    const llm = useSmartLLM ? this.llmSmart : this.llmCheap;
+
+    const chain = RunnableSequence.from([this.prompt, llm]);
     config.writer?.({
       type: LLMStreamEventType.ToolStatus,
       data: {
diff --git a/src/components/db-query/nodes/sql-generation.node.ts b/src/components/db-query/nodes/sql-generation.node.ts
@@ -114,7 +114,7 @@ In the last attempt, you generated this SQL query -
     });
 
     const generateDesc =
-      this.config.nodes?.sqlGenerationWithDescription !== false;
+      this.config.nodes?.sqlGenerationNode?.generateDescription !== false;
 
     const output = await chain.invoke({
       dialect: this.config.db?.dialect ?? SupportedDBs.PostgreSQL,
diff --git a/src/components/db-query/types.ts b/src/components/db-query/types.ts
@@ -113,8 +113,12 @@ export type DbQueryConfig = {
     maxClusterSize?: number;
   };
   nodes?: {
-    sqlGenerationWithDescription?: boolean;
-    renderNode?: boolean;
+    sqlGenerationNode?: {
+      generateDescription?: boolean;
+    };
+    getTablesNode?: {
+      useSmartLLM?: boolean;
+    };
   };
   columnSelection?: boolean;
 };