Skip to content

Commit 2acd83e

Browse files
committed
A/better rag (#2852)
* Enhance Azure OpenAI integration: update environment variables, improve client initialization with connectivity validation, and refactor embedder usage. * Refactor code structure for improved readability and maintainability * Refactor Azure OpenAI client initialization: streamline connection testing by using a dedicated utility function and remove redundant code. * Add test utility for Azure OpenAI client: implement connection testing with timeout handling and model type support * Update ingest.config.ts * lodash-es * fix ts * restore system prompt
1 parent 01f3921 commit 2acd83e

File tree

11 files changed

+130
-29
lines changed

11 files changed

+130
-29
lines changed

apps/chatbot-server/.env.example

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@ MONGODB_APP_NAME=LeafyGreenAI
99
VECTOR_SEARCH_INDEX_NAME="vector_index" # or whatever your index name is
1010
MONGODB_DATABASE_NAME="mongodb-chatbot-framework-chatbot" # or whatever your database name is. must contain vector search index.
1111

12-
# OpenAI config
13-
OPENAI_API_KEY=<OpenAI API key>
14-
OPENAI_EMBEDDING_MODEL="text-embedding-ada-002" # or other model
15-
OPENAI_CHAT_COMPLETION_MODEL="gpt-3.5-turbo" # or other model
16-
1712
# Azure OpenAI config
1813
AZURE_OPENAI_API_KEY=<YOUR_AZURE_API_KEY1>
1914
AZURE_OPENAI_API_KEY=<YOUR_AZURE_API_KEY2>
2015
AZURE_OPENAI_ENDPOINT=https://<your-instance>.openai.azure.com/
16+
2117
AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
22-
AZURE_OPENAI_CHAT_COMPLETION_MODEL=gpt-3.5-turbo
18+
AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_NAME=text-embedding-ada-002
19+
AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_URL=https://<your-instance>.openai.azure.com/openai/deployments/text-embedding-ada-002/embeddings?api-version=2023-05-15
20+
21+
AZURE_OPENAI_CHAT_COMPLETION_MODEL=gpt-4.1
22+
AZURE_OPENAI_API_CHAT_COMPLETION_DEPLOYMENT_NAME=gpt-4.1
23+
AZURE_OPENAI_API_CHAT_COMPLETION_DEPLOYMENT_URL=https://<your-instance>.openai.azure.com/openai/deployments/gpt-4.1/chat/completions?api-version=2025-01-01-preview

apps/chatbot-server/package.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
{
2-
"name": "@leafygreen-ui/chatbot-server",
2+
"name": "lg-chatbot-server",
33
"version": "0.0.1",
44
"description": "",
55
"type": "module",
6-
"main": "index.js",
6+
"main": "dist/index.js",
7+
"module": "dist/esm/index.js",
78
"publishConfig": {
89
"access": "restricted"
910
},
@@ -22,12 +23,14 @@
2223
"dotenv": "^16.5.0",
2324
"express": "^4.18.2",
2425
"jsdom": "^26.1.0",
26+
"lodash-es": "^4.17.21",
2527
"mongodb-chatbot-server": "^0.11.0",
2628
"mongodb-rag-core": "^0.7.0"
2729
},
2830
"devDependencies": {
2931
"mongodb-rag-ingest": "^0.3.1",
3032
"nodemon": "^3.0.1",
31-
"tsx": "^4.19.4"
33+
"tsx": "^4.19.4",
34+
"typescript": "^5.8.0"
3235
}
3336
}

apps/chatbot-server/src/index.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@ import { initChatBot } from './init';
1010
// System prompt for chatbot
1111
const systemPrompt: SystemPrompt = {
1212
role: 'system',
13-
content: `You are an assistant to users of the MongoDB Chatbot Framework.
13+
content: `You are an assistant to engineers and product designers using the LeafyGreen design system.
1414
Answer their questions about the framework in a friendly conversational tone.
15+
16+
For questions regarding engineering, and react components, provide code examples.
17+
For questions regarding design and UX guidelines, provide sources.
18+
1519
Format your answers in Markdown.
1620
Be concise in your answers.
1721
`,

apps/chatbot-server/src/ingest/ingest.config.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@ import { loadEnvVars } from '../utils/loadEnv';
88
import { makeEmbedder } from '../utils/makeEmbedder';
99

1010
import { leafygreenGithubSourceConstructor } from './sources/github-leafygreen-ui';
11-
import { mongoDbChatbotFrameworkDocsDataSourceConstructor } from './sources/github-mdb-chatbot-framework';
1211
import { webSourceConstructor } from './utils/webSourceConstructor';
1312

1413
// Load project environment variables
1514
const {
1615
MONGODB_CONNECTION_URI,
1716
MONGODB_DATABASE_NAME,
18-
AZURE_OPENAI_DEPLOYMENT,
17+
AZURE_OPENAI_EMBEDDING_MODEL,
1918
} = loadEnvVars();
2019

2120
export default {
@@ -25,7 +24,7 @@ export default {
2524
connectionUri: MONGODB_CONNECTION_URI,
2625
databaseName: MONGODB_DATABASE_NAME,
2726
searchIndex: {
28-
embeddingName: AZURE_OPENAI_DEPLOYMENT,
27+
embeddingName: AZURE_OPENAI_EMBEDDING_MODEL,
2928
},
3029
}),
3130
pageStore: () =>
@@ -39,6 +38,11 @@ export default {
3938
databaseName: MONGODB_DATABASE_NAME,
4039
entryId: 'all',
4140
}),
41+
chunkOptions: () => ({
42+
minChunkSize: 15,
43+
maxChunkSize: 1000,
44+
overlap: 100,
45+
}),
4246
// Add data sources here
4347
dataSources: async () => {
4448
return Promise.all([
@@ -51,7 +55,6 @@ export default {
5155
'https://www.w3.org/WAI/standards-guidelines/wcag',
5256
'https://atomicdesign.bradfrost.com/table-of-contents',
5357
].map(source => webSourceConstructor(source, {})),
54-
mongoDbChatbotFrameworkDocsDataSourceConstructor(),
5558
leafygreenGithubSourceConstructor(),
5659
]);
5760
},

apps/chatbot-server/src/ingest/utils/webSourceConstructor.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/* eslint-disable no-console */
22
import { recursiveCrawlFromBaseURL } from '@lg-tools/crawler';
3+
import { trimEnd } from 'lodash-es';
34
import { Page } from 'mongodb-rag-core';
45
import { type DataSource } from 'mongodb-rag-core/dataSources';
56

@@ -16,6 +17,7 @@ export async function webSourceConstructor(
1617
source: string,
1718
options?: WebSourceConstructorOptions,
1819
): Promise<DataSource> {
20+
source = trimEnd(source, '/');
1921
const { maxDepth = 3, verbose = false } = {
2022
maxDepth: 3,
2123
verbose: false,

apps/chatbot-server/src/init.ts

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { AzureOpenAI } from 'mongodb-rag-core/openai';
1818

1919
import { loadEnvVars } from './utils/loadEnv';
2020
import { makeEmbedder } from './utils/makeEmbedder';
21+
import { testOpenAIClient } from './utils/testOpenAIClient';
2122

2223
export async function initChatBot(): Promise<{
2324
llm: ChatLlm;
@@ -34,20 +35,27 @@ export async function initChatBot(): Promise<{
3435
AZURE_OPENAI_ENDPOINT,
3536
AZURE_OPENAI_API_KEY,
3637
AZURE_OPENAI_EMBEDDING_MODEL,
38+
AZURE_OPENAI_API_CHAT_COMPLETION_DEPLOYMENT_NAME,
3739
AZURE_OPENAI_CHAT_COMPLETION_MODEL,
3840
} = loadEnvVars();
3941

4042
const azureOpenAIChatClient = new AzureOpenAI({
4143
endpoint: AZURE_OPENAI_ENDPOINT,
4244
apiKey: AZURE_OPENAI_API_KEY,
4345
apiVersion: '2024-04-01-preview',
44-
deployment: AZURE_OPENAI_CHAT_COMPLETION_MODEL,
46+
deployment: AZURE_OPENAI_API_CHAT_COMPLETION_DEPLOYMENT_NAME,
47+
});
48+
49+
testOpenAIClient({
50+
client: azureOpenAIChatClient,
51+
model: AZURE_OPENAI_CHAT_COMPLETION_MODEL,
52+
type: 'chat',
4553
});
4654

4755
// Chatbot LLM for responding to the user's query.
4856
const llm = makeOpenAiChatLlm({
4957
openAiClient: azureOpenAIChatClient,
50-
deployment: AZURE_OPENAI_CHAT_COMPLETION_MODEL,
58+
deployment: AZURE_OPENAI_API_CHAT_COMPLETION_DEPLOYMENT_NAME,
5159
openAiLmmConfigOptions: {
5260
temperature: 0.5,
5361
},
@@ -100,6 +108,7 @@ export async function initChatBot(): Promise<{
100108
101109
102110
User query: ${originalUserMessage}`;
111+
103112
return { role: 'user', content: contentForLlm };
104113
};
105114

@@ -114,6 +123,10 @@ export async function initChatBot(): Promise<{
114123
const mongodbClient = new MongoClient(MONGODB_CONNECTION_URI);
115124
const conversations = makeMongoDbConversationsService(
116125
mongodbClient.db(MONGODB_DATABASE_NAME),
126+
{
127+
LLM_NOT_WORKING: 'LLM_NOT_WORKING',
128+
NO_RELEVANT_CONTENT: 'NO_RELEVANT_CONTENT',
129+
},
117130
);
118131

119132
return {

apps/chatbot-server/src/utils/loadEnv.ts

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ export function loadEnvVars() {
1515
MONGODB_APP_NAME,
1616
MONGODB_DATABASE_NAME,
1717
VECTOR_SEARCH_INDEX_NAME,
18-
OPENAI_API_KEY,
19-
OPENAI_EMBEDDING_MODEL,
2018
AZURE_OPENAI_ENDPOINT,
2119
AZURE_OPENAI_API_KEY,
2220
AZURE_OPENAI_EMBEDDING_MODEL,
21+
AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_NAME,
22+
AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_URL,
2323
AZURE_OPENAI_CHAT_COMPLETION_MODEL,
24+
AZURE_OPENAI_API_CHAT_COMPLETION_DEPLOYMENT_NAME,
25+
AZURE_OPENAI_API_CHAT_COMPLETION_DEPLOYMENT_URL,
2426
} = process.env;
2527
const requiredEnvVars = {
2628
MONGODB_USER,
@@ -29,12 +31,14 @@ export function loadEnvVars() {
2931
MONGODB_APP_NAME,
3032
MONGODB_DATABASE_NAME,
3133
VECTOR_SEARCH_INDEX_NAME,
32-
OPENAI_API_KEY,
33-
OPENAI_EMBEDDING_MODEL,
3434
AZURE_OPENAI_ENDPOINT,
3535
AZURE_OPENAI_API_KEY,
3636
AZURE_OPENAI_EMBEDDING_MODEL,
37+
AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_NAME,
38+
AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_URL,
3739
AZURE_OPENAI_CHAT_COMPLETION_MODEL,
40+
AZURE_OPENAI_API_CHAT_COMPLETION_DEPLOYMENT_NAME,
41+
AZURE_OPENAI_API_CHAT_COMPLETION_DEPLOYMENT_URL,
3842
} as const;
3943

4044
for (const [name, value] of Object.entries(requiredEnvVars)) {
@@ -43,8 +47,10 @@ export function loadEnvVars() {
4347

4448
const MONGODB_CONNECTION_URI = `mongodb+srv://${MONGODB_USER}:${MONGODB_PASSWORD}@${MONGODB_PROJECT_URL}/?retryWrites=true&w=majority&appName=${MONGODB_APP_NAME}`;
4549

46-
return { ...requiredEnvVars, MONGODB_CONNECTION_URI } as Record<
47-
string,
48-
string
49-
>;
50+
return {
51+
...requiredEnvVars,
52+
MONGODB_CONNECTION_URI,
53+
} as Record<keyof typeof requiredEnvVars, string> & {
54+
MONGODB_CONNECTION_URI: string;
55+
};
5056
}

apps/chatbot-server/src/utils/makeEmbedder.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ import { makeEmbeddingClient } from './makeEmbeddingClient';
99
* and in the chatbot server.
1010
*/
1111
export const makeEmbedder = () => {
12-
const { AZURE_OPENAI_DEPLOYMENT } = loadEnvVars();
12+
const { AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_NAME } = loadEnvVars();
1313

1414
const azureClient = makeEmbeddingClient();
1515

16-
return makeOpenAiEmbedder({
16+
const embedder = makeOpenAiEmbedder({
1717
openAiClient: azureClient,
18-
deployment: AZURE_OPENAI_DEPLOYMENT,
18+
deployment: AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_NAME,
1919
backoffOptions: {},
2020
});
21+
22+
return embedder;
2123
};

apps/chatbot-server/src/utils/makeEmbeddingClient.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { AzureOpenAI } from 'mongodb-rag-core/openai';
22

33
import { loadEnvVars } from './loadEnv';
4+
import { testOpenAIClient } from './testOpenAIClient';
45

56
/**
67
* Returns an consistent Azure OpenAI client
@@ -14,15 +15,30 @@ export const makeEmbeddingClient = () => {
1415
const {
1516
AZURE_OPENAI_API_KEY,
1617
AZURE_OPENAI_ENDPOINT,
17-
AZURE_OPENAI_DEPLOYMENT,
18+
AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_NAME,
1819
} = loadEnvVars();
1920

2021
const azureClient = new AzureOpenAI({
2122
endpoint: AZURE_OPENAI_ENDPOINT,
2223
apiKey: AZURE_OPENAI_API_KEY,
2324
apiVersion: '2024-04-01-preview',
24-
deployment: AZURE_OPENAI_DEPLOYMENT,
25+
deployment: AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_NAME,
2526
});
2627

28+
// Validate the Azure client instance
29+
if (!azureClient) {
30+
throw new Error('Failed to initialize Azure OpenAI client.');
31+
}
32+
33+
testOpenAIClient({
34+
client: azureClient,
35+
model: AZURE_OPENAI_API_EMBEDDING_DEPLOYMENT_NAME,
36+
type: 'embedding',
37+
});
38+
39+
// eslint-disable-next-line no-console
40+
console.log('✅ Successfully connected to Azure OpenAI client');
41+
2742
return azureClient;
43+
// This is a minimal request to validate connectivity
2844
};
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { AzureOpenAI } from 'mongodb-rag-core/openai';
2+
3+
interface TestOpenAIClientParams {
4+
client: AzureOpenAI;
5+
model: string;
6+
type?: 'embedding' | 'chat';
7+
timeoutMs?: number;
8+
}
9+
10+
export const testOpenAIClient = async ({
11+
client,
12+
model,
13+
type = 'embedding',
14+
timeoutMs = 5000,
15+
}: TestOpenAIClientParams) => {
16+
try {
17+
// eslint-disable-next-line no-console
18+
console.log(`✨ Testing Azure OpenAI client with ${type} model...`);
19+
20+
// Define the test request based on model type
21+
const testPromise =
22+
type === 'embedding'
23+
? client.embeddings.create({
24+
input: 'test connection',
25+
model,
26+
})
27+
: client.chat.completions.create({
28+
messages: [{ role: 'user', content: 'Hello, test connection' }],
29+
model,
30+
});
31+
32+
// Set a timeout to catch if the request hangs
33+
const timeoutPromise = new Promise((_, reject) => {
34+
setTimeout(
35+
() => reject(new Error('Azure OpenAI client connection timed out')),
36+
timeoutMs,
37+
);
38+
});
39+
40+
// Race the request against the timeout
41+
return await Promise.race([testPromise, timeoutPromise]);
42+
} catch (error) {
43+
throw new Error(
44+
`Failed to connect to Azure OpenAI: ${
45+
error instanceof Error ? error.message : String(error)
46+
}`,
47+
);
48+
}
49+
};

0 commit comments

Comments
 (0)