Skip to content

Commit a56ffad

Browse files
committed
Improves vectorization
1 parent dc3025c commit a56ffad

File tree

6 files changed

+113
-51
lines changed

6 files changed

+113
-51
lines changed

package-lock.json

Lines changed: 12 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,8 @@
2323
"lerna": "^8.1.2",
2424
"lint-staged": "^15.2.2",
2525
"prettier": "^3.2.5"
26+
},
27+
"dependencies": {
28+
"echomd": "^0.2.3"
2629
}
2730
}

packages/ethernaut-ai/src/internal/assistants/utils/build-hub-docs.js

Lines changed: 76 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
const fs = require('fs')
22
const path = require('path')
33
const openai = require('../../openai')
4-
5-
const USE_VECTOR_STORE = false // my flag to test both solutions
4+
const USE_VECTOR_STORE = true // my flag to test both solutions
5+
const VECTOR_STORE_ID = 'OP_5'
66

77
// Keywords associated with each document
8-
const documentKeywords = { 'extract from local docs': [] } // FIXME - remove
8+
const documentKeywords = require('../docs/kb-files/output/keywords.json')
99

1010
function extractKeywords(query) {
1111
// TBD DEPRECATED
@@ -46,10 +46,10 @@ function calculateRelevance(query, keywords) {
4646
}
4747

4848
// Original keyword-based implementation
49+
// DEPRECATED
4950
function buildHubDocsWithKeywords(query) {
5051
const docs = []
51-
const docsDir = path.join(__dirname, '../docs/op-community-hub')
52-
52+
const docsDir = path.join(__dirname, '../docs/kb-files/output/chapters')
5353
// Calculate relevance for each document
5454
const relevanceScores = Object.entries(documentKeywords).map(
5555
([file, keywords]) => ({
@@ -76,53 +76,91 @@ function buildHubDocsWithKeywords(query) {
7676
// New vector store implementation
7777
async function buildHubDocsWithVector(query) {
7878
try {
79-
// Get or create vector store
80-
let vectorStore
81-
const vectorStores = await openai().beta.vectorStores.list()
82-
const existingStore = vectorStores.data.find(
83-
(store) => store.name === 'Optimism Documentation',
84-
)
85-
86-
if (existingStore) {
87-
vectorStore = existingStore
88-
} else {
89-
// Create new vector store with all documentation files
90-
const docsDir = path.join(__dirname, '../docs/op-community-hub')
91-
const files = Object.keys(documentKeywords)
79+
let vectorStoreId
9280

93-
// Upload files to OpenAI
94-
const fileIds = await Promise.all(
95-
files.map(async (file) => {
96-
const content = fs.readFileSync(path.join(docsDir, file), 'utf8')
97-
const response = await openai().files.create({
98-
file: Buffer.from(content),
99-
purpose: 'vector-store',
100-
})
101-
return response.id
102-
}),
81+
try {
82+
const vectorStores = await openai().vectorStores.list()
83+
const matchingVectorStore = vectorStores.data.find(
84+
(store) => store.name === VECTOR_STORE_ID,
10385
)
10486

105-
vectorStore = await openai().beta.vectorStores.create({
106-
name: 'Optimism Documentation',
107-
file_ids: fileIds,
87+
if (matchingVectorStore) {
88+
vectorStoreId = matchingVectorStore.id
89+
} else {
90+
throw new Error('No knowledge base found, building...')
91+
}
92+
} catch (error) {
93+
console.log('No knowledge base found, building...')
94+
95+
const docsDir = path.join(__dirname, '../docs/kb-files/output/chapters')
96+
const files = Object.keys(documentKeywords)
97+
const batchSize = 5 // Upload files in batches to avoid rate limits
98+
const allFileIds = []
99+
100+
for (let i = 0; i < files.length; i += batchSize) {
101+
const batch = files.slice(i, i + batchSize)
102+
console.log(
103+
`Processing batch ${i / batchSize + 1} of ${Math.ceil(files.length / batchSize)}`,
104+
)
105+
106+
const fileIds = await Promise.all(
107+
batch.map(async (file) => {
108+
const filePath = path.join(docsDir, file)
109+
console.log(`Uploading file: ${file}`)
110+
try {
111+
const fileStream = fs.createReadStream(filePath)
112+
const response = await openai().files.create({
113+
file: fileStream,
114+
purpose: 'assistants',
115+
})
116+
return response.id
117+
} catch (error) {
118+
console.error(`Error uploading file ${file}:`, error)
119+
return null
120+
}
121+
}),
122+
)
123+
124+
// Filter out any failed uploads and add to all file IDs
125+
const validFileIds = fileIds.filter((id) => id !== null)
126+
allFileIds.push(...validFileIds)
127+
console.log(`Valid file IDs in this batch: ${validFileIds.length}`)
128+
}
129+
130+
// Create the vector store
131+
const vectorStore = await openai().vectorStores.create({
132+
name: VECTOR_STORE_ID,
108133
})
134+
vectorStoreId = vectorStore.id
135+
136+
// Add file IDs
137+
if (allFileIds.length > 0) {
138+
await openai().vectorStores.fileBatches.createAndPoll(vectorStoreId, {
139+
file_ids: allFileIds,
140+
})
141+
}
109142
}
110143

111-
const results = await openai().beta.vectorStores.search({
112-
vector_store_id: vectorStore.id,
113-
query: query,
114-
limit: 5,
144+
// do the search
145+
const results = await openai().vectorStores.search(vectorStoreId, {
146+
query,
147+
max_num_results: 5,
115148
})
149+
const textSources = results.data.map((result) =>
150+
result.content.map((c) => c.text),
151+
)
116152

117-
return results.documents.map((doc) => doc.content)
153+
return textSources
118154
} catch (error) {
119-
console.error('Error using vector store:', error)
155+
console.error(
156+
'Error using vector store, falling back to keyword-based approach:',
157+
error,
158+
)
120159
// Fallback to keyword-based approach if vector store fails
121160
return buildHubDocsWithKeywords(query)
122161
}
123162
}
124163

125-
// Main function that chooses between implementations
126164
async function buildHubDocs(query) {
127165
if (USE_VECTOR_STORE) {
128166
return await buildHubDocsWithVector(query)

packages/ethernaut-ai/src/tasks/hubs.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ require('../scopes/ai')
5252
spinner.success('Assistant done', 'ai')
5353

5454
if (response) {
55-
return output.resultBox(response, 'Response from documentation')
55+
return output.resultBox(response, 'Response', true)
5656
} else {
5757
throw new EthernautCliError(
5858
'ethernaut-ai',

packages/ethernaut-cli/hardhat.config.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@ const OPTIMISM_TIMESTAMP =
3636
'https://github.com/raiseerco/ethernaut-app-kb/releases/download/daily/last-update.json'
3737
const ZIP_URL_OPTIMISM =
3838
'https://github.com/raiseerco/ethernaut-app-kb/releases/download/daily/kb.zip'
39-
const FILES_DIR = path.join(__dirname, 'kb-files')
39+
const FILES_DIR = path.join(
40+
__dirname,
41+
'../../packages/ethernaut-ai/src/internal/assistants/docs/kb-files',
42+
)
43+
4044
const TIMESTAMP_FILE = path.join(FILES_DIR, 'last-update.json')
4145

4246
async function downloadFile(url) {
@@ -80,7 +84,6 @@ async function checkKB() {
8084

8185
async function downloadKB() {
8286
try {
83-
// Download and extract the KB
8487
if (fs.existsSync(FILES_DIR)) {
8588
// spinner.success(' Updating...')
8689
} else {
@@ -105,7 +108,6 @@ async function downloadKB() {
105108
const timestampData = await downloadFile(OPTIMISM_TIMESTAMP)
106109
const parsedData = JSON.parse(timestampData)
107110
fs.writeFileSync(TIMESTAMP_FILE, JSON.stringify(parsedData, null, 2))
108-
// spinner.success('--- Updated ')
109111
} catch (error) {
110112
console.error('Error downloading or parsing timestamp file:', error)
111113
}

packages/ethernaut-common/src/ui/output.js

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,25 @@ const boxen = require('boxen')
33
const debug = require('./debug')
44
const spinner = require('./spinner')
55
const telemetry = require('../error/telemetry')
6-
6+
const echomd = require('echomd')
77
let _muted = false
88
let _errorVerbose = false
99

10-
function resultBox(msg, title = 'Result') {
11-
box(msg, {
12-
title,
13-
padding: 1,
14-
borderStyle: 'round',
15-
borderColor: 'blue',
16-
})
10+
function resultBox(msg, title = 'Result', markdown = false) {
11+
if (markdown) {
12+
const width = process.stdout.columns || 80
13+
const line = '─'.repeat(width)
14+
console.log(`\n${line}\n`)
15+
echomd(msg)
16+
console.log(`\n${line}\n`)
17+
} else {
18+
box(msg, {
19+
title,
20+
padding: 1,
21+
borderStyle: 'round',
22+
borderColor: 'blue',
23+
})
24+
}
1725

1826
return msg
1927
}

0 commit comments

Comments
 (0)