11const fs = require ( 'fs' )
22const path = require ( 'path' )
33const openai = require ( '../../openai' )
4-
5- const USE_VECTOR_STORE = false // my flag to test both solutions
4+ const USE_VECTOR_STORE = true // my flag to test both solutions
5+ const VECTOR_STORE_ID = 'OP_5'
66
77// Keywords associated with each document
8- const documentKeywords = { 'extract from local docs' : [ ] } // FIXME - remove
8+ const documentKeywords = require ( '../ docs/kb-files/output/keywords.json' )
99
1010function extractKeywords ( query ) {
1111 // TBD DEPRECATED
@@ -46,10 +46,10 @@ function calculateRelevance(query, keywords) {
4646}
4747
4848// Original keyword-based implementation
49+ // DEPRECATED
4950function buildHubDocsWithKeywords ( query ) {
5051 const docs = [ ]
51- const docsDir = path . join ( __dirname , '../docs/op-community-hub' )
52-
52+ const docsDir = path . join ( __dirname , '../docs/kb-files/output/chapters' )
5353 // Calculate relevance for each document
5454 const relevanceScores = Object . entries ( documentKeywords ) . map (
5555 ( [ file , keywords ] ) => ( {
@@ -76,53 +76,91 @@ function buildHubDocsWithKeywords(query) {
7676// New vector store implementation
7777async function buildHubDocsWithVector ( query ) {
7878 try {
79- // Get or create vector store
80- let vectorStore
81- const vectorStores = await openai ( ) . beta . vectorStores . list ( )
82- const existingStore = vectorStores . data . find (
83- ( store ) => store . name === 'Optimism Documentation' ,
84- )
85-
86- if ( existingStore ) {
87- vectorStore = existingStore
88- } else {
89- // Create new vector store with all documentation files
90- const docsDir = path . join ( __dirname , '../docs/op-community-hub' )
91- const files = Object . keys ( documentKeywords )
79+ let vectorStoreId
9280
93- // Upload files to OpenAI
94- const fileIds = await Promise . all (
95- files . map ( async ( file ) => {
96- const content = fs . readFileSync ( path . join ( docsDir , file ) , 'utf8' )
97- const response = await openai ( ) . files . create ( {
98- file : Buffer . from ( content ) ,
99- purpose : 'vector-store' ,
100- } )
101- return response . id
102- } ) ,
81+ try {
82+ const vectorStores = await openai ( ) . vectorStores . list ( )
83+ const matchingVectorStore = vectorStores . data . find (
84+ ( store ) => store . name === VECTOR_STORE_ID ,
10385 )
10486
105- vectorStore = await openai ( ) . beta . vectorStores . create ( {
106- name : 'Optimism Documentation' ,
107- file_ids : fileIds ,
87+ if ( matchingVectorStore ) {
88+ vectorStoreId = matchingVectorStore . id
89+ } else {
90+ throw new Error ( 'No knowledge base found, building...' )
91+ }
92+ } catch ( error ) {
93+ console . log ( 'No knowledge base found, building...' )
94+
95+ const docsDir = path . join ( __dirname , '../docs/kb-files/output/chapters' )
96+ const files = Object . keys ( documentKeywords )
97+ const batchSize = 5 // Upload files in batches to avoid rate limits
98+ const allFileIds = [ ]
99+
100+ for ( let i = 0 ; i < files . length ; i += batchSize ) {
101+ const batch = files . slice ( i , i + batchSize )
102+ console . log (
103+ `Processing batch ${ i / batchSize + 1 } of ${ Math . ceil ( files . length / batchSize ) } ` ,
104+ )
105+
106+ const fileIds = await Promise . all (
107+ batch . map ( async ( file ) => {
108+ const filePath = path . join ( docsDir , file )
109+ console . log ( `Uploading file: ${ file } ` )
110+ try {
111+ const fileStream = fs . createReadStream ( filePath )
112+ const response = await openai ( ) . files . create ( {
113+ file : fileStream ,
114+ purpose : 'assistants' ,
115+ } )
116+ return response . id
117+ } catch ( error ) {
118+ console . error ( `Error uploading file ${ file } :` , error )
119+ return null
120+ }
121+ } ) ,
122+ )
123+
124+ // Filter out any failed uploads and add to all file IDs
125+ const validFileIds = fileIds . filter ( ( id ) => id !== null )
126+ allFileIds . push ( ...validFileIds )
127+ console . log ( `Valid file IDs in this batch: ${ validFileIds . length } ` )
128+ }
129+
130+ // Create the vector store
131+ const vectorStore = await openai ( ) . vectorStores . create ( {
132+ name : VECTOR_STORE_ID ,
108133 } )
134+ vectorStoreId = vectorStore . id
135+
136+ // Add file IDs
137+ if ( allFileIds . length > 0 ) {
138+ await openai ( ) . vectorStores . fileBatches . createAndPoll ( vectorStoreId , {
139+ file_ids : allFileIds ,
140+ } )
141+ }
109142 }
110143
111- const results = await openai ( ) . beta . vectorStores . search ( {
112- vector_store_id : vectorStore . id ,
113- query : query ,
114- limit : 5 ,
144+ // do the search
145+ const results = await openai ( ) . vectorStores . search ( vectorStoreId , {
146+ query,
147+ max_num_results : 5 ,
115148 } )
149+ const textSources = results . data . map ( ( result ) =>
150+ result . content . map ( ( c ) => c . text ) ,
151+ )
116152
117- return results . documents . map ( ( doc ) => doc . content )
153+ return textSources
118154 } catch ( error ) {
119- console . error ( 'Error using vector store:' , error )
155+ console . error (
156+ 'Error using vector store, falling back to keyword-based approach:' ,
157+ error ,
158+ )
120159 // Fallback to keyword-based approach if vector store fails
121160 return buildHubDocsWithKeywords ( query )
122161 }
123162}
124163
125- // Main function that chooses between implementations
126164async function buildHubDocs ( query ) {
127165 if ( USE_VECTOR_STORE ) {
128166 return await buildHubDocsWithVector ( query )
0 commit comments