@@ -5,6 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files");
55const { v4 : uuidv4 } = require ( "uuid" ) ;
66const { toChunks, getEmbeddingEngineSelection } = require ( "../../helpers" ) ;
77const { sourceIdentifier } = require ( "../../chats" ) ;
8+ const { VectorDatabase } = require ( "../base" ) ;
89
910const sanitizeNamespace = ( namespace ) => {
1011 // If namespace already starts with ns_, don't add it again
@@ -22,14 +23,21 @@ const collectionExists = async function (client, namespace) {
2223 return collections . includes ( namespace ) ;
2324 }
2425 } catch ( error ) {
25- console . log ( "Astra:: collectionExists check error", error ?. message || error ) ;
26+ this . logger ( " collectionExists check error", error ?. message || error ) ;
2627 return false ; // Return false for any error to allow creation attempt
2728 }
2829} ;
2930
30- const AstraDB = {
31- name : "AstraDB" ,
32- connect : async function ( ) {
31+ class AstraDB extends VectorDatabase {
32+ constructor ( ) {
33+ super ( ) ;
34+ }
35+
36+ get name ( ) {
37+ return "AstraDB" ;
38+ }
39+
40+ async connect ( ) {
3341 if ( process . env . VECTOR_DB !== "astra" )
3442 throw new Error ( "AstraDB::Invalid ENV settings" ) ;
3543
@@ -38,21 +46,24 @@ const AstraDB = {
3846 process ?. env ?. ASTRA_DB_ENDPOINT
3947 ) ;
4048 return { client } ;
41- } ,
42- heartbeat : async function ( ) {
49+ }
50+
51+ async heartbeat ( ) {
4352 return { heartbeat : Number ( new Date ( ) ) } ;
44- } ,
53+ }
54+
4555 // Astra interface will return a valid collection object even if the collection
4656 // does not actually exist. So we run a simple check which will always throw
4757 // when the table truly does not exist. Faster than iterating all collections.
48- isRealCollection : async function ( astraCollection = null ) {
58+ async isRealCollection ( astraCollection = null ) {
4959 if ( ! astraCollection ) return false ;
5060 return await astraCollection
5161 . countDocuments ( )
5262 . then ( ( ) => true )
5363 . catch ( ( ) => false ) ;
54- } ,
55- totalVectors : async function ( ) {
64+ }
65+
66+ async totalVectors ( ) {
5667 const { client } = await this . connect ( ) ;
5768 const collectionNames = await this . allNamespaces ( client ) ;
5869 var totalVectors = 0 ;
@@ -62,13 +73,15 @@ const AstraDB = {
6273 totalVectors += count ? count : 0 ;
6374 }
6475 return totalVectors ;
65- } ,
66- namespaceCount : async function ( _namespace = null ) {
76+ }
77+
78+ async namespaceCount ( _namespace = null ) {
6779 const { client } = await this . connect ( ) ;
6880 const namespace = await this . namespace ( client , _namespace ) ;
6981 return namespace ?. vectorCount || 0 ;
70- } ,
71- namespace : async function ( client , namespace = null ) {
82+ }
83+
84+ async namespace ( client , namespace = null ) {
7285 if ( ! namespace ) throw new Error ( "No namespace value provided." ) ;
7386 const sanitizedNamespace = sanitizeNamespace ( namespace ) ;
7487 const collection = await client
@@ -77,7 +90,7 @@ const AstraDB = {
7790 if ( ! ( await this . isRealCollection ( collection ) ) ) return null ;
7891
7992 const count = await collection . countDocuments ( ) . catch ( ( e ) => {
80- console . error ( "Astra:: namespaceExists", e . message ) ;
93+ this . logger ( " namespaceExists", e . message ) ;
8194 return null ;
8295 } ) ;
8396
@@ -86,27 +99,31 @@ const AstraDB = {
8699 ...collection ,
87100 vectorCount : typeof count === "number" ? count : 0 ,
88101 } ;
89- } ,
90- hasNamespace : async function ( namespace = null ) {
102+ }
103+
104+ async hasNamespace ( namespace = null ) {
91105 if ( ! namespace ) return false ;
92106 const { client } = await this . connect ( ) ;
93107 return await this . namespaceExists ( client , namespace ) ;
94- } ,
95- namespaceExists : async function ( client , namespace = null ) {
108+ }
109+
110+ async namespaceExists ( client , namespace = null ) {
96111 if ( ! namespace ) throw new Error ( "No namespace value provided." ) ;
97112 const sanitizedNamespace = sanitizeNamespace ( namespace ) ;
98113 const collection = await client . collection ( sanitizedNamespace ) ;
99114 return await this . isRealCollection ( collection ) ;
100- } ,
101- deleteVectorsInNamespace : async function ( client , namespace = null ) {
115+ }
116+
117+ async deleteVectorsInNamespace ( client , namespace = null ) {
102118 const sanitizedNamespace = sanitizeNamespace ( namespace ) ;
103119 await client . dropCollection ( sanitizedNamespace ) ;
104120 return true ;
105- } ,
121+ }
122+
106123 // AstraDB requires a dimension aspect for collection creation
107124 // we pass this in from the first chunk to infer the dimensions like other
108125 // providers do.
109- getOrCreateCollection : async function ( client , namespace , dimensions = null ) {
126+ async getOrCreateCollection ( client , namespace , dimensions = null ) {
110127 const sanitizedNamespace = sanitizeNamespace ( namespace ) ;
111128 try {
112129 const exists = await collectionExists ( client , sanitizedNamespace ) ;
@@ -132,14 +149,12 @@ const AstraDB = {
132149
133150 return await client . collection ( sanitizedNamespace ) ;
134151 } catch ( error ) {
135- console . error (
136- "Astra::getOrCreateCollection error" ,
137- error ?. message || error
138- ) ;
152+ this . logger ( "getOrCreateCollection" , error ?. message || error ) ;
139153 throw error ;
140154 }
141- } ,
142- addDocumentToNamespace : async function (
155+ }
156+
157+ async addDocumentToNamespace (
143158 namespace ,
144159 documentData = { } ,
145160 fullFilePath = null ,
@@ -151,7 +166,7 @@ const AstraDB = {
151166 const { pageContent, docId, ...metadata } = documentData ;
152167 if ( ! pageContent || pageContent . length == 0 ) return false ;
153168
154- console . log ( "Adding new vectorized document into namespace" , namespace ) ;
169+ this . logger ( "Adding new vectorized document into namespace" , namespace ) ;
155170 if ( ! skipCache ) {
156171 const cacheResult = await cachedVectorInformation ( fullFilePath ) ;
157172 if ( cacheResult . exists ) {
@@ -210,7 +225,7 @@ const AstraDB = {
210225 } ) ;
211226 const textChunks = await textSplitter . splitText ( pageContent ) ;
212227
213- console . log ( "Snippets created from document:" , textChunks . length ) ;
228+ this . logger ( "Snippets created from document:" , textChunks . length ) ;
214229 const documentVectors = [ ] ;
215230 const vectors = [ ] ;
216231 const vectorValues = await EmbedderEngine . embedChunks ( textChunks ) ;
@@ -246,7 +261,7 @@ const AstraDB = {
246261 if ( vectors . length > 0 ) {
247262 const chunks = [ ] ;
248263
249- console . log ( "Inserting vectorized chunks into Astra DB." ) ;
264+ this . logger ( "Inserting vectorized chunks into Astra DB." ) ;
250265
251266 // AstraDB has maximum upsert size of 20 records per-request so we have to use a lower chunk size here
252267 // in order to do the queries - this takes a lot more time than other providers but there
@@ -266,11 +281,12 @@ const AstraDB = {
266281 await DocumentVectors . bulkInsert ( documentVectors ) ;
267282 return { vectorized : true , error : null } ;
268283 } catch ( e ) {
269- console . error ( "addDocumentToNamespace" , e . message ) ;
284+ this . logger ( "addDocumentToNamespace" , e . message ) ;
270285 return { vectorized : false , error : e . message } ;
271286 }
272- } ,
273- deleteDocumentFromNamespace : async function ( namespace , docId ) {
287+ }
288+
289+ async deleteDocumentFromNamespace ( namespace , docId ) {
274290 const { DocumentVectors } = require ( "../../../models/vectors" ) ;
275291 const { client } = await this . connect ( ) ;
276292 namespace = sanitizeNamespace ( namespace ) ;
@@ -293,8 +309,9 @@ const AstraDB = {
293309 const indexes = knownDocuments . map ( ( doc ) => doc . id ) ;
294310 await DocumentVectors . deleteIds ( indexes ) ;
295311 return true ;
296- } ,
297- performSimilaritySearch : async function ( {
312+ }
313+
314+ async performSimilaritySearch ( {
298315 namespace = null ,
299316 input = "" ,
300317 LLMConnector = null ,
@@ -336,8 +353,9 @@ const AstraDB = {
336353 sources : this . curateSources ( sources ) ,
337354 message : false ,
338355 } ;
339- } ,
340- similarityResponse : async function ( {
356+ }
357+
358+ async similarityResponse ( {
341359 client,
342360 namespace,
343361 queryVector,
@@ -367,8 +385,8 @@ const AstraDB = {
367385 responses . forEach ( ( response ) => {
368386 if ( response . $similarity < similarityThreshold ) return ;
369387 if ( filterIdentifiers . includes ( sourceIdentifier ( response . metadata ) ) ) {
370- console . log (
371- "AstraDB: A source was filtered from context as it's parent document is pinned."
388+ this . logger (
389+ "A source was filtered from context as it's parent document is pinned."
372390 ) ;
373391 return ;
374392 }
@@ -380,8 +398,9 @@ const AstraDB = {
380398 result . scores . push ( response . $similarity ) ;
381399 } ) ;
382400 return result ;
383- } ,
384- allNamespaces : async function ( client ) {
401+ }
402+
403+ async allNamespaces ( client ) {
385404 try {
386405 let header = new Headers ( ) ;
387406 header . append ( "Token" , client ?. httpClient ?. applicationToken ) ;
@@ -403,11 +422,12 @@ const AstraDB = {
403422 const collections = resp ? JSON . parse ( resp ) ?. status ?. collections : [ ] ;
404423 return collections ;
405424 } catch ( e ) {
406- console . error ( "Astra:: AllNamespace", e ) ;
425+ this . logger ( " AllNamespace", e ) ;
407426 return [ ] ;
408427 }
409- } ,
410- "namespace-stats" : async function ( reqBody = { } ) {
428+ }
429+
430+ async "namespace-stats" ( reqBody = { } ) {
411431 const { namespace = null } = reqBody ;
412432 if ( ! namespace ) throw new Error ( "namespace required" ) ;
413433 const { client } = await this . connect ( ) ;
@@ -417,8 +437,9 @@ const AstraDB = {
417437 return stats
418438 ? stats
419439 : { message : "No stats were able to be fetched from DB for namespace" } ;
420- } ,
421- "delete-namespace" : async function ( reqBody = { } ) {
440+ }
441+
442+ async "delete-namespace" ( reqBody = { } ) {
422443 const { namespace = null } = reqBody ;
423444 const { client } = await this . connect ( ) ;
424445 if ( ! ( await this . namespaceExists ( client , namespace ) ) )
@@ -431,8 +452,9 @@ const AstraDB = {
431452 details ?. vectorCount || "all"
432453 } vectors.`,
433454 } ;
434- } ,
435- curateSources : function ( sources = [ ] ) {
455+ }
456+
457+ curateSources ( sources = [ ] ) {
436458 const documents = [ ] ;
437459 for ( const source of sources ) {
438460 if ( Object . keys ( source ) . length > 0 ) {
@@ -446,7 +468,7 @@ const AstraDB = {
446468 }
447469
448470 return documents ;
449- } ,
450- } ;
471+ }
472+ }
451473
452474module . exports . AstraDB = AstraDB ;
0 commit comments