@@ -99,75 +99,16 @@ public actor VecturaKit {
9999 /// - ids: Optional unique identifiers for the documents.
100100 /// - Returns: The IDs of the added documents.
101101 public func addDocuments( texts: [ String ] , ids: [ UUID ] ? = nil ) async throws -> [ UUID ] {
102- // Validate input
103- guard !texts. isEmpty else {
104- throw VecturaError . invalidInput ( " Cannot add empty array of documents " )
105- }
106-
107- // Validate that no text is empty
108- for (index, text) in texts. enumerated ( ) {
109- guard !text. trimmingCharacters ( in: . whitespacesAndNewlines) . isEmpty else {
110- throw VecturaError . invalidInput ( " Document at index \( index) cannot be empty or whitespace-only " )
111- }
112- }
102+ try validateAddDocumentsInput ( texts: texts, ids: ids)
113103
114- if let ids = ids, ids. count != texts. count {
115- throw VecturaError . invalidInput ( " Number of IDs must match number of texts " )
116- }
117-
118- // Get embeddings from the embedder
119104 let embeddings = try await embedder. embed ( texts: texts)
105+ try validateEmbeddings ( embeddings: embeddings, expectedCount: texts. count)
120106
121- guard embeddings. count == texts. count else {
122- throw VecturaError . invalidInput (
123- " Embedder returned \( embeddings. count) embedding(s) for \( texts. count) text(s) "
124- )
125- }
126-
127- // Validate embeddings
128- for embedding in embeddings {
129- try validateDimension ( embedding)
130- }
131-
132- var documentIds = [ UUID] ( )
133- var documentsToSave = [ VecturaDocument] ( )
134-
135- for i in 0 ..< texts. count {
136- let docId = ids ? [ i] ?? UUID ( )
137-
138- // Pre-normalize embedding at storage time to avoid per-search normalization
139- let normalizedEmbedding = try VectorMath . normalizeEmbedding ( embeddings [ i] )
140-
141- let doc = VecturaDocument (
142- id: docId,
143- text: texts [ i] ,
144- embedding: normalizedEmbedding
145- )
146- documentsToSave. append ( doc)
147- documentIds. append ( docId)
148- }
107+ let ( documentIds, documentsToSave) = try prepareDocuments ( texts: texts, ids: ids, embeddings: embeddings)
108+ let existingDocumentsById = try await loadExistingDocumentsForRollback ( documentIds: documentIds, ids: ids)
149109
150- let existingDocumentsById : [ UUID : VecturaDocument ]
151- let idsToRestore = Set ( documentIds)
152- if idsToRestore. isEmpty {
153- existingDocumentsById = [ : ]
154- } else if let indexedStorage = storageProvider as? IndexedVecturaStorage {
155- existingDocumentsById = try await indexedStorage. loadDocuments ( ids: Array ( idsToRestore) )
156- } else if ids != nil {
157- let existingDocs = try await storageProvider. loadDocuments ( )
158- existingDocumentsById = existingDocs. reduce ( into: [ : ] ) { dict, doc in
159- if idsToRestore. contains ( doc. id) {
160- dict [ doc. id] = doc
161- }
162- }
163- } else {
164- existingDocumentsById = [ : ]
165- }
166-
167- // Save documents to storage (storage provider handles batch concurrency)
168110 try await storageProvider. saveDocuments ( documentsToSave)
169111
170- // Notify search engine to index documents
171112 var indexedDocumentIDs : [ UUID ] = [ ]
172113 indexedDocumentIDs. reserveCapacity ( documentsToSave. count)
173114
@@ -178,50 +119,11 @@ public actor VecturaKit {
178119 }
179120 } catch {
180121 Self . logger. error ( " Indexing failed after saving documents: \( error. localizedDescription) " )
181-
182- for id in indexedDocumentIDs {
183- do {
184- try await searchEngine. removeDocument ( id: id)
185- } catch {
186- Self . logger. warning (
187- " Failed to rollback search index for \( id) : \( error. localizedDescription) "
188- )
189- }
190- }
191-
192- for doc in documentsToSave {
193- if let existingDoc = existingDocumentsById [ doc. id] {
194- do {
195- try await storageProvider. updateDocument ( existingDoc)
196- } catch {
197- Self . logger. warning (
198- " Failed to restore stored document \( doc. id) : \( error. localizedDescription) "
199- )
200- }
201- } else {
202- do {
203- try await storageProvider. deleteDocument ( withID: doc. id)
204- } catch {
205- Self . logger. warning (
206- " Failed to rollback stored document \( doc. id) : \( error. localizedDescription) "
207- )
208- }
209- }
210- }
211-
212- for id in indexedDocumentIDs {
213- guard let existingDoc = existingDocumentsById [ id] else {
214- continue
215- }
216- do {
217- try await searchEngine. indexDocument ( existingDoc)
218- } catch {
219- Self . logger. warning (
220- " Failed to restore search index for \( id) : \( error. localizedDescription) "
221- )
222- }
223- }
224-
122+ try await rollbackIndexingFailure (
123+ documentsToSave: documentsToSave,
124+ indexedDocumentIDs: indexedDocumentIDs,
125+ existingDocumentsById: existingDocumentsById
126+ )
225127 throw error
226128 }
227129
@@ -434,4 +336,133 @@ public actor VecturaKit {
434336 )
435337 }
436338 }
339+
340+ /// Validates input for addDocuments
341+ private func validateAddDocumentsInput( texts: [ String ] , ids: [ UUID ] ? ) throws {
342+ guard !texts. isEmpty else {
343+ throw VecturaError . invalidInput ( " Cannot add empty array of documents " )
344+ }
345+
346+ for (index, text) in texts. enumerated ( ) {
347+ guard !text. trimmingCharacters ( in: . whitespacesAndNewlines) . isEmpty else {
348+ throw VecturaError . invalidInput ( " Document at index \( index) cannot be empty or whitespace-only " )
349+ }
350+ }
351+
352+ if let ids = ids, ids. count != texts. count {
353+ throw VecturaError . invalidInput ( " Number of IDs must match number of texts " )
354+ }
355+ }
356+
357+ /// Validates embeddings count and dimensions
358+ private func validateEmbeddings( embeddings: [ [ Float ] ] , expectedCount: Int ) throws {
359+ guard embeddings. count == expectedCount else {
360+ throw VecturaError . invalidInput (
361+ " Embedder returned \( embeddings. count) embedding(s) for \( expectedCount) text(s) "
362+ )
363+ }
364+
365+ for embedding in embeddings {
366+ try validateDimension ( embedding)
367+ }
368+ }
369+
370+ /// Prepares documents from texts, IDs, and embeddings
371+ private func prepareDocuments(
372+ texts: [ String ] ,
373+ ids: [ UUID ] ? ,
374+ embeddings: [ [ Float ] ]
375+ ) throws -> ( [ UUID ] , [ VecturaDocument ] ) {
376+ var documentIds = [ UUID] ( )
377+ var documentsToSave = [ VecturaDocument] ( )
378+
379+ for i in 0 ..< texts. count {
380+ let docId = ids ? [ i] ?? UUID ( )
381+ let normalizedEmbedding = try VectorMath . normalizeEmbedding ( embeddings [ i] )
382+
383+ let doc = VecturaDocument (
384+ id: docId,
385+ text: texts [ i] ,
386+ embedding: normalizedEmbedding
387+ )
388+ documentsToSave. append ( doc)
389+ documentIds. append ( docId)
390+ }
391+
392+ return ( documentIds, documentsToSave)
393+ }
394+
395+ /// Loads existing documents for rollback purposes
396+ private func loadExistingDocumentsForRollback(
397+ documentIds: [ UUID ] ,
398+ ids: [ UUID ] ?
399+ ) async throws -> [ UUID : VecturaDocument ] {
400+ let idsToRestore = Set ( documentIds)
401+ guard !idsToRestore. isEmpty else {
402+ return [ : ]
403+ }
404+
405+ if let indexedStorage = storageProvider as? IndexedVecturaStorage {
406+ return try await indexedStorage. loadDocuments ( ids: Array ( idsToRestore) )
407+ } else if ids != nil {
408+ let existingDocs = try await storageProvider. loadDocuments ( )
409+ return existingDocs. reduce ( into: [ : ] ) { dict, doc in
410+ if idsToRestore. contains ( doc. id) {
411+ dict [ doc. id] = doc
412+ }
413+ }
414+ } else {
415+ return [ : ]
416+ }
417+ }
418+
419+ /// Rolls back indexing failure by restoring or deleting documents
420+ private func rollbackIndexingFailure(
421+ documentsToSave: [ VecturaDocument ] ,
422+ indexedDocumentIDs: [ UUID ] ,
423+ existingDocumentsById: [ UUID : VecturaDocument ]
424+ ) async throws {
425+ for id in indexedDocumentIDs {
426+ do {
427+ try await searchEngine. removeDocument ( id: id)
428+ } catch {
429+ Self . logger. warning (
430+ " Failed to rollback search index for \( id) : \( error. localizedDescription) "
431+ )
432+ }
433+ }
434+
435+ for doc in documentsToSave {
436+ if let existingDoc = existingDocumentsById [ doc. id] {
437+ do {
438+ try await storageProvider. updateDocument ( existingDoc)
439+ } catch {
440+ Self . logger. warning (
441+ " Failed to restore stored document \( doc. id) : \( error. localizedDescription) "
442+ )
443+ }
444+ } else {
445+ do {
446+ try await storageProvider. deleteDocument ( withID: doc. id)
447+ } catch {
448+ Self . logger. warning (
449+ " Failed to rollback stored document \( doc. id) : \( error. localizedDescription) "
450+ )
451+ }
452+ }
453+ }
454+
455+ for id in indexedDocumentIDs {
456+ guard let existingDoc = existingDocumentsById [ id] else {
457+ continue
458+ }
459+ do {
460+ try await searchEngine. indexDocument ( existingDoc)
461+ } catch {
462+ Self . logger. warning (
463+ " Failed to restore search index for \( id) : \( error. localizedDescription) "
464+ )
465+ }
466+ }
467+ }
437468}
0 commit comments