Skip to content

Commit d6b4a83

Browse files
committed
Refactor addDocuments to fix SwiftLint violations
Extract helper methods to reduce cyclomatic complexity and function body length: - validateAddDocumentsInput: Input validation - validateEmbeddings: Embedding validation - prepareDocuments: Document preparation - loadExistingDocumentsForRollback: Load existing docs for rollback - rollbackIndexingFailure: Handle rollback on indexing failure
1 parent a2cf070 commit d6b4a83

File tree

1 file changed

+138
-107
lines changed

1 file changed

+138
-107
lines changed

Sources/VecturaKit/Core/VecturaKit.swift

Lines changed: 138 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -99,75 +99,16 @@ public actor VecturaKit {
9999
/// - ids: Optional unique identifiers for the documents.
100100
/// - Returns: The IDs of the added documents.
101101
public func addDocuments(texts: [String], ids: [UUID]? = nil) async throws -> [UUID] {
102-
// Validate input
103-
guard !texts.isEmpty else {
104-
throw VecturaError.invalidInput("Cannot add empty array of documents")
105-
}
106-
107-
// Validate that no text is empty
108-
for (index, text) in texts.enumerated() {
109-
guard !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
110-
throw VecturaError.invalidInput("Document at index \(index) cannot be empty or whitespace-only")
111-
}
112-
}
102+
try validateAddDocumentsInput(texts: texts, ids: ids)
113103

114-
if let ids = ids, ids.count != texts.count {
115-
throw VecturaError.invalidInput("Number of IDs must match number of texts")
116-
}
117-
118-
// Get embeddings from the embedder
119104
let embeddings = try await embedder.embed(texts: texts)
105+
try validateEmbeddings(embeddings: embeddings, expectedCount: texts.count)
120106

121-
guard embeddings.count == texts.count else {
122-
throw VecturaError.invalidInput(
123-
"Embedder returned \(embeddings.count) embedding(s) for \(texts.count) text(s)"
124-
)
125-
}
126-
127-
// Validate embeddings
128-
for embedding in embeddings {
129-
try validateDimension(embedding)
130-
}
131-
132-
var documentIds = [UUID]()
133-
var documentsToSave = [VecturaDocument]()
134-
135-
for i in 0..<texts.count {
136-
let docId = ids?[i] ?? UUID()
137-
138-
// Pre-normalize embedding at storage time to avoid per-search normalization
139-
let normalizedEmbedding = try VectorMath.normalizeEmbedding(embeddings[i])
140-
141-
let doc = VecturaDocument(
142-
id: docId,
143-
text: texts[i],
144-
embedding: normalizedEmbedding
145-
)
146-
documentsToSave.append(doc)
147-
documentIds.append(docId)
148-
}
107+
let (documentIds, documentsToSave) = try prepareDocuments(texts: texts, ids: ids, embeddings: embeddings)
108+
let existingDocumentsById = try await loadExistingDocumentsForRollback(documentIds: documentIds, ids: ids)
149109

150-
let existingDocumentsById: [UUID: VecturaDocument]
151-
let idsToRestore = Set(documentIds)
152-
if idsToRestore.isEmpty {
153-
existingDocumentsById = [:]
154-
} else if let indexedStorage = storageProvider as? IndexedVecturaStorage {
155-
existingDocumentsById = try await indexedStorage.loadDocuments(ids: Array(idsToRestore))
156-
} else if ids != nil {
157-
let existingDocs = try await storageProvider.loadDocuments()
158-
existingDocumentsById = existingDocs.reduce(into: [:]) { dict, doc in
159-
if idsToRestore.contains(doc.id) {
160-
dict[doc.id] = doc
161-
}
162-
}
163-
} else {
164-
existingDocumentsById = [:]
165-
}
166-
167-
// Save documents to storage (storage provider handles batch concurrency)
168110
try await storageProvider.saveDocuments(documentsToSave)
169111

170-
// Notify search engine to index documents
171112
var indexedDocumentIDs: [UUID] = []
172113
indexedDocumentIDs.reserveCapacity(documentsToSave.count)
173114

@@ -178,50 +119,11 @@ public actor VecturaKit {
178119
}
179120
} catch {
180121
Self.logger.error("Indexing failed after saving documents: \(error.localizedDescription)")
181-
182-
for id in indexedDocumentIDs {
183-
do {
184-
try await searchEngine.removeDocument(id: id)
185-
} catch {
186-
Self.logger.warning(
187-
"Failed to rollback search index for \(id): \(error.localizedDescription)"
188-
)
189-
}
190-
}
191-
192-
for doc in documentsToSave {
193-
if let existingDoc = existingDocumentsById[doc.id] {
194-
do {
195-
try await storageProvider.updateDocument(existingDoc)
196-
} catch {
197-
Self.logger.warning(
198-
"Failed to restore stored document \(doc.id): \(error.localizedDescription)"
199-
)
200-
}
201-
} else {
202-
do {
203-
try await storageProvider.deleteDocument(withID: doc.id)
204-
} catch {
205-
Self.logger.warning(
206-
"Failed to rollback stored document \(doc.id): \(error.localizedDescription)"
207-
)
208-
}
209-
}
210-
}
211-
212-
for id in indexedDocumentIDs {
213-
guard let existingDoc = existingDocumentsById[id] else {
214-
continue
215-
}
216-
do {
217-
try await searchEngine.indexDocument(existingDoc)
218-
} catch {
219-
Self.logger.warning(
220-
"Failed to restore search index for \(id): \(error.localizedDescription)"
221-
)
222-
}
223-
}
224-
122+
try await rollbackIndexingFailure(
123+
documentsToSave: documentsToSave,
124+
indexedDocumentIDs: indexedDocumentIDs,
125+
existingDocumentsById: existingDocumentsById
126+
)
225127
throw error
226128
}
227129

@@ -434,4 +336,133 @@ public actor VecturaKit {
434336
)
435337
}
436338
}
339+
340+
/// Validates input for addDocuments
341+
private func validateAddDocumentsInput(texts: [String], ids: [UUID]?) throws {
342+
guard !texts.isEmpty else {
343+
throw VecturaError.invalidInput("Cannot add empty array of documents")
344+
}
345+
346+
for (index, text) in texts.enumerated() {
347+
guard !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
348+
throw VecturaError.invalidInput("Document at index \(index) cannot be empty or whitespace-only")
349+
}
350+
}
351+
352+
if let ids = ids, ids.count != texts.count {
353+
throw VecturaError.invalidInput("Number of IDs must match number of texts")
354+
}
355+
}
356+
357+
/// Validates embeddings count and dimensions
358+
private func validateEmbeddings(embeddings: [[Float]], expectedCount: Int) throws {
359+
guard embeddings.count == expectedCount else {
360+
throw VecturaError.invalidInput(
361+
"Embedder returned \(embeddings.count) embedding(s) for \(expectedCount) text(s)"
362+
)
363+
}
364+
365+
for embedding in embeddings {
366+
try validateDimension(embedding)
367+
}
368+
}
369+
370+
/// Prepares documents from texts, IDs, and embeddings
371+
private func prepareDocuments(
372+
texts: [String],
373+
ids: [UUID]?,
374+
embeddings: [[Float]]
375+
) throws -> ([UUID], [VecturaDocument]) {
376+
var documentIds = [UUID]()
377+
var documentsToSave = [VecturaDocument]()
378+
379+
for i in 0..<texts.count {
380+
let docId = ids?[i] ?? UUID()
381+
let normalizedEmbedding = try VectorMath.normalizeEmbedding(embeddings[i])
382+
383+
let doc = VecturaDocument(
384+
id: docId,
385+
text: texts[i],
386+
embedding: normalizedEmbedding
387+
)
388+
documentsToSave.append(doc)
389+
documentIds.append(docId)
390+
}
391+
392+
return (documentIds, documentsToSave)
393+
}
394+
395+
/// Loads existing documents for rollback purposes
396+
private func loadExistingDocumentsForRollback(
397+
documentIds: [UUID],
398+
ids: [UUID]?
399+
) async throws -> [UUID: VecturaDocument] {
400+
let idsToRestore = Set(documentIds)
401+
guard !idsToRestore.isEmpty else {
402+
return [:]
403+
}
404+
405+
if let indexedStorage = storageProvider as? IndexedVecturaStorage {
406+
return try await indexedStorage.loadDocuments(ids: Array(idsToRestore))
407+
} else if ids != nil {
408+
let existingDocs = try await storageProvider.loadDocuments()
409+
return existingDocs.reduce(into: [:]) { dict, doc in
410+
if idsToRestore.contains(doc.id) {
411+
dict[doc.id] = doc
412+
}
413+
}
414+
} else {
415+
return [:]
416+
}
417+
}
418+
419+
/// Rolls back indexing failure by restoring or deleting documents
420+
private func rollbackIndexingFailure(
421+
documentsToSave: [VecturaDocument],
422+
indexedDocumentIDs: [UUID],
423+
existingDocumentsById: [UUID: VecturaDocument]
424+
) async throws {
425+
for id in indexedDocumentIDs {
426+
do {
427+
try await searchEngine.removeDocument(id: id)
428+
} catch {
429+
Self.logger.warning(
430+
"Failed to rollback search index for \(id): \(error.localizedDescription)"
431+
)
432+
}
433+
}
434+
435+
for doc in documentsToSave {
436+
if let existingDoc = existingDocumentsById[doc.id] {
437+
do {
438+
try await storageProvider.updateDocument(existingDoc)
439+
} catch {
440+
Self.logger.warning(
441+
"Failed to restore stored document \(doc.id): \(error.localizedDescription)"
442+
)
443+
}
444+
} else {
445+
do {
446+
try await storageProvider.deleteDocument(withID: doc.id)
447+
} catch {
448+
Self.logger.warning(
449+
"Failed to rollback stored document \(doc.id): \(error.localizedDescription)"
450+
)
451+
}
452+
}
453+
}
454+
455+
for id in indexedDocumentIDs {
456+
guard let existingDoc = existingDocumentsById[id] else {
457+
continue
458+
}
459+
do {
460+
try await searchEngine.indexDocument(existingDoc)
461+
} catch {
462+
Self.logger.warning(
463+
"Failed to restore search index for \(id): \(error.localizedDescription)"
464+
)
465+
}
466+
}
467+
}
437468
}

0 commit comments

Comments
 (0)