Skip to content

Commit 5538504

Browse files
committed
fix format
1 parent 4bbf5ae commit 5538504

File tree

1 file changed

+179
-177
lines changed

1 file changed

+179
-177
lines changed

Examples/embeddings/Sources/Embeddings.swift

Lines changed: 179 additions & 177 deletions
Original file line numberDiff line numberDiff line change
@@ -27,204 +27,206 @@ import Foundation
2727
/// related different pieces of text are to each other.
2828
@main
2929
struct Embeddings {
30-
/// The Bedrock service client for making API calls
31-
let bedrock: BedrockService
32-
33-
/// The embedding model - Titan Text Embeddings V2 converts text into 1024-dimensional vectors
34-
let model = BedrockModel.titan_embed_text_v2
35-
36-
/// Main entry point that runs all embedding examples
37-
static func main() async throws {
38-
// Initialize the Bedrock service in US East 1 region
39-
let b = try await BedrockService(region: .useast1)
40-
let e = Embeddings(bedrock: b)
41-
42-
// Run each example in sequence
43-
try await e.simple()
44-
try await e.batch()
45-
try await e.doc()
46-
}
47-
48-
/// Demonstrates basic text similarity using embeddings
49-
///
50-
/// This example shows how to convert text into embedding vectors and calculate
51-
/// similarity between different texts. The first two sentences should be more
52-
/// similar than the first and third, even though they use different words.
53-
func simple() async throws {
54-
55-
// Three test sentences: two similar, one different
56-
let text1 = "The cat sat on the mat"
57-
let text2 = "A feline rested on the rug" // Similar meaning, different words
58-
let text3 = "Quantum computing uses qubits" // Completely different topic
59-
60-
// Convert each text into an embedding vector (array of numbers)
61-
let embedding1 = try await bedrock.embed(text1, with: model)
62-
let embedding2 = try await bedrock.embed(text2, with: model)
63-
let embedding3 = try await bedrock.embed(text3, with: model)
64-
65-
// Calculate how similar the texts are using cosine similarity
66-
// Values range from -1 (opposite) to 1 (identical)
67-
let similarity12 = cosineSimilarity(embedding1, embedding2)
68-
let similarity13 = cosineSimilarity(embedding1, embedding3)
69-
70-
print("Similarity between text1 and text2: \(similarity12)")
71-
print("Similarity between text1 and text3: \(similarity13)")
72-
}
73-
74-
/// Calculates cosine similarity between two embedding vectors
75-
///
76-
/// Cosine similarity measures the angle between two vectors, focusing on direction
77-
/// rather than magnitude. Perfect for comparing embeddings.
78-
///
79-
/// - Parameters:
80-
/// - a: First embedding vector
81-
/// - b: Second embedding vector
82-
/// - Returns: Similarity score from -1 (opposite) to 1 (identical)
83-
func cosineSimilarity(_ a: [Double], _ b: [Double]) -> Double {
84-
// Calculate dot product (multiply corresponding elements and sum)
85-
let dotProduct = zip(a, b).map { $0 * $1 }.reduce(0, +)
86-
// Calculate the magnitude (length) of each vector
87-
let magnitudeA = sqrt(a.map { $0 * $0 }.reduce(0, +))
88-
let magnitudeB = sqrt(b.map { $0 * $0 }.reduce(0, +))
89-
// Cosine similarity = dot product / (magnitude A × magnitude B)
90-
return dotProduct / (magnitudeA * magnitudeB)
91-
}
92-
93-
/// Demonstrates batch processing and finding similar texts in a collection
94-
///
95-
/// This example shows how to process multiple texts and find the most similar
96-
/// text to a given query. Useful for grouping similar content.
97-
func batch() async throws {
98-
// Sample texts: mix of tech companies and fruits
99-
let texts = [
100-
"Apple is a technology company",
101-
"Bananas are yellow fruits",
102-
"Microsoft develops software",
103-
"Oranges are citrus fruits",
104-
"Google creates search engines",
105-
]
106-
107-
var embeddings: [[Double]] = []
108-
109-
// Convert each text to its embedding vector
110-
for text in texts {
111-
let embedding = try await bedrock.embed(text, with: model)
112-
embeddings.append(embedding)
30+
/// The Bedrock service client for making API calls
31+
let bedrock: BedrockService
32+
33+
/// The embedding model - Titan Text Embeddings V2 converts text into 1024-dimensional vectors
34+
let model = BedrockModel.titan_embed_text_v2
35+
36+
/// Main entry point that runs all embedding examples
37+
static func main() async throws {
38+
// Initialize the Bedrock service in US East 1 region
39+
let b = try await BedrockService(region: .useast1)
40+
let e = Embeddings(bedrock: b)
41+
42+
// Run each example in sequence
43+
try await e.simple()
44+
try await e.batch()
45+
try await e.doc()
11346
}
11447

115-
/// Finds the most similar text to a given query text
116-
func findMostSimilar(to queryIndex: Int, in embeddings: [[Double]]) -> Int {
117-
var maxSimilarity = -1.0 // Start with lowest possible similarity
118-
var mostSimilarIndex = 0
48+
/// Demonstrates basic text similarity using embeddings
49+
///
50+
/// This example shows how to convert text into embedding vectors and calculate
51+
/// similarity between different texts. The first two sentences should be more
52+
/// similar than the first and third, even though they use different words.
53+
func simple() async throws {
54+
55+
// Three test sentences: two similar, one different
56+
let text1 = "The cat sat on the mat"
57+
let text2 = "A feline rested on the rug" // Similar meaning, different words
58+
let text3 = "Quantum computing uses qubits" // Completely different topic
59+
60+
// Convert each text into an embedding vector (array of numbers)
61+
let embedding1 = try await bedrock.embed(text1, with: model)
62+
let embedding2 = try await bedrock.embed(text2, with: model)
63+
let embedding3 = try await bedrock.embed(text3, with: model)
64+
65+
// Calculate how similar the texts are using cosine similarity
66+
// Values range from -1 (opposite) to 1 (identical)
67+
let similarity12 = cosineSimilarity(embedding1, embedding2)
68+
let similarity13 = cosineSimilarity(embedding1, embedding3)
69+
70+
print("Similarity between text1 and text2: \(similarity12)")
71+
print("Similarity between text1 and text3: \(similarity13)")
72+
}
11973

120-
// Compare query text with all other texts
121-
for (index, embedding) in embeddings.enumerated() {
122-
guard index != queryIndex else { continue } // Skip comparing with itself
74+
/// Calculates cosine similarity between two embedding vectors
75+
///
76+
/// Cosine similarity measures the angle between two vectors, focusing on direction
77+
/// rather than magnitude. Perfect for comparing embeddings.
78+
///
79+
/// - Parameters:
80+
/// - a: First embedding vector
81+
/// - b: Second embedding vector
82+
/// - Returns: Similarity score from -1 (opposite) to 1 (identical)
83+
func cosineSimilarity(_ a: [Double], _ b: [Double]) -> Double {
84+
// Calculate dot product (multiply corresponding elements and sum)
85+
let dotProduct = zip(a, b).map { $0 * $1 }.reduce(0, +)
86+
// Calculate the magnitude (length) of each vector
87+
let magnitudeA = sqrt(a.map { $0 * $0 }.reduce(0, +))
88+
let magnitudeB = sqrt(b.map { $0 * $0 }.reduce(0, +))
89+
// Cosine similarity = dot product / (magnitude A × magnitude B)
90+
return dotProduct / (magnitudeA * magnitudeB)
91+
}
12392

124-
let similarity = cosineSimilarity(embeddings[queryIndex], embedding)
125-
if similarity > maxSimilarity {
126-
maxSimilarity = similarity
127-
mostSimilarIndex = index
93+
/// Demonstrates batch processing and finding similar texts in a collection
94+
///
95+
/// This example shows how to process multiple texts and find the most similar
96+
/// text to a given query. Useful for grouping similar content.
97+
func batch() async throws {
98+
// Sample texts: mix of tech companies and fruits
99+
let texts = [
100+
"Apple is a technology company",
101+
"Bananas are yellow fruits",
102+
"Microsoft develops software",
103+
"Oranges are citrus fruits",
104+
"Google creates search engines",
105+
]
106+
107+
var embeddings: [[Double]] = []
108+
109+
// Convert each text to its embedding vector
110+
for text in texts {
111+
let embedding = try await bedrock.embed(text, with: model)
112+
embeddings.append(embedding)
128113
}
129-
}
130114

131-
return mostSimilarIndex
132-
}
115+
/// Finds the most similar text to a given query text
116+
func findMostSimilar(to queryIndex: Int, in embeddings: [[Double]]) -> Int {
117+
var maxSimilarity = -1.0 // Start with lowest possible similarity
118+
var mostSimilarIndex = 0
119+
120+
// Compare query text with all other texts
121+
for (index, embedding) in embeddings.enumerated() {
122+
guard index != queryIndex else { continue } // Skip comparing with itself
123+
124+
let similarity = cosineSimilarity(embeddings[queryIndex], embedding)
125+
if similarity > maxSimilarity {
126+
maxSimilarity = similarity
127+
mostSimilarIndex = index
128+
}
129+
}
130+
131+
return mostSimilarIndex
132+
}
133+
134+
// Find what's most similar to "Apple is a technology company"
135+
let queryIndex = 0
136+
let similarIndex = findMostSimilar(to: queryIndex, in: embeddings)
137+
print("Most similar to '\(texts[queryIndex])': '\(texts[similarIndex])'")
133138

134-
// Find what's most similar to "Apple is a technology company"
135-
let queryIndex = 0
136-
let similarIndex = findMostSimilar(to: queryIndex, in: embeddings)
137-
print("Most similar to '\(texts[queryIndex])': '\(texts[similarIndex])'")
138-
139-
}
140-
/// Demonstrates document storage and semantic search
141-
///
142-
/// This example shows how to build a simple document database with embeddings
143-
/// and perform semantic search (find by meaning, not exact words).
144-
func doc() async throws {
145-
146-
// Create a document store that can search by meaning
147-
let store = DocumentStore(bedrock: bedrock, model: model, similaryFn: cosineSimilarity)
148-
149-
// Add some programming-related documents
150-
try await store.addDocument("Swift is a programming language developed by Apple", id: "doc1")
151-
try await store.addDocument(
152-
"Python is popular for data science and machine learning", id: "doc2")
153-
try await store.addDocument("JavaScript runs in web browsers and Node.js", id: "doc3")
154-
155-
// Search using natural language - notice we don't use exact words
156-
let results = try await store.search("Programming language for iOS", topK: 1)
157-
for doc in results {
158-
print("Found: \(doc.content)")
159139
}
160-
}
140+
/// Demonstrates document storage and semantic search
141+
///
142+
/// This example shows how to build a simple document database with embeddings
143+
/// and perform semantic search (find by meaning, not exact words).
144+
func doc() async throws {
145+
146+
// Create a document store that can search by meaning
147+
let store = DocumentStore(bedrock: bedrock, model: model, similaryFn: cosineSimilarity)
148+
149+
// Add some programming-related documents
150+
try await store.addDocument("Swift is a programming language developed by Apple", id: "doc1")
151+
try await store.addDocument(
152+
"Python is popular for data science and machine learning",
153+
id: "doc2"
154+
)
155+
try await store.addDocument("JavaScript runs in web browsers and Node.js", id: "doc3")
156+
157+
// Search using natural language - notice we don't use exact words
158+
let results = try await store.search("Programming language for iOS", topK: 1)
159+
for doc in results {
160+
print("Found: \(doc.content)")
161+
}
162+
}
161163
}
162164

163165
/// Represents a document with its content and embedding vector
164166
///
165167
/// Stores both the original text and its numerical representation for fast searches.
166168
struct Document {
167-
/// Unique identifier for the document
168-
let id: String
169-
/// The original text content
170-
let content: String
171-
/// The embedding vector representing the document's meaning
172-
let embedding: [Double]
169+
/// Unique identifier for the document
170+
let id: String
171+
/// The original text content
172+
let content: String
173+
/// The embedding vector representing the document's meaning
174+
let embedding: [Double]
173175
}
174176

175177
/// A simple in-memory document store with semantic search capabilities
176178
///
177179
/// Demonstrates how to build a basic vector database that stores documents
178180
/// with their embeddings and searches by converting queries to embeddings.
179181
class DocumentStore {
180-
/// Function type for calculating similarity between embeddings
181-
typealias DistanceFn = ([Double], [Double]) -> Double
182-
183-
private var documents: [Document] = []
184-
private let bedrock: BedrockService
185-
private let model: BedrockModel
186-
private let distanceFn: DistanceFn
187-
188-
/// Initialize the document store with a Bedrock service and similarity function
189-
init(bedrock: BedrockService, model: BedrockModel, similaryFn: @escaping DistanceFn) {
190-
self.bedrock = bedrock
191-
self.model = model
192-
self.distanceFn = similaryFn
193-
}
194-
195-
/// Adds a new document to the store
196-
///
197-
/// The document's text is converted to an embedding and stored for future searches.
198-
func addDocument(_ content: String, id: String) async throws {
199-
// Convert the document text to an embedding vector
200-
let embedding = try await bedrock.embed(content, with: model)
201-
let document = Document(id: id, content: content, embedding: embedding)
202-
documents.append(document)
203-
}
204-
205-
/// Searches for documents similar to the query
206-
///
207-
/// Performs semantic search by converting the query to an embedding and
208-
/// comparing it with all stored document embeddings.
209-
///
210-
/// - Parameters:
211-
/// - query: The search query (natural language)
212-
/// - topK: Maximum number of results to return
213-
/// - Returns: Array of most similar documents, sorted by relevance
214-
func search(_ query: String, topK: Int = 3) async throws -> [Document] {
215-
// Convert the search query to an embedding
216-
let queryEmbedding = try await bedrock.embed(query, with: model)
217-
218-
// Calculate similarity between query and each document
219-
let similarities = documents.map { doc in
220-
(doc, distanceFn(queryEmbedding, doc.embedding))
182+
/// Function type for calculating similarity between embeddings
183+
typealias DistanceFn = ([Double], [Double]) -> Double
184+
185+
private var documents: [Document] = []
186+
private let bedrock: BedrockService
187+
private let model: BedrockModel
188+
private let distanceFn: DistanceFn
189+
190+
/// Initialize the document store with a Bedrock service and similarity function
191+
init(bedrock: BedrockService, model: BedrockModel, similaryFn: @escaping DistanceFn) {
192+
self.bedrock = bedrock
193+
self.model = model
194+
self.distanceFn = similaryFn
195+
}
196+
197+
/// Adds a new document to the store
198+
///
199+
/// The document's text is converted to an embedding and stored for future searches.
200+
func addDocument(_ content: String, id: String) async throws {
201+
// Convert the document text to an embedding vector
202+
let embedding = try await bedrock.embed(content, with: model)
203+
let document = Document(id: id, content: content, embedding: embedding)
204+
documents.append(document)
221205
}
222206

223-
// Sort by similarity (highest first) and return top results
224-
return
225-
similarities
226-
.sorted { $0.1 > $1.1 } // Sort by similarity score descending
227-
.prefix(topK) // Take only the top K results
228-
.map { $0.0 } // Extract just the documents
229-
}
207+
/// Searches for documents similar to the query
208+
///
209+
/// Performs semantic search by converting the query to an embedding and
210+
/// comparing it with all stored document embeddings.
211+
///
212+
/// - Parameters:
213+
/// - query: The search query (natural language)
214+
/// - topK: Maximum number of results to return
215+
/// - Returns: Array of most similar documents, sorted by relevance
216+
func search(_ query: String, topK: Int = 3) async throws -> [Document] {
217+
// Convert the search query to an embedding
218+
let queryEmbedding = try await bedrock.embed(query, with: model)
219+
220+
// Calculate similarity between query and each document
221+
let similarities = documents.map { doc in
222+
(doc, distanceFn(queryEmbedding, doc.embedding))
223+
}
224+
225+
// Sort by similarity (highest first) and return top results
226+
return
227+
similarities
228+
.sorted { $0.1 > $1.1 } // Sort by similarity score descending
229+
.prefix(topK) // Take only the top K results
230+
.map { $0.0 } // Extract just the documents
231+
}
230232
}

0 commit comments

Comments
 (0)