@@ -27,204 +27,206 @@ import Foundation
2727/// related different pieces of text are to each other.
2828@main
2929struct Embeddings {
30- /// The Bedrock service client for making API calls
31- let bedrock : BedrockService
32-
33- /// The embedding model - Titan Text Embeddings V2 converts text into 1024-dimensional vectors
34- let model = BedrockModel . titan_embed_text_v2
35-
36- /// Main entry point that runs all embedding examples
37- static func main( ) async throws {
38- // Initialize the Bedrock service in US East 1 region
39- let b = try await BedrockService ( region: . useast1)
40- let e = Embeddings ( bedrock: b)
41-
42- // Run each example in sequence
43- try await e. simple ( )
44- try await e. batch ( )
45- try await e. doc ( )
46- }
47-
48- /// Demonstrates basic text similarity using embeddings
49- ///
50- /// This example shows how to convert text into embedding vectors and calculate
51- /// similarity between different texts. The first two sentences should be more
52- /// similar than the first and third, even though they use different words.
53- func simple( ) async throws {
54-
55- // Three test sentences: two similar, one different
56- let text1 = " The cat sat on the mat "
57- let text2 = " A feline rested on the rug " // Similar meaning, different words
58- let text3 = " Quantum computing uses qubits " // Completely different topic
59-
60- // Convert each text into an embedding vector (array of numbers)
61- let embedding1 = try await bedrock. embed ( text1, with: model)
62- let embedding2 = try await bedrock. embed ( text2, with: model)
63- let embedding3 = try await bedrock. embed ( text3, with: model)
64-
65- // Calculate how similar the texts are using cosine similarity
66- // Values range from -1 (opposite) to 1 (identical)
67- let similarity12 = cosineSimilarity ( embedding1, embedding2)
68- let similarity13 = cosineSimilarity ( embedding1, embedding3)
69-
70- print ( " Similarity between text1 and text2: \( similarity12) " )
71- print ( " Similarity between text1 and text3: \( similarity13) " )
72- }
73-
74- /// Calculates cosine similarity between two embedding vectors
75- ///
76- /// Cosine similarity measures the angle between two vectors, focusing on direction
77- /// rather than magnitude. Perfect for comparing embeddings.
78- ///
79- /// - Parameters:
80- /// - a: First embedding vector
81- /// - b: Second embedding vector
82- /// - Returns: Similarity score from -1 (opposite) to 1 (identical)
83- func cosineSimilarity( _ a: [ Double ] , _ b: [ Double ] ) -> Double {
84- // Calculate dot product (multiply corresponding elements and sum)
85- let dotProduct = zip ( a, b) . map { $0 * $1 } . reduce ( 0 , + )
86- // Calculate the magnitude (length) of each vector
87- let magnitudeA = sqrt ( a. map { $0 * $0 } . reduce ( 0 , + ) )
88- let magnitudeB = sqrt ( b. map { $0 * $0 } . reduce ( 0 , + ) )
89- // Cosine similarity = dot product / (magnitude A × magnitude B)
90- return dotProduct / ( magnitudeA * magnitudeB)
91- }
92-
93- /// Demonstrates batch processing and finding similar texts in a collection
94- ///
95- /// This example shows how to process multiple texts and find the most similar
96- /// text to a given query. Useful for grouping similar content.
97- func batch( ) async throws {
98- // Sample texts: mix of tech companies and fruits
99- let texts = [
100- " Apple is a technology company " ,
101- " Bananas are yellow fruits " ,
102- " Microsoft develops software " ,
103- " Oranges are citrus fruits " ,
104- " Google creates search engines " ,
105- ]
106-
107- var embeddings : [ [ Double ] ] = [ ]
108-
109- // Convert each text to its embedding vector
110- for text in texts {
111- let embedding = try await bedrock. embed ( text, with: model)
112- embeddings. append ( embedding)
30+ /// The Bedrock service client for making API calls
31+ let bedrock : BedrockService
32+
33+ /// The embedding model - Titan Text Embeddings V2 converts text into 1024-dimensional vectors
34+ let model = BedrockModel . titan_embed_text_v2
35+
36+ /// Main entry point that runs all embedding examples
37+ static func main( ) async throws {
38+ // Initialize the Bedrock service in US East 1 region
39+ let b = try await BedrockService ( region: . useast1)
40+ let e = Embeddings ( bedrock: b)
41+
42+ // Run each example in sequence
43+ try await e. simple ( )
44+ try await e. batch ( )
45+ try await e. doc ( )
11346 }
11447
115- /// Finds the most similar text to a given query text
116- func findMostSimilar( to queryIndex: Int , in embeddings: [ [ Double ] ] ) -> Int {
117- var maxSimilarity = - 1.0 // Start with lowest possible similarity
118- var mostSimilarIndex = 0
48+ /// Demonstrates basic text similarity using embeddings
49+ ///
50+ /// This example shows how to convert text into embedding vectors and calculate
51+ /// similarity between different texts. The first two sentences should be more
52+ /// similar than the first and third, even though they use different words.
53+ func simple( ) async throws {
54+
55+ // Three test sentences: two similar, one different
56+ let text1 = " The cat sat on the mat "
57+ let text2 = " A feline rested on the rug " // Similar meaning, different words
58+ let text3 = " Quantum computing uses qubits " // Completely different topic
59+
60+ // Convert each text into an embedding vector (array of numbers)
61+ let embedding1 = try await bedrock. embed ( text1, with: model)
62+ let embedding2 = try await bedrock. embed ( text2, with: model)
63+ let embedding3 = try await bedrock. embed ( text3, with: model)
64+
65+ // Calculate how similar the texts are using cosine similarity
66+ // Values range from -1 (opposite) to 1 (identical)
67+ let similarity12 = cosineSimilarity ( embedding1, embedding2)
68+ let similarity13 = cosineSimilarity ( embedding1, embedding3)
69+
70+ print ( " Similarity between text1 and text2: \( similarity12) " )
71+ print ( " Similarity between text1 and text3: \( similarity13) " )
72+ }
11973
120- // Compare query text with all other texts
121- for (index, embedding) in embeddings. enumerated ( ) {
122- guard index != queryIndex else { continue } // Skip comparing with itself
74+ /// Calculates cosine similarity between two embedding vectors
75+ ///
76+ /// Cosine similarity measures the angle between two vectors, focusing on direction
77+ /// rather than magnitude. Perfect for comparing embeddings.
78+ ///
79+ /// - Parameters:
80+ /// - a: First embedding vector
81+ /// - b: Second embedding vector
82+ /// - Returns: Similarity score from -1 (opposite) to 1 (identical)
83+ func cosineSimilarity( _ a: [ Double ] , _ b: [ Double ] ) -> Double {
84+ // Calculate dot product (multiply corresponding elements and sum)
85+ let dotProduct = zip ( a, b) . map { $0 * $1 } . reduce ( 0 , + )
86+ // Calculate the magnitude (length) of each vector
87+ let magnitudeA = sqrt ( a. map { $0 * $0 } . reduce ( 0 , + ) )
88+ let magnitudeB = sqrt ( b. map { $0 * $0 } . reduce ( 0 , + ) )
89+ // Cosine similarity = dot product / (magnitude A × magnitude B)
90+ return dotProduct / ( magnitudeA * magnitudeB)
91+ }
12392
124- let similarity = cosineSimilarity ( embeddings [ queryIndex] , embedding)
125- if similarity > maxSimilarity {
126- maxSimilarity = similarity
127- mostSimilarIndex = index
93+ /// Demonstrates batch processing and finding similar texts in a collection
94+ ///
95+ /// This example shows how to process multiple texts and find the most similar
96+ /// text to a given query. Useful for grouping similar content.
97+ func batch( ) async throws {
98+ // Sample texts: mix of tech companies and fruits
99+ let texts = [
100+ " Apple is a technology company " ,
101+ " Bananas are yellow fruits " ,
102+ " Microsoft develops software " ,
103+ " Oranges are citrus fruits " ,
104+ " Google creates search engines " ,
105+ ]
106+
107+ var embeddings : [ [ Double ] ] = [ ]
108+
109+ // Convert each text to its embedding vector
110+ for text in texts {
111+ let embedding = try await bedrock. embed ( text, with: model)
112+ embeddings. append ( embedding)
128113 }
129- }
130114
131- return mostSimilarIndex
132- }
115+ /// Finds the most similar text to a given query text
116+ func findMostSimilar( to queryIndex: Int , in embeddings: [ [ Double ] ] ) -> Int {
117+ var maxSimilarity = - 1.0 // Start with lowest possible similarity
118+ var mostSimilarIndex = 0
119+
120+ // Compare query text with all other texts
121+ for (index, embedding) in embeddings. enumerated ( ) {
122+ guard index != queryIndex else { continue } // Skip comparing with itself
123+
124+ let similarity = cosineSimilarity ( embeddings [ queryIndex] , embedding)
125+ if similarity > maxSimilarity {
126+ maxSimilarity = similarity
127+ mostSimilarIndex = index
128+ }
129+ }
130+
131+ return mostSimilarIndex
132+ }
133+
134+ // Find what's most similar to "Apple is a technology company"
135+ let queryIndex = 0
136+ let similarIndex = findMostSimilar ( to: queryIndex, in: embeddings)
137+ print ( " Most similar to ' \( texts [ queryIndex] ) ': ' \( texts [ similarIndex] ) ' " )
133138
134- // Find what's most similar to "Apple is a technology company"
135- let queryIndex = 0
136- let similarIndex = findMostSimilar ( to: queryIndex, in: embeddings)
137- print ( " Most similar to ' \( texts [ queryIndex] ) ': ' \( texts [ similarIndex] ) ' " )
138-
139- }
140- /// Demonstrates document storage and semantic search
141- ///
142- /// This example shows how to build a simple document database with embeddings
143- /// and perform semantic search (find by meaning, not exact words).
144- func doc( ) async throws {
145-
146- // Create a document store that can search by meaning
147- let store = DocumentStore ( bedrock: bedrock, model: model, similaryFn: cosineSimilarity)
148-
149- // Add some programming-related documents
150- try await store. addDocument ( " Swift is a programming language developed by Apple " , id: " doc1 " )
151- try await store. addDocument (
152- " Python is popular for data science and machine learning " , id: " doc2 " )
153- try await store. addDocument ( " JavaScript runs in web browsers and Node.js " , id: " doc3 " )
154-
155- // Search using natural language - notice we don't use exact words
156- let results = try await store. search ( " Programming language for iOS " , topK: 1 )
157- for doc in results {
158- print ( " Found: \( doc. content) " )
159139 }
160- }
140+ /// Demonstrates document storage and semantic search
141+ ///
142+ /// This example shows how to build a simple document database with embeddings
143+ /// and perform semantic search (find by meaning, not exact words).
144+ func doc( ) async throws {
145+
146+ // Create a document store that can search by meaning
147+ let store = DocumentStore ( bedrock: bedrock, model: model, similaryFn: cosineSimilarity)
148+
149+ // Add some programming-related documents
150+ try await store. addDocument ( " Swift is a programming language developed by Apple " , id: " doc1 " )
151+ try await store. addDocument (
152+ " Python is popular for data science and machine learning " ,
153+ id: " doc2 "
154+ )
155+ try await store. addDocument ( " JavaScript runs in web browsers and Node.js " , id: " doc3 " )
156+
157+ // Search using natural language - notice we don't use exact words
158+ let results = try await store. search ( " Programming language for iOS " , topK: 1 )
159+ for doc in results {
160+ print ( " Found: \( doc. content) " )
161+ }
162+ }
161163}
162164
163165/// Represents a document with its content and embedding vector
164166///
165167/// Stores both the original text and its numerical representation for fast searches.
166168struct Document {
167- /// Unique identifier for the document
168- let id : String
169- /// The original text content
170- let content : String
171- /// The embedding vector representing the document's meaning
172- let embedding : [ Double ]
169+ /// Unique identifier for the document
170+ let id : String
171+ /// The original text content
172+ let content : String
173+ /// The embedding vector representing the document's meaning
174+ let embedding : [ Double ]
173175}
174176
175177/// A simple in-memory document store with semantic search capabilities
176178///
177179/// Demonstrates how to build a basic vector database that stores documents
178180/// with their embeddings and searches by converting queries to embeddings.
179181class DocumentStore {
180- /// Function type for calculating similarity between embeddings
181- typealias DistanceFn = ( [ Double ] , [ Double ] ) -> Double
182-
183- private var documents : [ Document ] = [ ]
184- private let bedrock : BedrockService
185- private let model : BedrockModel
186- private let distanceFn : DistanceFn
187-
188- /// Initialize the document store with a Bedrock service and similarity function
189- init ( bedrock: BedrockService , model: BedrockModel , similaryFn: @escaping DistanceFn ) {
190- self . bedrock = bedrock
191- self . model = model
192- self . distanceFn = similaryFn
193- }
194-
195- /// Adds a new document to the store
196- ///
197- /// The document's text is converted to an embedding and stored for future searches.
198- func addDocument( _ content: String , id: String ) async throws {
199- // Convert the document text to an embedding vector
200- let embedding = try await bedrock. embed ( content, with: model)
201- let document = Document ( id: id, content: content, embedding: embedding)
202- documents. append ( document)
203- }
204-
205- /// Searches for documents similar to the query
206- ///
207- /// Performs semantic search by converting the query to an embedding and
208- /// comparing it with all stored document embeddings.
209- ///
210- /// - Parameters:
211- /// - query: The search query (natural language)
212- /// - topK: Maximum number of results to return
213- /// - Returns: Array of most similar documents, sorted by relevance
214- func search( _ query: String , topK: Int = 3 ) async throws -> [ Document ] {
215- // Convert the search query to an embedding
216- let queryEmbedding = try await bedrock. embed ( query, with: model)
217-
218- // Calculate similarity between query and each document
219- let similarities = documents. map { doc in
220- ( doc, distanceFn ( queryEmbedding, doc. embedding) )
182+ /// Function type for calculating similarity between embeddings
183+ typealias DistanceFn = ( [ Double ] , [ Double ] ) -> Double
184+
185+ private var documents : [ Document ] = [ ]
186+ private let bedrock : BedrockService
187+ private let model : BedrockModel
188+ private let distanceFn : DistanceFn
189+
190+ /// Initialize the document store with a Bedrock service and similarity function
191+ init ( bedrock: BedrockService , model: BedrockModel , similaryFn: @escaping DistanceFn ) {
192+ self . bedrock = bedrock
193+ self . model = model
194+ self . distanceFn = similaryFn
195+ }
196+
197+ /// Adds a new document to the store
198+ ///
199+ /// The document's text is converted to an embedding and stored for future searches.
200+ func addDocument( _ content: String , id: String ) async throws {
201+ // Convert the document text to an embedding vector
202+ let embedding = try await bedrock. embed ( content, with: model)
203+ let document = Document ( id: id, content: content, embedding: embedding)
204+ documents. append ( document)
221205 }
222206
223- // Sort by similarity (highest first) and return top results
224- return
225- similarities
226- . sorted { $0. 1 > $1. 1 } // Sort by similarity score descending
227- . prefix ( topK) // Take only the top K results
228- . map { $0. 0 } // Extract just the documents
229- }
207+ /// Searches for documents similar to the query
208+ ///
209+ /// Performs semantic search by converting the query to an embedding and
210+ /// comparing it with all stored document embeddings.
211+ ///
212+ /// - Parameters:
213+ /// - query: The search query (natural language)
214+ /// - topK: Maximum number of results to return
215+ /// - Returns: Array of most similar documents, sorted by relevance
216+ func search( _ query: String , topK: Int = 3 ) async throws -> [ Document ] {
217+ // Convert the search query to an embedding
218+ let queryEmbedding = try await bedrock. embed ( query, with: model)
219+
220+ // Calculate similarity between query and each document
221+ let similarities = documents. map { doc in
222+ ( doc, distanceFn ( queryEmbedding, doc. embedding) )
223+ }
224+
225+ // Sort by similarity (highest first) and return top results
226+ return
227+ similarities
228+ . sorted { $0. 1 > $1. 1 } // Sort by similarity score descending
229+ . prefix ( topK) // Take only the top K results
230+ . map { $0. 0 } // Extract just the documents
231+ }
230232}
0 commit comments