|
14 | 14 | //===----------------------------------------------------------------------===// |
15 | 15 |
|
16 | 16 | import BedrockService |
17 | | -import Logging |
| 17 | +import Foundation |
18 | 18 |
|
| 19 | +/// # Text Embeddings Example |
| 20 | +/// |
| 21 | +/// This example demonstrates how to use Amazon Bedrock's text embedding capabilities |
| 22 | +/// to convert text into numerical vectors and perform similarity comparisons. |
| 23 | +/// |
| 24 | +/// ## What are embeddings? |
| 25 | +/// Text embeddings are numerical representations of text that capture semantic meaning. |
| 26 | +/// Similar texts will have similar embedding vectors, allowing us to measure how |
| 27 | +/// related different pieces of text are to each other. |
19 | 28 | @main |
20 | | -struct Main { |
| 29 | +struct Embeddings { |
| 30 | + /// The Bedrock service client for making API calls |
| 31 | + let bedrock: BedrockService |
| 32 | + |
| 33 | + /// The embedding model - Titan Text Embeddings V2 converts text into 1024-dimensional vectors |
| 34 | + let model = BedrockModel.titan_embed_text_v2 |
| 35 | + |
| 36 | + /// Main entry point that runs all embedding examples |
21 | 37 | static func main() async throws { |
22 | | - do { |
23 | | - try await Main.embed() |
24 | | - } catch { |
25 | | - print("Error:\n\(error)") |
| 38 | + // Initialize the Bedrock service in US East 1 region |
| 39 | + let b = try await BedrockService(region: .useast1) |
| 40 | + let e = Embeddings(bedrock: b) |
| 41 | + |
| 42 | + // Run each example in sequence |
| 43 | + try await e.simple() |
| 44 | + try await e.batch() |
| 45 | + try await e.doc() |
| 46 | + } |
| 47 | + |
| 48 | + /// Demonstrates basic text similarity using embeddings |
| 49 | + /// |
| 50 | + /// This example shows how to convert text into embedding vectors and calculate |
| 51 | + /// similarity between different texts. The first two sentences should be more |
| 52 | + /// similar than the first and third, even though they use different words. |
| 53 | + func simple() async throws { |
| 54 | + |
| 55 | + // Three test sentences: two similar, one different |
| 56 | + let text1 = "The cat sat on the mat" |
| 57 | + let text2 = "A feline rested on the rug" // Similar meaning, different words |
| 58 | + let text3 = "Quantum computing uses qubits" // Completely different topic |
| 59 | + |
| 60 | + // Convert each text into an embedding vector (array of numbers) |
| 61 | + let embedding1 = try await bedrock.embed(text1, with: model) |
| 62 | + let embedding2 = try await bedrock.embed(text2, with: model) |
| 63 | + let embedding3 = try await bedrock.embed(text3, with: model) |
| 64 | + |
| 65 | + // Calculate how similar the texts are using cosine similarity |
| 66 | + // Values range from -1 (opposite) to 1 (identical) |
| 67 | + let similarity12 = cosineSimilarity(embedding1, embedding2) |
| 68 | + let similarity13 = cosineSimilarity(embedding1, embedding3) |
| 69 | + |
| 70 | + print("Similarity between text1 and text2: \(similarity12)") |
| 71 | + print("Similarity between text1 and text3: \(similarity13)") |
| 72 | + } |
| 73 | + |
| 74 | + /// Calculates cosine similarity between two embedding vectors |
| 75 | + /// |
| 76 | + /// Cosine similarity measures the angle between two vectors, focusing on direction |
| 77 | + /// rather than magnitude. Perfect for comparing embeddings. |
| 78 | + /// |
| 79 | + /// - Parameters: |
| 80 | + /// - a: First embedding vector |
| 81 | + /// - b: Second embedding vector |
| 82 | + /// - Returns: Similarity score from -1 (opposite) to 1 (identical) |
| 83 | + func cosineSimilarity(_ a: [Double], _ b: [Double]) -> Double { |
| 84 | + // Calculate dot product (multiply corresponding elements and sum) |
| 85 | + let dotProduct = zip(a, b).map { $0 * $1 }.reduce(0, +) |
| 86 | + // Calculate the magnitude (length) of each vector |
| 87 | + let magnitudeA = sqrt(a.map { $0 * $0 }.reduce(0, +)) |
| 88 | + let magnitudeB = sqrt(b.map { $0 * $0 }.reduce(0, +)) |
| 89 | + // Cosine similarity = dot product / (magnitude A × magnitude B) |
| 90 | + return dotProduct / (magnitudeA * magnitudeB) |
| 91 | + } |
| 92 | + |
| 93 | + /// Demonstrates batch processing and finding similar texts in a collection |
| 94 | + /// |
| 95 | + /// This example shows how to process multiple texts and find the most similar |
| 96 | + /// text to a given query. Useful for grouping similar content. |
| 97 | + func batch() async throws { |
| 98 | + // Sample texts: mix of tech companies and fruits |
| 99 | + let texts = [ |
| 100 | + "Apple is a technology company", |
| 101 | + "Bananas are yellow fruits", |
| 102 | + "Microsoft develops software", |
| 103 | + "Oranges are citrus fruits", |
| 104 | + "Google creates search engines", |
| 105 | + ] |
| 106 | + |
| 107 | + var embeddings: [[Double]] = [] |
| 108 | + |
| 109 | + // Convert each text to its embedding vector |
| 110 | + for text in texts { |
| 111 | + let embedding = try await bedrock.embed(text, with: model) |
| 112 | + embeddings.append(embedding) |
| 113 | + } |
| 114 | + |
| 115 | + /// Finds the most similar text to a given query text |
| 116 | + func findMostSimilar(to queryIndex: Int, in embeddings: [[Double]]) -> Int { |
| 117 | + var maxSimilarity = -1.0 // Start with lowest possible similarity |
| 118 | + var mostSimilarIndex = 0 |
| 119 | + |
| 120 | + // Compare query text with all other texts |
| 121 | + for (index, embedding) in embeddings.enumerated() { |
| 122 | + guard index != queryIndex else { continue } // Skip comparing with itself |
| 123 | + |
| 124 | + let similarity = cosineSimilarity(embeddings[queryIndex], embedding) |
| 125 | + if similarity > maxSimilarity { |
| 126 | + maxSimilarity = similarity |
| 127 | + mostSimilarIndex = index |
| 128 | + } |
| 129 | + } |
| 130 | + |
| 131 | + return mostSimilarIndex |
26 | 132 | } |
| 133 | + |
| 134 | + // Find what's most similar to "Apple is a technology company" |
| 135 | + let queryIndex = 0 |
| 136 | + let similarIndex = findMostSimilar(to: queryIndex, in: embeddings) |
| 137 | + print("Most similar to '\(texts[queryIndex])': '\(texts[similarIndex])'") |
| 138 | + |
27 | 139 | } |
28 | | - static func embed() async throws { |
29 | | - var logger = Logger(label: "Embeddings") |
30 | | - logger.logLevel = .debug |
| 140 | + /// Demonstrates document storage and semantic search |
| 141 | + /// |
| 142 | + /// This example shows how to build a simple document database with embeddings |
| 143 | + /// and perform semantic search (find by meaning, not exact words). |
| 144 | + func doc() async throws { |
31 | 145 |
|
32 | | - let bedrock = try await BedrockService( |
33 | | - region: .useast1, |
34 | | - logger: logger, |
35 | | - ) |
| 146 | + // Create a document store that can search by meaning |
| 147 | + let store = DocumentStore(bedrock: bedrock, model: model, similarityFn: cosineSimilarity) |
36 | 148 |
|
37 | | - // select a model that supports the embeddings modality |
38 | | - // models must be enabled in your AWS account |
39 | | - let model: BedrockModel = .titan_embed_text_v2 |
| 149 | + // Add some programming-related documents |
| 150 | + try await store.addDocument("Swift is a programming language developed by Apple", id: "doc1") |
| 151 | + try await store.addDocument( |
| 152 | + "Python is popular for data science and machine learning", |
| 153 | + id: "doc2" |
| 154 | + ) |
| 155 | + try await store.addDocument("JavaScript runs in web browsers and Node.js", id: "doc3") |
40 | 156 |
|
41 | | - guard model.hasEmbeddingsModality() else { |
42 | | - throw MyError.incorrectModality("\(model.name) does not support embeddings") |
| 157 | + // Search using natural language - notice we don't use exact words |
| 158 | + let results = try await store.search("Programming language for iOS", topK: 1) |
| 159 | + for doc in results { |
| 160 | + print("Found: \(doc.content)") |
43 | 161 | } |
| 162 | + } |
| 163 | +} |
44 | 164 |
|
45 | | - // send the request |
46 | | - let reply = try await bedrock.embed("Hello, Vector World", with: model) |
| 165 | +/// Represents a document with its content and embedding vector |
| 166 | +/// |
| 167 | +/// Stores both the original text and its numerical representation for fast searches. |
| 168 | +struct Document { |
| 169 | + /// Unique identifier for the document |
| 170 | + let id: String |
| 171 | + /// The original text content |
| 172 | + let content: String |
| 173 | + /// The embedding vector representing the document's meaning |
| 174 | + let embedding: [Double] |
| 175 | +} |
| 176 | + |
| 177 | +/// A simple in-memory document store with semantic search capabilities |
| 178 | +/// |
| 179 | +/// Demonstrates how to build a basic vector database that stores documents |
| 180 | +/// with their embeddings and searches by converting queries to embeddings. |
| 181 | +class DocumentStore { |
| 182 | + /// Function type for calculating similarity between embeddings |
| 183 | + typealias DistanceFn = ([Double], [Double]) -> Double |
| 184 | + |
| 185 | + private var documents: [Document] = [] |
| 186 | + private let bedrock: BedrockService |
| 187 | + private let model: BedrockModel |
| 188 | + private let distanceFn: DistanceFn |
| 189 | + |
| 190 | + /// Initialize the document store with a Bedrock service and similarity function |
| 191 | + init(bedrock: BedrockService, model: BedrockModel, similarityFn: @escaping DistanceFn) { |
| 192 | + self.bedrock = bedrock |
| 193 | + self.model = model |
| 194 | + self.distanceFn = similarityFn |
| 195 | + } |
47 | 196 |
|
48 | | - print(reply) |
| 197 | + /// Adds a new document to the store |
| 198 | + /// |
| 199 | + /// The document's text is converted to an embedding and stored for future searches. |
| 200 | + func addDocument(_ content: String, id: String) async throws { |
| 201 | + // Convert the document text to an embedding vector |
| 202 | + let embedding = try await bedrock.embed(content, with: model) |
| 203 | + let document = Document(id: id, content: content, embedding: embedding) |
| 204 | + documents.append(document) |
49 | 205 | } |
50 | 206 |
|
51 | | - enum MyError: Error { |
52 | | - case incorrectModality(String) |
| 207 | + /// Searches for documents similar to the query |
| 208 | + /// |
| 209 | + /// Performs semantic search by converting the query to an embedding and |
| 210 | + /// comparing it with all stored document embeddings. |
| 211 | + /// |
| 212 | + /// - Parameters: |
| 213 | + /// - query: The search query (natural language) |
| 214 | + /// - topK: Maximum number of results to return |
| 215 | + /// - Returns: Array of most similar documents, sorted by relevance |
| 216 | + func search(_ query: String, topK: Int = 3) async throws -> [Document] { |
| 217 | + // Convert the search query to an embedding |
| 218 | + let queryEmbedding = try await bedrock.embed(query, with: model) |
| 219 | + |
| 220 | + // Calculate similarity between query and each document |
| 221 | + let similarities = documents.map { doc in |
| 222 | + (doc, distanceFn(queryEmbedding, doc.embedding)) |
| 223 | + } |
| 224 | + |
| 225 | + // Sort by similarity (highest first) and return top results |
| 226 | + return |
| 227 | + similarities |
| 228 | + .sorted { $0.1 > $1.1 } // Sort by similarity score descending |
| 229 | + .prefix(topK) // Take only the top K results |
| 230 | + .map { $0.0 } // Extract just the documents |
53 | 231 | } |
54 | 232 | } |
0 commit comments