Skip to content

Commit e07b33d

Browse files
authored
New Embedding Example (#57)
* more details in the embedding example * fix license * fix yaml * fix format * fix unnacceptable language * fix typo * ignore SPI doc builder
1 parent d7f700f commit e07b33d

File tree

3 files changed

+208
-30
lines changed

3 files changed

+208
-30
lines changed

.github/workflows/doc.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,16 @@ jobs:
3232
uses: actions/configure-pages@v5
3333
- name: Install Swift SDK dependencies
3434
run: |
35-
apt-get -y update
35+
apt-get -y update
3636
apt-get install -y libssl-dev make
3737
- name: Build DocC
3838
run: |
3939
make generate-docs
4040
echo "<script>window.location.href += \"/documentation/bedrockservice\"</script>" > docs/index.html;
41-
- name: Sanity Check
41+
- name: Check docc rendered files are present
4242
run: |
4343
echo "Checking for required static assets..."
44-
find ./docs -name "css" -o -name "js" -o -name "favicon*" | head -10
44+
find ./docs -name "css" -o -name "js" -o -name "favicon*"
4545
ls -la ./docs/
4646
- name: Upload artifact
4747
uses: actions/upload-pages-artifact@v4

.spi.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
version: 1
2-
builder:
3-
configs:
4-
- documentation_targets: [BedrockService]
2+
# builder:
3+
# configs:
4+
# - documentation_targets: [BedrockService]
55
external_links:
6-
documentation: "https://build-on-aws.github.io/swift-bedrock-library/documentation/bedrockservice/"
6+
documentation: "https://build-on-aws.github.io/swift-bedrock-library/documentation/bedrockservice/"

Examples/embeddings/Sources/Embeddings.swift

Lines changed: 201 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,41 +14,219 @@
1414
//===----------------------------------------------------------------------===//
1515

1616
import BedrockService
17-
import Logging
17+
import Foundation
1818

19+
/// # Text Embeddings Example
20+
///
21+
/// This example demonstrates how to use Amazon Bedrock's text embedding capabilities
22+
/// to convert text into numerical vectors and perform similarity comparisons.
23+
///
24+
/// ## What are embeddings?
25+
/// Text embeddings are numerical representations of text that capture semantic meaning.
26+
/// Similar texts will have similar embedding vectors, allowing us to measure how
27+
/// related different pieces of text are to each other.
1928
@main
20-
struct Main {
29+
struct Embeddings {
30+
/// The Bedrock service client for making API calls
31+
let bedrock: BedrockService
32+
33+
/// The embedding model - Titan Text Embeddings V2 converts text into 1024-dimensional vectors
34+
let model = BedrockModel.titan_embed_text_v2
35+
36+
/// Main entry point that runs all embedding examples
2137
static func main() async throws {
22-
do {
23-
try await Main.embed()
24-
} catch {
25-
print("Error:\n\(error)")
38+
// Initialize the Bedrock service in US East 1 region
39+
let b = try await BedrockService(region: .useast1)
40+
let e = Embeddings(bedrock: b)
41+
42+
// Run each example in sequence
43+
try await e.simple()
44+
try await e.batch()
45+
try await e.doc()
46+
}
47+
48+
/// Demonstrates basic text similarity using embeddings
49+
///
50+
/// This example shows how to convert text into embedding vectors and calculate
51+
/// similarity between different texts. The first two sentences should be more
52+
/// similar than the first and third, even though they use different words.
53+
func simple() async throws {
54+
55+
// Three test sentences: two similar, one different
56+
let text1 = "The cat sat on the mat"
57+
let text2 = "A feline rested on the rug" // Similar meaning, different words
58+
let text3 = "Quantum computing uses qubits" // Completely different topic
59+
60+
// Convert each text into an embedding vector (array of numbers)
61+
let embedding1 = try await bedrock.embed(text1, with: model)
62+
let embedding2 = try await bedrock.embed(text2, with: model)
63+
let embedding3 = try await bedrock.embed(text3, with: model)
64+
65+
// Calculate how similar the texts are using cosine similarity
66+
// Values range from -1 (opposite) to 1 (identical)
67+
let similarity12 = cosineSimilarity(embedding1, embedding2)
68+
let similarity13 = cosineSimilarity(embedding1, embedding3)
69+
70+
print("Similarity between text1 and text2: \(similarity12)")
71+
print("Similarity between text1 and text3: \(similarity13)")
72+
}
73+
74+
/// Calculates cosine similarity between two embedding vectors
75+
///
76+
/// Cosine similarity measures the angle between two vectors, focusing on direction
77+
/// rather than magnitude. Perfect for comparing embeddings.
78+
///
79+
/// - Parameters:
80+
/// - a: First embedding vector
81+
/// - b: Second embedding vector
82+
/// - Returns: Similarity score from -1 (opposite) to 1 (identical)
83+
func cosineSimilarity(_ a: [Double], _ b: [Double]) -> Double {
84+
// Calculate dot product (multiply corresponding elements and sum)
85+
let dotProduct = zip(a, b).map { $0 * $1 }.reduce(0, +)
86+
// Calculate the magnitude (length) of each vector
87+
let magnitudeA = sqrt(a.map { $0 * $0 }.reduce(0, +))
88+
let magnitudeB = sqrt(b.map { $0 * $0 }.reduce(0, +))
89+
// Cosine similarity = dot product / (magnitude A × magnitude B)
90+
return dotProduct / (magnitudeA * magnitudeB)
91+
}
92+
93+
/// Demonstrates batch processing and finding similar texts in a collection
94+
///
95+
/// This example shows how to process multiple texts and find the most similar
96+
/// text to a given query. Useful for grouping similar content.
97+
func batch() async throws {
98+
// Sample texts: mix of tech companies and fruits
99+
let texts = [
100+
"Apple is a technology company",
101+
"Bananas are yellow fruits",
102+
"Microsoft develops software",
103+
"Oranges are citrus fruits",
104+
"Google creates search engines",
105+
]
106+
107+
var embeddings: [[Double]] = []
108+
109+
// Convert each text to its embedding vector
110+
for text in texts {
111+
let embedding = try await bedrock.embed(text, with: model)
112+
embeddings.append(embedding)
113+
}
114+
115+
/// Finds the most similar text to a given query text
116+
func findMostSimilar(to queryIndex: Int, in embeddings: [[Double]]) -> Int {
117+
var maxSimilarity = -1.0 // Start with lowest possible similarity
118+
var mostSimilarIndex = 0
119+
120+
// Compare query text with all other texts
121+
for (index, embedding) in embeddings.enumerated() {
122+
guard index != queryIndex else { continue } // Skip comparing with itself
123+
124+
let similarity = cosineSimilarity(embeddings[queryIndex], embedding)
125+
if similarity > maxSimilarity {
126+
maxSimilarity = similarity
127+
mostSimilarIndex = index
128+
}
129+
}
130+
131+
return mostSimilarIndex
26132
}
133+
134+
// Find what's most similar to "Apple is a technology company"
135+
let queryIndex = 0
136+
let similarIndex = findMostSimilar(to: queryIndex, in: embeddings)
137+
print("Most similar to '\(texts[queryIndex])': '\(texts[similarIndex])'")
138+
27139
}
28-
static func embed() async throws {
29-
var logger = Logger(label: "Embeddings")
30-
logger.logLevel = .debug
140+
/// Demonstrates document storage and semantic search
141+
///
142+
/// This example shows how to build a simple document database with embeddings
143+
/// and perform semantic search (find by meaning, not exact words).
144+
func doc() async throws {
31145

32-
let bedrock = try await BedrockService(
33-
region: .useast1,
34-
logger: logger,
35-
)
146+
// Create a document store that can search by meaning
147+
let store = DocumentStore(bedrock: bedrock, model: model, similarityFn: cosineSimilarity)
36148

37-
// select a model that supports the embeddings modality
38-
// models must be enabled in your AWS account
39-
let model: BedrockModel = .titan_embed_text_v2
149+
// Add some programming-related documents
150+
try await store.addDocument("Swift is a programming language developed by Apple", id: "doc1")
151+
try await store.addDocument(
152+
"Python is popular for data science and machine learning",
153+
id: "doc2"
154+
)
155+
try await store.addDocument("JavaScript runs in web browsers and Node.js", id: "doc3")
40156

41-
guard model.hasEmbeddingsModality() else {
42-
throw MyError.incorrectModality("\(model.name) does not support embeddings")
157+
// Search using natural language - notice we don't use exact words
158+
let results = try await store.search("Programming language for iOS", topK: 1)
159+
for doc in results {
160+
print("Found: \(doc.content)")
43161
}
162+
}
163+
}
44164

45-
// send the request
46-
let reply = try await bedrock.embed("Hello, Vector World", with: model)
165+
/// Represents a document with its content and embedding vector
166+
///
167+
/// Stores both the original text and its numerical representation for fast searches.
168+
struct Document {
169+
/// Unique identifier for the document
170+
let id: String
171+
/// The original text content
172+
let content: String
173+
/// The embedding vector representing the document's meaning
174+
let embedding: [Double]
175+
}
176+
177+
/// A simple in-memory document store with semantic search capabilities
178+
///
179+
/// Demonstrates how to build a basic vector database that stores documents
180+
/// with their embeddings and searches by converting queries to embeddings.
181+
class DocumentStore {
182+
/// Function type for calculating similarity between embeddings
183+
typealias DistanceFn = ([Double], [Double]) -> Double
184+
185+
private var documents: [Document] = []
186+
private let bedrock: BedrockService
187+
private let model: BedrockModel
188+
private let distanceFn: DistanceFn
189+
190+
/// Initialize the document store with a Bedrock service and similarity function
191+
init(bedrock: BedrockService, model: BedrockModel, similarityFn: @escaping DistanceFn) {
192+
self.bedrock = bedrock
193+
self.model = model
194+
self.distanceFn = similarityFn
195+
}
47196

48-
print(reply)
197+
/// Adds a new document to the store
198+
///
199+
/// The document's text is converted to an embedding and stored for future searches.
200+
func addDocument(_ content: String, id: String) async throws {
201+
// Convert the document text to an embedding vector
202+
let embedding = try await bedrock.embed(content, with: model)
203+
let document = Document(id: id, content: content, embedding: embedding)
204+
documents.append(document)
49205
}
50206

51-
enum MyError: Error {
52-
case incorrectModality(String)
207+
/// Searches for documents similar to the query
208+
///
209+
/// Performs semantic search by converting the query to an embedding and
210+
/// comparing it with all stored document embeddings.
211+
///
212+
/// - Parameters:
213+
/// - query: The search query (natural language)
214+
/// - topK: Maximum number of results to return
215+
/// - Returns: Array of most similar documents, sorted by relevance
216+
func search(_ query: String, topK: Int = 3) async throws -> [Document] {
217+
// Convert the search query to an embedding
218+
let queryEmbedding = try await bedrock.embed(query, with: model)
219+
220+
// Calculate similarity between query and each document
221+
let similarities = documents.map { doc in
222+
(doc, distanceFn(queryEmbedding, doc.embedding))
223+
}
224+
225+
// Sort by similarity (highest first) and return top results
226+
return
227+
similarities
228+
.sorted { $0.1 > $1.1 } // Sort by similarity score descending
229+
.prefix(topK) // Take only the top K results
230+
.map { $0.0 } // Extract just the documents
53231
}
54232
}

0 commit comments

Comments
 (0)