Skip to content

Commit 8f21ea4

Browse files
authored
Merge pull request #76 from rryam/perf/realistic-benchmarks
Add realistic workload benchmark profile and optional CI stage
2 parents b2f0bb2 + 73f87f0 commit 8f21ea4

File tree

5 files changed

+550
-2
lines changed

5 files changed

+550
-2
lines changed

Tests/PerformanceTests/Helpers/PerformanceTestConfig.swift

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,28 @@ import Foundation
22
@testable import VecturaKit
33

44
enum PerformanceTestConfig {
5+
private static let environment = ProcessInfo.processInfo.environment
6+
57
static let useSwiftEmbedder: Bool =
6-
ProcessInfo.processInfo.environment["VECTURA_PERF_USE_SWIFT_EMBEDDER"] == "1"
8+
environment["VECTURA_PERF_USE_SWIFT_EMBEDDER"] == "1"
9+
10+
static let performanceProfile: String =
11+
(environment["VECTURA_PERF_PROFILE"] ?? "default").lowercased()
12+
13+
static let runRealisticBenchmarks: Bool =
14+
performanceProfile == "realistic" || environment["VECTURA_PERF_REALISTIC"] == "1"
715

816
static let defaultDimension = 384
17+
static let realisticDocumentCount =
18+
intEnv("VECTURA_PERF_REALISTIC_DOCS", default: useSwiftEmbedder ? 2_500 : 12_000)
19+
static let realisticQueryCount =
20+
intEnv("VECTURA_PERF_REALISTIC_QUERIES", default: useSwiftEmbedder ? 200 : 600)
21+
static let realisticMixedOperationCount =
22+
intEnv("VECTURA_PERF_REALISTIC_MIXED_OPS", default: useSwiftEmbedder ? 300 : 1_200)
23+
static let realisticConcurrentClients =
24+
intEnv("VECTURA_PERF_REALISTIC_CLIENTS", default: useSwiftEmbedder ? 4 : 12)
25+
static let realisticColdRuns =
26+
intEnv("VECTURA_PERF_REALISTIC_COLD_RUNS", default: useSwiftEmbedder ? 8 : 24)
927

1028
@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, watchOS 11.0, *)
1129
static func makeEmbedder(modelSource: VecturaModelSource = .default) -> any VecturaEmbedder {
@@ -15,6 +33,13 @@ enum PerformanceTestConfig {
1533
return DeterministicEmbedder(dimensionValue: defaultDimension)
1634
}
1735

36+
private static func intEnv(_ key: String, default defaultValue: Int) -> Int {
37+
guard let raw = environment[key], let value = Int(raw), value > 0 else {
38+
return defaultValue
39+
}
40+
return value
41+
}
42+
1843
private struct DeterministicEmbedder: VecturaEmbedder {
1944
let dimensionValue: Int
2045

Tests/PerformanceTests/Helpers/TestDataGenerator.swift

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,25 @@ public struct TestDataGenerator: Sendable {
5555
"financial services"
5656
]
5757

58+
private static let noisyTokens = [
59+
"latency",
60+
"throughput",
61+
"p95",
62+
"p99",
63+
"cache-hit",
64+
"cache-miss",
65+
"retry",
66+
"timeout",
67+
"batch",
68+
"vector-db",
69+
"k8s",
70+
"grpc",
71+
"ssd",
72+
"cold-start",
73+
"hot-path",
74+
"rollback"
75+
]
76+
5877
public init() {}
5978

6079
/// Generate a collection of test documents.
@@ -116,6 +135,115 @@ public struct TestDataGenerator: Sendable {
116135
return "Doc\(index): " + docWords.joined(separator: " ")
117136
}
118137
}
138+
139+
/// Generate a corpus with realistic variance in document length and lexical noise.
140+
///
141+
/// The output mixes:
142+
/// - short and long technical documents
143+
/// - repeated near-duplicates
144+
/// - numeric/error-code style tokens and punctuation
145+
///
146+
/// - Parameters:
147+
/// - count: Number of documents to generate
148+
/// - minWords: Minimum words in a document
149+
/// - maxWords: Maximum words in a document
150+
/// - duplicateRate: Fraction of documents that are near-duplicates [0, 1]
151+
/// - seed: Seed for reproducibility
152+
/// - Returns: Generated realistic corpus
153+
public func generateRealisticCorpus(
154+
count: Int,
155+
minWords: Int = 20,
156+
maxWords: Int = 260,
157+
duplicateRate: Double = 0.08,
158+
seed: UInt64? = nil
159+
) -> [String] {
160+
guard count > 0 else { return [] }
161+
162+
let clampedMinWords = max(5, minWords)
163+
let clampedMaxWords = max(clampedMinWords, maxWords)
164+
let clampedDuplicateRate = min(max(duplicateRate, 0), 1)
165+
166+
var generator = seed.map { SeededRandomGenerator(seed: $0) } ?? SeededRandomGenerator(seed: 424242)
167+
let topicWords = Self.topics.flatMap { $0.split(separator: " ").map(String.init) }
168+
let contextWords = Self.contexts.flatMap { $0.split(separator: " ").map(String.init) }
169+
let domainWords = Self.domains.flatMap { $0.split(separator: " ").map(String.init) }
170+
let vocabulary = topicWords + contextWords + domainWords + Self.noisyTokens
171+
172+
var corpus: [String] = []
173+
corpus.reserveCapacity(count)
174+
175+
for index in 0..<count {
176+
let shouldDuplicate = !corpus.isEmpty && generator.nextDouble() < clampedDuplicateRate
177+
if shouldDuplicate {
178+
let base = corpus[generator.nextInt(upperBound: corpus.count)]
179+
let variant = base
180+
+ " Variant-\(index) incident=\(1000 + generator.nextInt(upperBound: 9000))"
181+
+ " status=\(generator.pick(from: ["ok", "warn", "error"]))"
182+
corpus.append(variant)
183+
continue
184+
}
185+
186+
let topic = generator.pick(from: Self.topics)
187+
let context = generator.pick(from: Self.contexts)
188+
let domain = generator.pick(from: Self.domains)
189+
let wordCount = clampedMinWords + generator.nextInt(upperBound: clampedMaxWords - clampedMinWords + 1)
190+
191+
var words: [String] = []
192+
words.reserveCapacity(wordCount)
193+
for position in 0..<wordCount {
194+
var token = vocabulary[generator.nextInt(upperBound: vocabulary.count)]
195+
if position % 37 == 0 {
196+
token += "-\(generator.nextInt(upperBound: 500))"
197+
}
198+
words.append(token)
199+
}
200+
201+
let punctuation = generator.pick(from: [".", ".", ".", ";", "!", "?"])
202+
let body = words.joined(separator: " ")
203+
let record =
204+
"Document \(index): \(topic) \(context) \(domain). \(body)\(punctuation) err=\(generator.nextInt(upperBound: 12))"
205+
corpus.append(record)
206+
}
207+
208+
return corpus
209+
}
210+
211+
/// Generate realistic query traffic with short, medium, and long queries.
212+
///
213+
/// - Parameters:
214+
/// - count: Number of queries
215+
/// - seed: Seed for reproducibility
216+
/// - Returns: Array of query strings
217+
public func generateRealisticQueries(
218+
count: Int,
219+
seed: UInt64? = nil
220+
) -> [String] {
221+
guard count > 0 else { return [] }
222+
223+
var generator = seed.map { SeededRandomGenerator(seed: $0) } ?? SeededRandomGenerator(seed: 898989)
224+
let tokens = Self.topics + Self.domains + Self.noisyTokens
225+
var queries: [String] = []
226+
queries.reserveCapacity(count)
227+
228+
for _ in 0..<count {
229+
let mode = generator.nextInt(upperBound: 100)
230+
if mode < 40 {
231+
queries.append(generator.pick(from: Self.topics))
232+
} else if mode < 85 {
233+
let first = generator.pick(from: tokens)
234+
let second = generator.pick(from: tokens)
235+
let third = generator.pick(from: tokens)
236+
queries.append("\(first) \(second) \(third)")
237+
} else {
238+
let first = generator.pick(from: Self.topics)
239+
let second = generator.pick(from: Self.domains)
240+
let third = generator.pick(from: Self.noisyTokens)
241+
queries.append("how to optimize \(first) for \(second) with \(third)")
242+
}
243+
}
244+
245+
return queries
246+
}
119247
}
120248

121249
/// Simple seeded random number generator for reproducible tests.
@@ -132,4 +260,18 @@ private struct SeededRandomGenerator {
132260
state = state &* 6364136223846793005 &+ 1442695040888963407
133261
return Int(state >> 32)
134262
}
263+
264+
mutating func nextInt(upperBound: Int) -> Int {
265+
guard upperBound > 0 else { return 0 }
266+
return next() % upperBound
267+
}
268+
269+
mutating func nextDouble() -> Double {
270+
let value = UInt64(nextInt(upperBound: Int(UInt32.max)))
271+
return Double(value) / Double(UInt32.max)
272+
}
273+
274+
mutating func pick<T>(from values: [T]) -> T {
275+
values[nextInt(upperBound: values.count)]
276+
}
135277
}

Tests/PerformanceTests/README.md

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ Tests/PerformanceTests/
5757
├── MemoryProfilerSuite.swift # Memory profiling (5 tests)
5858
├── ParameterTuningSuite.swift # Parameter optimization (5 tests)
5959
├── AccuracyTests.swift # Search quality (4 tests)
60+
├── RealisticWorkloadSuite.swift # Harder real-world profile (2 opt-in tests)
6061
6162
├── Helpers/
6263
│ ├── PerformanceMetrics.swift # Metrics collection
@@ -67,7 +68,7 @@ Tests/PerformanceTests/
6768
└── README.md # Custom data guide
6869
```
6970

70-
**Total: 26 performance tests**
71+
**Total: 28 performance tests (2 realistic tests are opt-in)**
7172

7273
### Embedder Selection (Speed vs Realism)
7374

@@ -78,6 +79,24 @@ downloading CoreML models. To run with real embeddings, set:
7879
VECTURA_PERF_USE_SWIFT_EMBEDDER=1
7980
```
8081

82+
### Realistic Profile (Harder Benchmark Mode)
83+
84+
Enable the realistic suite to simulate heavier production-like workloads:
85+
86+
```bash
87+
VECTURA_PERF_PROFILE=realistic swift test --filter RealisticWorkloadSuite
88+
```
89+
90+
Optional knobs:
91+
92+
```bash
93+
export VECTURA_PERF_REALISTIC_DOCS=12000
94+
export VECTURA_PERF_REALISTIC_QUERIES=600
95+
export VECTURA_PERF_REALISTIC_MIXED_OPS=1200
96+
export VECTURA_PERF_REALISTIC_CLIENTS=12
97+
export VECTURA_PERF_REALISTIC_COLD_RUNS=24
98+
```
99+
81100
---
82101

83102
## Running Tests
@@ -105,6 +124,9 @@ swift test --filter ParameterTuningSuite
105124
# Validates search quality with 1K documents
106125
swift test --filter AccuracyTests
107126

127+
# Realistic workload profile (opt-in, heavier)
128+
VECTURA_PERF_PROFILE=realistic swift test --filter RealisticWorkloadSuite
129+
108130
# All performance tests (~30 min)
109131
# Note: May fail on memory-constrained systems
110132
swift test --filter PerformanceTests
@@ -136,6 +158,10 @@ swift test --filter MemoryProfilerSuite.strategyMemoryComparison
136158

137159
# Accuracy testing
138160
swift test --filter AccuracyTests.basicAccuracyTest
161+
162+
# Realistic profile
163+
VECTURA_PERF_PROFILE=realistic swift test --filter RealisticWorkloadSuite.realisticFullMemoryProfile
164+
VECTURA_PERF_PROFILE=realistic swift test --filter RealisticWorkloadSuite.realisticIndexedProfile
139165
```
140166

141167
---
@@ -329,6 +355,35 @@ Reduced from 2K to 1K documents and reduced query counts to prevent memory issue
329355

330356
---
331357

358+
### 6. RealisticWorkloadSuite (2 tests, opt-in)
359+
360+
**Purpose:** Stress the system with harder workload characteristics closer to production traffic.
361+
362+
**Key Tests:**
363+
- `realisticFullMemoryProfile` - cold vs warm behavior, multi-client queueing, mixed read/write traffic
364+
- `realisticIndexedProfile` - same workload in indexed mode with cold/warm and tail latency focus
365+
366+
**What makes it harder:**
367+
- Larger document/query defaults than other suites
368+
- Variable document lengths with lexical noise and near-duplicates
369+
- Cold-start measurements (new instance + first search)
370+
- Multi-client pressure (concurrent callers)
371+
- Mixed workload phase (70% search / 30% writes)
372+
- Tail metrics emphasized (P99 and P99.9)
373+
374+
**Enable:**
375+
376+
```bash
377+
VECTURA_PERF_PROFILE=realistic swift test --filter RealisticWorkloadSuite
378+
```
379+
380+
**Use Cases:**
381+
- Catching regressions hidden by small synthetic datasets
382+
- Evaluating cold-start impact separately from warm steady-state
383+
- Measuring tail-latency behavior under heavier pressure
384+
385+
---
386+
332387
## Customizing Tests
333388

334389
### Modify Dataset Sizes

0 commit comments

Comments
 (0)