rryam
diff --git a/‎Tests/PerformanceTests/Helpers/PerformanceTestConfig.swift‎
Lines changed: 26 additions & 1 deletion b/‎Tests/PerformanceTests/Helpers/PerformanceTestConfig.swift‎
Lines changed: 26 additions & 1 deletion
diff --git a/‎Tests/PerformanceTests/Helpers/TestDataGenerator.swift‎
Lines changed: 142 additions & 0 deletions b/‎Tests/PerformanceTests/Helpers/TestDataGenerator.swift‎
Lines changed: 142 additions & 0 deletions
diff --git a/‎Tests/PerformanceTests/README.md‎
Lines changed: 56 additions & 1 deletion b/‎Tests/PerformanceTests/README.md‎
Lines changed: 56 additions & 1 deletion
@@ -2,10 +2,28 @@ import Foundation
 @testable import VecturaKit
 
 enum PerformanceTestConfig {
+  private static let environment = ProcessInfo.processInfo.environment
+
   static let useSwiftEmbedder: Bool =
-    ProcessInfo.processInfo.environment["VECTURA_PERF_USE_SWIFT_EMBEDDER"] == "1"
+    environment["VECTURA_PERF_USE_SWIFT_EMBEDDER"] == "1"
+
+  static let performanceProfile: String =
+    (environment["VECTURA_PERF_PROFILE"] ?? "default").lowercased()
+
+  static let runRealisticBenchmarks: Bool =
+    performanceProfile == "realistic" || environment["VECTURA_PERF_REALISTIC"] == "1"
 
   static let defaultDimension = 384
+  static let realisticDocumentCount =
+    intEnv("VECTURA_PERF_REALISTIC_DOCS", default: useSwiftEmbedder ? 2_500 : 12_000)
+  static let realisticQueryCount =
+    intEnv("VECTURA_PERF_REALISTIC_QUERIES", default: useSwiftEmbedder ? 200 : 600)
+  static let realisticMixedOperationCount =
+    intEnv("VECTURA_PERF_REALISTIC_MIXED_OPS", default: useSwiftEmbedder ? 300 : 1_200)
+  static let realisticConcurrentClients =
+    intEnv("VECTURA_PERF_REALISTIC_CLIENTS", default: useSwiftEmbedder ? 4 : 12)
+  static let realisticColdRuns =
+    intEnv("VECTURA_PERF_REALISTIC_COLD_RUNS", default: useSwiftEmbedder ? 8 : 24)
 
   @available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, watchOS 11.0, *)
   static func makeEmbedder(modelSource: VecturaModelSource = .default) -> any VecturaEmbedder {
@@ -15,6 +33,13 @@ enum PerformanceTestConfig {
     return DeterministicEmbedder(dimensionValue: defaultDimension)
   }
 
+  private static func intEnv(_ key: String, default defaultValue: Int) -> Int {
+    guard let raw = environment[key], let value = Int(raw), value > 0 else {
+      return defaultValue
+    }
+    return value
+  }
+
   private struct DeterministicEmbedder: VecturaEmbedder {
     let dimensionValue: Int
 
 
@@ -55,6 +55,25 @@ public struct TestDataGenerator: Sendable {
     "financial services"
   ]
 
+  private static let noisyTokens = [
+    "latency",
+    "throughput",
+    "p95",
+    "p99",
+    "cache-hit",
+    "cache-miss",
+    "retry",
+    "timeout",
+    "batch",
+    "vector-db",
+    "k8s",
+    "grpc",
+    "ssd",
+    "cold-start",
+    "hot-path",
+    "rollback"
+  ]
+
   public init() {}
 
   /// Generate a collection of test documents.
@@ -116,6 +135,115 @@ public struct TestDataGenerator: Sendable {
       return "Doc\(index): " + docWords.joined(separator: " ")
     }
   }
+
+  /// Generate a corpus with realistic variance in document length and lexical noise.
+  ///
+  /// The output mixes:
+  /// - short and long technical documents
+  /// - repeated near-duplicates
+  /// - numeric/error-code style tokens and punctuation
+  ///
+  /// - Parameters:
+  ///   - count: Number of documents to generate
+  ///   - minWords: Minimum words in a document
+  ///   - maxWords: Maximum words in a document
+  ///   - duplicateRate: Fraction of documents that are near-duplicates [0, 1]
+  ///   - seed: Seed for reproducibility
+  /// - Returns: Generated realistic corpus
+  public func generateRealisticCorpus(
+    count: Int,
+    minWords: Int = 20,
+    maxWords: Int = 260,
+    duplicateRate: Double = 0.08,
+    seed: UInt64? = nil
+  ) -> [String] {
+    guard count > 0 else { return [] }
+
+    let clampedMinWords = max(5, minWords)
+    let clampedMaxWords = max(clampedMinWords, maxWords)
+    let clampedDuplicateRate = min(max(duplicateRate, 0), 1)
+
+    var generator = seed.map { SeededRandomGenerator(seed: $0) } ?? SeededRandomGenerator(seed: 424242)
+    let topicWords = Self.topics.flatMap { $0.split(separator: " ").map(String.init) }
+    let contextWords = Self.contexts.flatMap { $0.split(separator: " ").map(String.init) }
+    let domainWords = Self.domains.flatMap { $0.split(separator: " ").map(String.init) }
+    let vocabulary = topicWords + contextWords + domainWords + Self.noisyTokens
+
+    var corpus: [String] = []
+    corpus.reserveCapacity(count)
+
+    for index in 0..<count {
+      let shouldDuplicate = !corpus.isEmpty && generator.nextDouble() < clampedDuplicateRate
+      if shouldDuplicate {
+        let base = corpus[generator.nextInt(upperBound: corpus.count)]
+        let variant = base
+          + " Variant-\(index) incident=\(1000 + generator.nextInt(upperBound: 9000))"
+          + " status=\(generator.pick(from: ["ok", "warn", "error"]))"
+        corpus.append(variant)
+        continue
+      }
+
+      let topic = generator.pick(from: Self.topics)
+      let context = generator.pick(from: Self.contexts)
+      let domain = generator.pick(from: Self.domains)
+      let wordCount = clampedMinWords + generator.nextInt(upperBound: clampedMaxWords - clampedMinWords + 1)
+
+      var words: [String] = []
+      words.reserveCapacity(wordCount)
+      for position in 0..<wordCount {
+        var token = vocabulary[generator.nextInt(upperBound: vocabulary.count)]
+        if position % 37 == 0 {
+          token += "-\(generator.nextInt(upperBound: 500))"
+        }
+        words.append(token)
+      }
+
+      let punctuation = generator.pick(from: [".", ".", ".", ";", "!", "?"])
+      let body = words.joined(separator: " ")
+      let record =
+        "Document \(index): \(topic) \(context) \(domain). \(body)\(punctuation) err=\(generator.nextInt(upperBound: 12))"
+      corpus.append(record)
+    }
+
+    return corpus
+  }
+
+  /// Generate realistic query traffic with short, medium, and long queries.
+  ///
+  /// - Parameters:
+  ///   - count: Number of queries
+  ///   - seed: Seed for reproducibility
+  /// - Returns: Array of query strings
+  public func generateRealisticQueries(
+    count: Int,
+    seed: UInt64? = nil
+  ) -> [String] {
+    guard count > 0 else { return [] }
+
+    var generator = seed.map { SeededRandomGenerator(seed: $0) } ?? SeededRandomGenerator(seed: 898989)
+    let tokens = Self.topics + Self.domains + Self.noisyTokens
+    var queries: [String] = []
+    queries.reserveCapacity(count)
+
+    for _ in 0..<count {
+      let mode = generator.nextInt(upperBound: 100)
+      if mode < 40 {
+        queries.append(generator.pick(from: Self.topics))
+      } else if mode < 85 {
+        let first = generator.pick(from: tokens)
+        let second = generator.pick(from: tokens)
+        let third = generator.pick(from: tokens)
+        queries.append("\(first) \(second) \(third)")
+      } else {
+        let first = generator.pick(from: Self.topics)
+        let second = generator.pick(from: Self.domains)
+        let third = generator.pick(from: Self.noisyTokens)
+        queries.append("how to optimize \(first) for \(second) with \(third)")
+      }
+    }
+
+    return queries
+  }
 }
 
 /// Simple seeded random number generator for reproducible tests.
@@ -132,4 +260,18 @@ private struct SeededRandomGenerator {
     state = state &* 6364136223846793005 &+ 1442695040888963407
     return Int(state >> 32)
   }
+
+  mutating func nextInt(upperBound: Int) -> Int {
+    guard upperBound > 0 else { return 0 }
+    return next() % upperBound
+  }
+
+  mutating func nextDouble() -> Double {
+    let value = UInt64(nextInt(upperBound: Int(UInt32.max)))
+    return Double(value) / Double(UInt32.max)
+  }
+
+  mutating func pick<T>(from values: [T]) -> T {
+    values[nextInt(upperBound: values.count)]
+  }
 }
@@ -57,6 +57,7 @@ Tests/PerformanceTests/
 ├── MemoryProfilerSuite.swift      # Memory profiling (5 tests)
 ├── ParameterTuningSuite.swift     # Parameter optimization (5 tests)
 ├── AccuracyTests.swift            # Search quality (4 tests)
+├── RealisticWorkloadSuite.swift   # Harder real-world profile (2 opt-in tests)
 │
 ├── Helpers/
 │   ├── PerformanceMetrics.swift   # Metrics collection
@@ -67,7 +68,7 @@ Tests/PerformanceTests/
     └── README.md                  # Custom data guide
 ```
 
-**Total: 26 performance tests**
+**Total: 28 performance tests (2 realistic tests are opt-in)**
 
 ### Embedder Selection (Speed vs Realism)
 
@@ -78,6 +79,24 @@ downloading CoreML models. To run with real embeddings, set:
 VECTURA_PERF_USE_SWIFT_EMBEDDER=1
 ```
 
+### Realistic Profile (Harder Benchmark Mode)
+
+Enable the realistic suite to simulate heavier production-like workloads:
+
+```bash
+VECTURA_PERF_PROFILE=realistic swift test --filter RealisticWorkloadSuite
+```
+
+Optional knobs:
+
+```bash
+export VECTURA_PERF_REALISTIC_DOCS=12000
+export VECTURA_PERF_REALISTIC_QUERIES=600
+export VECTURA_PERF_REALISTIC_MIXED_OPS=1200
+export VECTURA_PERF_REALISTIC_CLIENTS=12
+export VECTURA_PERF_REALISTIC_COLD_RUNS=24
+```
+
 ---
 
 ## Running Tests
@@ -105,6 +124,9 @@ swift test --filter ParameterTuningSuite
 # Validates search quality with 1K documents
 swift test --filter AccuracyTests
 
+# Realistic workload profile (opt-in, heavier)
+VECTURA_PERF_PROFILE=realistic swift test --filter RealisticWorkloadSuite
+
 # All performance tests (~30 min)
 # Note: May fail on memory-constrained systems
 swift test --filter PerformanceTests
@@ -136,6 +158,10 @@ swift test --filter MemoryProfilerSuite.strategyMemoryComparison
 
 # Accuracy testing
 swift test --filter AccuracyTests.basicAccuracyTest
+
+# Realistic profile
+VECTURA_PERF_PROFILE=realistic swift test --filter RealisticWorkloadSuite.realisticFullMemoryProfile
+VECTURA_PERF_PROFILE=realistic swift test --filter RealisticWorkloadSuite.realisticIndexedProfile
 ```
 
 ---
@@ -329,6 +355,35 @@ Reduced from 2K to 1K documents and reduced query counts to prevent memory issue
 
 ---
 
+### 6. RealisticWorkloadSuite (2 tests, opt-in)
+
+**Purpose:** Stress the system with harder workload characteristics closer to production traffic.
+
+**Key Tests:**
+- `realisticFullMemoryProfile` - cold vs warm behavior, multi-client queueing, mixed read/write traffic
+- `realisticIndexedProfile` - same workload in indexed mode with cold/warm and tail latency focus
+
+**What makes it harder:**
+- Larger document/query defaults than other suites
+- Variable document lengths with lexical noise and near-duplicates
+- Cold-start measurements (new instance + first search)
+- Multi-client pressure (concurrent callers)
+- Mixed workload phase (70% search / 30% writes)
+- Tail metrics emphasized (P99 and P99.9)
+
+**Enable:**
+
+```bash
+VECTURA_PERF_PROFILE=realistic swift test --filter RealisticWorkloadSuite
+```
+
+**Use Cases:**
+- Catching regressions hidden by small synthetic datasets
+- Evaluating cold-start impact separately from warm steady-state
+- Measuring tail-latency behavior under heavier pressure
+
+---
+
 ## Customizing Tests
 
 ### Modify Dataset Sizes