Add download cache hit benchmarks

DePasqualeOrg · DePasqualeOrg · commit 0667c1845abc · 2026-03-06T22:23:23.000+01:00
diff --git a/Libraries/BenchmarkHelpers/BenchmarkHelpers.swift b/Libraries/BenchmarkHelpers/BenchmarkHelpers.swift
@@ -1,4 +1,4 @@
-// Shared benchmark logic for measuring model loading performance.
+// Shared benchmark logic for measuring model loading and download performance.
 // Integration packages inject their own Downloader and TokenizerLoader.
 
 import Foundation
@@ -8,6 +8,37 @@ import MLXLLM
 import MLXLMCommon
 import MLXVLM
 
+// MARK: - No-Op Tokenizer
+
+/// A tokenizer loader that returns a stub tokenizer. Useful for benchmarking
+/// model loading in downloader integration packages that don't provide a
+/// real tokenizer.
+public struct NoOpTokenizerLoader: TokenizerLoader {
+    public init() {}
+
+    public func load(from directory: URL) async throws -> any Tokenizer {
+        NoOpTokenizer()
+    }
+}
+
+private struct NoOpTokenizer: Tokenizer {
+    func encode(text: String, addSpecialTokens: Bool) -> [Int] { [] }
+    func decode(tokenIds: [Int], skipSpecialTokens: Bool) -> String { "" }
+    func convertTokenToId(_ token: String) -> Int? { nil }
+    func convertIdToToken(_ id: Int) -> String? { nil }
+    var bosToken: String? { nil }
+    var eosToken: String? { nil }
+    var unknownToken: String? { nil }
+
+    func applyChatTemplate(
+        messages: [[String: any Sendable]],
+        tools: [[String: any Sendable]]?,
+        additionalContext: [String: any Sendable]?
+    ) throws -> [Int] {
+        throw MLXLMCommon.TokenizerError.missingChatTemplate
+    }
+}
+
 // MARK: - Stats
 
 public struct BenchmarkStats: Sendable {
@@ -32,10 +63,10 @@ public struct BenchmarkStats: Sendable {
 
     public func printSummary(label: String) {
         print("\(label) results:")
-        print("  Mean:   \(String(format: "%.0f", mean))ms")
-        print("  Median: \(String(format: "%.0f", median))ms")
+        print("  Mean:   \(String(format: "%.1f", mean))ms")
+        print("  Median: \(String(format: "%.1f", median))ms")
         print("  StdDev: \(String(format: "%.1f", stdDev))ms")
-        print("  Range:  \(String(format: "%.0f", min))-\(String(format: "%.0f", max))ms")
+        print("  Range:  \(String(format: "%.1f", min))-\(String(format: "%.1f", max))ms")
     }
 }
 
@@ -63,7 +94,7 @@ public func benchmarkLLMLoading(
         ) { _ in }
         let elapsed = (CFAbsoluteTimeGetCurrent() - start) * 1000
         times.append(elapsed)
-        print("LLM load run \(i): \(String(format: "%.0f", elapsed))ms")
+        print("LLM load run \(i): \(String(format: "%.1f", elapsed))ms")
         Memory.clearCache()
     }
 
@@ -92,7 +123,7 @@ public func benchmarkVLMLoading(
         ) { _ in }
         let elapsed = (CFAbsoluteTimeGetCurrent() - start) * 1000
         times.append(elapsed)
-        print("VLM load run \(i): \(String(format: "%.0f", elapsed))ms")
+        print("VLM load run \(i): \(String(format: "%.1f", elapsed))ms")
         Memory.clearCache()
     }
 
@@ -119,9 +150,39 @@ public func benchmarkEmbeddingLoading(
         ) { _ in }
         let elapsed = (CFAbsoluteTimeGetCurrent() - start) * 1000
         times.append(elapsed)
-        print("Embedding load run \(i): \(String(format: "%.0f", elapsed))ms")
+        print("Embedding load run \(i): \(String(format: "%.1f", elapsed))ms")
         Memory.clearCache()
     }
 
     return BenchmarkStats(times: times)
 }
+
+// MARK: - Download Benchmarks
+
+/// Benchmark download cache hit performance. Ensures the model is cached with a warm-up
+/// download, then measures repeated cache lookups.
+public func benchmarkDownloadCacheHit(
+    from downloader: any Downloader,
+    modelId: String = "mlx-community/Qwen3-0.6B-4bit",
+    runs: Int = 7
+) async throws -> BenchmarkStats {
+    let patterns = ["*.safetensors", "*.json", "*.jinja"]
+
+    // Warm-up: ensure the model is cached
+    _ = try await downloader.download(
+        id: modelId, revision: "main", matching: patterns,
+        useLatest: false, progressHandler: { _ in })
+
+    var times: [Double] = []
+    for i in 1 ... runs {
+        let start = CFAbsoluteTimeGetCurrent()
+        _ = try await downloader.download(
+            id: modelId, revision: "main", matching: patterns,
+            useLatest: false, progressHandler: { _ in })
+        let elapsed = (CFAbsoluteTimeGetCurrent() - start) * 1000
+        times.append(elapsed)
+        print("Download cache hit run \(i): \(String(format: "%.1f", elapsed))ms")
+    }
+
+    return BenchmarkStats(times: times)
+}