PreTrainedTokenizer: use thread-safe cache for compiled Jinja templates

DePasqualeOrg · DePasqualeOrg · commit 042e7e91e655 · 2026-01-05T12:10:43.000+01:00
diff --git a/Sources/Tokenizers/Tokenizer.swift b/Sources/Tokenizers/Tokenizer.swift
@@ -8,6 +8,7 @@
 import Foundation
 import Hub
 import Jinja
+import os
 
 /// A type alias for chat messages, represented as key-value pairs.
 public typealias Message = [String: any Sendable]
@@ -609,11 +610,8 @@ public class PreTrainedTokenizer: @unchecked Sendable, Tokenizer {
 
     private let cleanUpTokenizationSpaces: Bool
 
-    /// Cache for compiled Jinja templates keyed by their literal template string
-    private var compiledChatTemplateCache: [String: Template] = [:]
-
-    /// Lock to protect the compiled chat template cache from concurrent access
-    private let cacheLock = NSLock()
+    /// Thread-safe cache for compiled Jinja templates keyed by their literal template string
+    private let templateCache = OSAllocatedUnfairLock(initialState: [String: Template]())
 
     /// Initializes a tokenizer from Hugging Face configuration files.
     ///
@@ -775,28 +773,22 @@ public class PreTrainedTokenizer: @unchecked Sendable, Tokenizer {
     }
 
     private func compiledTemplate(for templateString: String) throws -> Template {
-        // Fast path: check cache under lock
-        cacheLock.lock()
-        if let cached = compiledChatTemplateCache[templateString] {
-            cacheLock.unlock()
+        // Fast path: check cache
+        if let cached = templateCache.withLock({ $0[templateString] }) {
             return cached
         }
-        cacheLock.unlock()
 
         // Compile template outside of lock to avoid holding lock during expensive operation
         let compiled = try Template(templateString)
 
-        // Insert into cache under lock (using double-checked locking pattern)
-        cacheLock.lock()
-        defer { cacheLock.unlock() }
-
-        // Check again in case another thread compiled the same template
-        if let cached = compiledChatTemplateCache[templateString] {
-            return cached
+        // Insert into cache (double-checked in case another thread compiled the same template)
+        return templateCache.withLock { cache in
+            if let cached = cache[templateString] {
+                return cached
+            }
+            cache[templateString] = compiled
+            return compiled
         }
-
-        compiledChatTemplateCache[templateString] = compiled
-        return compiled
     }
 
     func preTokenize(_ text: String, options: PreTokenizerOptions) -> [String] {