Parallelize loading of weights, tokenizer, and processor config

DePasqualeOrg · DePasqualeOrg · commit afb9decd1148 · 2025-12-29T09:43:56.000+01:00
diff --git a/Libraries/Embedders/Configuration.swift b/Libraries/Embedders/Configuration.swift
@@ -33,60 +33,42 @@ private class ModelTypeRegistry: @unchecked Sendable {
     // to remain synchronous.
     private let lock = NSLock()
 
-    private var creators: [String: @Sendable (URL) throws -> EmbeddingModel] = [
-        "bert": {
-            url in
-            let configuration = try JSONDecoder().decode(
-                BertConfiguration.self, from: Data(contentsOf: url))
-            let model = BertModel(configuration)
-            return model
+    private var creators: [String: @Sendable (Data) throws -> EmbeddingModel] = [
+        "bert": { data in
+            let configuration = try JSONDecoder().decode(BertConfiguration.self, from: data)
+            return BertModel(configuration)
         },
-        "roberta": {
-            url in
-            let configuration = try JSONDecoder().decode(
-                BertConfiguration.self, from: Data(contentsOf: url))
-            let model = BertModel(configuration)
-            return model
+        "roberta": { data in
+            let configuration = try JSONDecoder().decode(BertConfiguration.self, from: data)
+            return BertModel(configuration)
         },
-        "xlm-roberta": {
-            url in
-            let configuration = try JSONDecoder().decode(
-                BertConfiguration.self, from: Data(contentsOf: url))
-            let model = BertModel(configuration)
-            return model
+        "xlm-roberta": { data in
+            let configuration = try JSONDecoder().decode(BertConfiguration.self, from: data)
+            return BertModel(configuration)
         },
-        "distilbert": {
-            url in
-            let configuration = try JSONDecoder().decode(
-                BertConfiguration.self, from: Data(contentsOf: url))
-            let model = BertModel(configuration)
-            return model
+        "distilbert": { data in
+            let configuration = try JSONDecoder().decode(BertConfiguration.self, from: data)
+            return BertModel(configuration)
         },
-        "nomic_bert": {
-            url in
-            let configuration = try JSONDecoder().decode(
-                NomicBertConfiguration.self, from: Data(contentsOf: url))
-            let model = NomicBertModel(configuration, pooler: false)
-            return model
+        "nomic_bert": { data in
+            let configuration = try JSONDecoder().decode(NomicBertConfiguration.self, from: data)
+            return NomicBertModel(configuration, pooler: false)
         },
-        "qwen3": {
-            url in
-            let configuration = try JSONDecoder().decode(
-                Qwen3Configuration.self, from: Data(contentsOf: url))
-            let model = Qwen3Model(configuration)
-            return model
+        "qwen3": { data in
+            let configuration = try JSONDecoder().decode(Qwen3Configuration.self, from: data)
+            return Qwen3Model(configuration)
         },
     ]
 
     public func registerModelType(
-        _ type: String, creator: @Sendable @escaping (URL) throws -> EmbeddingModel
+        _ type: String, creator: @Sendable @escaping (Data) throws -> EmbeddingModel
     ) {
         lock.withLock {
             creators[type] = creator
         }
     }
 
-    public func createModel(configuration: URL, rawValue: String) throws -> EmbeddingModel {
+    public func createModel(configuration: Data, rawValue: String) throws -> EmbeddingModel {
         let creator = lock.withLock {
             creators[rawValue]
         }
@@ -108,12 +90,12 @@ public struct ModelType: RawRepresentable, Codable, Sendable {
     }
 
     public static func registerModelType(
-        _ type: String, creator: @Sendable @escaping (URL) throws -> EmbeddingModel
+        _ type: String, creator: @Sendable @escaping (Data) throws -> EmbeddingModel
     ) {
         modelTypeRegistry.registerModelType(type, creator: creator)
     }
 
-    public func createModel(configuration: URL) throws -> EmbeddingModel {
+    public func createModel(configuration: Data) throws -> EmbeddingModel {
         try modelTypeRegistry.createModel(configuration: configuration, rawValue: rawValue)
     }
 }
diff --git a/Libraries/Embedders/EmbeddingModel.swift b/Libraries/Embedders/EmbeddingModel.swift
@@ -46,13 +46,15 @@ public actor ModelContainer {
     public init(
         hub: HubApi, modelDirectory: URL, configuration: ModelConfiguration
     ) async throws {
+        async let tokenizerConfigTask = loadTokenizerConfig(
+            configuration: configuration, hub: hub)
+
         self.model = try loadSynchronous(modelDirectory: modelDirectory)
+        self.pooler = loadPooling(modelDirectory: modelDirectory)
 
-        let (tokenizerConfig, tokenizerData) = try await loadTokenizerConfig(
-            configuration: configuration, hub: hub)
+        let (tokenizerConfig, tokenizerData) = try await tokenizerConfigTask
         self.tokenizer = try PreTrainedTokenizer(
             tokenizerConfig: tokenizerConfig, tokenizerData: tokenizerData)
-        self.pooler = loadPooling(modelDirectory: modelDirectory)  //?? Pooling(strategy: .none)
     }
 
     /// Perform an action on the model and/or tokenizer. Callers _must_ eval any `MLXArray` before returning as
diff --git a/Libraries/Embedders/Load.swift b/Libraries/Embedders/Load.swift
@@ -49,20 +49,22 @@ public func load(
 ) async throws -> (EmbeddingModel, Tokenizer) {
     let modelDirectory = try await prepareModelDirectory(
         hub: hub, configuration: configuration, progressHandler: progressHandler)
+
+    async let tokenizerTask = loadTokenizer(configuration: configuration, hub: hub)
     let model = try loadSynchronous(modelDirectory: modelDirectory)
-    let tokenizer = try await loadTokenizer(configuration: configuration, hub: hub)
+    let tokenizer = try await tokenizerTask
 
     return (model, tokenizer)
 }
 
 func loadSynchronous(modelDirectory: URL) throws -> EmbeddingModel {
-    // create the model (no weights loaded)
+    // Load config.json once and decode for both base config and model-specific config
     let configurationURL = modelDirectory.appending(component: "config.json")
-    let baseConfig = try JSONDecoder().decode(
-        BaseConfiguration.self, from: Data(contentsOf: configurationURL))
+    let configData = try Data(contentsOf: configurationURL)
+    let baseConfig = try JSONDecoder().decode(BaseConfiguration.self, from: configData)
 
     let modelType = ModelType(rawValue: baseConfig.modelType)
-    let model = try modelType.createModel(configuration: configurationURL)
+    let model = try modelType.createModel(configuration: configData)
 
     // load the weights
     var weights = [String: MLXArray]()
diff --git a/Libraries/MLXLLM/LLMModelFactory.swift b/Libraries/MLXLLM/LLMModelFactory.swift
@@ -6,13 +6,12 @@ import MLX
 import MLXLMCommon
 import Tokenizers
 
-/// Creates a function that loads a configuration file and instantiates a model with the proper configuration
+/// Creates a function that decodes configuration data and instantiates a model with the proper configuration
 private func create<C: Codable, M>(
     _ configurationType: C.Type, _ modelInit: @escaping (C) -> M
-) -> (URL) throws -> M {
-    { url in
-        let configuration = try JSONDecoder().decode(
-            C.self, from: Data(contentsOf: url))
+) -> (Data) throws -> M {
+    { data in
+        let configuration = try JSONDecoder().decode(C.self, from: data)
         return modelInit(configuration)
     }
 }
@@ -478,13 +477,13 @@ public final class LLMModelFactory: ModelFactory {
         let modelDirectory = try await downloadModel(
             hub: hub, configuration: configuration, progressHandler: progressHandler)
 
-        // Load the generic config to understand which model and how to load the weights
+        // Load config.json once and decode for both base config and model-specific config
         let configurationURL = modelDirectory.appending(component: "config.json")
-
+        let configData: Data
         let baseConfig: BaseConfiguration
         do {
-            baseConfig = try JSONDecoder().decode(
-                BaseConfiguration.self, from: Data(contentsOf: configurationURL))
+            configData = try Data(contentsOf: configurationURL)
+            baseConfig = try JSONDecoder().decode(BaseConfiguration.self, from: configData)
         } catch let error as DecodingError {
             throw ModelFactoryError.configurationDecodingError(
                 configurationURL.lastPathComponent, configuration.name, error)
@@ -493,18 +492,20 @@ public final class LLMModelFactory: ModelFactory {
         let model: LanguageModel
         do {
             model = try await typeRegistry.createModel(
-                configuration: configurationURL, modelType: baseConfig.modelType)
+                configuration: configData, modelType: baseConfig.modelType)
         } catch let error as DecodingError {
             throw ModelFactoryError.configurationDecodingError(
                 configurationURL.lastPathComponent, configuration.name, error)
         }
 
-        // apply the weights to the bare model
+        // Load weights and tokenizer in parallel
+        async let tokenizerTask = loadTokenizer(configuration: configuration, hub: hub)
+
         try loadWeights(
             modelDirectory: modelDirectory, model: model,
             perLayerQuantization: baseConfig.perLayerQuantization)
 
-        let tokenizer = try await loadTokenizer(configuration: configuration, hub: hub)
+        let tokenizer = try await tokenizerTask
 
         let messageGenerator =
             if let model = model as? LLMModel {
diff --git a/Libraries/MLXLMCommon/Registries/ModelTypeRegistry.swift b/Libraries/MLXLMCommon/Registries/ModelTypeRegistry.swift
@@ -10,21 +10,22 @@ public actor ModelTypeRegistry {
     }
 
     /// Creates a registry with given creators.
-    public init(creators: [String: (URL) throws -> any LanguageModel]) {
+    public init(creators: [String: (Data) throws -> any LanguageModel]) {
         self.creators = creators
     }
 
-    private var creators: [String: (URL) throws -> any LanguageModel]
+    private var creators: [String: (Data) throws -> any LanguageModel]
 
     /// Add a new model to the type registry.
     public func registerModelType(
-        _ type: String, creator: @escaping (URL) throws -> any LanguageModel
+        _ type: String, creator: @escaping (Data) throws -> any LanguageModel
     ) {
         creators[type] = creator
     }
 
-    /// Given a `modelType` and configuration file instantiate a new `LanguageModel`.
-    public func createModel(configuration: URL, modelType: String) throws -> sending LanguageModel {
+    /// Given a `modelType` and configuration data instantiate a new `LanguageModel`.
+    public func createModel(configuration: Data, modelType: String) throws -> sending LanguageModel
+    {
         guard let creator = creators[modelType] else {
             throw ModelFactoryError.unsupportedModelType(modelType)
         }
diff --git a/Libraries/MLXLMCommon/Registries/ProcessorTypeRegistry.swift b/Libraries/MLXLMCommon/Registries/ProcessorTypeRegistry.swift
@@ -11,26 +11,26 @@ public actor ProcessorTypeRegistry {
     }
 
     /// Creates a registry with given creators.
-    public init(creators: [String: (URL, any Tokenizer) throws -> any UserInputProcessor]) {
+    public init(creators: [String: (Data, any Tokenizer) throws -> any UserInputProcessor]) {
         self.creators = creators
     }
 
-    private var creators: [String: (URL, any Tokenizer) throws -> any UserInputProcessor]
+    private var creators: [String: (Data, any Tokenizer) throws -> any UserInputProcessor]
 
     /// Add a new model to the type registry.
     public func registerProcessorType(
         _ type: String,
         creator:
             @escaping (
-                URL,
+                Data,
                 any Tokenizer
             ) throws -> any UserInputProcessor
     ) {
         creators[type] = creator
     }
 
-    /// Given a `processorType` and configuration file instantiate a new `UserInputProcessor`.
-    public func createModel(configuration: URL, processorType: String, tokenizer: any Tokenizer)
+    /// Given a `processorType` and configuration data instantiate a new `UserInputProcessor`.
+    public func createModel(configuration: Data, processorType: String, tokenizer: any Tokenizer)
         throws -> sending any UserInputProcessor
     {
         guard let creator = creators[processorType] else {
diff --git a/Libraries/MLXVLM/VLMModelFactory.swift b/Libraries/MLXVLM/VLMModelFactory.swift
@@ -48,10 +48,9 @@ public struct BaseProcessorConfiguration: Codable, Sendable {
 /// Creates a function that loads a configuration file and instantiates a model with the proper configuration
 private func create<C: Codable, M>(
     _ configurationType: C.Type, _ modelInit: @escaping (C) -> M
-) -> (URL) throws -> M {
-    { url in
-        let configuration = try JSONDecoder().decode(
-            C.self, from: Data(contentsOf: url))
+) -> (Data) throws -> M {
+    { data in
+        let configuration = try JSONDecoder().decode(C.self, from: data)
         return modelInit(configuration)
     }
 }
@@ -63,10 +62,9 @@ private func create<C: Codable, P>(
             C,
             any Tokenizer
         ) -> P
-) -> (URL, any Tokenizer) throws -> P {
-    { url, tokenizer in
-        let configuration = try JSONDecoder().decode(
-            C.self, from: Data(contentsOf: url))
+) -> (Data, any Tokenizer) throws -> P {
+    { data, tokenizer in
+        let configuration = try JSONDecoder().decode(C.self, from: data)
         return processorInit(configuration, tokenizer)
     }
 }
@@ -247,15 +245,13 @@ public final class VLMModelFactory: ModelFactory {
         let modelDirectory = try await downloadModel(
             hub: hub, configuration: configuration, progressHandler: progressHandler)
 
-        // load the generic config to understand which model and how to load the weights
-        let configurationURL = modelDirectory.appending(
-            component: "config.json"
-        )
-
+        // Load config.json once and decode for both base config and model-specific config
+        let configurationURL = modelDirectory.appending(component: "config.json")
+        let configData: Data
         let baseConfig: BaseConfiguration
         do {
-            baseConfig = try JSONDecoder().decode(
-                BaseConfiguration.self, from: Data(contentsOf: configurationURL))
+            configData = try Data(contentsOf: configurationURL)
+            baseConfig = try JSONDecoder().decode(BaseConfiguration.self, from: configData)
         } catch let error as DecodingError {
             throw ModelFactoryError.configurationDecodingError(
                 configurationURL.lastPathComponent, configuration.name, error)
@@ -264,39 +260,30 @@ public final class VLMModelFactory: ModelFactory {
         let model: LanguageModel
         do {
             model = try await typeRegistry.createModel(
-                configuration: configurationURL, modelType: baseConfig.modelType)
+                configuration: configData, modelType: baseConfig.modelType)
         } catch let error as DecodingError {
             throw ModelFactoryError.configurationDecodingError(
                 configurationURL.lastPathComponent, configuration.name, error)
         }
 
-        // apply the weights to the bare model
+        // Load weights, tokenizer, and processor config in parallel
+        async let tokenizerTask = loadTokenizer(configuration: configuration, hub: hub)
+        async let processorConfigTask: (Data, BaseProcessorConfiguration) = {
+            let url = modelDirectory.appending(component: "preprocessor_config.json")
+            let data = try Data(contentsOf: url)
+            let config = try JSONDecoder().decode(BaseProcessorConfiguration.self, from: data)
+            return (data, config)
+        }()
+
         try loadWeights(
             modelDirectory: modelDirectory, model: model,
             perLayerQuantization: baseConfig.perLayerQuantization)
 
-        let tokenizer = try await loadTokenizer(
-            configuration: configuration,
-            hub: hub
-        )
-
-        let processorConfigurationURL = modelDirectory.appending(
-            component: "preprocessor_config.json"
-        )
-
-        let baseProcessorConfig: BaseProcessorConfiguration
-        do {
-            baseProcessorConfig = try JSONDecoder().decode(
-                BaseProcessorConfiguration.self,
-                from: Data(contentsOf: processorConfigurationURL)
-            )
-        } catch let error as DecodingError {
-            throw ModelFactoryError.configurationDecodingError(
-                processorConfigurationURL.lastPathComponent, configuration.name, error)
-        }
+        let tokenizer = try await tokenizerTask
+        let (processorConfigData, baseProcessorConfig) = try await processorConfigTask
 
         let processor = try await processorRegistry.createModel(
-            configuration: processorConfigurationURL,
+            configuration: processorConfigData,
             processorType: baseProcessorConfig.processorClass, tokenizer: tokenizer)
 
         return .init(

Original file line number	Diff line number	Diff line change
`@@ -10,21 +10,22 @@ public actor ModelTypeRegistry {`
`10`	`10`	`}`
`11`	`11`
`12`	`12`	`/// Creates a registry with given creators.`
`13`		`- public init(creators: [String: (URL) throws -> any LanguageModel]) {`
	`13`	`+ public init(creators: [String: (Data) throws -> any LanguageModel]) {`
`14`	`14`	`self.creators = creators`
`15`	`15`	`}`
`16`	`16`
`17`		`- private var creators: [String: (URL) throws -> any LanguageModel]`
	`17`	`+ private var creators: [String: (Data) throws -> any LanguageModel]`
`18`	`18`
`19`	`19`	`/// Add a new model to the type registry.`
`20`	`20`	`public func registerModelType(`
`21`		`- _ type: String, creator: @escaping (URL) throws -> any LanguageModel`
	`21`	`+ _ type: String, creator: @escaping (Data) throws -> any LanguageModel`
`22`	`22`	`) {`
`23`	`23`	`creators[type] = creator`
`24`	`24`	`}`
`25`	`25`
`26`		- /// Given a `modelType` and configuration file instantiate a new `LanguageModel`.
`27`		`- public func createModel(configuration: URL, modelType: String) throws -> sending LanguageModel {`
	`26`	+ /// Given a `modelType` and configuration data instantiate a new `LanguageModel`.
	`27`	`+ public func createModel(configuration: Data, modelType: String) throws -> sending LanguageModel`
	`28`	`+ {`
`28`	`29`	`guard let creator = creators[modelType] else {`
`29`	`30`	`throw ModelFactoryError.unsupportedModelType(modelType)`
`30`	`31`	`}`