Parallelize loading of weights, tokenizer, and processor config

DePasqualeOrg · DePasqualeOrg · commit 0417a58fdedd · 2025-12-28T21:26:46.000+01:00
diff --git a/Libraries/MLXLLM/LLMModelFactory.swift b/Libraries/MLXLLM/LLMModelFactory.swift
@@ -6,13 +6,12 @@ import MLX
 import MLXLMCommon
 import Tokenizers
 
-/// Creates a function that loads a configuration file and instantiates a model with the proper configuration
+/// Creates a function that decodes configuration data and instantiates a model with the proper configuration
 private func create<C: Codable, M>(
     _ configurationType: C.Type, _ modelInit: @escaping (C) -> M
-) -> (URL) throws -> M {
-    { url in
-        let configuration = try JSONDecoder().decode(
-            C.self, from: Data(contentsOf: url))
+) -> (Data) throws -> M {
+    { data in
+        let configuration = try JSONDecoder().decode(C.self, from: data)
         return modelInit(configuration)
     }
 }
@@ -478,13 +477,13 @@ public final class LLMModelFactory: ModelFactory {
         let modelDirectory = try await downloadModel(
             hub: hub, configuration: configuration, progressHandler: progressHandler)
 
-        // Load the generic config to understand which model and how to load the weights
+        // Load config.json once and decode for both base config and model-specific config
         let configurationURL = modelDirectory.appending(component: "config.json")
-
+        let configData: Data
         let baseConfig: BaseConfiguration
         do {
-            baseConfig = try JSONDecoder().decode(
-                BaseConfiguration.self, from: Data(contentsOf: configurationURL))
+            configData = try Data(contentsOf: configurationURL)
+            baseConfig = try JSONDecoder().decode(BaseConfiguration.self, from: configData)
         } catch let error as DecodingError {
             throw ModelFactoryError.configurationDecodingError(
                 configurationURL.lastPathComponent, configuration.name, error)
@@ -493,18 +492,20 @@ public final class LLMModelFactory: ModelFactory {
         let model: LanguageModel
         do {
             model = try await typeRegistry.createModel(
-                configuration: configurationURL, modelType: baseConfig.modelType)
+                configuration: configData, modelType: baseConfig.modelType)
         } catch let error as DecodingError {
             throw ModelFactoryError.configurationDecodingError(
                 configurationURL.lastPathComponent, configuration.name, error)
         }
 
-        // apply the weights to the bare model
+        // Load weights and tokenizer in parallel
+        async let tokenizerTask = loadTokenizer(configuration: configuration, hub: hub)
+
         try loadWeights(
             modelDirectory: modelDirectory, model: model,
             perLayerQuantization: baseConfig.perLayerQuantization)
 
-        let tokenizer = try await loadTokenizer(configuration: configuration, hub: hub)
+        let tokenizer = try await tokenizerTask
 
         let messageGenerator =
             if let model = model as? LLMModel {
diff --git a/Libraries/MLXLMCommon/Registries/ModelTypeRegistry.swift b/Libraries/MLXLMCommon/Registries/ModelTypeRegistry.swift
@@ -10,21 +10,22 @@ public actor ModelTypeRegistry {
     }
 
     /// Creates a registry with given creators.
-    public init(creators: [String: (URL) throws -> any LanguageModel]) {
+    public init(creators: [String: (Data) throws -> any LanguageModel]) {
         self.creators = creators
     }
 
-    private var creators: [String: (URL) throws -> any LanguageModel]
+    private var creators: [String: (Data) throws -> any LanguageModel]
 
     /// Add a new model to the type registry.
     public func registerModelType(
-        _ type: String, creator: @escaping (URL) throws -> any LanguageModel
+        _ type: String, creator: @escaping (Data) throws -> any LanguageModel
     ) {
         creators[type] = creator
     }
 
-    /// Given a `modelType` and configuration file instantiate a new `LanguageModel`.
-    public func createModel(configuration: URL, modelType: String) throws -> sending LanguageModel {
+    /// Given a `modelType` and configuration data instantiate a new `LanguageModel`.
+    public func createModel(configuration: Data, modelType: String) throws -> sending LanguageModel
+    {
         guard let creator = creators[modelType] else {
             throw ModelFactoryError.unsupportedModelType(modelType)
         }
diff --git a/Libraries/MLXLMCommon/Registries/ProcessorTypeRegistry.swift b/Libraries/MLXLMCommon/Registries/ProcessorTypeRegistry.swift
@@ -11,26 +11,26 @@ public actor ProcessorTypeRegistry {
     }
 
     /// Creates a registry with given creators.
-    public init(creators: [String: (URL, any Tokenizer) throws -> any UserInputProcessor]) {
+    public init(creators: [String: (Data, any Tokenizer) throws -> any UserInputProcessor]) {
         self.creators = creators
     }
 
-    private var creators: [String: (URL, any Tokenizer) throws -> any UserInputProcessor]
+    private var creators: [String: (Data, any Tokenizer) throws -> any UserInputProcessor]
 
     /// Add a new model to the type registry.
     public func registerProcessorType(
         _ type: String,
         creator:
             @escaping (
-                URL,
+                Data,
                 any Tokenizer
             ) throws -> any UserInputProcessor
     ) {
         creators[type] = creator
     }
 
-    /// Given a `processorType` and configuration file instantiate a new `UserInputProcessor`.
-    public func createModel(configuration: URL, processorType: String, tokenizer: any Tokenizer)
+    /// Given a `processorType` and configuration data instantiate a new `UserInputProcessor`.
+    public func createModel(configuration: Data, processorType: String, tokenizer: any Tokenizer)
         throws -> sending any UserInputProcessor
     {
         guard let creator = creators[processorType] else {
diff --git a/Libraries/MLXVLM/VLMModelFactory.swift b/Libraries/MLXVLM/VLMModelFactory.swift
@@ -48,10 +48,9 @@ public struct BaseProcessorConfiguration: Codable, Sendable {
 /// Creates a function that loads a configuration file and instantiates a model with the proper configuration
 private func create<C: Codable, M>(
     _ configurationType: C.Type, _ modelInit: @escaping (C) -> M
-) -> (URL) throws -> M {
-    { url in
-        let configuration = try JSONDecoder().decode(
-            C.self, from: Data(contentsOf: url))
+) -> (Data) throws -> M {
+    { data in
+        let configuration = try JSONDecoder().decode(C.self, from: data)
         return modelInit(configuration)
     }
 }
@@ -63,10 +62,9 @@ private func create<C: Codable, P>(
             C,
             any Tokenizer
         ) -> P
-) -> (URL, any Tokenizer) throws -> P {
-    { url, tokenizer in
-        let configuration = try JSONDecoder().decode(
-            C.self, from: Data(contentsOf: url))
+) -> (Data, any Tokenizer) throws -> P {
+    { data, tokenizer in
+        let configuration = try JSONDecoder().decode(C.self, from: data)
         return processorInit(configuration, tokenizer)
     }
 }
@@ -247,15 +245,13 @@ public final class VLMModelFactory: ModelFactory {
         let modelDirectory = try await downloadModel(
             hub: hub, configuration: configuration, progressHandler: progressHandler)
 
-        // load the generic config to understand which model and how to load the weights
-        let configurationURL = modelDirectory.appending(
-            component: "config.json"
-        )
-
+        // Load config.json once and decode for both base config and model-specific config
+        let configurationURL = modelDirectory.appending(component: "config.json")
+        let configData: Data
         let baseConfig: BaseConfiguration
         do {
-            baseConfig = try JSONDecoder().decode(
-                BaseConfiguration.self, from: Data(contentsOf: configurationURL))
+            configData = try Data(contentsOf: configurationURL)
+            baseConfig = try JSONDecoder().decode(BaseConfiguration.self, from: configData)
         } catch let error as DecodingError {
             throw ModelFactoryError.configurationDecodingError(
                 configurationURL.lastPathComponent, configuration.name, error)
@@ -264,39 +260,30 @@ public final class VLMModelFactory: ModelFactory {
         let model: LanguageModel
         do {
             model = try await typeRegistry.createModel(
-                configuration: configurationURL, modelType: baseConfig.modelType)
+                configuration: configData, modelType: baseConfig.modelType)
         } catch let error as DecodingError {
             throw ModelFactoryError.configurationDecodingError(
                 configurationURL.lastPathComponent, configuration.name, error)
         }
 
-        // apply the weights to the bare model
+        // Load weights, tokenizer, and processor config in parallel
+        async let tokenizerTask = loadTokenizer(configuration: configuration, hub: hub)
+        async let processorConfigTask: (Data, BaseProcessorConfiguration) = {
+            let url = modelDirectory.appending(component: "preprocessor_config.json")
+            let data = try Data(contentsOf: url)
+            let config = try JSONDecoder().decode(BaseProcessorConfiguration.self, from: data)
+            return (data, config)
+        }()
+
         try loadWeights(
             modelDirectory: modelDirectory, model: model,
             perLayerQuantization: baseConfig.perLayerQuantization)
 
-        let tokenizer = try await loadTokenizer(
-            configuration: configuration,
-            hub: hub
-        )
-
-        let processorConfigurationURL = modelDirectory.appending(
-            component: "preprocessor_config.json"
-        )
-
-        let baseProcessorConfig: BaseProcessorConfiguration
-        do {
-            baseProcessorConfig = try JSONDecoder().decode(
-                BaseProcessorConfiguration.self,
-                from: Data(contentsOf: processorConfigurationURL)
-            )
-        } catch let error as DecodingError {
-            throw ModelFactoryError.configurationDecodingError(
-                processorConfigurationURL.lastPathComponent, configuration.name, error)
-        }
+        let tokenizer = try await tokenizerTask
+        let (processorConfigData, baseProcessorConfig) = try await processorConfigTask
 
         let processor = try await processorRegistry.createModel(
-            configuration: processorConfigurationURL,
+            configuration: processorConfigData,
             processorType: baseProcessorConfig.processorClass, tokenizer: tokenizer)
 
         return .init(

Original file line number	Diff line number	Diff line change
`@@ -10,21 +10,22 @@ public actor ModelTypeRegistry {`
`10`	`10`	`}`
`11`	`11`
`12`	`12`	`/// Creates a registry with given creators.`
`13`		`- public init(creators: [String: (URL) throws -> any LanguageModel]) {`
	`13`	`+ public init(creators: [String: (Data) throws -> any LanguageModel]) {`
`14`	`14`	`self.creators = creators`
`15`	`15`	`}`
`16`	`16`
`17`		`- private var creators: [String: (URL) throws -> any LanguageModel]`
	`17`	`+ private var creators: [String: (Data) throws -> any LanguageModel]`
`18`	`18`
`19`	`19`	`/// Add a new model to the type registry.`
`20`	`20`	`public func registerModelType(`
`21`		`- _ type: String, creator: @escaping (URL) throws -> any LanguageModel`
	`21`	`+ _ type: String, creator: @escaping (Data) throws -> any LanguageModel`
`22`	`22`	`) {`
`23`	`23`	`creators[type] = creator`
`24`	`24`	`}`
`25`	`25`
`26`		- /// Given a `modelType` and configuration file instantiate a new `LanguageModel`.
`27`		`- public func createModel(configuration: URL, modelType: String) throws -> sending LanguageModel {`
	`26`	+ /// Given a `modelType` and configuration data instantiate a new `LanguageModel`.
	`27`	`+ public func createModel(configuration: Data, modelType: String) throws -> sending LanguageModel`
	`28`	`+ {`
`28`	`29`	`guard let creator = creators[modelType] else {`
`29`	`30`	`throw ModelFactoryError.unsupportedModelType(modelType)`
`30`	`31`	`}`