ml-explore · davidkoski · May 14, 2025 · Feb 12, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
@@ -83,13 +83,14 @@ jobs:
         run: xcodebuild -showComponent MetalToolchain
 
       - name: Build (Xcode, macOS)
+        # note skipMacroValidation causes CI to trust the codable macros
         shell: sh
         run: |
           xcodebuild -version
           xcrun --show-sdk-build-version
           swift --version
           rm -rf ~/Library/Developer/Xcode/DerivedData/*
-          xcodebuild build-for-testing -scheme mlx-swift-lm-Package -destination 'platform=macOS'
+          xcodebuild build-for-testing -scheme mlx-swift-lm-Package -destination 'platform=macOS' -skipMacroValidation
 
       - name: Run Tests (Xcode, macOS)
         shell: sh

diff --git a/Libraries/MLXLLM/Codable+Support.swift b/Libraries/MLXLLM/Codable+Support.swift
@@ -0,0 +1,5 @@
+import Foundation
+
+/// `swift-transformers` also declares a public `Decoder` and it conflicts with the `Codable`
+/// implementations.
+public typealias Decoder = Swift.Decoder
diff --git a/Libraries/MLXLLM/Documentation.docc/adding-model.md b/Libraries/MLXLLM/Documentation.docc/adding-model.md
@@ -14,17 +14,12 @@ and create a `.swift` file for your new model:
 Create a configuration struct to match the `config.json` (any parameters needed).
 
 ```swift
-public struct YourModelConfiguration: Codable, Sendable {
-    public let hiddenSize: Int
-
-    // use this pattern for values that need defaults
-    public let _layerNormEps: Float?
-    public var layerNormEps: Float { _layerNormEps ?? 1e-6 }
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case _layerNormEps = "layer_norm_eps"
-    }
+import ReerCodable
+
+@Codable
+public struct YourModelConfiguration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-6
 }
 ```
 

diff --git a/Libraries/MLXLLM/Models/Cohere.swift b/Libraries/MLXLLM/Models/Cohere.swift
@@ -2,6 +2,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
 // port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/cohere.py
 
@@ -172,63 +173,21 @@ public class CohereModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct CohereConfiguration: Codable, Sendable {
-
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var layerNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var ropeTheta: Float = 8000000.0
-    var ropeTraditional: Bool = true
-    var ropeScaling: [String: StringOrNumber]? = nil
-    var logitScale: Float
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case kvHeads = "num_key_value_heads"
-        case ropeTheta = "rope_theta"
-        case vocabularySize = "vocab_size"
-        case layerNormEps = "layer_norm_eps"
-        case logitScale = "logit_scale"
-        case ropeTraditional = "rope_traditional"
-        case ropeScaling = "rope_scaling"
-    }
+@Codable
+public struct CohereConfiguration: Sendable {
+
+    @CodingKey("hidden_size") public var hiddenSize: Int = 8192
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int = 40
+    @CodingKey("intermediate_size") public var intermediateSize: Int = 22528
+    @CodingKey("num_attention_heads") public var attentionHeads: Int = 64
+    @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-5
+    @CodingKey("vocab_size") public var vocabularySize: Int = 256000
+    @CodingKey("num_key_value_heads") public var kvHeads: Int = 64
+    @CodingKey("rope_theta") public var ropeTheta: Float = 8000000.0
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = true
+    @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil
+    @CodingKey("logit_scale") public var logitScale: Float = 0.0625
 
-    public init(from decoder: Decoder) throws {
-        // custom implementation to handle optional keys with required values
-        let container: KeyedDecodingContainer<CohereConfiguration.CodingKeys> =
-            try decoder.container(
-                keyedBy: CohereConfiguration.CodingKeys.self)
-
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.hiddenSize)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.hiddenLayers)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.intermediateSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.attentionHeads)
-        self.layerNormEps = try container.decode(
-            Float.self, forKey: CohereConfiguration.CodingKeys.layerNormEps)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.vocabularySize)
-        self.kvHeads = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.kvHeads)
-        self.ropeTheta =
-            try container.decodeIfPresent(
-                Float.self, forKey: CohereConfiguration.CodingKeys.ropeTheta)
-            ?? 8000000.0
-        self.ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: CohereConfiguration.CodingKeys.ropeScaling)
-        self.logitScale = try container.decode(
-            Float.self, forKey: CohereConfiguration.CodingKeys.logitScale)
-    }
 }
 
 // MARK: - LoRA

diff --git a/Libraries/MLXLLM/Models/Gemma2.swift b/Libraries/MLXLLM/Models/Gemma2.swift
@@ -4,6 +4,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 import Tokenizers
 
 // Port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/gemma2.py
@@ -204,70 +205,21 @@ public class Gemma2Model: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct Gemma2Configuration: Codable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var headDimensions: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var ropeTheta: Float = 10_000
-    var ropeTraditional: Bool = false
-    var attnLogitSoftcapping: Float = 50.0
-    var finalLogitSoftcapping: Float = 30.0
-    var queryPreAttnScalar: Float = 144.0
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case headDimensions = "head_dim"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case ropeTheta = "rope_theta"
-        case ropeTraditional = "rope_traditional"
-        case attnLogitSoftcapping = "attn_logit_softcapping"
-        case finalLogitSoftcapping = "final_logit_softcapping"
-        case queryPreAttnScalar = "query_pre_attn_scalar"
-    }
-
-    public init(from decoder: Swift.Decoder) throws {
-        // Custom implementation to handle optional keys with required values
-        let container: KeyedDecodingContainer<CodingKeys> = try decoder.container(
-            keyedBy: CodingKeys.self)
-
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: CodingKeys.hiddenSize)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: CodingKeys.hiddenLayers)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: CodingKeys.intermediateSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: CodingKeys.attentionHeads)
-        self.headDimensions = try container.decode(
-            Int.self, forKey: CodingKeys.headDimensions)
-        self.rmsNormEps = try container.decode(
-            Float.self, forKey: CodingKeys.rmsNormEps)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: CodingKeys.vocabularySize)
-        self.kvHeads = try container.decode(Int.self, forKey: CodingKeys.kvHeads)
-        self.ropeTheta =
-            try container.decodeIfPresent(Float.self, forKey: CodingKeys.ropeTheta)
-            ?? 10_000
-        self.ropeTraditional =
-            try container.decodeIfPresent(
-                Bool.self, forKey: CodingKeys.ropeTraditional) ?? false
-        self.attnLogitSoftcapping = try container.decode(
-            Float.self, forKey: CodingKeys.attnLogitSoftcapping)
-        self.finalLogitSoftcapping = try container.decode(
-            Float.self, forKey: CodingKeys.finalLogitSoftcapping)
-        self.queryPreAttnScalar = try container.decode(
-            Float.self, forKey: CodingKeys.queryPreAttnScalar)
-    }
+@Codable
+public struct Gemma2Configuration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("head_dim") public var headDimensions: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10_000
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+    @CodingKey("attn_logit_softcapping") public var attnLogitSoftcapping: Float = 50.0
+    @CodingKey("final_logit_softcapping") public var finalLogitSoftcapping: Float = 30.0
+    @CodingKey("query_pre_attn_scalar") public var queryPreAttnScalar: Float = 144.0
 }
 
 // MARK: - LoRA

diff --git a/Libraries/MLXLLM/Models/Llama.swift b/Libraries/MLXLLM/Models/Llama.swift
@@ -4,6 +4,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 import Tokenizers
 
 // port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/llama.py
@@ -340,108 +341,37 @@ public class LlamaModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct LlamaConfiguration: Codable, Sendable {
-
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var headDimensions: Int?
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var maxPositionEmbeddings: Int?
-    var ropeTheta: Float = 10_000
-    var ropeTraditional: Bool = false
-    var ropeScaling: [String: StringOrNumber]?
-    var tieWordEmbeddings: Bool = true
-    var attentionBias: Bool = false
-    var mlpBias: Bool = false
-
-    public init(
-        hiddenSize: Int, hiddenLayers: Int, intermediateSize: Int, attentionHeads: Int,
-        headDimensions: Int? = nil, rmsNormEps: Float, vocabularySize: Int, kvHeads: Int,
-        maxPositionEmbeddings: Int? = nil, ropeTheta: Float = 10_000, ropeTraditional: Bool = false,
-        ropeScaling: [String: StringOrNumber]? = nil, tieWordEmbeddings: Bool = true,
-        attentionBias: Bool = false, mlpBias: Bool = false
-    ) {
-        self.hiddenSize = hiddenSize
-        self.hiddenLayers = hiddenLayers
-        self.intermediateSize = intermediateSize
-        self.attentionHeads = attentionHeads
-        self.headDimensions = headDimensions
-        self.rmsNormEps = rmsNormEps
-        self.vocabularySize = vocabularySize
-        self.kvHeads = kvHeads
-        self.maxPositionEmbeddings = maxPositionEmbeddings
-        self.ropeTheta = ropeTheta
-        self.ropeTraditional = ropeTraditional
-        self.ropeScaling = ropeScaling
-        self.tieWordEmbeddings = tieWordEmbeddings
-        self.attentionBias = attentionBias
-        self.mlpBias = mlpBias
-    }
+@Codable
+public struct LlamaConfiguration: Sendable {
+
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("head_dim") public var headDimensions: Int?
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads", "num_attention_heads") public var kvHeads: Int
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int?
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10_000
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+    @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]?
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true
+    @CodingKey("attention_bias") public var attentionBias: Bool = false
+    @CodingKey("mlp_bias") public var mlpBias: Bool = false
 
     var resolvedHeadDimensions: Int {
         headDimensions ?? (hiddenSize / attentionHeads)
     }
 
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case headDimensions = "head_dim"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case ropeTheta = "rope_theta"
-        case ropeTraditional = "rope_traditional"
-        case ropeScaling = "rope_scaling"
-        case tieWordEmbeddings = "tie_word_embeddings"
-        case attentionBias = "attention_bias"
-        case mlpBias = "mlp_bias"
-    }
-
-    public init(from decoder: Swift.Decoder) throws {
-        let container = try decoder.container(keyedBy: CodingKeys.self)
-
-        hiddenSize = try container.decode(Int.self, forKey: .hiddenSize)
-        hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers)
-        intermediateSize = try container.decode(Int.self, forKey: .intermediateSize)
-        attentionHeads = try container.decode(Int.self, forKey: .attentionHeads)
-        headDimensions = try container.decodeIfPresent(Int.self, forKey: .headDimensions)
-        rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps)
-        vocabularySize = try container.decode(Int.self, forKey: .vocabularySize)
-        kvHeads = try container.decodeIfPresent(Int.self, forKey: .kvHeads) ?? attentionHeads
-        maxPositionEmbeddings = try container.decodeIfPresent(
-            Int.self, forKey: .maxPositionEmbeddings)
-        if let ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) {
-            self.ropeTheta = ropeTheta
-        }
-        if let ropeTraditional = try container.decodeIfPresent(Bool.self, forKey: .ropeTraditional)
-        {
-            self.ropeTraditional = ropeTraditional
-        }
-        ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: .ropeScaling)
-        if let tieWordEmbeddings = try container.decodeIfPresent(
-            Bool.self, forKey: .tieWordEmbeddings)
-        {
-            self.tieWordEmbeddings = tieWordEmbeddings
-        }
-        if let attentionBias = try container.decodeIfPresent(Bool.self, forKey: .attentionBias) {
-            self.attentionBias = attentionBias
-        }
-        if let mlpBias = try container.decodeIfPresent(Bool.self, forKey: .mlpBias) {
-            self.mlpBias = mlpBias
-        }
+    public func didDecode(from decoder: any Decoder) throws {
+        let container = try decoder.container(keyedBy: AnyCodingKey.self)
+        let codingKey = AnyCodingKey("rope_scaling")
 
         if let ropeScaling {
             if ropeScaling["factor"] == nil {
                 throw DecodingError.dataCorruptedError(
-                    forKey: .ropeScaling, in: container,
+                    forKey: codingKey, in: container,
                     debugDescription: "rope_scaling must contain 'factor'")
             }
             if let ropeType = ropeScaling["type"] ?? ropeScaling["rope_type"] {
@@ -452,15 +382,15 @@ public struct LlamaConfiguration: Codable, Sendable {
                     ]
                     if !options.contains(ropeType) {
                         throw DecodingError.dataCorruptedError(
-                            forKey: .ropeScaling, in: container,
+                            forKey: codingKey, in: container,
                             debugDescription:
                                 "rope_scaling 'type' currently only supports 'linear', 'dynamic', or 'llama3'"
                         )
                     }
                 }
             } else {
                 throw DecodingError.dataCorruptedError(
-                    forKey: .ropeScaling, in: container,
+                    forKey: codingKey, in: container,
                     debugDescription: "rope_scaling must contain either 'type' or 'rope_type'")
             }
         }