Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,14 @@ jobs:
run: xcodebuild -showComponent MetalToolchain

- name: Build (Xcode, macOS)
# note skipMacroValidation causes CI to trust the codable macros
shell: sh
run: |
xcodebuild -version
xcrun --show-sdk-build-version
swift --version
rm -rf ~/Library/Developer/Xcode/DerivedData/*
xcodebuild build-for-testing -scheme mlx-swift-lm-Package -destination 'platform=macOS'
xcodebuild build-for-testing -scheme mlx-swift-lm-Package -destination 'platform=macOS' -skipMacroValidation

- name: Run Tests (Xcode, macOS)
shell: sh
Expand Down
5 changes: 5 additions & 0 deletions Libraries/MLXLLM/Codable+Support.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import Foundation

/// `swift-transformers` also declares a public `Decoder` and it conflicts with the `Codable`
/// implementations.
public typealias Decoder = Swift.Decoder
17 changes: 6 additions & 11 deletions Libraries/MLXLLM/Documentation.docc/adding-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,12 @@ and create a `.swift` file for your new model:
Create a configuration struct to match the `config.json` (any parameters needed).

```swift
public struct YourModelConfiguration: Codable, Sendable {
public let hiddenSize: Int

// use this pattern for values that need defaults
public let _layerNormEps: Float?
public var layerNormEps: Float { _layerNormEps ?? 1e-6 }

enum CodingKeys: String, CodingKey {
case hiddenSize = "hidden_size"
case _layerNormEps = "layer_norm_eps"
}
import ReerCodable

@Codable
public struct YourModelConfiguration: Sendable {
@CodingKey("hidden_size") public var hiddenSize: Int
@CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-6
}
```

Expand Down
71 changes: 15 additions & 56 deletions Libraries/MLXLLM/Models/Cohere.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import Foundation
import MLX
import MLXLMCommon
import MLXNN
import ReerCodable

// port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/cohere.py

Expand Down Expand Up @@ -172,63 +173,21 @@ public class CohereModel: Module, LLMModel, KVCacheDimensionProvider {
}
}

public struct CohereConfiguration: Codable, Sendable {

var hiddenSize: Int
var hiddenLayers: Int
var intermediateSize: Int
var attentionHeads: Int
var layerNormEps: Float
var vocabularySize: Int
var kvHeads: Int
var ropeTheta: Float = 8000000.0
var ropeTraditional: Bool = true
var ropeScaling: [String: StringOrNumber]? = nil
var logitScale: Float

enum CodingKeys: String, CodingKey {
case hiddenSize = "hidden_size"
case hiddenLayers = "num_hidden_layers"
case intermediateSize = "intermediate_size"
case attentionHeads = "num_attention_heads"
case kvHeads = "num_key_value_heads"
case ropeTheta = "rope_theta"
case vocabularySize = "vocab_size"
case layerNormEps = "layer_norm_eps"
case logitScale = "logit_scale"
case ropeTraditional = "rope_traditional"
case ropeScaling = "rope_scaling"
}
@Codable
public struct CohereConfiguration: Sendable {

@CodingKey("hidden_size") public var hiddenSize: Int = 8192
@CodingKey("num_hidden_layers") public var hiddenLayers: Int = 40
@CodingKey("intermediate_size") public var intermediateSize: Int = 22528
@CodingKey("num_attention_heads") public var attentionHeads: Int = 64
@CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-5
@CodingKey("vocab_size") public var vocabularySize: Int = 256000
@CodingKey("num_key_value_heads") public var kvHeads: Int = 64
@CodingKey("rope_theta") public var ropeTheta: Float = 8000000.0
@CodingKey("rope_traditional") public var ropeTraditional: Bool = true
@CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil
@CodingKey("logit_scale") public var logitScale: Float = 0.0625

public init(from decoder: Decoder) throws {
// custom implementation to handle optional keys with required values
let container: KeyedDecodingContainer<CohereConfiguration.CodingKeys> =
try decoder.container(
keyedBy: CohereConfiguration.CodingKeys.self)

self.hiddenSize = try container.decode(
Int.self, forKey: CohereConfiguration.CodingKeys.hiddenSize)
self.hiddenLayers = try container.decode(
Int.self, forKey: CohereConfiguration.CodingKeys.hiddenLayers)
self.intermediateSize = try container.decode(
Int.self, forKey: CohereConfiguration.CodingKeys.intermediateSize)
self.attentionHeads = try container.decode(
Int.self, forKey: CohereConfiguration.CodingKeys.attentionHeads)
self.layerNormEps = try container.decode(
Float.self, forKey: CohereConfiguration.CodingKeys.layerNormEps)
self.vocabularySize = try container.decode(
Int.self, forKey: CohereConfiguration.CodingKeys.vocabularySize)
self.kvHeads = try container.decode(
Int.self, forKey: CohereConfiguration.CodingKeys.kvHeads)
self.ropeTheta =
try container.decodeIfPresent(
Float.self, forKey: CohereConfiguration.CodingKeys.ropeTheta)
?? 8000000.0
self.ropeScaling = try container.decodeIfPresent(
[String: StringOrNumber].self, forKey: CohereConfiguration.CodingKeys.ropeScaling)
self.logitScale = try container.decode(
Float.self, forKey: CohereConfiguration.CodingKeys.logitScale)
}
}

// MARK: - LoRA
Expand Down
80 changes: 16 additions & 64 deletions Libraries/MLXLLM/Models/Gemma2.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import Foundation
import MLX
import MLXLMCommon
import MLXNN
import ReerCodable
import Tokenizers

// Port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/gemma2.py
Expand Down Expand Up @@ -204,70 +205,21 @@ public class Gemma2Model: Module, LLMModel, KVCacheDimensionProvider {
}
}

public struct Gemma2Configuration: Codable {
var hiddenSize: Int
var hiddenLayers: Int
var intermediateSize: Int
var attentionHeads: Int
var headDimensions: Int
var rmsNormEps: Float
var vocabularySize: Int
var kvHeads: Int
var ropeTheta: Float = 10_000
var ropeTraditional: Bool = false
var attnLogitSoftcapping: Float = 50.0
var finalLogitSoftcapping: Float = 30.0
var queryPreAttnScalar: Float = 144.0

enum CodingKeys: String, CodingKey {
case hiddenSize = "hidden_size"
case hiddenLayers = "num_hidden_layers"
case intermediateSize = "intermediate_size"
case attentionHeads = "num_attention_heads"
case headDimensions = "head_dim"
case rmsNormEps = "rms_norm_eps"
case vocabularySize = "vocab_size"
case kvHeads = "num_key_value_heads"
case ropeTheta = "rope_theta"
case ropeTraditional = "rope_traditional"
case attnLogitSoftcapping = "attn_logit_softcapping"
case finalLogitSoftcapping = "final_logit_softcapping"
case queryPreAttnScalar = "query_pre_attn_scalar"
}

public init(from decoder: Swift.Decoder) throws {
// Custom implementation to handle optional keys with required values
let container: KeyedDecodingContainer<CodingKeys> = try decoder.container(
keyedBy: CodingKeys.self)

self.hiddenSize = try container.decode(
Int.self, forKey: CodingKeys.hiddenSize)
self.hiddenLayers = try container.decode(
Int.self, forKey: CodingKeys.hiddenLayers)
self.intermediateSize = try container.decode(
Int.self, forKey: CodingKeys.intermediateSize)
self.attentionHeads = try container.decode(
Int.self, forKey: CodingKeys.attentionHeads)
self.headDimensions = try container.decode(
Int.self, forKey: CodingKeys.headDimensions)
self.rmsNormEps = try container.decode(
Float.self, forKey: CodingKeys.rmsNormEps)
self.vocabularySize = try container.decode(
Int.self, forKey: CodingKeys.vocabularySize)
self.kvHeads = try container.decode(Int.self, forKey: CodingKeys.kvHeads)
self.ropeTheta =
try container.decodeIfPresent(Float.self, forKey: CodingKeys.ropeTheta)
?? 10_000
self.ropeTraditional =
try container.decodeIfPresent(
Bool.self, forKey: CodingKeys.ropeTraditional) ?? false
self.attnLogitSoftcapping = try container.decode(
Float.self, forKey: CodingKeys.attnLogitSoftcapping)
self.finalLogitSoftcapping = try container.decode(
Float.self, forKey: CodingKeys.finalLogitSoftcapping)
self.queryPreAttnScalar = try container.decode(
Float.self, forKey: CodingKeys.queryPreAttnScalar)
}
@Codable
public struct Gemma2Configuration: Sendable {
@CodingKey("hidden_size") public var hiddenSize: Int
@CodingKey("num_hidden_layers") public var hiddenLayers: Int
@CodingKey("intermediate_size") public var intermediateSize: Int
@CodingKey("num_attention_heads") public var attentionHeads: Int
@CodingKey("head_dim") public var headDimensions: Int
@CodingKey("rms_norm_eps") public var rmsNormEps: Float
@CodingKey("vocab_size") public var vocabularySize: Int
@CodingKey("num_key_value_heads") public var kvHeads: Int
@CodingKey("rope_theta") public var ropeTheta: Float = 10_000
@CodingKey("rope_traditional") public var ropeTraditional: Bool = false
@CodingKey("attn_logit_softcapping") public var attnLogitSoftcapping: Float = 50.0
@CodingKey("final_logit_softcapping") public var finalLogitSoftcapping: Float = 30.0
@CodingKey("query_pre_attn_scalar") public var queryPreAttnScalar: Float = 144.0
}

// MARK: - LoRA
Expand Down
120 changes: 25 additions & 95 deletions Libraries/MLXLLM/Models/Llama.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import Foundation
import MLX
import MLXLMCommon
import MLXNN
import ReerCodable
import Tokenizers

// port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/llama.py
Expand Down Expand Up @@ -340,108 +341,37 @@ public class LlamaModel: Module, LLMModel, KVCacheDimensionProvider {
}
}

public struct LlamaConfiguration: Codable, Sendable {

var hiddenSize: Int
var hiddenLayers: Int
var intermediateSize: Int
var attentionHeads: Int
var headDimensions: Int?
var rmsNormEps: Float
var vocabularySize: Int
var kvHeads: Int
var maxPositionEmbeddings: Int?
var ropeTheta: Float = 10_000
var ropeTraditional: Bool = false
var ropeScaling: [String: StringOrNumber]?
var tieWordEmbeddings: Bool = true
var attentionBias: Bool = false
var mlpBias: Bool = false

public init(
hiddenSize: Int, hiddenLayers: Int, intermediateSize: Int, attentionHeads: Int,
headDimensions: Int? = nil, rmsNormEps: Float, vocabularySize: Int, kvHeads: Int,
maxPositionEmbeddings: Int? = nil, ropeTheta: Float = 10_000, ropeTraditional: Bool = false,
ropeScaling: [String: StringOrNumber]? = nil, tieWordEmbeddings: Bool = true,
attentionBias: Bool = false, mlpBias: Bool = false
) {
self.hiddenSize = hiddenSize
self.hiddenLayers = hiddenLayers
self.intermediateSize = intermediateSize
self.attentionHeads = attentionHeads
self.headDimensions = headDimensions
self.rmsNormEps = rmsNormEps
self.vocabularySize = vocabularySize
self.kvHeads = kvHeads
self.maxPositionEmbeddings = maxPositionEmbeddings
self.ropeTheta = ropeTheta
self.ropeTraditional = ropeTraditional
self.ropeScaling = ropeScaling
self.tieWordEmbeddings = tieWordEmbeddings
self.attentionBias = attentionBias
self.mlpBias = mlpBias
}
@Codable
public struct LlamaConfiguration: Sendable {

@CodingKey("hidden_size") public var hiddenSize: Int
@CodingKey("num_hidden_layers") public var hiddenLayers: Int
@CodingKey("intermediate_size") public var intermediateSize: Int
@CodingKey("num_attention_heads") public var attentionHeads: Int
@CodingKey("head_dim") public var headDimensions: Int?
@CodingKey("rms_norm_eps") public var rmsNormEps: Float
@CodingKey("vocab_size") public var vocabularySize: Int
@CodingKey("num_key_value_heads", "num_attention_heads") public var kvHeads: Int
@CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int?
@CodingKey("rope_theta") public var ropeTheta: Float = 10_000
@CodingKey("rope_traditional") public var ropeTraditional: Bool = false
@CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]?
@CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true
@CodingKey("attention_bias") public var attentionBias: Bool = false
@CodingKey("mlp_bias") public var mlpBias: Bool = false

var resolvedHeadDimensions: Int {
headDimensions ?? (hiddenSize / attentionHeads)
}

enum CodingKeys: String, CodingKey {
case hiddenSize = "hidden_size"
case hiddenLayers = "num_hidden_layers"
case intermediateSize = "intermediate_size"
case attentionHeads = "num_attention_heads"
case headDimensions = "head_dim"
case rmsNormEps = "rms_norm_eps"
case vocabularySize = "vocab_size"
case kvHeads = "num_key_value_heads"
case maxPositionEmbeddings = "max_position_embeddings"
case ropeTheta = "rope_theta"
case ropeTraditional = "rope_traditional"
case ropeScaling = "rope_scaling"
case tieWordEmbeddings = "tie_word_embeddings"
case attentionBias = "attention_bias"
case mlpBias = "mlp_bias"
}

public init(from decoder: Swift.Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)

hiddenSize = try container.decode(Int.self, forKey: .hiddenSize)
hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers)
intermediateSize = try container.decode(Int.self, forKey: .intermediateSize)
attentionHeads = try container.decode(Int.self, forKey: .attentionHeads)
headDimensions = try container.decodeIfPresent(Int.self, forKey: .headDimensions)
rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps)
vocabularySize = try container.decode(Int.self, forKey: .vocabularySize)
kvHeads = try container.decodeIfPresent(Int.self, forKey: .kvHeads) ?? attentionHeads
maxPositionEmbeddings = try container.decodeIfPresent(
Int.self, forKey: .maxPositionEmbeddings)
if let ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) {
self.ropeTheta = ropeTheta
}
if let ropeTraditional = try container.decodeIfPresent(Bool.self, forKey: .ropeTraditional)
{
self.ropeTraditional = ropeTraditional
}
ropeScaling = try container.decodeIfPresent(
[String: StringOrNumber].self, forKey: .ropeScaling)
if let tieWordEmbeddings = try container.decodeIfPresent(
Bool.self, forKey: .tieWordEmbeddings)
{
self.tieWordEmbeddings = tieWordEmbeddings
}
if let attentionBias = try container.decodeIfPresent(Bool.self, forKey: .attentionBias) {
self.attentionBias = attentionBias
}
if let mlpBias = try container.decodeIfPresent(Bool.self, forKey: .mlpBias) {
self.mlpBias = mlpBias
}
public func didDecode(from decoder: any Decoder) throws {
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an interesting one because it has some logic to post-process the configuration.

let container = try decoder.container(keyedBy: AnyCodingKey.self)
let codingKey = AnyCodingKey("rope_scaling")

if let ropeScaling {
if ropeScaling["factor"] == nil {
throw DecodingError.dataCorruptedError(
forKey: .ropeScaling, in: container,
forKey: codingKey, in: container,
debugDescription: "rope_scaling must contain 'factor'")
}
if let ropeType = ropeScaling["type"] ?? ropeScaling["rope_type"] {
Expand All @@ -452,15 +382,15 @@ public struct LlamaConfiguration: Codable, Sendable {
]
if !options.contains(ropeType) {
throw DecodingError.dataCorruptedError(
forKey: .ropeScaling, in: container,
forKey: codingKey, in: container,
debugDescription:
"rope_scaling 'type' currently only supports 'linear', 'dynamic', or 'llama3'"
)
}
}
} else {
throw DecodingError.dataCorruptedError(
forKey: .ropeScaling, in: container,
forKey: codingKey, in: container,
debugDescription: "rope_scaling must contain either 'type' or 'rope_type'")
}
}
Expand Down
Loading
Loading