ml-explore
diff --git a/‎Libraries/MLXEmbedders/Models/Bert.swift‎
Lines changed: 3 additions & 3 deletions b/‎Libraries/MLXEmbedders/Models/Bert.swift‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Libraries/MLXEmbedders/Models/NomicBert.swift‎
Lines changed: 3 additions & 3 deletions b/‎Libraries/MLXEmbedders/Models/NomicBert.swift‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Libraries/MLXEmbedders/Models/Qwen3.swift‎
Lines changed: 4 additions & 4 deletions b/‎Libraries/MLXEmbedders/Models/Qwen3.swift‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎Libraries/MLXLLM/Models/Apertus.swift‎
Lines changed: 2 additions & 2 deletions b/‎Libraries/MLXLLM/Models/Apertus.swift‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Libraries/MLXLLM/Models/BaichuanM1.swift‎
Lines changed: 2 additions & 2 deletions b/‎Libraries/MLXLLM/Models/BaichuanM1.swift‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Libraries/MLXLLM/Models/BailingMoe.swift‎
Lines changed: 2 additions & 2 deletions b/‎Libraries/MLXLLM/Models/BailingMoe.swift‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Libraries/MLXLLM/Models/Bitnet.swift‎
Lines changed: 2 additions & 2 deletions b/‎Libraries/MLXLLM/Models/Bitnet.swift‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Libraries/MLXLLM/Models/Cohere.swift‎
Lines changed: 2 additions & 2 deletions b/‎Libraries/MLXLLM/Models/Cohere.swift‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Libraries/MLXLLM/Models/Ernie4_5.swift‎
Lines changed: 2 additions & 2 deletions b/‎Libraries/MLXLLM/Models/Ernie4_5.swift‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Libraries/MLXLLM/Models/Exaone4.swift‎
Lines changed: 2 additions & 2 deletions b/‎Libraries/MLXLLM/Models/Exaone4.swift‎
Lines changed: 2 additions & 2 deletions
@@ -259,7 +259,7 @@ public class BertModel: Module, EmbeddingModel {
     @ModuleInfo(key: "embeddings") fileprivate var embedder: BertEmbedding
 
     /// A linear layer used to "pool" the [CLS] token into a single sentence vector.
-    let pooler: Linear?
+    @ModuleInfo var pooler: Linear?
 
     /// The stack of Transformer layers.
     fileprivate let encoder: Encoder
@@ -280,10 +280,10 @@ public class BertModel: Module, EmbeddingModel {
 
         if lmHead {
             _lmHead.wrappedValue = LMHead(config)
-            self.pooler = nil
+            _pooler.wrappedValue = nil
         } else {
             // Pooler projects the [CLS] token to a hidden state of the same size
-            pooler = Linear(config.embedDim, config.embedDim)
+            _pooler.wrappedValue = Linear(config.embedDim, config.embedDim)
             _lmHead.wrappedValue = nil
         }
     }
 
@@ -671,7 +671,7 @@ public class NomicBertModel: Module, EmbeddingModel {
 
     /// The optional pooler layer.
     /// Used to extract a single vector representation for the whole sequence (usually from the [CLS] token).
-    let pooler: Linear?
+    @ModuleInfo var pooler: Linear?
 
     /// The stack of Transformer blocks.
     fileprivate let encoder: Encoder
@@ -696,9 +696,9 @@ public class NomicBertModel: Module, EmbeddingModel {
 
         // Initialize Pooler (for sentence embeddings)
         if pooler {
-            self.pooler = Linear(config.embedDim, config.embedDim, bias: false)
+            _pooler.wrappedValue = Linear(config.embedDim, config.embedDim, bias: false)
         } else {
-            self.pooler = nil
+            _pooler.wrappedValue = nil
         }
 
         // Initialize LM Head (for training/masked prediction)
 
@@ -169,7 +169,7 @@ private class TransformerBlock: Module {
     @ModuleInfo(key: "self_attn") var attention: Attention
 
     /// The feed-forward network (SwiGLU).
-    let mlp: MLP
+    @ModuleInfo var mlp: MLP
 
     /// Normalization applied before the attention layer.
     @ModuleInfo(key: "input_layernorm") var inputLayerNorm: RMSNorm
@@ -182,7 +182,7 @@ private class TransformerBlock: Module {
     public init(_ args: Qwen3Configuration) {
         // Initialize the two main processing sub-layers
         _attention.wrappedValue = Attention(args)
-        self.mlp = MLP(dimensions: args.hiddenSize, hiddenDimensions: args.intermediateSize)
+        _mlp.wrappedValue = MLP(dimensions: args.hiddenSize, hiddenDimensions: args.intermediateSize)
 
         // Initialize RMSNorm layers with the specified epsilon for numerical stability
         _inputLayerNorm.wrappedValue = RMSNorm(
@@ -236,7 +236,7 @@ private class Qwen3ModelInner: Module {
     fileprivate let layers: [TransformerBlock]
 
     /// The final normalization layer applied after all transformer blocks.
-    let norm: RMSNorm
+    @ModuleInfo var norm: RMSNorm
 
     /// Initializes the model backbone.
     /// - Parameter args: Configuration containing `vocabularySize`, `hiddenLayers`, and `hiddenSize`.
@@ -255,7 +255,7 @@ private class Qwen3ModelInner: Module {
         }
 
         // 3. Initialize final RMSNorm
-        self.norm = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
+        _norm.wrappedValue = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
     }
 
     /// Forward pass through the model backbone.
 
@@ -303,7 +303,7 @@ private class ApertusModelInner: Module {
     @ModuleInfo(key: "embed_tokens") var embedTokens: Embedding
 
     let layers: [ApertusBlock]
-    let norm: RMSNorm
+    @ModuleInfo var norm: RMSNorm
 
     public init(_ args: ApertusConfiguration) {
         precondition(args.vocabSize > 0)
@@ -313,7 +313,7 @@ private class ApertusModelInner: Module {
             dimensions: args.hiddenSize
         )
         self.layers = (0 ..< args.numHiddenLayers).map { _ in ApertusBlock(args) }
-        self.norm = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
+        _norm.wrappedValue = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
     }
 
     public func callAsFunction(
 
@@ -203,7 +203,7 @@ public class BaichuanM1ModelInner: Module {
     @ModuleInfo(key: "embed_tokens") var embedTokens: Embedding
 
     fileprivate let layers: [BaichuanM1DecoderLayer]
-    let norm: RMSNorm
+    @ModuleInfo var norm: RMSNorm
 
     init(_ config: BaichuanM1Configuration) {
         self.args = config
@@ -212,7 +212,7 @@ public class BaichuanM1ModelInner: Module {
         self.layers = (0 ..< config.hiddenLayers).map {
             BaichuanM1DecoderLayer(config, layerIdx: $0)
         }
-        norm = RMSNorm(dimensions: config.hiddenSize, eps: config.rmsNormEps)
+        _norm.wrappedValue = RMSNorm(dimensions: config.hiddenSize, eps: config.rmsNormEps)
     }
 
     func callAsFunction(
 
@@ -313,7 +313,7 @@ class BailingMoeTransformerBlock: Module {
 public class BailingMoeModelInner: Module {
     @ModuleInfo(key: "word_embeddings") var embedTokens: Embedding
     let layers: [BailingMoeTransformerBlock]
-    let norm: RMSNorm
+    @ModuleInfo var norm: RMSNorm
 
     init(_ args: BailingMoeConfiguration) {
         precondition(args.vocabularySize > 0)
@@ -322,7 +322,7 @@ public class BailingMoeModelInner: Module {
         self.layers = (0 ..< args.hiddenLayers).map {
             BailingMoeTransformerBlock(args, layerIdx: $0)
         }
-        self.norm = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
+        _norm.wrappedValue = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
     }
 
     func callAsFunction(_ inputs: MLXArray, cache: [KVCache]? = nil) -> MLXArray {
 
@@ -405,7 +405,7 @@ public class BitnetModelInner: Module {
     @ModuleInfo(key: "embed_tokens") var embedTokens: Embedding
 
     fileprivate let layers: [BitnetTransformerBlock]
-    var norm: RMSNorm
+    @ModuleInfo var norm: RMSNorm
 
     init(_ args: BitnetConfiguration) {
         precondition(args.vocabularySize > 0)
@@ -417,7 +417,7 @@ public class BitnetModelInner: Module {
         layers = (0 ..< args.hiddenLayers).map { _ in
             BitnetTransformerBlock(args)
         }
-        norm = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
+        _norm.wrappedValue = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
     }
 
     func callAsFunction(_ inputs: MLXArray, cache: [KVCache]? = nil) -> MLXArray {
 
@@ -121,7 +121,7 @@ public class CohereModelInner: Module {
     @ModuleInfo(key: "embed_tokens") var embedTokens: Embedding
 
     fileprivate let layers: [CohereTransformerBlock]
-    let norm: LayerNorm
+    @ModuleInfo var norm: LayerNorm
 
     public init(_ args: CohereConfiguration) {
         precondition(args.vocabularySize > 0)
@@ -133,7 +133,7 @@ public class CohereModelInner: Module {
             .map { _ in
                 CohereTransformerBlock(args)
             }
-        self.norm = LayerNorm(dimensions: args.hiddenSize, eps: args.layerNormEps)
+        _norm.wrappedValue = LayerNorm(dimensions: args.hiddenSize, eps: args.layerNormEps)
     }
 
     public func callAsFunction(_ inputs: MLXArray, cache: [KVCache]? = nil) -> MLXArray {
 
@@ -173,7 +173,7 @@ class Ernie45DecoderLayer: Module {
 public class Ernie45ModelInner: Module {
     @ModuleInfo(key: "embed_tokens") var embedTokens: Embedding
     let layers: [Ernie45DecoderLayer]
-    let norm: RMSNorm
+    @ModuleInfo var norm: RMSNorm
 
     public init(_ args: Ernie45Configuration) {
         self._embedTokens.wrappedValue = Embedding(
@@ -182,7 +182,7 @@ public class Ernie45ModelInner: Module {
         self.layers = (0 ..< args.numHiddenLayers).map { _ in
             Ernie45DecoderLayer(args)
         }
-        self.norm = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
+        _norm.wrappedValue = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
     }
 
     public func callAsFunction(_ inputs: MLXArray, cache: [KVCache]? = nil) -> MLXArray {
 
@@ -141,7 +141,7 @@ public class Exaone4ModelInner: Module {
     @ModuleInfo(key: "embed_tokens") var embedTokens: Embedding
 
     fileprivate let layers: [Exaone4TransformerBlock]
-    let norm: RMSNorm
+    @ModuleInfo var norm: RMSNorm
 
     public init(_ args: Exaone4Configuration) {
         precondition(args.vocabularySize > 0)
@@ -162,7 +162,7 @@ public class Exaone4ModelInner: Module {
                 }
                 return Exaone4TransformerBlock(args, isLocal: isLocal)
             }
-        self.norm = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
+        _norm.wrappedValue = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)
     }
 
     public func callAsFunction(_ inputs: MLXArray, cache: [KVCache]? = nil) -> MLXArray {
Original file line number	Diff line number	Diff line change
`@@ -303,7 +303,7 @@ private class ApertusModelInner: Module {`
`303`	`303`	`@ModuleInfo(key: "embed_tokens") var embedTokens: Embedding`
`304`	`304`
`305`	`305`	`let layers: [ApertusBlock]`
`306`		`- let norm: RMSNorm`
	`306`	`+ @ModuleInfo var norm: RMSNorm`
`307`	`307`
`308`	`308`	`public init(_ args: ApertusConfiguration) {`
`309`	`309`	`precondition(args.vocabSize > 0)`
`@@ -313,7 +313,7 @@ private class ApertusModelInner: Module {`
`313`	`313`	`dimensions: args.hiddenSize`
`314`	`314`	`)`
`315`	`315`	`self.layers = (0 ..< args.numHiddenLayers).map { _ in ApertusBlock(args) }`
`316`		`- self.norm = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)`
	`316`	`+ _norm.wrappedValue = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)`
`317`	`317`	`}`
`318`	`318`
`319`	`319`	`public func callAsFunction(`
Original file line number	Diff line number	Diff line change
`@@ -203,7 +203,7 @@ public class BaichuanM1ModelInner: Module {`
`203`	`203`	`@ModuleInfo(key: "embed_tokens") var embedTokens: Embedding`
`204`	`204`
`205`	`205`	`fileprivate let layers: [BaichuanM1DecoderLayer]`
`206`		`- let norm: RMSNorm`
	`206`	`+ @ModuleInfo var norm: RMSNorm`
`207`	`207`
`208`	`208`	`init(_ config: BaichuanM1Configuration) {`
`209`	`209`	`self.args = config`
`@@ -212,7 +212,7 @@ public class BaichuanM1ModelInner: Module {`
`212`	`212`	`self.layers = (0 ..< config.hiddenLayers).map {`
`213`	`213`	`BaichuanM1DecoderLayer(config, layerIdx: $0)`
`214`	`214`	`}`
`215`		`- norm = RMSNorm(dimensions: config.hiddenSize, eps: config.rmsNormEps)`
	`215`	`+ _norm.wrappedValue = RMSNorm(dimensions: config.hiddenSize, eps: config.rmsNormEps)`
`216`	`216`	`}`
`217`	`217`
`218`	`218`	`func callAsFunction(`
Original file line number	Diff line number	Diff line change
`@@ -405,7 +405,7 @@ public class BitnetModelInner: Module {`
`405`	`405`	`@ModuleInfo(key: "embed_tokens") var embedTokens: Embedding`
`406`	`406`
`407`	`407`	`fileprivate let layers: [BitnetTransformerBlock]`
`408`		`- var norm: RMSNorm`
	`408`	`+ @ModuleInfo var norm: RMSNorm`
`409`	`409`
`410`	`410`	`init(_ args: BitnetConfiguration) {`
`411`	`411`	`precondition(args.vocabularySize > 0)`
`@@ -417,7 +417,7 @@ public class BitnetModelInner: Module {`
`417`	`417`	`layers = (0 ..< args.hiddenLayers).map { _ in`
`418`	`418`	`BitnetTransformerBlock(args)`
`419`	`419`	`}`
`420`		`- norm = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)`
	`420`	`+ _norm.wrappedValue = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)`
`421`	`421`	`}`
`422`	`422`
`423`	`423`	`func callAsFunction(_ inputs: MLXArray, cache: [KVCache]? = nil) -> MLXArray {`
Original file line number	Diff line number	Diff line change
`@@ -121,7 +121,7 @@ public class CohereModelInner: Module {`
`121`	`121`	`@ModuleInfo(key: "embed_tokens") var embedTokens: Embedding`
`122`	`122`
`123`	`123`	`fileprivate let layers: [CohereTransformerBlock]`
`124`		`- let norm: LayerNorm`
	`124`	`+ @ModuleInfo var norm: LayerNorm`
`125`	`125`
`126`	`126`	`public init(_ args: CohereConfiguration) {`
`127`	`127`	`precondition(args.vocabularySize > 0)`
`@@ -133,7 +133,7 @@ public class CohereModelInner: Module {`
`133`	`133`	`.map { _ in`
`134`	`134`	`CohereTransformerBlock(args)`
`135`	`135`	`}`
`136`		`- self.norm = LayerNorm(dimensions: args.hiddenSize, eps: args.layerNormEps)`
	`136`	`+ _norm.wrappedValue = LayerNorm(dimensions: args.hiddenSize, eps: args.layerNormEps)`
`137`	`137`	`}`
`138`	`138`
`139`	`139`	`public func callAsFunction(_ inputs: MLXArray, cache: [KVCache]? = nil) -> MLXArray {`
Original file line number	Diff line number	Diff line change
`@@ -141,7 +141,7 @@ public class Exaone4ModelInner: Module {`
`141`	`141`	`@ModuleInfo(key: "embed_tokens") var embedTokens: Embedding`
`142`	`142`
`143`	`143`	`fileprivate let layers: [Exaone4TransformerBlock]`
`144`		`- let norm: RMSNorm`
	`144`	`+ @ModuleInfo var norm: RMSNorm`
`145`	`145`
`146`	`146`	`public init(_ args: Exaone4Configuration) {`
`147`	`147`	`precondition(args.vocabularySize > 0)`
`@@ -162,7 +162,7 @@ public class Exaone4ModelInner: Module {`
`162`	`162`	`}`
`163`	`163`	`return Exaone4TransformerBlock(args, isLocal: isLocal)`
`164`	`164`	`}`
`165`		`- self.norm = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)`
	`165`	`+ _norm.wrappedValue = RMSNorm(dimensions: args.hiddenSize, eps: args.rmsNormEps)`
`166`	`166`	`}`
`167`	`167`
`168`	`168`	`public func callAsFunction(_ inputs: MLXArray, cache: [KVCache]? = nil) -> MLXArray {`