Update config.yaml

codelion · codelion · commit 6ec75d777655 · 2025-05-27T15:23:49.000+08:00
diff --git a/examples/mlx_finetuning_optimization/config.yaml b/examples/mlx_finetuning_optimization/config.yaml
@@ -9,13 +9,13 @@ log_level: "INFO"
 # LLM configuration optimized for algorithmic pattern evolution
 llm:
   primary_model: "gemini-2.5-flash-preview-05-20"
-  primary_model_weight: 0.7
+  primary_model_weight: 0.6
   secondary_model: "gemini-2.5-pro-preview-05-06"
-  secondary_model_weight: 0.3
+  secondary_model_weight: 0.4
   api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
-  temperature: 0.8
+  temperature: 0.7
   top_p: 0.95
-  max_tokens: 24000
+  max_tokens: 32000
   timeout: 900  # Longer timeout for complex optimization reasoning
 
 # Specialized prompt for memory and algorithmic optimization with MLX API safety
@@ -280,18 +280,39 @@ prompt:
     ```python
     # WRONG: Using JAX-style has_aux parameter
     (scaled_loss_val, unscaled_loss_val), grads = mx.value_and_grad(loss_fn, has_aux=True)(model)
+    # This causes unscaled_loss_val to be a tuple! float(tuple) fails!
+    
+    # WRONG: Multiple return values from loss function when using value_and_grad
+    def loss_fn(model):
+        logits = model(inputs)
+        loss = nn.losses.cross_entropy(logits, targets)
+        return loss, some_aux_data  # WRONG! Creates tuple!
+    
+    loss_tuple, grads = mx.value_and_grad(loss_fn)(model)  # loss_tuple is (loss, aux_data)
+    return float(loss_tuple)  # ERROR: float() argument must be a real number, not 'tuple'
     
     # RIGHT: MLX only supports simple value_and_grad
+    def loss_fn(model):
+        logits = model(inputs)
+        loss = nn.losses.cross_entropy(logits, targets)
+        return loss  # Return ONLY the loss, not a tuple
+    
     loss_value, grads = mx.value_and_grad(loss_fn)(model)
+    return float(loss_value), should_update  # loss_value is now a scalar
     
-    # If you need scaled loss, handle it in the loss function itself:
+    # RIGHT: If you need auxiliary data, compute it separately
     def loss_fn(model):
         logits = model(inputs)
         loss = nn.losses.cross_entropy(logits, targets)
-        # Scale inside the function if needed
-        return loss / max(total_accumulation_steps, 1)
+        return loss  # Only return loss for value_and_grad
     
     loss_value, grads = mx.value_and_grad(loss_fn)(model)
+    # Compute auxiliary data separately if needed
+    with mx.no_grad():  # Don't need gradients for aux computation
+        logits = model(inputs)
+        accuracy = compute_accuracy(logits, targets)
+    
+    return float(loss_value), should_update
     ```
     
     ❌ **'NoneType' object is not subscriptable**