fixes

codelion · codelion · commit 258f44ba763d · 2025-06-02T13:22:00.000+08:00
diff --git a/examples/mlx_spda_optimization/evaluator.py b/examples/mlx_spda_optimization/evaluator.py
@@ -321,7 +321,7 @@ def evaluate_stage1(program_path: str) -> Dict[str, float]:
         # Load the evolved program with better error handling
         spec = importlib.util.spec_from_file_location("evolved_program", program_path)
         evolved_program = importlib.util.module_from_spec(spec)
-        
+
         try:
             spec.loader.exec_module(evolved_program)
         except SyntaxError as e:
@@ -333,14 +333,14 @@ def evaluate_stage1(program_path: str) -> Dict[str, float]:
             return {
                 "basic_functionality": 0.0,
                 "syntax_error": 1.0,
-                "error": f"Syntax error: {str(e)}"
+                "error": f"Syntax error: {str(e)}",
             }
         except Exception as e:
             print(f"[Stage 1] ❌ IMPORT ERROR: {e}")
             return {
                 "basic_functionality": 0.0,
                 "import_error": 1.0,
-                "error": f"Import error: {str(e)}"
+                "error": f"Import error: {str(e)}",
             }
 
         # Check if the required function exists
@@ -384,7 +384,7 @@ def evaluate_stage1(program_path: str) -> Dict[str, float]:
             return {
                 "basic_functionality": 0.0,
                 "runtime_error": 1.0,
-                "error": f"Runtime error: {str(e)}"
+                "error": f"Runtime error: {str(e)}",
             }
 
         # Enhanced scoring for incremental progress
@@ -408,7 +408,7 @@ def evaluate_stage1(program_path: str) -> Dict[str, float]:
             "basic_functionality": float(basic_score),
             "shape_correct": float(correctness["shape_correct"]),
             "no_nan_inf": float(correctness["no_nan_inf"]),
-            "accuracy_score": float(min(1.0, 1.0 / max(correctness.get('mse', 1e6), 1e-6)))
+            "accuracy_score": float(min(1.0, 1.0 / max(correctness.get("mse", 1e6), 1e-6))),
         }
 
         print(f"[Stage 1] ✓ Completed with score: {basic_score:.3f}")
@@ -420,12 +420,9 @@ def evaluate_stage1(program_path: str) -> Dict[str, float]:
     except Exception as e:
         print(f"[Stage 1] ❌ Unexpected Exception: {str(e)}")
         import traceback
+
         traceback.print_exc()
-        return {
-            "basic_functionality": 0.0,
-            "unexpected_error": 1.0,
-            "error": str(e)
-        }
+        return {"basic_functionality": 0.0, "unexpected_error": 1.0, "error": str(e)}
 
 
 def evaluate(program_path: str) -> Dict[str, float]:
diff --git a/examples/mlx_spda_optimization/initial_program.py b/examples/mlx_spda_optimization/initial_program.py
@@ -21,21 +21,21 @@
 def evolved_scaled_dot_product_attention(q, k, v, scale=1.0, mask=None):
     """
     Metal Kernel-based attention implementation with working building blocks.
-    
+
     This function uses simple, working Metal kernels that can be evolved
     to more complex optimizations. Starting simple and building complexity.
-    
+
     Args:
         q: Query tensor [B, num_heads, L, head_dim]
-        k: Key tensor [B, num_kv_heads, L_kv, head_dim]  
+        k: Key tensor [B, num_kv_heads, L_kv, head_dim]
         v: Value tensor [B, num_kv_heads, L_kv, head_dim]
         scale: Scaling factor (typically 1/sqrt(head_dim))
         mask: Attention mask or mask type string
-        
+
     Returns:
         Attention output with same shape as queries
     """
-    
+
     # EVOLVE-BLOCK-START
     """
     WORKING METAL KERNEL IMPLEMENTATION
@@ -55,13 +55,13 @@ def evolved_scaled_dot_product_attention(q, k, v, scale=1.0, mask=None):
     - Implement custom softmax kernels
     - Eventually fuse entire attention pipeline
     """
-    
+
     # Extract dimensions
     B, n_q_heads, L, head_dim = q.shape
     n_kv_heads = k.shape[1]
     kL = k.shape[2]
     n_repeats = n_q_heads // n_kv_heads
-    
+
     # WORKING METAL KERNEL: Element-wise scaling
     # This is a simple, working kernel that can be evolved
     try:
@@ -72,32 +72,32 @@ def evolved_scaled_dot_product_attention(q, k, v, scale=1.0, mask=None):
             }
             out[elem] = q[elem] * scale_val;
         """
-        
+
         scale_kernel = mx.fast.metal_kernel(
             name="scale_query",
             input_names=["q", "scale_val"],
             output_names=["out"],
             source=scale_source,
         )
-        
+
         # Create scale as a scalar array for the kernel
         scale_array = mx.array(float(scale), dtype=q.dtype)
-        
+
         q_scaled = scale_kernel(
             inputs=[q, scale_array],
             template=[("T", q.dtype)],
             output_shapes=[q.shape],
             output_dtypes=[q.dtype],
             grid=(q.size, 1, 1),
-            threadgroup=(256, 1, 1)
+            threadgroup=(256, 1, 1),
         )[0]
-        
+
         # Metal kernel scaling successful (remove noisy print)
-        
+
     except Exception as e:
         # Fallback to reference implementation on any Metal kernel error
         q_scaled = q * scale
-    
+
     # Handle GQA with reference implementation (can be evolved later)
     if n_repeats > 1:
         q_reshaped = mx.reshape(q_scaled, [B, n_kv_heads, n_repeats, L, head_dim])
@@ -107,11 +107,11 @@ def evolved_scaled_dot_product_attention(q, k, v, scale=1.0, mask=None):
         q_reshaped = q_scaled
         k_expanded = k
         v_expanded = v
-    
+
     # Compute attention scores with reference implementation (can be evolved)
     # Evolution opportunity: Replace with custom matmul kernel
     scores = q_reshaped @ mx.swapaxes(k_expanded, -1, -2)
-    
+
     # Apply mask with reference implementation (can be evolved)
     if mask is not None:
         if isinstance(mask, str) and mask == "causal":
@@ -120,7 +120,7 @@ def evolved_scaled_dot_product_attention(q, k, v, scale=1.0, mask=None):
             k_indices = mx.arange(kL)
             causal_mask = q_indices[:, None] >= k_indices[None]
             scores = mx.where(causal_mask, scores, -mx.array(np.float32(np.inf)))
-        elif hasattr(mask, 'dtype') and mask.dtype == mx.bool_:
+        elif hasattr(mask, "dtype") and mask.dtype == mx.bool_:
             if n_repeats > 1 and mask.ndim >= 3:
                 if mask.shape[-3] == 1:
                     mask = mx.expand_dims(mask, -3)
@@ -129,19 +129,19 @@ def evolved_scaled_dot_product_attention(q, k, v, scale=1.0, mask=None):
             scores = mx.where(mask, scores, -mx.array(np.float32(np.inf)))
         else:
             scores = scores + mask
-    
+
     # Apply softmax with reference implementation (can be evolved)
     # Evolution opportunity: Replace with custom softmax kernel
     attention_weights = mx.softmax(scores, axis=-1, precise=True)
-    
+
     # Apply attention weights to values (can be evolved)
     # Evolution opportunity: Replace with custom matmul kernel
     out = attention_weights @ v_expanded
-    
+
     # Reshape back if needed
     if n_repeats > 1:
         out = mx.reshape(out, [B, n_q_heads, L, head_dim])
-    
+
     return out
     # EVOLVE-BLOCK-END
 
@@ -157,58 +157,61 @@ def create_benchmark_attention_function():
 def test_basic_functionality():
     """Test that the Metal kernel attention works with real kernels"""
     print("Testing Working Metal Kernel attention functionality...")
-    
+
     # Small test case to verify kernels work
     B, qL, kL, D, qH, kH = 1, 32, 32, 64, 4, 4
     scale = 1.0 / math.sqrt(D)
-    
+
     # Create test inputs
     q = mx.random.normal((B, qH, qL, D))
-    k = mx.random.normal((B, kH, kL, D)) 
+    k = mx.random.normal((B, kH, kL, D))
     v = mx.random.normal((B, kH, kL, D))
-    
+
     # Test with working Metal kernel
     print("  Testing with working Metal scaling kernel...")
     output = evolved_scaled_dot_product_attention(q, k, v, scale=scale)
     print(f"  ✓ Working kernel test: input {q.shape} -> output {output.shape}")
-    
+
     # Test correctness by comparing with reference
     print("  Verifying correctness against reference implementation...")
     from spda_benchmark import mlx_ref_attn
+
     reference_output = mlx_ref_attn(q, k, v, scale=scale)
-    
+
     # Check if outputs are close
     max_diff = float(mx.max(mx.abs(output - reference_output)))
     mse = float(mx.mean((output - reference_output) ** 2))
-    
+
     print(f"  ✓ Max difference vs reference: {max_diff:.2e}")
     print(f"  ✓ MSE vs reference: {mse:.2e}")
-    
+
     if mse < 1e-6:
         print("  ✓ Accuracy test PASSED")
     else:
         print("  ⚠️ Accuracy test FAILED - need to fix implementation")
-    
+
     # Test with different configurations
     test_configs = [
-        (1, 32, 32, 64, 8, 8, None),           # No mask
-        (1, 64, 64, 64, 8, 8, "causal"),      # Causal mask
-        (1, 32, 32, 64, 8, 4, None),          # GQA
+        (1, 32, 32, 64, 8, 8, None),  # No mask
+        (1, 64, 64, 64, 8, 8, "causal"),  # Causal mask
+        (1, 32, 32, 64, 8, 4, None),  # GQA
     ]
-    
+
     for B, qL, kL, D, qH, kH, mask_type in test_configs:
         q_test = mx.random.normal((B, qH, qL, D))
         k_test = mx.random.normal((B, kH, kL, D))
         v_test = mx.random.normal((B, kH, kL, D))
-        
+
         try:
             output_test = evolved_scaled_dot_product_attention(
                 q_test, k_test, v_test, scale=scale, mask=mask_type
             )
             print(f"  ✓ Config test passed: seq={qL}, heads={qH}/{kH}, mask={mask_type}")
         except Exception as e:
-            print(f"  ❌ Config test failed: seq={qL}, heads={qH}/{kH}, mask={mask_type}, error={e}")
-    
+            print(
+                f"  ❌ Config test failed: seq={qL}, heads={qH}/{kH}, mask={mask_type}, error={e}"
+            )
+
     print("🚀 Working Metal Kernel attention tests completed!")
     print("  - Simple Metal scaling kernel working")
     print("  - Reference implementation for complex operations")
diff --git a/openevolve/database.py b/openevolve/database.py