feat(examples): add linear regression (#199)

dcvz · web-flow · commit cc4258cd844f · 2025-02-04T18:18:41.000+01:00
diff --git a/mlx-rs/README.md b/mlx-rs/README.md
@@ -66,6 +66,57 @@ mlx-rs = "0.21.0"
 * `metal` - enables metal (GPU) usage in MLX
 * `accelerate` - enables using the accelerate framework in MLX
 
+## Important Notes on Automatic Differentiation
+
+When using automatic differentiation in mlx-rs, there's an important difference in how closures work compared to Python's MLX. In Python, variables are implicitly captured and properly traced in the compute graph. However, in Rust, we need to be more explicit about which arrays should be traced.
+
+❌ This approach may cause segfaults:
+```rust
+// Don't do this
+let x = random::normal::<f32>(&[num_examples, num_features], None, None, None)?;
+let y = x.matmul(&w_star)? + eps;
+
+let loss_fn = |w: &Array| -> Result<Array, Exception> {
+    let y_pred = x.matmul(w)?;  // x and y are captured from outer scope
+    let loss = Array::from_float(0.5) * ops::mean(&ops::square(&(y_pred - &y))?, None, None)?;
+    Ok(loss)
+};
+
+let grad_fn = transforms::grad(loss_fn, &[0]);
+```
+
+✅ Instead, pass all required arrays as inputs to ensure proper tracing:
+```rust
+let loss_fn = |inputs: &[Array]| -> Result<Array, Exception> {
+    let w = &inputs[0];
+    let x = &inputs[1];
+    let y = &inputs[2];
+
+    let y_pred = x.matmul(w)?;
+    let loss = Array::from_float(0.5) * ops::mean(&ops::square(y_pred - y)?, None, None)?;
+    Ok(loss)
+};
+let argnums = &[0];  // Specify which argument to differentiate with respect to
+
+// Pass all required arrays in the inputs slice
+let mut inputs = vec![w, x, y];
+let grad = transforms::grad(loss_fn, argnums)(&inputs)?;
+```
+
+When using gradients in training loops, remember to update the appropriate array in your inputs:
+
+```rust
+let mut inputs = vec![w, x, y];
+
+for _ in 0..num_iterations {
+    let grad = transforms::grad(loss_fn, argnums)(&inputs)?;
+    inputs[0] = &inputs[0] - Array::from_float(learning_rate) * grad;  // Update the weight array
+    inputs[0].eval()?;
+}
+```
+
+We are actively working on improving this API to make it more ergonomic and closer to Python's behavior. For now, explicitly passing all required arrays as shown above is the recommended approach.
+
 ## Versioning
 
 For simplicity, the main crate `mls-rs` follows MLX’s versioning, allowing you to easily see which MLX version you’re using under the hood. The `mlx-sys` crate follows the versioning of `mlx-c`, as that is the version from which the API is generated.
diff --git a/mlx-rs/examples/linear_regression.rs b/mlx-rs/examples/linear_regression.rs
@@ -0,0 +1,59 @@
+use mlx_rs::error::Exception;
+use mlx_rs::{ops, random, transforms, Array};
+use std::error::Error;
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let num_features: i32 = 100;
+    let num_examples: i32 = 1000;
+    let num_iterations: i32 = 10000;
+    let learning_rate: f32 = 0.01;
+
+    // True weight vector
+    let w_star = random::normal::<f32>(&[num_features], None, None, None)?;
+
+    // Input examples (design matrix)
+    let x = random::normal::<f32>(&[num_examples, num_features], None, None, None)?;
+
+    // Noisy labels
+    let eps = random::normal::<f32>(&[num_examples], None, None, None)? * 1e-2;
+    let y = x.matmul(&w_star)? + eps;
+
+    // Initialize random weights
+    let w = random::normal::<f32>(&[num_features], None, None, None)? * 1e-2;
+
+    let loss_fn = |inputs: &[Array]| -> Result<Array, Exception> {
+        let w = &inputs[0];
+        let x = &inputs[1];
+        let y = &inputs[2];
+
+        let y_pred = x.matmul(w)?;
+        let loss = Array::from_float(0.5) * ops::mean(&ops::square(y_pred - y)?, None, None)?;
+        Ok(loss)
+    };
+
+    let mut grad_fn = transforms::grad(loss_fn, &[0]);
+
+    let now = std::time::Instant::now();
+    let mut inputs = [w, x, y];
+
+    for _ in 0..num_iterations {
+        let grad = grad_fn(&inputs)?;
+        inputs[0] = &inputs[0] - Array::from_float(learning_rate) * grad;
+        inputs[0].eval()?;
+    }
+
+    let elapsed = now.elapsed();
+
+    let loss = loss_fn(&inputs)?;
+    let error_norm = ops::sum(&ops::square(&(&inputs[0] - &w_star))?, None, None)?.sqrt()?;
+    let throughput = num_iterations as f32 / elapsed.as_secs_f32();
+
+    println!(
+        "Loss {:.5}, L2 distance: |w-w*| = {:.5}, Throughput {:.5} (it/s)",
+        loss.item::<f32>(),
+        error_norm.item::<f32>(),
+        throughput
+    );
+
+    Ok(())
+}