Merge pull request #30 from Advaitgaur004/loss-function

PrimedErwin · web-flow · commit 2a5ecd2d517e · 2025-07-24T10:11:54.000+08:00
[Feat] : Add MSE, MAE, and Huber Loss Functions
diff --git a/include/cten.h b/include/cten.h
@@ -115,6 +115,9 @@ Tensor nn_softmax(Tensor input);
 Tensor Glorot_init(TensorShape shape, bool requires_grad);
 Tensor nn_crossentropy(Tensor y_true, Tensor y_pred);
 Tensor nn_softmax_crossentropy(Tensor y_true, Tensor logits);
+Tensor nn_mse_loss(Tensor y_true, Tensor y_pred);
+Tensor nn_mae_loss(Tensor y_true, Tensor y_pred);
+Tensor nn_huber_loss(Tensor y_true, Tensor y_pred, float delta);
 
 /* Memory Management */
 typedef int64_t PoolId;
diff --git a/src/nn.c b/src/nn.c
@@ -9,14 +9,14 @@
 #include <stdio.h>
 
 static float elu_alpha_value = 1.0f;
+static float huber_delta_value = 1.0f;
 
 Tensor nn_linear(Tensor input, Tensor weight, Tensor bias) {
     Tensor tmp = Tensor_matmul(input, weight);
     tmp = Tensor_add(tmp, bias);
     return tmp;
 }
 
-/* nn.relu */
 static Tensor GradFn_relu(Tensor self, int i) {
     Tensor input = self.node->inputs[i];
     Tensor res = Tensor_new(input.shape, false);
@@ -365,7 +365,6 @@ Tensor nn_softmax(Tensor self) {
     return res;
 }
 
-/* nn.cross_entropy */
 static Tensor GradFn_crossentropy(Tensor self, int i) {
     if (i == 1) { // Gradient w.r.t. y_pred
         Tensor y_true = self.node->inputs[0];
@@ -498,5 +497,142 @@ Tensor nn_softmax_crossentropy(Tensor y_true, Tensor logits) {
         res.node->name = "SoftmaxCrossEntropy"; 
     }
     
+    return res;
+}
+
+static Tensor GradFn_mse_loss(Tensor self, int i) {
+    if (i == 1) {  // Gradient w.r.t y_pred
+        Tensor y_true = self.node->inputs[0];
+        Tensor y_pred = self.node->inputs[1];
+        int n = y_pred.data->numel;
+
+        Tensor grad = Tensor_new(y_pred.shape, false);
+        for (int j = 0; j < n; j++) {
+            grad.data->flex[j] = 2.0f * (y_pred.data->flex[j] - y_true.data->flex[j]) / n;
+        }
+        return grad;
+    }
+    return Tensor_zeros((TensorShape){1}, false);
+}
+
+Tensor nn_mse_loss(Tensor y_true, Tensor y_pred) {
+    bool requires_grad = !cten_is_eval() && y_pred.node != NULL;
+
+    cten_begin_eval();
+    Tensor error = Tensor_sub(y_pred, y_true);
+    Tensor squared_error = Tensor_square(error);
+    Tensor loss = Tensor_mean(squared_error);
+    cten_end_eval();
+
+    Tensor res = Tensor_new((TensorShape){1}, requires_grad);
+    res.data->flex[0] = loss.data->flex[0];
+
+    if (requires_grad) {
+        res.node->grad_fn = GradFn_mse_loss;
+        res.node->inputs[0] = y_true;
+        res.node->inputs[1] = y_pred;
+        res.node->n_inputs = 2;
+        res.node->name = "MSELoss";
+    }
+    return res;
+}
+
+static Tensor GradFn_mae_loss(Tensor self, int i) {
+    if (i == 1) { // Gradient w.r.t y_pred
+        Tensor y_true = self.node->inputs[0];
+        Tensor y_pred = self.node->inputs[1];
+        int n = y_pred.data->numel;
+
+        Tensor grad = Tensor_new(y_pred.shape, false);
+        for (int j = 0; j < n; j++) {
+            float error = y_pred.data->flex[j] - y_true.data->flex[j];
+            if (error > 0) {
+                grad.data->flex[j] = 1.0f / n;
+            } else if (error < 0) {
+                grad.data->flex[j] = -1.0f / n;
+            } else {
+                grad.data->flex[j] = 0.0f;
+            }
+        }
+        return grad;
+    }
+    return Tensor_zeros((TensorShape){1}, false);
+}
+
+Tensor nn_mae_loss(Tensor y_true, Tensor y_pred) {
+    bool requires_grad = !cten_is_eval() && y_pred.node != NULL;
+
+    cten_begin_eval();
+    Tensor error = Tensor_sub(y_pred, y_true);
+    Tensor abs_error = Tensor_abs(error);
+    Tensor loss = Tensor_mean(abs_error);
+    cten_end_eval();
+
+    Tensor res = Tensor_new((TensorShape){1}, requires_grad);
+    res.data->flex[0] = loss.data->flex[0];
+
+    if (requires_grad) {
+        res.node->grad_fn = GradFn_mae_loss;
+        res.node->inputs[0] = y_true;
+        res.node->inputs[1] = y_pred;
+        res.node->n_inputs = 2;
+        res.node->name = "MAELoss";
+    }
+    return res;
+}
+
+static Tensor GradFn_huber_loss(Tensor self, int i) {
+    if (i == 1) { // Gradient w.r.t y_pred
+        Tensor y_true = self.node->inputs[0];
+        Tensor y_pred = self.node->inputs[1];
+        float delta = huber_delta_value;
+        int n = y_pred.data->numel;
+
+        Tensor grad = Tensor_new(y_pred.shape, false);
+        // Gradient of Huber loss is (error / n) for small errors,
+        // and (delta * sign(error) / n) for large errors.
+        for (int j = 0; j < n; j++) {
+            float error = y_pred.data->flex[j] - y_true.data->flex[j];
+            if (fabsf(error) <= delta) {
+                grad.data->flex[j] = error / n;
+            } else {
+                if (error > 0) {
+                    grad.data->flex[j] = delta / n;
+                } else {
+                    grad.data->flex[j] = -delta / n;
+                }
+            }
+        }
+        return grad;
+    }
+    return Tensor_zeros((TensorShape){1}, false);
+}
+
+Tensor nn_huber_loss(Tensor y_true, Tensor y_pred, float delta) {
+    huber_delta_value = delta; // Store delta for the backward pass
+    bool requires_grad = !cten_is_eval() && y_pred.node != NULL;
+
+    int n = y_pred.data->numel;
+    float total_loss = 0.0f;
+    for (int i = 0; i < n; i++) {
+        float error = y_pred.data->flex[i] - y_true.data->flex[i];
+        float abs_error = fabsf(error);
+        if (abs_error <= delta) {
+            total_loss += 0.5f * error * error; // MSE part
+        } else {
+            total_loss += delta * (abs_error - 0.5f * delta); // MAE part
+        }
+    }
+
+    Tensor res = Tensor_new((TensorShape){1}, requires_grad);
+    res.data->flex[0] = total_loss / n; // Mean Huber Loss
+
+    if (requires_grad) {
+        res.node->grad_fn = GradFn_huber_loss;
+        res.node->inputs[0] = y_true;
+        res.node->inputs[1] = y_pred;
+        res.node->n_inputs = 2;
+        res.node->name = "HuberLoss";
+    }
     return res;
 }