pocketpy
diff --git a/‎include/cten.h‎
Lines changed: 13 additions & 2 deletions b/‎include/cten.h‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎src/basic.c‎
Lines changed: 3 additions & 4 deletions b/‎src/basic.c‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎src/operator.c‎
Lines changed: 132 additions & 0 deletions b/‎src/operator.c‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎src/utils.c‎
Lines changed: 153 additions & 2 deletions b/‎src/utils.c‎
Lines changed: 153 additions & 2 deletions
@@ -7,6 +7,10 @@
 #include <stdarg.h>
 #include <limits.h>
 
+#define _CTEN_PICK_REDUCE(_1, _2, NAME, ...) NAME
+#define Tensor_max(...)  _CTEN_PICK_REDUCE(__VA_ARGS__, Tensor_max_dim,  Tensor_max_all)(__VA_ARGS__)
+#define Tensor_min(...)  _CTEN_PICK_REDUCE(__VA_ARGS__, Tensor_min_dim,  Tensor_min_all)(__VA_ARGS__)
+
 #define _CTEN_PICK(_1,_2,NAME,...) NAME
 #define Tensor_mean(...) _CTEN_PICK(__VA_ARGS__, Tensor_mean_dim, Tensor_mean_all)(__VA_ARGS__)
 #define Tensor_sum(...)  _CTEN_PICK(__VA_ARGS__, Tensor_sum_dim,  Tensor_sum_all )(__VA_ARGS__)
@@ -33,6 +37,11 @@ typedef struct GradNode {
     const char* name;
 } GradNode;
 
+typedef struct {
+    Tensor values;
+    Tensor indices;
+} TensorMaxMinResult;
+
 void cten_initilize();
 void cten_finalize();
 
@@ -81,8 +90,10 @@ Tensor Tensor_mean_dim(Tensor self, int dim);
 Tensor Tensor_sum_all (Tensor self);
 Tensor Tensor_sum_dim (Tensor self, int dim);
 
-Tensor Tensor_max(Tensor self);
-Tensor Tensor_min(Tensor self);
+Tensor Tensor_max_all(Tensor self);
+TensorMaxMinResult Tensor_max_dim(Tensor self, int dim);
+Tensor Tensor_min_all(Tensor self);
+TensorMaxMinResult Tensor_min_dim(Tensor self, int dim);
 
 void Tensor_argmax(Tensor self, int* out);
 
 
@@ -140,12 +140,11 @@ void Tensor_backward(Tensor self, Tensor grad) {
     }
 
     for(int i = 0; i < self.node->n_inputs; i++) {
-        if (self.node->inputs[i].data == NULL) {
+        Tensor input_tensor = self.node->inputs[i];
+        if (input_tensor.node == NULL) {
             continue;
         }
 
-        Tensor input_tensor = self.node->inputs[i];
-        
         // Step 1: Get the local gradient (the partial derivative). --> For z = f(x, y), this would be dz/dx or dz/dy.
         Tensor input_grad = self.node->grad_fn(self, i);
 
@@ -154,7 +153,7 @@ void Tensor_backward(Tensor self, Tensor grad) {
         int input_ndim = TensorShape_dim(input_tensor.shape);
         int grad_ndim = TensorShape_dim(grad.shape);
 
-        if ((strcmp(self.node->name, "Sum") == 0 || strcmp(self.node->name, "Mean") == 0) && input_ndim > grad_ndim) {
+        if ((strcmp(self.node->name, "Sum") == 0 || strcmp(self.node->name, "Mean") == 0 || strcmp(self.node->name, "MaxDim") == 0 || strcmp(self.node->name, "MinDim") == 0) && input_ndim > grad_ndim) {
             // Find the dimension that was reduced. We assume the non-reduced dimensions match in size.
             int unsqueeze_dim = -1;
             int grad_idx = 0;
 
@@ -14,6 +14,12 @@
 #ifdef Tensor_sum
 #undef Tensor_sum
 #endif
+#ifdef Tensor_max
+#undef Tensor_max
+#endif
+#ifdef Tensor_min
+#undef Tensor_min
+#endif
 
 static Tensor GradFn_add(Tensor self, int i) {
     // f(x, y) = x + y; f'(x) = 1; f'(y) = 1
@@ -450,5 +456,131 @@ Tensor Tensor_sub(Tensor self, Tensor other) {
         res.node->n_inputs = 2;
         res.node->name = "Sub";
     }
+    return res;
+}
+
+Tensor GradFn_reduce_dim(Tensor self, int i) {
+    Tensor input = self.node->inputs[0];
+    Tensor indices_tensor = self.node->inputs[1];
+    Tensor grad_out = Tensor_zeros(input.shape, false);
+
+    int out_numel = indices_tensor.data->numel;
+    int ndim = TensorShape_dim(input.shape);
+    int reduced_dim = -1;
+
+    for(int d = 0, out_d = 0; d < ndim; d++){
+        if(out_d >= TensorShape_dim(self.shape) || input.shape[d] != self.shape[out_d]){
+            reduced_dim = d;
+            break;
+        }
+        out_d++;
+    }
+    cten_assert(reduced_dim != -1, "Could not determine reduced dimension in gradient calculation");
+    
+    for (int j = 0; j < out_numel; j++) {
+        int index_along_dim = (int)indices_tensor.data->flex[j];
+        
+        int linear_idx = 0, stride = 1, out_j_rem = j, out_shape_idx = TensorShape_dim(self.shape) - 1;
+        for (int k = ndim - 1; k >= 0; --k) {
+            int current_dim_idx;
+            if (k == reduced_dim) {
+                current_dim_idx = index_along_dim;
+            } else {
+                int dim_k = self.shape[out_shape_idx--];
+                current_dim_idx = out_j_rem % dim_k;
+                out_j_rem /= dim_k;
+            }
+            linear_idx += current_dim_idx * stride;
+            stride *= input.shape[k];
+        }
+        grad_out.data->flex[linear_idx] = 1.0f;
+    }
+    return grad_out;
+}
+
+Tensor GradFn_max_all(Tensor self, int i) {
+    Tensor input = self.node->inputs[i];
+    Tensor res = Tensor_zeros(input.shape, false);
+    float max_val = self.data->flex[0];
+    
+    int max_count = 0;
+    for (int j = 0; j < input.data->numel; j++) {
+        if (input.data->flex[j] == max_val) max_count++;
+    }
+    
+    float grad_value = (max_count > 0) ? 1.0f / max_count : 0.0f;
+    for (int j = 0; j < input.data->numel; j++) {
+        if (input.data->flex[j] == max_val) res.data->flex[j] = grad_value;
+    }
+    return res;
+}
+
+Tensor Tensor_max(Tensor self) {
+    if (self.data->numel == 0){
+        cten_assert(false, "Error: max() on an empty tensor.");
+    }
+    bool requires_grad = !cten_is_eval() && (self.node != NULL);
+    Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad);
+    
+    float max_val = self.data->flex[0];
+    for (int i = 1; i < self.data->numel; i++) {
+        if (self.data->flex[i] > max_val) {
+            max_val = self.data->flex[i];
+        }
+    }
+    
+    res.data->flex[0] = max_val;
+    
+    if (requires_grad) {
+        res.node->grad_fn = GradFn_max_all;
+        res.node->inputs[0] = self;
+        res.node->n_inputs = 1;
+        res.node->name = "MaxAll";
+    }
+    
+    return res;
+}
+
+Tensor GradFn_min_all(Tensor self, int i) {
+    Tensor input = self.node->inputs[i];
+    Tensor res = Tensor_zeros(input.shape, false);
+    float min_val = self.data->flex[0];
+    
+    int min_count = 0;
+    for (int j = 0; j < input.data->numel; j++) {
+        if (input.data->flex[j] == min_val) min_count++;
+    }
+    
+    float grad_value = (min_count > 0) ? 1.0f / min_count : 0.0f;
+    for (int j = 0; j < input.data->numel; j++) {
+        if (input.data->flex[j] == min_val) res.data->flex[j] = grad_value;
+    }
+    return res;
+}
+
+Tensor Tensor_min(Tensor self) {
+    if (self.data->numel == 0){
+        cten_assert(false, "Error: min() on an empty tensor.");
+    }
+    bool requires_grad = !cten_is_eval() && (self.node != NULL);
+    Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad);
+    
+    // Find minimum value
+    float min_val = self.data->flex[0];
+    for (int i = 1; i < self.data->numel; i++) {
+        if (self.data->flex[i] < min_val) {
+            min_val = self.data->flex[i];
+        }
+    }
+    
+    res.data->flex[0] = min_val;
+    
+    if (requires_grad) {
+        res.node->grad_fn = GradFn_min_all;
+        res.node->inputs[0] = self;
+        res.node->n_inputs = 1;
+        res.node->name = "MinAll";
+    }
+    
     return res;
 }
@@ -16,6 +16,9 @@ bool va_arg_is_present(va_list args) {
 
 Tensor GradFn_mean(Tensor self, int i);
 Tensor GradFn_sum(Tensor self, int i);
+Tensor GradFn_max_all(Tensor self, int i);
+Tensor GradFn_min_all(Tensor self, int i);
+Tensor GradFn_reduce_dim(Tensor self, int i);
 
 Tensor Tensor_mean_all(Tensor self) {
     float total = 0.0f;
@@ -67,6 +70,155 @@ Tensor Tensor_sum_dim(Tensor self, int dim) {
     return res;
 }
 
+Tensor Tensor_max_all(Tensor self) {
+    bool requires_grad = !cten_is_eval() && (self.node != NULL);
+    Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad);
+    
+    if (self.data->numel == 0) cten_assert(false, "max on empty tensor");
+    float max_val = self.data->flex[0];
+    for (int i = 1; i < self.data->numel; i++) {
+        if (self.data->flex[i] > max_val) {
+            max_val = self.data->flex[i];
+        }
+    }
+    res.data->flex[0] = max_val;
+    
+    if (requires_grad) {
+        res.node->grad_fn = GradFn_max_all;
+        res.node->inputs[0] = self;
+        res.node->n_inputs = 1;
+        res.node->name = "MaxAll";
+    }
+    return res;
+}
+
+TensorMaxMinResult Tensor_max_dim(Tensor self, int dim) {
+    int ndim = TensorShape_dim(self.shape);
+    dim = TensorShape_asdim(self.shape, dim);
+
+    TensorShape out_shape = {0};
+    int out_shape_len = 0;
+    for (int i = 0; i < ndim; i++) {
+        if (i != dim) out_shape[out_shape_len++] = self.shape[i];
+    }
+    
+    bool requires_grad = !cten_is_eval() && (self.node != NULL);
+    Tensor values = Tensor_new(out_shape, requires_grad);
+    Tensor indices = Tensor_new(out_shape, false);
+
+    int dim_size = self.shape[dim];
+    for (int i = 0; i < values.data->numel; ++i) {
+        float best_val = -INFINITY;
+        int best_idx = -1;
+
+        for (int j = 0; j < dim_size; ++j) {
+            int in_linear_idx = 0, stride = 1, out_i_rem = i, out_idx_tracker = out_shape_len - 1;
+            for (int k = ndim - 1; k >= 0; --k) {
+                int current_dim_idx;
+                if (k == dim) {
+                    current_dim_idx = j;
+                } else {
+                    int dim_k = out_shape[out_idx_tracker--];
+                    current_dim_idx = out_i_rem % dim_k;
+                    out_i_rem /= dim_k;
+                }
+                in_linear_idx += current_dim_idx * stride;
+                stride *= self.shape[k];
+            }
+            float current_val = self.data->flex[in_linear_idx];
+            if (current_val > best_val) { best_val = current_val; best_idx = j; }
+        }
+        values.data->flex[i] = best_val;
+        indices.data->flex[i] = (float)best_idx;
+    }
+
+    if (requires_grad) {
+        values.node->grad_fn = GradFn_reduce_dim;
+        values.node->inputs[0] = self;
+        values.node->inputs[1] = indices;
+        values.node->n_inputs = 2;
+        values.node->name = "MaxDim";
+    }
+    
+    TensorMaxMinResult result = {values, indices};
+    return result;
+}
+
+Tensor Tensor_min_all(Tensor self) {
+    bool requires_grad = !cten_is_eval() && (self.node != NULL);
+    Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad);
+
+    if (self.data->numel == 0) cten_assert(false, "min on empty tensor");
+    float min_val = self.data->flex[0];
+    for (int i = 1; i < self.data->numel; i++) {
+        if (self.data->flex[i] < min_val) {
+            min_val = self.data->flex[i];
+        }
+    }
+    res.data->flex[0] = min_val;
+
+    if (requires_grad) {
+        res.node->grad_fn = GradFn_min_all;
+        res.node->inputs[0] = self;
+        res.node->n_inputs = 1;
+        res.node->name = "MinAll";
+    }
+    return res;
+}
+
+TensorMaxMinResult Tensor_min_dim(Tensor self, int dim) {
+    int ndim = TensorShape_dim(self.shape);
+    dim = TensorShape_asdim(self.shape, dim);
+
+    TensorShape out_shape = {0};
+    int out_shape_len = 0;
+    for (int i = 0; i < ndim; i++) {
+        if (i != dim) out_shape[out_shape_len++] = self.shape[i];
+    }
+    
+    bool requires_grad = !cten_is_eval() && (self.node != NULL);
+    Tensor values = Tensor_new(out_shape, requires_grad);
+    Tensor indices = Tensor_new(out_shape, false);
+
+    int dim_size = self.shape[dim];
+    for (int i = 0; i < values.data->numel; ++i) {
+        float best_val = INFINITY;
+        int best_idx = -1;
+
+        for (int j = 0; j < dim_size; ++j) {
+            int in_linear_idx = 0, stride = 1, out_i_rem = i, out_idx_tracker = out_shape_len - 1;
+            for (int k = ndim - 1; k >= 0; --k) {
+                int current_dim_idx;
+                if (k == dim) {
+                    current_dim_idx = j;
+                } else {
+                    int dim_k = out_shape[out_idx_tracker--];
+                    current_dim_idx = out_i_rem % dim_k;
+                    out_i_rem /= dim_k;
+                }
+                in_linear_idx += current_dim_idx * stride;
+                stride *= self.shape[k];
+            }
+            float current_val = self.data->flex[in_linear_idx];
+            if (current_val < best_val) { best_val = current_val; best_idx = j; }
+        }
+        values.data->flex[i] = best_val;
+        indices.data->flex[i] = (float)best_idx;
+    }
+    
+    if (requires_grad) {
+        values.node->grad_fn = GradFn_reduce_dim;
+        values.node->inputs[0] = self;
+        values.node->inputs[1] = indices;
+        values.node->n_inputs = 2;
+        values.node->name = "MinDim";
+    }
+    
+    TensorMaxMinResult result = {values, indices};
+    return result;
+}
+
+
 void cten_assert(bool cond, const char* fmt, ...) {
     if(!cond) {
         va_list args;
@@ -91,7 +243,6 @@ void cten_assert_dim(const char* title, int a, int b) {
     cten_assert(a == b, "%s: %d != %d", title, a, b);
 }
 
-
 bool cten_elemwise_broadcast(Tensor* a, Tensor* b) {
     Tensor orig_a = *a;
     Tensor orig_b = *b;
@@ -366,4 +517,4 @@ Tensor Tensor_unsqueeze(Tensor self, int dim) {
     memcpy(res.shape, new_shape, sizeof(TensorShape));
 
     return res;
-}
+}