diff --git a/include/cten.h b/include/cten.h index cb7f63f..4ce35bd 100644 --- a/include/cten.h +++ b/include/cten.h @@ -7,6 +7,10 @@ #include #include +#define _CTEN_PICK_REDUCE(_1, _2, NAME, ...) NAME +#define Tensor_max(...) _CTEN_PICK_REDUCE(__VA_ARGS__, Tensor_max_dim, Tensor_max_all)(__VA_ARGS__) +#define Tensor_min(...) _CTEN_PICK_REDUCE(__VA_ARGS__, Tensor_min_dim, Tensor_min_all)(__VA_ARGS__) + #define _CTEN_PICK(_1,_2,NAME,...) NAME #define Tensor_mean(...) _CTEN_PICK(__VA_ARGS__, Tensor_mean_dim, Tensor_mean_all)(__VA_ARGS__) #define Tensor_sum(...) _CTEN_PICK(__VA_ARGS__, Tensor_sum_dim, Tensor_sum_all )(__VA_ARGS__) @@ -33,6 +37,11 @@ typedef struct GradNode { const char* name; } GradNode; +typedef struct { + Tensor values; + Tensor indices; +} TensorMaxMinResult; + void cten_initilize(); void cten_finalize(); @@ -81,8 +90,10 @@ Tensor Tensor_mean_dim(Tensor self, int dim); Tensor Tensor_sum_all (Tensor self); Tensor Tensor_sum_dim (Tensor self, int dim); -Tensor Tensor_max(Tensor self); -Tensor Tensor_min(Tensor self); +Tensor Tensor_max_all(Tensor self); +TensorMaxMinResult Tensor_max_dim(Tensor self, int dim); +Tensor Tensor_min_all(Tensor self); +TensorMaxMinResult Tensor_min_dim(Tensor self, int dim); void Tensor_argmax(Tensor self, int* out); diff --git a/src/basic.c b/src/basic.c index 14834e8..15f6bdd 100644 --- a/src/basic.c +++ b/src/basic.c @@ -140,12 +140,11 @@ void Tensor_backward(Tensor self, Tensor grad) { } for(int i = 0; i < self.node->n_inputs; i++) { - if (self.node->inputs[i].data == NULL) { + Tensor input_tensor = self.node->inputs[i]; + if (input_tensor.node == NULL) { continue; } - Tensor input_tensor = self.node->inputs[i]; - // Step 1: Get the local gradient (the partial derivative). --> For z = f(x, y), this would be dz/dx or dz/dy. Tensor input_grad = self.node->grad_fn(self, i); @@ -154,7 +153,7 @@ void Tensor_backward(Tensor self, Tensor grad) { int input_ndim = TensorShape_dim(input_tensor.shape); int grad_ndim = TensorShape_dim(grad.shape); - if ((strcmp(self.node->name, "Sum") == 0 || strcmp(self.node->name, "Mean") == 0) && input_ndim > grad_ndim) { + if ((strcmp(self.node->name, "Sum") == 0 || strcmp(self.node->name, "Mean") == 0 || strcmp(self.node->name, "MaxDim") == 0 || strcmp(self.node->name, "MinDim") == 0) && input_ndim > grad_ndim) { // Find the dimension that was reduced. We assume the non-reduced dimensions match in size. int unsqueeze_dim = -1; int grad_idx = 0; diff --git a/src/operator.c b/src/operator.c index 365446c..8fdb1d7 100644 --- a/src/operator.c +++ b/src/operator.c @@ -14,6 +14,12 @@ #ifdef Tensor_sum #undef Tensor_sum #endif +#ifdef Tensor_max +#undef Tensor_max +#endif +#ifdef Tensor_min +#undef Tensor_min +#endif static Tensor GradFn_add(Tensor self, int i) { // f(x, y) = x + y; f'(x) = 1; f'(y) = 1 @@ -450,5 +456,131 @@ Tensor Tensor_sub(Tensor self, Tensor other) { res.node->n_inputs = 2; res.node->name = "Sub"; } + return res; +} + +Tensor GradFn_reduce_dim(Tensor self, int i) { + Tensor input = self.node->inputs[0]; + Tensor indices_tensor = self.node->inputs[1]; + Tensor grad_out = Tensor_zeros(input.shape, false); + + int out_numel = indices_tensor.data->numel; + int ndim = TensorShape_dim(input.shape); + int reduced_dim = -1; + + for(int d = 0, out_d = 0; d < ndim; d++){ + if(out_d >= TensorShape_dim(self.shape) || input.shape[d] != self.shape[out_d]){ + reduced_dim = d; + break; + } + out_d++; + } + cten_assert(reduced_dim != -1, "Could not determine reduced dimension in gradient calculation"); + + for (int j = 0; j < out_numel; j++) { + int index_along_dim = (int)indices_tensor.data->flex[j]; + + int linear_idx = 0, stride = 1, out_j_rem = j, out_shape_idx = TensorShape_dim(self.shape) - 1; + for (int k = ndim - 1; k >= 0; --k) { + int current_dim_idx; + if (k == reduced_dim) { + current_dim_idx = index_along_dim; + } else { + int dim_k = self.shape[out_shape_idx--]; + current_dim_idx = out_j_rem % dim_k; + out_j_rem /= dim_k; + } + linear_idx += current_dim_idx * stride; + stride *= input.shape[k]; + } + grad_out.data->flex[linear_idx] = 1.0f; + } + return grad_out; +} + +Tensor GradFn_max_all(Tensor self, int i) { + Tensor input = self.node->inputs[i]; + Tensor res = Tensor_zeros(input.shape, false); + float max_val = self.data->flex[0]; + + int max_count = 0; + for (int j = 0; j < input.data->numel; j++) { + if (input.data->flex[j] == max_val) max_count++; + } + + float grad_value = (max_count > 0) ? 1.0f / max_count : 0.0f; + for (int j = 0; j < input.data->numel; j++) { + if (input.data->flex[j] == max_val) res.data->flex[j] = grad_value; + } + return res; +} + +Tensor Tensor_max(Tensor self) { + if (self.data->numel == 0){ + cten_assert(false, "Error: max() on an empty tensor."); + } + bool requires_grad = !cten_is_eval() && (self.node != NULL); + Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad); + + float max_val = self.data->flex[0]; + for (int i = 1; i < self.data->numel; i++) { + if (self.data->flex[i] > max_val) { + max_val = self.data->flex[i]; + } + } + + res.data->flex[0] = max_val; + + if (requires_grad) { + res.node->grad_fn = GradFn_max_all; + res.node->inputs[0] = self; + res.node->n_inputs = 1; + res.node->name = "MaxAll"; + } + + return res; +} + +Tensor GradFn_min_all(Tensor self, int i) { + Tensor input = self.node->inputs[i]; + Tensor res = Tensor_zeros(input.shape, false); + float min_val = self.data->flex[0]; + + int min_count = 0; + for (int j = 0; j < input.data->numel; j++) { + if (input.data->flex[j] == min_val) min_count++; + } + + float grad_value = (min_count > 0) ? 1.0f / min_count : 0.0f; + for (int j = 0; j < input.data->numel; j++) { + if (input.data->flex[j] == min_val) res.data->flex[j] = grad_value; + } + return res; +} + +Tensor Tensor_min(Tensor self) { + if (self.data->numel == 0){ + cten_assert(false, "Error: min() on an empty tensor."); + } + bool requires_grad = !cten_is_eval() && (self.node != NULL); + Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad); + + // Find minimum value + float min_val = self.data->flex[0]; + for (int i = 1; i < self.data->numel; i++) { + if (self.data->flex[i] < min_val) { + min_val = self.data->flex[i]; + } + } + + res.data->flex[0] = min_val; + + if (requires_grad) { + res.node->grad_fn = GradFn_min_all; + res.node->inputs[0] = self; + res.node->n_inputs = 1; + res.node->name = "MinAll"; + } + return res; } \ No newline at end of file diff --git a/src/utils.c b/src/utils.c index 592efc4..86babc7 100644 --- a/src/utils.c +++ b/src/utils.c @@ -16,6 +16,9 @@ bool va_arg_is_present(va_list args) { Tensor GradFn_mean(Tensor self, int i); Tensor GradFn_sum(Tensor self, int i); +Tensor GradFn_max_all(Tensor self, int i); +Tensor GradFn_min_all(Tensor self, int i); +Tensor GradFn_reduce_dim(Tensor self, int i); Tensor Tensor_mean_all(Tensor self) { float total = 0.0f; @@ -67,6 +70,155 @@ Tensor Tensor_sum_dim(Tensor self, int dim) { return res; } +Tensor Tensor_max_all(Tensor self) { + bool requires_grad = !cten_is_eval() && (self.node != NULL); + Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad); + + if (self.data->numel == 0) cten_assert(false, "max on empty tensor"); + float max_val = self.data->flex[0]; + for (int i = 1; i < self.data->numel; i++) { + if (self.data->flex[i] > max_val) { + max_val = self.data->flex[i]; + } + } + res.data->flex[0] = max_val; + + if (requires_grad) { + res.node->grad_fn = GradFn_max_all; + res.node->inputs[0] = self; + res.node->n_inputs = 1; + res.node->name = "MaxAll"; + } + return res; +} + +TensorMaxMinResult Tensor_max_dim(Tensor self, int dim) { + int ndim = TensorShape_dim(self.shape); + dim = TensorShape_asdim(self.shape, dim); + + TensorShape out_shape = {0}; + int out_shape_len = 0; + for (int i = 0; i < ndim; i++) { + if (i != dim) out_shape[out_shape_len++] = self.shape[i]; + } + + bool requires_grad = !cten_is_eval() && (self.node != NULL); + Tensor values = Tensor_new(out_shape, requires_grad); + Tensor indices = Tensor_new(out_shape, false); + + int dim_size = self.shape[dim]; + for (int i = 0; i < values.data->numel; ++i) { + float best_val = -INFINITY; + int best_idx = -1; + + for (int j = 0; j < dim_size; ++j) { + int in_linear_idx = 0, stride = 1, out_i_rem = i, out_idx_tracker = out_shape_len - 1; + for (int k = ndim - 1; k >= 0; --k) { + int current_dim_idx; + if (k == dim) { + current_dim_idx = j; + } else { + int dim_k = out_shape[out_idx_tracker--]; + current_dim_idx = out_i_rem % dim_k; + out_i_rem /= dim_k; + } + in_linear_idx += current_dim_idx * stride; + stride *= self.shape[k]; + } + float current_val = self.data->flex[in_linear_idx]; + if (current_val > best_val) { best_val = current_val; best_idx = j; } + } + values.data->flex[i] = best_val; + indices.data->flex[i] = (float)best_idx; + } + + if (requires_grad) { + values.node->grad_fn = GradFn_reduce_dim; + values.node->inputs[0] = self; + values.node->inputs[1] = indices; + values.node->n_inputs = 2; + values.node->name = "MaxDim"; + } + + TensorMaxMinResult result = {values, indices}; + return result; +} + +Tensor Tensor_min_all(Tensor self) { + bool requires_grad = !cten_is_eval() && (self.node != NULL); + Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad); + + if (self.data->numel == 0) cten_assert(false, "min on empty tensor"); + float min_val = self.data->flex[0]; + for (int i = 1; i < self.data->numel; i++) { + if (self.data->flex[i] < min_val) { + min_val = self.data->flex[i]; + } + } + res.data->flex[0] = min_val; + + if (requires_grad) { + res.node->grad_fn = GradFn_min_all; + res.node->inputs[0] = self; + res.node->n_inputs = 1; + res.node->name = "MinAll"; + } + return res; +} + +TensorMaxMinResult Tensor_min_dim(Tensor self, int dim) { + int ndim = TensorShape_dim(self.shape); + dim = TensorShape_asdim(self.shape, dim); + + TensorShape out_shape = {0}; + int out_shape_len = 0; + for (int i = 0; i < ndim; i++) { + if (i != dim) out_shape[out_shape_len++] = self.shape[i]; + } + + bool requires_grad = !cten_is_eval() && (self.node != NULL); + Tensor values = Tensor_new(out_shape, requires_grad); + Tensor indices = Tensor_new(out_shape, false); + + int dim_size = self.shape[dim]; + for (int i = 0; i < values.data->numel; ++i) { + float best_val = INFINITY; + int best_idx = -1; + + for (int j = 0; j < dim_size; ++j) { + int in_linear_idx = 0, stride = 1, out_i_rem = i, out_idx_tracker = out_shape_len - 1; + for (int k = ndim - 1; k >= 0; --k) { + int current_dim_idx; + if (k == dim) { + current_dim_idx = j; + } else { + int dim_k = out_shape[out_idx_tracker--]; + current_dim_idx = out_i_rem % dim_k; + out_i_rem /= dim_k; + } + in_linear_idx += current_dim_idx * stride; + stride *= self.shape[k]; + } + float current_val = self.data->flex[in_linear_idx]; + if (current_val < best_val) { best_val = current_val; best_idx = j; } + } + values.data->flex[i] = best_val; + indices.data->flex[i] = (float)best_idx; + } + + if (requires_grad) { + values.node->grad_fn = GradFn_reduce_dim; + values.node->inputs[0] = self; + values.node->inputs[1] = indices; + values.node->n_inputs = 2; + values.node->name = "MinDim"; + } + + TensorMaxMinResult result = {values, indices}; + return result; +} + + void cten_assert(bool cond, const char* fmt, ...) { if(!cond) { va_list args; @@ -91,7 +243,6 @@ void cten_assert_dim(const char* title, int a, int b) { cten_assert(a == b, "%s: %d != %d", title, a, b); } - bool cten_elemwise_broadcast(Tensor* a, Tensor* b) { Tensor orig_a = *a; Tensor orig_b = *b; @@ -366,4 +517,4 @@ Tensor Tensor_unsqueeze(Tensor self, int dim) { memcpy(res.shape, new_shape, sizeof(TensorShape)); return res; -} \ No newline at end of file +} diff --git a/tests/Backward/test_max_backward.c b/tests/Backward/test_max_backward.c new file mode 100644 index 0000000..ec8c7c0 --- /dev/null +++ b/tests/Backward/test_max_backward.c @@ -0,0 +1,156 @@ +#include "../../include/cten.h" +#include "../test_utils.h" +#include "../csv_reporter.h" +#include "../test_config.h" +#include + +void test_max_backward() { + const char* op_name = "max_backward"; + PoolId pool_id = 0; + cten_begin_malloc(pool_id); + + // Test Case 1: Vector with a unique maximum value + { + const char* tc_name = "max_vector_unique_backward"; + TensorShape v_shape = {3}; + float data[] = {2.0f, 8.0f, 5.0f}; + float exp_grad[] = {0.0f, 1.0f, 0.0f}; + + Tensor t = create_test_tensor(v_shape, data, true); + Tensor z = Tensor_max(t); + + Tensor grad_dummy = {0}; + Tensor_backward(z, grad_dummy); + + Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 2: Vector with duplicate maximum values + { + const char* tc_name = "max_vector_duplicate_backward"; + TensorShape v_shape = {4}; + float data[] = {9.0f, 3.0f, 9.0f, 1.0f}; + float exp_grad[] = {0.5f, 0.0f, 0.5f, 0.0f}; + + Tensor t = create_test_tensor(v_shape, data, true); + Tensor z = Tensor_max(t); + + Tensor grad_dummy = {0}; + Tensor_backward(z, grad_dummy); + + Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 3: Matrix with a unique maximum value + { + const char* tc_name = "max_matrix_unique_backward"; + TensorShape m_shape = {2, 2}; + float data[] = {1.0f, 2.0f, 10.0f, 4.0f}; + float exp_grad[] = {0.0f, 0.0f, 1.0f, 0.0f}; + + Tensor t = create_test_tensor(m_shape, data, true); + Tensor z = Tensor_max(t); + + Tensor grad_dummy = {0}; + Tensor_backward(z, grad_dummy); + + Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 4: Complex computation graph (z = max(x) * y) + { + const char* tc_name = "max_complex_graph_backward"; + TensorShape v_shape = {3}; + TensorShape s_shape = {1}; + float x_data[] = {1.0f, 5.0f, 2.0f}; + float y_data[] = {4.0f}; + + // Let m = max(x). z = m * y. + // dz/dx = dz/dm * dm/dx + // dz/dm = y = 4.0 + // dm/dx = [0, 1, 0] + // dz/dx = 4.0 * [0, 1, 0] = [0, 4.0, 0] + float exp_grad_x[] = {0.0f, 4.0f, 0.0f}; + // dz/dy = m = 5.0 + float exp_grad_y[] = {5.0f}; + + Tensor x = create_test_tensor(v_shape, x_data, true); + Tensor y = create_test_tensor(s_shape, y_data, true); + + Tensor m = Tensor_max(x); // m = 5.0 + Tensor z = Tensor_mul(m, y); // z = 20.0 + + Tensor grad_dummy = {0}; + Tensor_backward(z, grad_dummy); + + Tensor expected_grad_x_tensor = create_test_tensor(v_shape, exp_grad_x, false); + Tensor expected_grad_y_tensor = create_test_tensor(s_shape, exp_grad_y, false); + + compare_tensors(&x.node->grad, &expected_grad_x_tensor, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + compare_tensors(&y.node->grad, &expected_grad_y_tensor, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE); + } + + // Test Case 5: Gradient of max over a dimension (dim=1) + { + const char* tc_name = "max_matrix_dim1_backward"; + TensorShape m_shape = {2, 3}; + float data[] = {1.0f, 9.0f, 3.0f, 8.0f, 5.0f, 6.0f}; + float exp_grad[] = {0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f}; + + Tensor t = create_test_tensor(m_shape, data, true); + TensorMaxMinResult max_res = Tensor_max(t, 1); + Tensor loss = Tensor_sum(max_res.values); + + Tensor grad_dummy = {0}; + Tensor_backward(loss, grad_dummy); + + Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 6: Gradient of max over a dimension (dim=0) + { + const char* tc_name = "max_matrix_dim0_backward"; + TensorShape m_shape = {3, 2}; + float data[] = {5.0f, 2.0f, 1.0f, 9.0f, 7.0f, 8.0f}; + float exp_grad[] = {0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f}; + + Tensor t = create_test_tensor(m_shape, data, true); + TensorMaxMinResult max_res = Tensor_max(t, 0); + Tensor loss = Tensor_sum(max_res.values); + + Tensor grad_dummy = {0}; + Tensor_backward(loss, grad_dummy); + + Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 7: Gradient of max over a dimension with duplicate maximums + { + const char* tc_name = "max_matrix_dim_duplicate_backward"; + TensorShape m_shape = {2, 4}; + float data[] = {5.0f, 9.0f, 7.0f, 9.0f, 8.0f, 6.0f, 8.0f, 1.0f}; + + // Max along dim=1 will select the first occurrence of the maximum. + // For row 0, max is 9.0 at index 1. + // For row 1, max is 8.0 at index 0. + // The gradient only flows back to these specific indices. + float exp_grad[] = {0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f}; + + Tensor t = create_test_tensor(m_shape, data, true); + TensorMaxMinResult max_res = Tensor_max(t, 1); + Tensor loss = Tensor_sum(max_res.values); + + Tensor grad_dummy = {0}; + Tensor_backward(loss, grad_dummy); + + Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + cten_free(pool_id); +} \ No newline at end of file diff --git a/tests/Backward/test_min_backward.c b/tests/Backward/test_min_backward.c new file mode 100644 index 0000000..a5dad23 --- /dev/null +++ b/tests/Backward/test_min_backward.c @@ -0,0 +1,156 @@ +#include "../../include/cten.h" +#include "../test_utils.h" +#include "../csv_reporter.h" +#include "../test_config.h" +#include + +void test_min_backward() { + const char* op_name = "min_backward"; + PoolId pool_id = 0; + cten_begin_malloc(pool_id); + + // Test Case 1: Vector with a unique minimum value + { + const char* tc_name = "min_vector_unique_backward"; + TensorShape v_shape = {3}; + float data[] = {8.0f, 2.0f, 5.0f}; + float exp_grad[] = {0.0f, 1.0f, 0.0f}; + + Tensor t = create_test_tensor(v_shape, data, true); + Tensor z = Tensor_min(t); + + Tensor grad_dummy = {0}; + Tensor_backward(z, grad_dummy); + + Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 2: Vector with duplicate minimum values + { + const char* tc_name = "min_vector_duplicate_backward"; + TensorShape v_shape = {4}; + float data[] = {9.0f, 1.0f, 5.0f, 1.0f}; + float exp_grad[] = {0.0f, 0.5f, 0.0f, 0.5f}; + + Tensor t = create_test_tensor(v_shape, data, true); + Tensor z = Tensor_min(t); + + Tensor grad_dummy = {0}; + Tensor_backward(z, grad_dummy); + + Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 3: Matrix with a unique minimum value + { + const char* tc_name = "min_matrix_unique_backward"; + TensorShape m_shape = {2, 2}; + float data[] = {10.0f, 2.0f, 8.0f, 4.0f}; + float exp_grad[] = {0.0f, 1.0f, 0.0f, 0.0f}; + + Tensor t = create_test_tensor(m_shape, data, true); + Tensor z = Tensor_min(t); + + Tensor grad_dummy = {0}; + Tensor_backward(z, grad_dummy); + + Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 4: Complex computation graph (z = min(x) + y) + { + const char* tc_name = "min_complex_graph_backward"; + TensorShape v_shape = {3}; + TensorShape s_shape = {1}; + float x_data[] = {8.0f, 3.0f, 9.0f}; + float y_data[] = {10.0f}; + + // Let m = min(x). z = m + y. + // dz/dx = dz/dm * dm/dx + // dz/dm = 1.0 (from add op) + // dm/dx = [0, 1, 0] + // dz/dx = 1.0 * [0, 1, 0] = [0, 1.0, 0] + float exp_grad_x[] = {0.0f, 1.0f, 0.0f}; + // dz/dy = 1.0 + float exp_grad_y[] = {1.0f}; + + Tensor x = create_test_tensor(v_shape, x_data, true); + Tensor y = create_test_tensor(s_shape, y_data, true); + + Tensor m = Tensor_min(x); // m = 3.0 + Tensor z = Tensor_add(m, y); // z = 13.0 + + Tensor grad_dummy = {0}; + Tensor_backward(z, grad_dummy); + + Tensor expected_grad_x_tensor = create_test_tensor(v_shape, exp_grad_x, false); + Tensor expected_grad_y_tensor = create_test_tensor(s_shape, exp_grad_y, false); + + compare_tensors(&x.node->grad, &expected_grad_x_tensor, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + compare_tensors(&y.node->grad, &expected_grad_y_tensor, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE); + } + + // Test Case 5: Gradient of min over a dimension (dim=1) + { + const char* tc_name = "min_matrix_dim1_backward"; + TensorShape m_shape = {2, 3}; + float data[] = {5.0f, 7.0f, -1.0f, -8.0f, 2.0f, 6.0f}; + float exp_grad[] = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f}; + + Tensor t = create_test_tensor(m_shape, data, true); + TensorMaxMinResult min_res = Tensor_min(t, 1); + Tensor loss = Tensor_sum(min_res.values); + + Tensor grad_dummy = {0}; + Tensor_backward(loss, grad_dummy); + + Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 6: Gradient of min over a dimension (dim=0) + { + const char* tc_name = "min_matrix_dim0_backward"; + TensorShape m_shape = {3, 2}; + float data[] = {5.0f, 2.0f, -1.0f, 9.0f, 7.0f, -8.0f}; + float exp_grad[] = {0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}; + + Tensor t = create_test_tensor(m_shape, data, true); + TensorMaxMinResult min_res = Tensor_min(t, 0); + Tensor loss = Tensor_sum(min_res.values); + + Tensor grad_dummy = {0}; + Tensor_backward(loss, grad_dummy); + + Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 7: Gradient of min over a dimension with duplicate minimums + { + const char* tc_name = "min_matrix_dim_duplicate_backward"; + TensorShape m_shape = {2, 4}; + float data[] = {5.0f, -8.0f, 7.0f, -8.0f, 2.0f, 6.0f, 2.0f, 9.0f}; + + // Min along dim=1 will select the first occurrence of the minimum. + // For row 0, min is -8.0 at index 1. + // For row 1, min is 2.0 at index 0. + // The gradient only flows back to these specific indices. + float exp_grad[] = {0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f}; + + Tensor t = create_test_tensor(m_shape, data, true); + TensorMaxMinResult min_res = Tensor_min(t, 1); + Tensor loss = Tensor_sum(min_res.values); + + Tensor grad_dummy = {0}; + Tensor_backward(loss, grad_dummy); + + Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false); + compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + cten_free(pool_id); +} \ No newline at end of file diff --git a/tests/Operator/test_max.c b/tests/Operator/test_max.c new file mode 100644 index 0000000..dfddec7 --- /dev/null +++ b/tests/Operator/test_max.c @@ -0,0 +1,154 @@ +#include "../../include/cten.h" +#include "../test_utils.h" +#include "../csv_reporter.h" +#include "../test_config.h" +#include + +void test_max_operator() { + const char* op_name = "max"; + PoolId pool_id = 0; + + cten_begin_malloc(pool_id); + + // Test Case 1: Max of a scalar tensor + { + const char* tc_name = "max_scalar"; + TensorShape s_shape = {1}; + float d1[] = {2.7885f}; + float exp_d[] = {2.7885f}; + Tensor t1 = create_test_tensor(s_shape, d1, false); + Tensor expected_res = create_test_tensor(s_shape, exp_d, false); + Tensor actual_res = Tensor_max(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 2: Max of a vector tensor + { + const char* tc_name = "max_vector"; + TensorShape v_shape = {5}; + float d1[] = {8.7458f, 4.147f, 0.9326f, 7.1226f, 2.5115f}; + float exp_d[] = {8.7458f}; + TensorShape exp_shape = {1, 0, 0, 0}; + Tensor t1 = create_test_tensor(v_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor actual_res = Tensor_max(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 3: Max of a matrix tensor + { + const char* tc_name = "max_matrix"; + TensorShape m_shape = {2, 3}; + float d1[] = {7.6507f, -6.481f, 2.9918f, -6.1952f, -9.0693f, 4.4308f}; + float exp_d[] = {7.6507f}; + TensorShape exp_shape = {1, 0, 0, 0}; + Tensor t1 = create_test_tensor(m_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor actual_res = Tensor_max(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 4: Max of a tensor with negative numbers + { + const char* tc_name = "max_vector_negative"; + TensorShape v_shape = {4}; + float d1[] = {-8.687f, -0.9767f, -9.2835f, -6.0498f}; + float exp_d[] = {-0.9767f}; + TensorShape exp_shape = {1, 0, 0, 0}; + Tensor t1 = create_test_tensor(v_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor actual_res = Tensor_max(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 5: Max with duplicate maximum values + { + const char* tc_name = "max_duplicate"; + TensorShape v_shape = {5}; + float d1[] = {6.1886f, -9.87f, 5.8818f, 5.8818f, 6.1886f}; + float exp_d[] = {6.1886f}; + TensorShape exp_shape = {1, 0, 0, 0}; + Tensor t1 = create_test_tensor(v_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor actual_res = Tensor_max(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 6: Max of a 3D tensor + { + const char* tc_name = "max_3d_tensor"; + TensorShape t_shape = {2, 2, 2}; + float d1[] = {-6.8904f, 9.1443f, -3.2681f, -8.1451f, -8.0657f, 6.9499f, 2.0745f, 6.1426f}; + float exp_d[] = {9.1443f}; + TensorShape exp_shape = {1, 0, 0, 0}; + Tensor t1 = create_test_tensor(t_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor actual_res = Tensor_max(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 7: Max over a specific dimension of a matrix (dim=0) (here dim=-2 is used to represent dim=0) + { + const char* tc_name = "max_matrix_dim_0"; + TensorShape m_shape = {2, 3}; + float d1[] = {5.0f, 9.0f, 7.0f, 2.0f, 1.0f, 8.0f}; + float exp_d[] = {5.0f, 9.0f, 8.0f}; + float exp_idx[] = {0.0f, 0.0f, 1.0f}; + TensorShape exp_shape = {3}; + + Tensor t1 = create_test_tensor(m_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false); + + TensorMaxMinResult actual = Tensor_max(t1, -2); + + compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE); + } + + // Test Case 8: Max over a specific dimension of a matrix (dim=1) + { + const char* tc_name = "max_matrix_dim_1"; + TensorShape m_shape = {2, 3}; + float d1[] = {5.0f, 9.0f, 7.0f, 2.0f, 1.0f, 8.0f}; + float exp_d[] = {9.0f, 8.0f}; + float exp_idx[] = {1.0f, 2.0f}; + TensorShape exp_shape = {2}; + + Tensor t1 = create_test_tensor(m_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false); + + TensorMaxMinResult actual = Tensor_max(t1, 1); + + compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE); + } + + // Test Case 9: Max over a dimension with duplicate max values (should return first index) + { + const char* tc_name = "max_matrix_dim_1_duplicate"; + TensorShape m_shape = {2, 4}; + float d1[] = {1.0f, 9.0f, 5.0f, 9.0f, 8.0f, 8.0f, 2.0f, 7.0f}; + float exp_d[] = {9.0f, 8.0f}; + float exp_idx[] = {1.0f, 0.0f}; + TensorShape exp_shape = {2}; + + Tensor t1 = create_test_tensor(m_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false); + + TensorMaxMinResult actual = Tensor_max(t1, -1); + + compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE); + } + + cten_free(pool_id); +} diff --git a/tests/Operator/test_min.c b/tests/Operator/test_min.c new file mode 100644 index 0000000..f93f85d --- /dev/null +++ b/tests/Operator/test_min.c @@ -0,0 +1,156 @@ +#include "../../include/cten.h" +#include "../test_utils.h" +#include "../csv_reporter.h" +#include "../test_config.h" +#include + +void test_min_operator() { + const char* op_name = "min"; + PoolId pool_id = 0; + + cten_begin_malloc(pool_id); + + // Test Case 1: Min of a scalar tensor + { + const char* tc_name = "min_scalar"; + TensorShape s_shape = {1}; + float d1[] = {2.7885f}; + float exp_d[] = {2.7885f}; + Tensor t1 = create_test_tensor(s_shape, d1, false); + Tensor expected_res = create_test_tensor(s_shape, exp_d, false); + Tensor actual_res = Tensor_min(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 2: Min of a vector tensor + { + const char* tc_name = "min_vector"; + TensorShape v_shape = {5}; + float d1[] = {-9.4998f, -4.4994f, -5.5358f, 4.7294f, 3.534f}; + float exp_d[] = {-9.4998f}; + TensorShape exp_shape = {1, 0, 0, 0}; + Tensor t1 = create_test_tensor(v_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor actual_res = Tensor_min(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 3: Min of a matrix tensor + { + const char* tc_name = "min_matrix"; + TensorShape m_shape = {2, 3}; + float d1[] = {7.8436f, -8.2612f, -1.5616f, -9.4041f, -5.6272f, 0.1071f}; + float exp_d[] = {-9.4041f}; + TensorShape exp_shape = {1, 0, 0, 0}; + Tensor t1 = create_test_tensor(m_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor actual_res = Tensor_min(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 4: Min of a tensor with negative numbers + { + const char* tc_name = "min_vector_negative"; + TensorShape v_shape = {4}; + float d1[] = {-9.7373f, -8.0315f, -3.5661f, -4.6051f}; + float exp_d[] = {-9.7373f}; + TensorShape exp_shape = {1, 0, 0, 0}; + Tensor t1 = create_test_tensor(v_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor actual_res = Tensor_min(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 5: Min with duplicate minimum values + { + const char* tc_name = "min_duplicate"; + TensorShape v_shape = {5}; + float d1[] = {1.7853f, -9.87f, -2.7956f, -2.7956f, -9.87f}; + float exp_d[] = {-9.87f}; + TensorShape exp_shape = {1, 0, 0, 0}; + Tensor t1 = create_test_tensor(v_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor actual_res = Tensor_min(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 6: Min of a 3D tensor + { + const char* tc_name = "min_3d_tensor"; + TensorShape t_shape = {2, 2, 2}; + float d1[] = {-6.8904f, 9.1443f, -3.2681f, -8.1451f, -8.0657f, 6.9499f, 2.0745f, 6.1426f}; + float exp_d[] = {-8.1451f}; + TensorShape exp_shape = {1, 0, 0, 0}; + Tensor t1 = create_test_tensor(t_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor actual_res = Tensor_min(t1); + + compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + } + + // Test Case 7: Min over a specific dimension of a matrix (dim=0) + { + const char* tc_name = "min_matrix_dim_0"; + TensorShape m_shape = {2, 3}; + float d1[] = {5.0f, -1.0f, 7.0f, 2.0f, -8.0f, 6.0f}; + float exp_d[] = {2.0f, -8.0f, 6.0f}; + float exp_idx[] = {1.0f, 1.0f, 1.0f}; + TensorShape exp_shape = {3}; + + Tensor t1 = create_test_tensor(m_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false); + + TensorMaxMinResult actual = Tensor_min(t1, 0); + + compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE); + } + + // Test Case 8: Min over a specific dimension of a matrix (dim=1) + { + const char* tc_name = "min_matrix_dim_1"; + TensorShape m_shape = {2, 3}; + float d1[] = {5.0f, -1.0f, 7.0f, 2.0f, -8.0f, 6.0f}; + float exp_d[] = {-1.0f, -8.0f}; + float exp_idx[] = {1.0f, 1.0f}; + TensorShape exp_shape = {2}; + + Tensor t1 = create_test_tensor(m_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false); + + TensorMaxMinResult actual = Tensor_min(t1, 1); + + compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE); + } + + // Test Case 9: Min over a dimension of a 3D tensor (dim=2) + { + const char* tc_name = "min_3d_tensor_dim_2"; + TensorShape t_shape = {2, 2, 3}; + float d1[] = {1.0f, 8.0f, -3.0f, 4.0f, 2.0f, 9.0f, + 7.0f, 0.0f, 5.0f, -4.0f, -1.0f, -2.0f}; + + float exp_d[] = {-3.0f, 2.0f, 0.0f, -4.0f}; + float exp_idx[] = {2.0f, 1.0f, 1.0f, 0.0f}; + TensorShape exp_shape = {2, 2}; + + Tensor t1 = create_test_tensor(t_shape, d1, false); + Tensor expected_res = create_test_tensor(exp_shape, exp_d, false); + Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false); + + TensorMaxMinResult actual = Tensor_min(t1, 2); + + compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE); + compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE); + } + + cten_free(pool_id); +} diff --git a/tests/cten_tests.c b/tests/cten_tests.c index ca6b231..0c4e9f0 100644 --- a/tests/cten_tests.c +++ b/tests/cten_tests.c @@ -25,6 +25,8 @@ void test_pow_operator(); void test_reciprocal_operator(); void test_square_operator(); void test_div_operator(); +void test_max_operator(); +void test_min_operator(); // Backward tests void test_add_backward(); @@ -33,6 +35,8 @@ void test_matmul_backward(); void test_sub_backward(); void test_relu_backward(); void test_linear_backward(); +void test_min_backward(); +void test_max_backward(); void test_sum_backward(); void test_mean_backward(); @@ -98,6 +102,12 @@ int main() { test_div_operator(); printf("Div operator tests finished.\n"); + test_max_operator(); + printf("Max operator tests finished.\n"); + + test_min_operator(); + printf("Min operator tests finished.\n"); + // Backward tests test_add_backward(); printf("Add backward tests finished.\n"); @@ -117,6 +127,12 @@ int main() { test_linear_backward(); printf("Linear backward tests finished.\n"); + test_min_backward(); + printf("Min backward tests finished.\n"); + + test_max_backward(); + printf("Max backward tests finished.\n"); + test_sum_backward(); printf("Sum backward tests finished.\n");