diff --git a/include/layers/ReduceLayer.hpp b/include/layers/ReduceLayer.hpp
new file mode 100644
index 00000000..e2400600
--- /dev/null
+++ b/include/layers/ReduceLayer.hpp
@@ -0,0 +1,35 @@
+#pragma once
+#include <cstdint>
+#include <vector>
+
+#include "layers/Layer.hpp"
+#include "layers/Tensor.hpp"
+
+namespace it_lab_ai {
+
+class ReduceLayer : public Layer {
+ public:
+  enum class Operation : uint8_t { kSum, kMean, kMult, kMax, kMin };
+
+  ReduceLayer(Operation op, int64_t keepdims = 0);
+  explicit ReduceLayer(int64_t keepdims = 0)
+      : ReduceLayer(Operation::kSum, keepdims) {}
+  void run(const Tensor& input, Tensor& output) override;
+  void run(const Tensor& input, const Tensor& axes, Tensor& output);
+
+  static std::string get_name() { return "ReduceLayer"; }
+
+ private:
+  Operation op_;
+  int64_t keepdims_;
+  static void normalize_axes(const Shape& input_shape,
+                             std::vector<int64_t>& axes);
+  Shape calculate_output_shape(const Shape& input_shape,
+                               const std::vector<int64_t>& axes) const;
+
+  template <typename T>
+  void compute(const Tensor& input, const Shape& output_shape,
+               const std::vector<int64_t>& axes, Tensor& output) const;
+};
+
+}  // namespace it_lab_ai
\ No newline at end of file
diff --git a/include/layers/Shape.hpp b/include/layers/Shape.hpp
index 6200a70a..d6d1fad7 100644
--- a/include/layers/Shape.hpp
+++ b/include/layers/Shape.hpp
@@ -39,17 +39,16 @@ class Shape {
   }
   size_t dims() const noexcept { return dims_.size(); }
   size_t get_index(const std::vector<size_t>& coords) const;
-  friend std::ostream& operator<<(std::ostream& os, const Shape& shape);
-  bool operator==(const Shape& other) const noexcept {
-    if (dims_.size() != other.dims_.size()) {
-      return false;
+  bool operator==(const Shape& other) const {
+    if (dims_.size() != other.dims_.size()) return false;
+    for (size_t i = 0; i < dims_.size(); ++i) {
+      if (dims_[i] != other.dims_[i]) return false;
     }
-    return std::equal(dims_.begin(), dims_.end(), other.dims_.begin());
+    return true;
   }
 
-  bool operator!=(const Shape& other) const noexcept {
-    return !(*this == other);
-  }
+  bool operator!=(const Shape& other) const { return !(*this == other); }
+  friend std::ostream& operator<<(std::ostream& os, const Shape& shape);
 
  private:
   std::vector<size_t> dims_;
diff --git a/src/layers/ReduceLayer.cpp b/src/layers/ReduceLayer.cpp
new file mode 100644
index 00000000..0ed989a5
--- /dev/null
+++ b/src/layers/ReduceLayer.cpp
@@ -0,0 +1,213 @@
+#include "layers/ReduceLayer.hpp"
+
+#include <algorithm>
+#include <limits>
+#include <numeric>
+
+namespace it_lab_ai {
+
+ReduceLayer::ReduceLayer(Operation op, int64_t keepdims)
+    : op_(op), keepdims_(keepdims) {}
+
+void ReduceLayer::normalize_axes(const Shape& input_shape,
+                                 std::vector<int64_t>& axes) {
+  const auto rank = static_cast<int64_t>(input_shape.dims());
+
+  if (rank == 0) {
+    if (!axes.empty()) {
+      throw std::runtime_error("ReduceLayer: Axis specified for scalar input");
+    }
+    return;
+  }
+
+  if (axes.empty()) {
+    axes.resize(rank);
+    std::iota(axes.begin(), axes.end(), 0);
+    return;
+  }
+
+  for (auto& axis : axes) {
+    if (axis < -rank || axis >= rank) {
+      throw std::runtime_error(
+          "ReduceLayer: Axis out of range. Valid range is [-" +
+          std::to_string(rank) + ", " + std::to_string(rank - 1) + "]");
+    }
+
+    if (axis < 0) {
+      axis += rank;
+    }
+  }
+
+  std::sort(axes.begin(), axes.end());
+  axes.erase(std::unique(axes.begin(), axes.end()), axes.end());
+}
+
+Shape ReduceLayer::calculate_output_shape(
+    const Shape& input_shape, const std::vector<int64_t>& axes) const {
+  if (input_shape.dims() == 0) {
+    return Shape({});
+  }
+
+  std::vector<size_t> new_dims;
+
+  if (keepdims_) {
+    new_dims.resize(input_shape.dims(), 1);
+    for (int64_t i = 0; i < static_cast<int64_t>(input_shape.dims()); ++i) {
+      bool is_axis = std::find(axes.begin(), axes.end(), i) != axes.end();
+      if (!is_axis) {
+        new_dims[i] = input_shape[i];
+      }
+    }
+  } else {
+    for (int64_t i = 0; i < static_cast<int64_t>(input_shape.dims()); ++i) {
+      bool is_axis = std::find(axes.begin(), axes.end(), i) != axes.end();
+      if (!is_axis) {
+        new_dims.push_back(input_shape[i]);
+      }
+    }
+    if (new_dims.empty()) {
+      new_dims.push_back(1);
+    }
+  }
+
+  return Shape(new_dims);
+}
+
+template <typename T>
+void ReduceLayer::compute(const Tensor& input, const Shape& output_shape,
+                          const std::vector<int64_t>& axes,
+                          Tensor& output) const {
+  const auto& input_data = *input.as<T>();
+  std::vector<T> output_data(output_shape.count());
+  std::vector<size_t> counts(output_shape.count(), 0);
+
+  switch (op_) {
+    case Operation::kSum:
+    case Operation::kMean:
+      std::fill(output_data.begin(), output_data.end(), T(0));
+      break;
+    case Operation::kMult:
+      std::fill(output_data.begin(), output_data.end(), T(1));
+      break;
+    case Operation::kMax:
+      std::fill(output_data.begin(), output_data.end(),
+                std::numeric_limits<T>::lowest());
+      break;
+    case Operation::kMin:
+      std::fill(output_data.begin(), output_data.end(),
+                std::numeric_limits<T>::max());
+      break;
+  }
+
+  const auto& input_shape = input.get_shape();
+  const auto input_rank = static_cast<int64_t>(input_shape.dims());
+
+  std::vector<size_t> in_coords(input_rank, 0);
+  for (size_t in_idx = 0; in_idx < input_data.size(); ++in_idx) {
+    std::vector<size_t> out_coords;
+    if (keepdims_) {
+      out_coords.resize(input_rank, 0);
+      for (int64_t i = 0; i < input_rank; ++i) {
+        if (std::find(axes.begin(), axes.end(), i) == axes.end()) {
+          out_coords[i] = in_coords[i];
+        }
+      }
+    } else {
+      for (int64_t i = 0; i < input_rank; ++i) {
+        if (std::find(axes.begin(), axes.end(), i) == axes.end()) {
+          out_coords.push_back(in_coords[i]);
+        }
+      }
+    }
+
+    size_t out_idx = 0;
+    size_t stride = 1;
+    for (size_t i = out_coords.size(); i-- > 0;) {
+      out_idx += out_coords[i] * stride;
+      stride *= output_shape[i];
+    }
+
+    switch (op_) {
+      case Operation::kSum:
+      case Operation::kMean:
+        output_data[out_idx] += input_data[in_idx];
+        counts[out_idx]++;
+        break;
+      case Operation::kMult:
+        output_data[out_idx] *= input_data[in_idx];
+        break;
+      case Operation::kMax:
+        if (input_data[in_idx] > output_data[out_idx]) {
+          output_data[out_idx] = input_data[in_idx];
+        }
+        break;
+      case Operation::kMin:
+        if (input_data[in_idx] < output_data[out_idx]) {
+          output_data[out_idx] = input_data[in_idx];
+        }
+        break;
+    }
+
+    for (int64_t i = input_rank; i-- > 0;) {
+      ++in_coords[i];
+      if (in_coords[i] < input_shape[i]) break;
+      in_coords[i] = 0;
+    }
+  }
+
+  if (op_ == Operation::kMean) {
+    for (size_t i = 0; i < output_data.size(); ++i) {
+      if (counts[i] != 0) {
+        output_data[i] /= static_cast<T>(counts[i]);
+      }
+    }
+  }
+
+  output = make_tensor(output_data, output_shape);
+}
+
+template void ReduceLayer::compute<float>(const Tensor&, const Shape&,
+                                          const std::vector<int64_t>&,
+                                          Tensor&) const;
+template void ReduceLayer::compute<int>(const Tensor&, const Shape&,
+                                        const std::vector<int64_t>&,
+                                        Tensor&) const;
+
+void ReduceLayer::run(const Tensor& input, Tensor& output) {
+  run(input, Tensor(), output);
+}
+
+void ReduceLayer::run(const Tensor& input, const Tensor& axes, Tensor& output) {
+  if (input.get_shape().count() == 0) {
+    output = make_tensor<float>({0.0F}, {});
+    return;
+  }
+
+  std::vector<int64_t> axes_indices;
+  if (axes.get_shape().dims() > 0) {
+    if (axes.get_type() == Type::kInt) {
+      const auto* axes_data = axes.as<int>();
+      axes_indices.assign(axes_data->begin(), axes_data->end());
+    } else {
+      throw std::runtime_error("ReduceLayer: Axes tensor must be of type int");
+    }
+  }
+
+  normalize_axes(input.get_shape(), axes_indices);
+  Shape output_shape = calculate_output_shape(input.get_shape(), axes_indices);
+
+  switch (input.get_type()) {
+    case Type::kFloat:
+      compute<float>(input, output_shape, axes_indices, output);
+      break;
+    case Type::kInt:
+      compute<int>(input, output_shape, axes_indices, output);
+      break;
+    default:
+      throw std::runtime_error(
+          "ReduceLayer: Unsupported input tensor type. Only float and int are "
+          "supported");
+  }
+}
+
+}  // namespace it_lab_ai
\ No newline at end of file
diff --git a/test/single_layer/test_reducelayer.cpp b/test/single_layer/test_reducelayer.cpp
new file mode 100644
index 00000000..4fbf048c
--- /dev/null
+++ b/test/single_layer/test_reducelayer.cpp
@@ -0,0 +1,275 @@
+﻿#include <gtest/gtest.h>
+
+#include "layers/ReduceLayer.hpp"
+#include "layers/Tensor.hpp"
+
+namespace it_lab_ai {
+
+TEST(ReduceLayer, DefaultConstructor) { ASSERT_NO_THROW(ReduceLayer layer); }
+
+TEST(ReduceLayer, SumAllAxesKeepDims) {
+  ReduceLayer layer(1);
+  Tensor input = make_tensor<float>({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2});
+  Tensor output;
+
+  layer.run(input, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({1, 1}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0}), 10.0f);
+}
+
+TEST(ReduceLayer, SumAlongAxis0) {
+  ReduceLayer layer(0);
+  Tensor input = make_tensor<float>({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2});
+  Tensor axes = make_tensor<int>({0});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({2}));
+  EXPECT_FLOAT_EQ(output.get<float>({0}), 4.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1}), 6.0f);
+}
+
+TEST(ReduceLayer, SumAlongAxis1KeepDims) {
+  ReduceLayer layer(1);
+  Tensor input = make_tensor<float>({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2});
+  Tensor axes = make_tensor<int>({1});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({2, 1}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0}), 3.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1, 0}), 7.0f);
+}
+
+TEST(ReduceLayer, InvalidAxisThrows) {
+  ReduceLayer layer;
+  Tensor input = make_tensor<float>({1.0f, 2.0f}, {2});
+  Tensor axes = make_tensor<int>({2});
+
+  Tensor output;
+  ASSERT_THROW(layer.run(input, axes, output), std::runtime_error);
+}
+
+TEST(ReduceLayer, IntTensorSupport) {
+  ReduceLayer layer(0);
+  Tensor input = make_tensor<int>({1, 2, 3, 4}, {2, 2});
+  Tensor axes = make_tensor<int>({0});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({2}));
+  EXPECT_EQ(output.get<int>({0}), 4);
+  EXPECT_EQ(output.get<int>({1}), 6);
+}
+
+TEST(ReduceLayer, 3DTensorReduction) {
+  ReduceLayer layer(1);
+  Tensor input = make_tensor<float>({1, 2, 3, 4, 5, 6, 7, 8}, {2, 2, 2});
+  Tensor axes = make_tensor<int>({2});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({2, 2, 1}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 0}), 3.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 1, 0}), 7.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1, 0, 0}), 11.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1, 1, 0}), 15.0f);
+}
+
+TEST(ReduceLayer, 3DReductionAxis2) {
+  ReduceLayer layer(1);
+  Tensor input = make_tensor<float>({1, 2, 3, 4, 5, 6, 7, 8}, {2, 2, 2});
+  Tensor axes = make_tensor<int>({1});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({2, 1, 2}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 0}), 4.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 1}), 6.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1, 0, 0}), 12.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1, 0, 1}), 14.0f);
+}
+
+TEST(ReduceLayer, 3DReductionAxis10) {
+  ReduceLayer layer(1);
+  Tensor input = make_tensor<float>(
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {2, 2, 2, 2});
+
+  Tensor axes = make_tensor<int>({0});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({1, 2, 2, 2}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 0, 0}), 1 + 9);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 0, 1}), 2 + 10);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 1, 0}), 3 + 11);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 1, 1}), 4 + 12);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 1, 0, 0}), 5 + 13);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 1, 0, 1}), 6 + 14);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 1, 1, 0}), 7 + 15);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 1, 1, 1}), 8 + 16);
+}
+
+TEST(ReduceLayer, 3DFullReduction) {
+  ReduceLayer layer(1);
+  Tensor input = make_tensor<float>({1, 2, 3, 4, 5, 6, 7, 8}, {2, 2, 2});
+
+  Tensor output;
+  layer.run(input, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({1, 1, 1}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 0}), 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8);
+}
+
+TEST(ReduceLayer, Resnet) {
+  ReduceLayer layer(1);
+  Tensor input = make_tensor<float>(
+      {1.0f,  2.0f,  3.0f,  4.0f,  5.0f,  6.0f,  7.0f,  8.0f,  9.0f,
+       10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
+       19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f,
+       28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f,
+       37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f,
+       46.0f, 47.0f, 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f},
+      {1, 2, 3, 3, 3});
+
+  Tensor axes = make_tensor<int>({1});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({1, 1, 3, 3, 3}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 0, 0, 0}), 1.0f + 28.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 2, 2, 2}), 27.0f + 54.0f);
+}
+
+TEST(ReduceLayer, NegativeAxisBasic) {
+  ReduceLayer layer(0);
+  Tensor input = make_tensor<float>({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2});
+  Tensor axes = make_tensor<int>({-1});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({2}));
+  EXPECT_FLOAT_EQ(output.get<float>({0}), 3.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1}), 7.0f);
+}
+
+TEST(ReduceLayer, NegativeAxis3DTensor) {
+  ReduceLayer layer(1);
+  Tensor input = make_tensor<float>({1, 2, 3, 4, 5, 6, 7, 8}, {2, 2, 2});
+  Tensor axes = make_tensor<int>({-2});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({2, 1, 2}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 0}), 4.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 1}), 6.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1, 0, 0}), 12.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1, 0, 1}), 14.0f);
+}
+
+TEST(ReduceLayer, ReduceMean) {
+  ReduceLayer layer(ReduceLayer::Operation::kMean, 1);
+  Tensor input = make_tensor<float>({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2});
+  Tensor output;
+  Tensor axes = make_tensor<int>({0});
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({1, 2}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0}), 2.0f);
+}
+
+TEST(ReduceLayer, ReduceMeanResnet) {
+  ReduceLayer layer(ReduceLayer::Operation::kMean, 1);
+  Tensor input = make_tensor<float>({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2});
+  Tensor output;
+  Tensor axes = make_tensor<int>({0});
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({1, 2}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0}), 2.0f);
+}
+
+TEST(ReduceLayer, MultAlongAxis0) {
+  ReduceLayer layer(ReduceLayer::Operation::kMult, 0);
+  Tensor input = make_tensor<float>({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2});
+  Tensor axes = make_tensor<int>({0});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({2}));
+  EXPECT_FLOAT_EQ(output.get<float>({0}), 3.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1}), 8.0f);
+}
+
+TEST(ReduceLayer, MaxAlongAxis1KeepDims) {
+  ReduceLayer layer(ReduceLayer::Operation::kMax, 1);
+  Tensor input = make_tensor<float>({1.0f, 2.0f, 3.0f, 4.0f}, {2, 2});
+  Tensor axes = make_tensor<int>({1});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({2, 1}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0}), 2.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1, 0}), 4.0f);
+}
+
+TEST(ReduceLayer, Min3DTensorReduction) {
+  ReduceLayer layer(ReduceLayer::Operation::kMin, 1);
+  Tensor input = make_tensor<float>({1, 2, 3, 4, 5, 6, 7, 8}, {2, 2, 2});
+  Tensor axes = make_tensor<int>({2});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({2, 2, 1}));
+  EXPECT_FLOAT_EQ(output.get<float>({0, 0, 0}), 1.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({0, 1, 0}), 3.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1, 0, 0}), 5.0f);
+  EXPECT_FLOAT_EQ(output.get<float>({1, 1, 0}), 7.0f);
+}
+
+TEST(ReduceLayer, ResnetReduceMean) {
+  ReduceLayer layer(ReduceLayer::Operation::kMean, 1);
+  Tensor input = make_tensor<float>(
+      {1.0f,  2.0f,  3.0f,  4.0f,  5.0f,  6.0f,  7.0f,  8.0f,  9.0f,
+       10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
+       19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f},
+      {1, 1, 3, 3, 3});
+
+  Tensor axes = make_tensor<int>({2, 3});
+  Tensor output;
+
+  layer.run(input, axes, output);
+
+  EXPECT_EQ(output.get_shape(), Shape({1, 1, 1, 1, 3}));
+  EXPECT_FLOAT_EQ(
+      output.get<float>({0, 0, 0, 0, 0}),
+      (1.0f + 4.0f + 7.0f + 10.0f + 13.0f + 16.0f + 19.0f + 22.0f + 25.0f) /
+          9.0f);
+
+  EXPECT_FLOAT_EQ(
+      output.get<float>({0, 0, 0, 0, 1}),
+      (2.0f + 5.0f + 8.0f + 11.0f + 14.0f + 17.0f + 20.0f + 23.0f + 26.0f) /
+          9.0f);
+
+  EXPECT_FLOAT_EQ(
+      output.get<float>({0, 0, 0, 0, 2}),
+      (3.0f + 6.0f + 9.0f + 12.0f + 15.0f + 18.0f + 21.0f + 24.0f + 27.0f) /
+          9.0f);
+}
+
+}  // namespace it_lab_ai
\ No newline at end of file