diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3d3995a85e7c..2e55c9eb8c03 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -52,7 +52,6 @@ repos:
     rev: 24.4.2
     hooks:
     -   id: black
-        args: ["--check", "--diff", "--color"]
 -   repo: https://github.com/pycqa/isort
     rev: 5.13.2
     hooks:
diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt
index da46cf99e3f5..d4ac718954a6 100644
--- a/dpnp/CMakeLists.txt
+++ b/dpnp/CMakeLists.txt
@@ -58,6 +58,7 @@ add_subdirectory(backend/extensions/fft)
 add_subdirectory(backend/extensions/lapack)
 add_subdirectory(backend/extensions/vm)
 add_subdirectory(backend/extensions/ufunc)
+add_subdirectory(backend/extensions/statistics)
 
 add_subdirectory(dpnp_algo)
 add_subdirectory(dpnp_utils)
diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt
new file mode 100644
index 000000000000..20c868066576
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/CMakeLists.txt
@@ -0,0 +1,88 @@
+# *****************************************************************************
+# Copyright (c) 2016-2024, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+set(python_module_name _statistics_impl)
+set(_module_src
+    ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/histogram.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/histogram_common.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/statistics_py.cpp
+)
+
+pybind11_add_module(${python_module_name} MODULE ${_module_src})
+add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+
+if(_dpnp_sycl_targets)
+    # make fat binary
+    target_compile_options(
+        ${python_module_name}
+        PRIVATE
+        -fsycl-targets=${_dpnp_sycl_targets}
+    )
+    target_link_options(
+        ${python_module_name}
+        PRIVATE
+        -fsycl-targets=${_dpnp_sycl_targets}
+    )
+endif()
+
+if (WIN32)
+    if (${CMAKE_VERSION} VERSION_LESS "3.27")
+        # this is a work-around for target_link_options inserting option after -link option, cause
+        # linker to ignore it.
+        set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel")
+    endif()
+endif()
+
+set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
+
+if (WIN32)
+  target_compile_options(${python_module_name} PRIVATE
+    /clang:-fno-approx-func
+    /clang:-fno-finite-math-only
+    )
+else()
+  target_compile_options(${python_module_name} PRIVATE
+    -fno-approx-func
+    -fno-finite-math-only
+    )
+endif()
+
+target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel)
+
+if (DPNP_GENERATE_COVERAGE)
+    target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
+endif()
+
+install(TARGETS ${python_module_name}
+  DESTINATION "dpnp/backend/extensions/statistics"
+)
diff --git a/dpnp/backend/extensions/statistics/common.cpp b/dpnp/backend/extensions/statistics/common.cpp
new file mode 100644
index 000000000000..961205293725
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/common.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include "common.hpp"
+#include "utils/type_dispatch.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+
+namespace statistics
+{
+namespace common
+{
+
+size_t get_max_local_size(const sycl::device &device)
+{
+    constexpr const int default_max_cpu_local_size = 256;
+    constexpr const int default_max_gpu_local_size = 0;
+
+    return get_max_local_size(device, default_max_cpu_local_size,
+                              default_max_gpu_local_size);
+}
+
+size_t get_max_local_size(const sycl::device &device,
+                          int cpu_local_size_limit,
+                          int gpu_local_size_limit)
+{
+    int max_work_group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+    if (device.is_cpu() && cpu_local_size_limit > 0) {
+        return std::min(cpu_local_size_limit, max_work_group_size);
+    }
+    else if (device.is_gpu() && gpu_local_size_limit > 0) {
+        return std::min(gpu_local_size_limit, max_work_group_size);
+    }
+
+    return max_work_group_size;
+}
+
+sycl::nd_range<1>
+    make_ndrange(size_t global_size, size_t local_range, size_t work_per_item)
+{
+    return make_ndrange(sycl::range<1>(global_size),
+                        sycl::range<1>(local_range),
+                        sycl::range<1>(work_per_item));
+}
+
+size_t get_local_mem_size_in_bytes(const sycl::device &device)
+{
+    // Reserving 1kb for runtime needs
+    constexpr const size_t reserve = 1024;
+
+    return get_local_mem_size_in_bytes(device, reserve);
+}
+
+size_t get_local_mem_size_in_bytes(const sycl::device &device, size_t reserve)
+{
+    size_t local_mem_size =
+        device.get_info<sycl::info::device::local_mem_size>();
+    return local_mem_size - reserve;
+}
+
+pybind11::dtype dtype_from_typenum(int dst_typenum)
+{
+    dpctl_td_ns::typenum_t dst_typenum_t =
+        static_cast<dpctl_td_ns::typenum_t>(dst_typenum);
+    switch (dst_typenum_t) {
+    case dpctl_td_ns::typenum_t::BOOL:
+        return py::dtype("?");
+    case dpctl_td_ns::typenum_t::INT8:
+        return py::dtype("i1");
+    case dpctl_td_ns::typenum_t::UINT8:
+        return py::dtype("u1");
+    case dpctl_td_ns::typenum_t::INT16:
+        return py::dtype("i2");
+    case dpctl_td_ns::typenum_t::UINT16:
+        return py::dtype("u2");
+    case dpctl_td_ns::typenum_t::INT32:
+        return py::dtype("i4");
+    case dpctl_td_ns::typenum_t::UINT32:
+        return py::dtype("u4");
+    case dpctl_td_ns::typenum_t::INT64:
+        return py::dtype("i8");
+    case dpctl_td_ns::typenum_t::UINT64:
+        return py::dtype("u8");
+    case dpctl_td_ns::typenum_t::HALF:
+        return py::dtype("f2");
+    case dpctl_td_ns::typenum_t::FLOAT:
+        return py::dtype("f4");
+    case dpctl_td_ns::typenum_t::DOUBLE:
+        return py::dtype("f8");
+    case dpctl_td_ns::typenum_t::CFLOAT:
+        return py::dtype("c8");
+    case dpctl_td_ns::typenum_t::CDOUBLE:
+        return py::dtype("c16");
+    default:
+        throw py::value_error("Unrecognized dst_typeid");
+    }
+}
+
+} // namespace common
+} // namespace statistics
diff --git a/dpnp/backend/extensions/statistics/common.hpp b/dpnp/backend/extensions/statistics/common.hpp
new file mode 100644
index 000000000000..518a3cf86a9e
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/common.hpp
@@ -0,0 +1,191 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <complex>
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+
+// clang-format off
+// math_utils.hpp doesn't include sycl header but uses sycl types
+// so sycl.hpp must be included before math_utils.hpp
+#include <sycl/sycl.hpp>
+#include "utils/math_utils.hpp"
+// clang-format on
+
+namespace statistics
+{
+namespace common
+{
+
+template <typename N, typename D>
+constexpr auto CeilDiv(N n, D d)
+{
+    return (n + d - 1) / d;
+}
+
+template <typename N, typename D>
+constexpr auto Align(N n, D d)
+{
+    return CeilDiv(n, d) * d;
+}
+
+template <typename T, sycl::memory_order Order, sycl::memory_scope Scope>
+struct AtomicOp
+{
+    static void add(T &lhs, const T value)
+    {
+        sycl::atomic_ref<T, Order, Scope> lh(lhs);
+        lh += value;
+    }
+};
+
+template <typename T, sycl::memory_order Order, sycl::memory_scope Scope>
+struct AtomicOp<std::complex<T>, Order, Scope>
+{
+    static void add(std::complex<T> &lhs, const std::complex<T> value)
+    {
+        T *_lhs = reinterpret_cast<T(&)[2]>(lhs);
+        const T *_val = reinterpret_cast<const T(&)[2]>(value);
+        sycl::atomic_ref<T, Order, Scope> lh0(_lhs[0]);
+        lh0 += _val[0];
+        sycl::atomic_ref<T, Order, Scope> lh1(_lhs[1]);
+        lh1 += _val[1];
+    }
+};
+
+template <typename T>
+struct Less
+{
+    bool operator()(const T &lhs, const T &rhs) const
+    {
+        return std::less{}(lhs, rhs);
+    }
+};
+
+template <typename T>
+struct Less<std::complex<T>>
+{
+    bool operator()(const std::complex<T> &lhs,
+                    const std::complex<T> &rhs) const
+    {
+        return dpctl::tensor::math_utils::less_complex(lhs, rhs);
+    }
+};
+
+template <typename T>
+struct IsNan
+{
+    static bool isnan(const T &v)
+    {
+        if constexpr (std::is_floating_point_v<T> ||
+                      std::is_same_v<T, sycl::half>) {
+            return sycl::isnan(v);
+        }
+
+        return false;
+    }
+};
+
+template <typename T>
+struct IsNan<std::complex<T>>
+{
+    static bool isnan(const std::complex<T> &v)
+    {
+        T real1 = std::real(v);
+        T imag1 = std::imag(v);
+        return sycl::isnan(real1) || sycl::isnan(imag1);
+    }
+};
+
+size_t get_max_local_size(const sycl::device &device);
+size_t get_max_local_size(const sycl::device &device,
+                          int cpu_local_size_limit,
+                          int gpu_local_size_limit);
+
+inline size_t get_max_local_size(const sycl::queue &queue)
+{
+    return get_max_local_size(queue.get_device());
+}
+
+inline size_t get_max_local_size(const sycl::queue &queue,
+                                 int cpu_local_size_limit,
+                                 int gpu_local_size_limit)
+{
+    return get_max_local_size(queue.get_device(), cpu_local_size_limit,
+                              gpu_local_size_limit);
+}
+
+size_t get_local_mem_size_in_bytes(const sycl::device &device);
+size_t get_local_mem_size_in_bytes(const sycl::device &device, size_t reserve);
+
+inline size_t get_local_mem_size_in_bytes(const sycl::queue &queue)
+{
+    return get_local_mem_size_in_bytes(queue.get_device());
+}
+
+inline size_t get_local_mem_size_in_bytes(const sycl::queue &queue,
+                                          size_t reserve)
+{
+    return get_local_mem_size_in_bytes(queue.get_device(), reserve);
+}
+
+template <typename T>
+size_t get_local_mem_size_in_items(const sycl::device &device)
+{
+    return get_local_mem_size_in_bytes(device) / sizeof(T);
+}
+
+template <typename T>
+size_t get_local_mem_size_in_items(const sycl::device &device, size_t reserve)
+{
+    return get_local_mem_size_in_bytes(device, sizeof(T) * reserve) / sizeof(T);
+}
+
+template <int Dims>
+sycl::nd_range<Dims> make_ndrange(const sycl::range<Dims> &global_range,
+                                  const sycl::range<Dims> &local_range,
+                                  const sycl::range<Dims> &work_per_item)
+{
+    sycl::range<Dims> aligned_global_range;
+
+    for (int i = 0; i < Dims; ++i) {
+        aligned_global_range[i] =
+            Align(CeilDiv(global_range[i], work_per_item[i]), local_range[i]);
+    }
+
+    return sycl::nd_range<Dims>(aligned_global_range, local_range);
+}
+
+sycl::nd_range<1>
+    make_ndrange(size_t global_size, size_t local_range, size_t work_per_item);
+
+// This function is a copy from dpctl because it is not available in the public
+// headers of dpctl.
+pybind11::dtype dtype_from_typenum(int dst_typenum);
+
+} // namespace common
+} // namespace statistics
diff --git a/dpnp/backend/extensions/statistics/dispatch_table.hpp b/dpnp/backend/extensions/statistics/dispatch_table.hpp
new file mode 100644
index 000000000000..ce11de891318
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/dispatch_table.hpp
@@ -0,0 +1,292 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <unordered_set>
+#include <vector>
+
+#include "utils/type_dispatch.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <sycl/sycl.hpp>
+
+#include "common.hpp"
+
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+namespace py = pybind11;
+
+namespace statistics
+{
+namespace common
+{
+
+template <typename T, typename Rest>
+struct one_of
+{
+    static_assert(std::is_same_v<Rest, std::tuple<>>,
+                  "one_of: second parameter cannot be empty std::tuple");
+    static_assert(false, "one_of: second parameter must be std::tuple");
+};
+
+template <typename T, typename Top, typename... Rest>
+struct one_of<T, std::tuple<Top, Rest...>>
+{
+    static constexpr bool value =
+        std::is_same_v<T, Top> || one_of<T, std::tuple<Rest...>>::value;
+};
+
+template <typename T, typename Top>
+struct one_of<T, std::tuple<Top>>
+{
+    static constexpr bool value = std::is_same_v<T, Top>;
+};
+
+template <typename T, typename Rest>
+constexpr bool one_of_v = one_of<T, Rest>::value;
+
+template <typename FnT>
+using Table = FnT[dpctl_td_ns::num_types];
+template <typename FnT>
+using Table2 = Table<FnT>[dpctl_td_ns::num_types];
+
+using TypeId = int32_t;
+using TypesPair = std::pair<TypeId, TypeId>;
+
+struct int_pair_hash
+{
+    inline size_t operator()(const TypesPair &p) const
+    {
+        std::hash<size_t> hasher;
+        return hasher(size_t(p.first) << (8 * sizeof(TypeId)) |
+                      size_t(p.second));
+    }
+};
+
+using SupportedTypesList = std::vector<TypeId>;
+using SupportedTypesList2 = std::vector<TypesPair>;
+using SupportedTypesSet = std::unordered_set<TypeId>;
+using SupportedTypesSet2 = std::unordered_set<TypesPair, int_pair_hash>;
+
+using DType = py::dtype;
+using DTypePair = std::pair<DType, DType>;
+
+using SupportedDTypeList = std::vector<DType>;
+using SupportedDTypeList2 = std::vector<DTypePair>;
+
+template <typename FnT,
+          typename SupportedTypes,
+          template <typename, typename>
+          typename Func>
+struct TableBuilder2
+{
+    template <typename _FnT, typename T1, typename T2>
+    struct impl
+    {
+        static constexpr bool is_defined =
+            one_of_v<std::tuple<T1, T2>, SupportedTypes>;
+
+        _FnT get()
+        {
+            if constexpr (is_defined) {
+                return Func<T1, T2>::impl;
+            }
+            else {
+                return nullptr;
+            }
+        }
+    };
+
+    using type =
+        dpctl_td_ns::DispatchTableBuilder<FnT, impl, dpctl_td_ns::num_types>;
+};
+
+template <typename FnT>
+class DispatchTable2
+{
+public:
+    DispatchTable2(std::string first_name, std::string second_name)
+        : first_name(first_name), second_name(second_name)
+    {
+    }
+
+    template <typename SupportedTypes,
+              template <typename, typename>
+              typename Func>
+    void populate_dispatch_table()
+    {
+        using TBulder = typename TableBuilder2<FnT, SupportedTypes, Func>::type;
+        TBulder builder;
+
+        builder.populate_dispatch_table(table);
+        populate_supported_types();
+    }
+
+    FnT get_unsafe(int first_typenum, int second_typenum) const
+    {
+        auto array_types = dpctl_td_ns::usm_ndarray_types();
+        const int first_type_id =
+            array_types.typenum_to_lookup_id(first_typenum);
+        const int second_type_id =
+            array_types.typenum_to_lookup_id(second_typenum);
+
+        return table[first_type_id][second_type_id];
+    }
+
+    FnT get(int first_typenum, int second_typenum) const
+    {
+        auto fn = get_unsafe(first_typenum, second_typenum);
+
+        if (fn == nullptr) {
+            auto array_types = dpctl_td_ns::usm_ndarray_types();
+            const int first_type_id =
+                array_types.typenum_to_lookup_id(first_typenum);
+            const int second_type_id =
+                array_types.typenum_to_lookup_id(second_typenum);
+
+            py::dtype first_dtype = dtype_from_typenum(first_type_id);
+            auto first_type_pos =
+                std::find(supported_first_type.begin(),
+                          supported_first_type.end(), first_dtype);
+            if (first_type_pos == supported_first_type.end()) {
+                py::str types = py::str(py::cast(supported_first_type));
+                py::str dtype = py::str(first_dtype);
+
+                py::str err_msg =
+                    py::str("'" + first_name + "' has unsupported type '") +
+                    dtype +
+                    py::str("'."
+                            " Supported types are: ") +
+                    types;
+
+                throw py::value_error(static_cast<std::string>(err_msg));
+            }
+
+            py::dtype second_dtype = dtype_from_typenum(second_type_id);
+            auto second_type_pos =
+                std::find(supported_second_type.begin(),
+                          supported_second_type.end(), second_dtype);
+            if (second_type_pos == supported_second_type.end()) {
+                py::str types = py::str(py::cast(supported_second_type));
+                py::str dtype = py::str(second_dtype);
+
+                py::str err_msg =
+                    py::str("'" + second_name + "' has unsupported type '") +
+                    dtype +
+                    py::str("'."
+                            " Supported types are: ") +
+                    types;
+
+                throw py::value_error(static_cast<std::string>(err_msg));
+            }
+
+            py::str first_dtype_str = py::str(first_dtype);
+            py::str second_dtype_str = py::str(second_dtype);
+            py::str types = py::str(py::cast(all_supported_types));
+
+            py::str err_msg =
+                py::str("'" + first_name + "' and '" + second_name +
+                        "' has unsupported types combination: ('") +
+                first_dtype_str + py::str("', '") + second_dtype_str +
+                py::str("')."
+                        " Supported types combinations are: ") +
+                types;
+
+            throw py::value_error(static_cast<std::string>(err_msg));
+        }
+
+        return fn;
+    }
+
+    const SupportedDTypeList &get_supported_first_type() const
+    {
+        return supported_first_type;
+    }
+
+    const SupportedDTypeList &get_supported_second_type() const
+    {
+        return supported_second_type;
+    }
+
+    const SupportedDTypeList2 &get_all_supported_types() const
+    {
+        return all_supported_types;
+    }
+
+private:
+    void populate_supported_types()
+    {
+        SupportedTypesSet first_supported_types_set;
+        SupportedTypesSet second_supported_types_set;
+        SupportedTypesSet2 all_supported_types_set;
+
+        for (int i = 0; i < dpctl_td_ns::num_types; ++i) {
+            for (int j = 0; j < dpctl_td_ns::num_types; ++j) {
+                if (table[i][j] != nullptr) {
+                    all_supported_types_set.emplace(i, j);
+                    first_supported_types_set.emplace(i);
+                    second_supported_types_set.emplace(j);
+                }
+            }
+        }
+
+        auto to_supported_dtype_list = [](const auto &supported_set,
+                                          auto &supported_list) {
+            SupportedTypesList lst(supported_set.begin(), supported_set.end());
+            std::sort(lst.begin(), lst.end());
+            supported_list.resize(supported_set.size());
+            std::transform(lst.begin(), lst.end(), supported_list.begin(),
+                           [](TypeId i) { return dtype_from_typenum(i); });
+        };
+
+        to_supported_dtype_list(first_supported_types_set,
+                                supported_first_type);
+        to_supported_dtype_list(second_supported_types_set,
+                                supported_second_type);
+
+        SupportedTypesList2 lst(all_supported_types_set.begin(),
+                                all_supported_types_set.end());
+        std::sort(lst.begin(), lst.end());
+        all_supported_types.resize(all_supported_types_set.size());
+        std::transform(lst.begin(), lst.end(), all_supported_types.begin(),
+                       [](TypesPair p) {
+                           return DTypePair(dtype_from_typenum(p.first),
+                                            dtype_from_typenum(p.second));
+                       });
+    }
+
+    std::string first_name;
+    std::string second_name;
+
+    SupportedDTypeList supported_first_type;
+    SupportedDTypeList supported_second_type;
+    SupportedDTypeList2 all_supported_types;
+
+    Table2<FnT> table;
+};
+
+} // namespace common
+} // namespace statistics
diff --git a/dpnp/backend/extensions/statistics/histogram.cpp b/dpnp/backend/extensions/statistics/histogram.cpp
new file mode 100644
index 000000000000..4317280b5a8d
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/histogram.cpp
@@ -0,0 +1,287 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <algorithm>
+#include <complex>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+// dpctl tensor headers
+#include "dpctl4pybind11.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "histogram.hpp"
+#include "histogram_common.hpp"
+
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+using dpctl::tensor::usm_ndarray;
+
+using namespace statistics::histogram;
+using namespace statistics::common;
+
+namespace
+{
+
+template <typename T, typename DataStorage>
+struct HistogramEdges
+{
+    static constexpr bool const sync_after_init = DataStorage::sync_after_init;
+    using boundsT = std::tuple<T, T>;
+
+    HistogramEdges(const T *global_data, size_t size, sycl::handler &cgh)
+        : data(global_data, sycl::range<1>(size), cgh)
+    {
+    }
+
+    template <int _Dims>
+    void init(const sycl::nd_item<_Dims> &item) const
+    {
+        data.init(item);
+    }
+
+    boundsT get_bounds() const
+    {
+        auto min = data.get_ptr()[0];
+        auto max = data.get_ptr()[data.size() - 1];
+        return {min, max};
+    }
+
+    template <int _Dims, typename dT>
+    size_t get_bin(const sycl::nd_item<_Dims> &,
+                   const dT *val,
+                   const boundsT &) const
+    {
+        uint32_t edges_count = data.size();
+        uint32_t bins_count = edges_count - 1;
+        const auto *bins = data.get_ptr();
+
+        uint32_t bin =
+            std::upper_bound(bins, bins + edges_count, val[0], Less<dT>{}) -
+            bins - 1;
+        bin = std::min(bin, bins_count - 1);
+
+        return bin;
+    }
+
+    template <typename dT>
+    bool in_bounds(const dT *val, const boundsT &bounds) const
+    {
+        Less<dT> _less;
+        return !_less(val[0], std::get<0>(bounds)) &&
+               !_less(std::get<1>(bounds), val[0]) && !IsNan<dT>::isnan(val[0]);
+    }
+
+private:
+    DataStorage data;
+};
+
+template <typename T>
+using CachedEdges = HistogramEdges<T, CachedData<const T, 1>>;
+
+template <typename T>
+using UncachedEdges = HistogramEdges<T, UncachedData<const T, 1>>;
+
+template <typename T, typename BinsT, typename HistType = size_t>
+struct histogram_kernel
+{
+    static sycl::event impl(sycl::queue &exec_q,
+                            const void *vin,
+                            const void *vbins_edges,
+                            const void *vweights,
+                            void *vout,
+                            const size_t bins_count,
+                            const size_t size,
+                            const std::vector<sycl::event> &depends)
+    {
+        const T *in = static_cast<const T *>(vin);
+        const BinsT *bins_edges = static_cast<const BinsT *>(vbins_edges);
+        const HistType *weights = static_cast<const HistType *>(vweights);
+        HistType *out = static_cast<HistType *>(vout);
+
+        auto device = exec_q.get_device();
+        const auto local_size = get_max_local_size(device);
+
+        constexpr uint32_t WorkPI = 128; // empirically found number
+
+        const auto nd_range = make_ndrange(size, local_size, WorkPI);
+
+        return exec_q.submit([&](sycl::handler &cgh) {
+            cgh.depends_on(depends);
+            constexpr uint32_t dims = 1;
+
+            auto dispatch_edges = [&](uint32_t local_mem, const auto &weights,
+                                      auto &hist) {
+                if (device.is_gpu() && (local_mem >= bins_count + 1)) {
+                    auto edges = CachedEdges(bins_edges, bins_count + 1, cgh);
+                    submit_histogram(in, size, dims, WorkPI, hist, edges,
+                                     weights, nd_range, cgh);
+                }
+                else {
+                    auto edges = UncachedEdges(bins_edges, bins_count + 1, cgh);
+                    submit_histogram(in, size, dims, WorkPI, hist, edges,
+                                     weights, nd_range, cgh);
+                }
+            };
+
+            auto dispatch_bins = [&](const auto &weights) {
+                const auto local_mem_size =
+                    get_local_mem_size_in_items<T>(device);
+                if (local_mem_size >= bins_count) {
+                    const auto local_hist_count = get_local_hist_copies_count(
+                        local_mem_size, local_size, bins_count);
+
+                    auto hist = HistWithLocalCopies<HistType>(
+                        out, bins_count, local_hist_count, cgh);
+                    const auto free_local_mem = local_mem_size - hist.size();
+
+                    dispatch_edges(free_local_mem, weights, hist);
+                }
+                else {
+                    auto hist = HistGlobalMemory<HistType>(out);
+                    auto edges = UncachedEdges(bins_edges, bins_count + 1, cgh);
+                    submit_histogram(in, size, dims, WorkPI, hist, edges,
+                                     weights, nd_range, cgh);
+                }
+            };
+
+            if (weights) {
+                auto _weights = Weights(weights);
+                dispatch_bins(_weights);
+            }
+            else {
+                auto _weights = NoWeights();
+                dispatch_bins(_weights);
+            }
+        });
+    }
+};
+
+template <typename SampleType, typename HistType>
+using histogram_kernel_ = histogram_kernel<SampleType, SampleType, HistType>;
+
+} // namespace
+
+using SupportedTypes = std::tuple<std::tuple<uint64_t, int64_t>,
+                                  std::tuple<int64_t, int64_t>,
+                                  std::tuple<uint64_t, float>,
+                                  std::tuple<int64_t, float>,
+                                  std::tuple<uint64_t, double>,
+                                  std::tuple<int64_t, double>,
+                                  std::tuple<uint64_t, std::complex<float>>,
+                                  std::tuple<int64_t, std::complex<float>>,
+                                  std::tuple<uint64_t, std::complex<double>>,
+                                  std::tuple<int64_t, std::complex<double>>,
+                                  std::tuple<float, int64_t>,
+                                  std::tuple<double, int64_t>,
+                                  std::tuple<float, float>,
+                                  std::tuple<double, double>,
+                                  std::tuple<float, std::complex<float>>,
+                                  std::tuple<double, std::complex<double>>,
+                                  std::tuple<std::complex<float>, int64_t>,
+                                  std::tuple<std::complex<double>, int64_t>,
+                                  std::tuple<std::complex<float>, float>,
+                                  std::tuple<std::complex<double>, double>>;
+
+Histogram::Histogram() : dispatch_table("sample", "histogram")
+{
+    dispatch_table.populate_dispatch_table<SupportedTypes, histogram_kernel_>();
+}
+
+std::tuple<sycl::event, sycl::event>
+    Histogram::call(const dpctl::tensor::usm_ndarray &sample,
+                    const dpctl::tensor::usm_ndarray &bins,
+                    std::optional<const dpctl::tensor::usm_ndarray> &weights,
+                    dpctl::tensor::usm_ndarray &histogram,
+                    const std::vector<sycl::event> &depends)
+{
+    validate(sample, bins, weights, histogram);
+
+    if (sample.get_size() == 0) {
+        return {sycl::event(), sycl::event()};
+    }
+
+    const int sample_typenum = sample.get_typenum();
+    const int hist_typenum = histogram.get_typenum();
+
+    auto histogram_func = dispatch_table.get(sample_typenum, hist_typenum);
+
+    auto exec_q = sample.get_queue();
+
+    void *weights_ptr =
+        weights.has_value() ? weights.value().get_data() : nullptr;
+
+    auto ev =
+        histogram_func(exec_q, sample.get_data(), bins.get_data(), weights_ptr,
+                       histogram.get_data(), histogram.get_shape(0),
+                       sample.get_shape(0), depends);
+
+    sycl::event args_ev;
+    if (weights.has_value()) {
+        args_ev = dpctl::utils::keep_args_alive(
+            exec_q, {sample, bins, weights.value(), histogram}, {ev});
+    }
+    else {
+        args_ev = dpctl::utils::keep_args_alive(
+            exec_q, {sample, bins, histogram}, {ev});
+    }
+
+    return {args_ev, ev};
+}
+
+std::unique_ptr<Histogram> hist;
+
+void statistics::histogram::populate_histogram(py::module_ m)
+{
+    using namespace std::placeholders;
+
+    hist.reset(new Histogram());
+
+    auto hist_func =
+        [histp = hist.get()](
+            const dpctl::tensor::usm_ndarray &sample,
+            const dpctl::tensor::usm_ndarray &bins,
+            std::optional<const dpctl::tensor::usm_ndarray> &weights,
+            dpctl::tensor::usm_ndarray &histogram,
+            const std::vector<sycl::event> &depends) {
+            return histp->call(sample, bins, weights, histogram, depends);
+        };
+
+    m.def("histogram", hist_func, "Compute the histogram of a dataset.",
+          py::arg("sample"), py::arg("bins"), py::arg("weights"),
+          py::arg("histogram"), py::arg("depends") = py::list());
+
+    auto histogram_dtypes = [histp = hist.get()]() {
+        return histp->dispatch_table.get_all_supported_types();
+    };
+
+    m.def("histogram_dtypes", histogram_dtypes,
+          "Get the supported data types for histogram.");
+}
diff --git a/dpnp/backend/extensions/statistics/histogram.hpp b/dpnp/backend/extensions/statistics/histogram.hpp
new file mode 100644
index 000000000000..e89737c12946
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/histogram.hpp
@@ -0,0 +1,63 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <sycl/sycl.hpp>
+
+#include "dispatch_table.hpp"
+
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+
+namespace statistics
+{
+namespace histogram
+{
+struct Histogram
+{
+    using FnT = sycl::event (*)(sycl::queue &,
+                                const void *,
+                                const void *,
+                                const void *,
+                                void *,
+                                const size_t,
+                                const size_t,
+                                const std::vector<sycl::event> &);
+
+    common::DispatchTable2<FnT> dispatch_table;
+
+    Histogram();
+
+    std::tuple<sycl::event, sycl::event>
+        call(const dpctl::tensor::usm_ndarray &input,
+             const dpctl::tensor::usm_ndarray &bins_edges,
+             std::optional<const dpctl::tensor::usm_ndarray> &weights,
+             dpctl::tensor::usm_ndarray &output,
+             const std::vector<sycl::event> &depends);
+};
+
+void populate_histogram(py::module_ m);
+} // namespace histogram
+} // namespace statistics
diff --git a/dpnp/backend/extensions/statistics/histogram_common.cpp b/dpnp/backend/extensions/statistics/histogram_common.cpp
new file mode 100644
index 000000000000..30197f74e422
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/histogram_common.cpp
@@ -0,0 +1,244 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "dpctl4pybind11.hpp"
+#include "utils/memory_overlap.hpp"
+#include "utils/output_validation.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include <pybind11/pybind11.h>
+
+#include "histogram_common.hpp"
+
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+using dpctl::tensor::usm_ndarray;
+using dpctl_td_ns::typenum_t;
+
+namespace statistics
+{
+using common::CeilDiv;
+
+namespace histogram
+{
+
+void validate(const usm_ndarray &sample,
+              const usm_ndarray &bins,
+              std::optional<const dpctl::tensor::usm_ndarray> &weights,
+              const usm_ndarray &histogram)
+{
+    auto exec_q = sample.get_queue();
+    using array_ptr = const usm_ndarray *;
+
+    std::vector<array_ptr> arrays{&sample, &bins, &histogram};
+    std::unordered_map<array_ptr, std::string> names = {
+        {arrays[0], "sample"}, {arrays[1], "bins"}, {arrays[2], "histogram"}};
+
+    array_ptr weights_ptr = nullptr;
+
+    if (weights.has_value()) {
+        weights_ptr = &weights.value();
+        arrays.push_back(weights_ptr);
+        names.insert({weights_ptr, "weights"});
+    }
+
+    auto get_name = [&](const array_ptr &arr) {
+        auto name_it = names.find(arr);
+        assert(name_it != names.end());
+
+        return "'" + name_it->second + "'";
+    };
+
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(histogram);
+
+    auto unequal_queue =
+        std::find_if(arrays.cbegin(), arrays.cend(), [&](const array_ptr &arr) {
+            return arr->get_queue() != exec_q;
+        });
+
+    if (unequal_queue != arrays.cend()) {
+        throw py::value_error(
+            get_name(*unequal_queue) +
+            " parameter has incompatible queue with parameter " +
+            get_name(&sample));
+    }
+
+    auto non_contig_array =
+        std::find_if(arrays.cbegin(), arrays.cend(), [&](const array_ptr &arr) {
+            return !arr->is_c_contiguous();
+        });
+
+    if (non_contig_array != arrays.cend()) {
+        throw py::value_error(get_name(*non_contig_array) +
+                              " parameter is not c-contiguos");
+    }
+
+    auto check_overlaping = [&](const array_ptr &first,
+                                const array_ptr &second) {
+        if (first == nullptr || second == nullptr) {
+            return;
+        }
+
+        const auto &overlap = dpctl::tensor::overlap::MemoryOverlap();
+
+        if (overlap(*first, *second)) {
+            throw py::value_error(get_name(first) +
+                                  " has overlapping memory segments with " +
+                                  get_name(second));
+        }
+    };
+
+    check_overlaping(&sample, &histogram);
+    check_overlaping(&bins, &histogram);
+    check_overlaping(weights_ptr, &histogram);
+
+    if (bins.get_size() < 2) {
+        throw py::value_error(get_name(&bins) +
+                              " parameter must have at least 2 elements");
+    }
+
+    if (histogram.get_size() < 1) {
+        throw py::value_error(get_name(&histogram) +
+                              " parameter must have at least 1 element");
+    }
+
+    if (histogram.get_ndim() != 1) {
+        throw py::value_error(get_name(&histogram) +
+                              " parameter must be 1d. Actual " +
+                              std::to_string(histogram.get_ndim()) + "d");
+    }
+
+    if (weights_ptr) {
+        if (weights_ptr->get_ndim() != 1) {
+            throw py::value_error(
+                get_name(weights_ptr) + " parameter must be 1d. Actual " +
+                std::to_string(weights_ptr->get_ndim()) + "d");
+        }
+
+        auto sample_size = sample.get_size();
+        auto weights_size = weights_ptr->get_size();
+        if (sample.get_size() != weights_ptr->get_size()) {
+            throw py::value_error(
+                get_name(&sample) + " size (" + std::to_string(sample_size) +
+                ") and " + get_name(weights_ptr) + " size (" +
+                std::to_string(weights_size) + ")" + " must match");
+        }
+    }
+
+    if (sample.get_ndim() > 2) {
+        throw py::value_error(
+            get_name(&sample) +
+            " parameter must have no more than 2 dimensions. Actual " +
+            std::to_string(sample.get_ndim()) + "d");
+    }
+
+    if (sample.get_ndim() == 1) {
+        if (bins.get_ndim() != 1) {
+            throw py::value_error(get_name(&sample) + " parameter is 1d, but " +
+                                  get_name(&bins) + " is " +
+                                  std::to_string(bins.get_ndim()) + "d");
+        }
+    }
+    else if (sample.get_ndim() == 2) {
+        auto sample_count = sample.get_shape(0);
+        auto expected_dims = sample.get_shape(1);
+
+        if (bins.get_ndim() != expected_dims) {
+            throw py::value_error(get_name(&sample) + " parameter has shape {" +
+                                  std::to_string(sample_count) + "x" +
+                                  std::to_string(expected_dims) + "}" +
+                                  ", so " + get_name(&bins) +
+                                  " parameter expected to be " +
+                                  std::to_string(expected_dims) +
+                                  "d. "
+                                  "Actual " +
+                                  std::to_string(bins.get_ndim()) + "d");
+        }
+    }
+
+    py::ssize_t expected_hist_size = 1;
+    for (int i = 0; i < bins.get_ndim(); ++i) {
+        expected_hist_size *= (bins.get_shape(i) - 1);
+    }
+
+    if (histogram.get_size() != expected_hist_size) {
+        throw py::value_error(
+            get_name(&histogram) + " and " + get_name(&bins) +
+            " shape mismatch. " + get_name(&histogram) +
+            " expected to have size = " + std::to_string(expected_hist_size) +
+            ". Actual " + std::to_string(histogram.get_size()));
+    }
+
+    int64_t max_hist_size = std::numeric_limits<uint32_t>::max() - 1;
+    if (histogram.get_size() > max_hist_size) {
+        throw py::value_error(get_name(&histogram) +
+                              " parameter size expected to be less than " +
+                              std::to_string(max_hist_size) + ". Actual " +
+                              std::to_string(histogram.get_size()));
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    auto hist_type = static_cast<typenum_t>(
+        array_types.typenum_to_lookup_id(histogram.get_typenum()));
+    if (histogram.get_elemsize() == 8 && hist_type != typenum_t::CFLOAT) {
+        auto device = exec_q.get_device();
+        bool _64bit_atomics = device.has(sycl::aspect::atomic64);
+
+        if (!_64bit_atomics) {
+            auto device_name = device.get_info<sycl::info::device::name>();
+            throw py::value_error(
+                get_name(&histogram) +
+                " parameter has 64-bit type, but 64-bit atomics " +
+                " are not supported for " + device_name);
+        }
+    }
+}
+
+uint32_t get_local_hist_copies_count(uint32_t loc_mem_size_in_items,
+                                     uint32_t local_size,
+                                     uint32_t hist_size_in_items)
+{
+    constexpr uint32_t local_copies_limit = 16;
+    constexpr uint32_t atomics_per_work_item = 4;
+
+    const uint32_t preferred_local_copies =
+        CeilDiv(atomics_per_work_item * local_size, hist_size_in_items);
+    const uint32_t local_copies_fit_memory =
+        loc_mem_size_in_items / hist_size_in_items;
+
+    uint32_t local_hist_count =
+        std::min(preferred_local_copies, local_copies_limit);
+    local_hist_count = std::min(local_hist_count, local_copies_fit_memory);
+
+    return local_hist_count;
+}
+
+} // namespace histogram
+} // namespace statistics
diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp
new file mode 100644
index 000000000000..9d2cbd163024
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/histogram_common.hpp
@@ -0,0 +1,342 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <sycl/sycl.hpp>
+
+#include "common.hpp"
+
+namespace dpctl
+{
+namespace tensor
+{
+class usm_ndarray;
+}
+} // namespace dpctl
+
+using dpctl::tensor::usm_ndarray;
+
+namespace statistics
+{
+using common::AtomicOp;
+using common::IsNan;
+using common::Less;
+
+namespace histogram
+{
+
+template <typename T, int Dims>
+struct CachedData
+{
+    static constexpr bool const sync_after_init = true;
+    using pointer_type = T *;
+
+    using ncT = typename std::remove_const<T>::type;
+    using LocalData = sycl::local_accessor<ncT, Dims>;
+
+    CachedData(T *global_data, sycl::range<Dims> shape, sycl::handler &cgh)
+    {
+        this->global_data = global_data;
+        local_data = LocalData(shape, cgh);
+    }
+
+    T *get_ptr() const
+    {
+        return &local_data[0];
+    }
+
+    template <int _Dims>
+    void init(const sycl::nd_item<_Dims> &item) const
+    {
+        int32_t llid = item.get_local_linear_id();
+        auto local_ptr = &local_data[0];
+        int32_t size = local_data.size();
+        auto group = item.get_group();
+        int32_t local_size = group.get_local_linear_range();
+
+        for (int32_t i = llid; i < size; i += local_size) {
+            local_ptr[i] = global_data[i];
+        }
+    }
+
+    size_t size() const
+    {
+        return local_data.size();
+    }
+
+private:
+    LocalData local_data;
+    T *global_data = nullptr;
+};
+
+template <typename T, int Dims>
+struct UncachedData
+{
+    static constexpr bool const sync_after_init = false;
+    using Shape = sycl::range<Dims>;
+    using pointer_type = T *;
+
+    UncachedData(T *global_data, const Shape &shape, sycl::handler &)
+    {
+        this->global_data = global_data;
+        _shape = shape;
+    }
+
+    T *get_ptr() const
+    {
+        return global_data;
+    }
+
+    template <int _Dims>
+    void init(const sycl::nd_item<_Dims> &) const
+    {
+    }
+
+    size_t size() const
+    {
+        return _shape.size();
+    }
+
+private:
+    T *global_data = nullptr;
+    Shape _shape;
+};
+
+template <typename T>
+struct HistLocalType
+{
+    using type = T;
+};
+
+template <>
+struct HistLocalType<uint64_t>
+{
+    using type = uint32_t;
+};
+
+template <>
+struct HistLocalType<int64_t>
+{
+    using type = int32_t;
+};
+
+template <typename T, typename localT = typename HistLocalType<T>::type>
+struct HistWithLocalCopies
+{
+    static constexpr bool const sync_after_init = true;
+    static constexpr bool const sync_before_finalize = true;
+
+    using LocalHist = sycl::local_accessor<localT, 2>;
+
+    HistWithLocalCopies(T *global_data,
+                        size_t bins_count,
+                        int32_t copies_count,
+                        sycl::handler &cgh)
+    {
+        local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh);
+        global_hist = global_data;
+    }
+
+    template <int _Dims>
+    void init(const sycl::nd_item<_Dims> &item, localT val = 0) const
+    {
+        uint32_t llid = item.get_local_linear_id();
+        auto *local_ptr = &local_hist[0][0];
+        uint32_t size = local_hist.size();
+        auto group = item.get_group();
+        uint32_t local_size = group.get_local_linear_range();
+
+        for (uint32_t i = llid; i < size; i += local_size) {
+            local_ptr[i] = val;
+        }
+    }
+
+    template <int _Dims>
+    void add(const sycl::nd_item<_Dims> &item, int32_t bin, localT value) const
+    {
+        int32_t llid = item.get_local_linear_id();
+        int32_t local_hist_count = local_hist.get_range().get(0);
+        int32_t local_copy_id =
+            local_hist_count == 1 ? 0 : llid % local_hist_count;
+
+        AtomicOp<localT, sycl::memory_order::relaxed,
+                 sycl::memory_scope::work_group>::add(local_hist[local_copy_id]
+                                                                [bin],
+                                                      value);
+    }
+
+    template <int _Dims>
+    void finalize(const sycl::nd_item<_Dims> &item) const
+    {
+        int32_t llid = item.get_local_linear_id();
+        int32_t bins_count = local_hist.get_range().get(1);
+        int32_t local_hist_count = local_hist.get_range().get(0);
+        auto group = item.get_group();
+        int32_t local_size = group.get_local_linear_range();
+
+        for (int32_t i = llid; i < bins_count; i += local_size) {
+            auto value = local_hist[0][i];
+            for (int32_t lhc = 1; lhc < local_hist_count; ++lhc) {
+                value += local_hist[lhc][i];
+            }
+            if (value != T(0)) {
+                AtomicOp<T, sycl::memory_order::relaxed,
+                         sycl::memory_scope::device>::add(global_hist[i],
+                                                          value);
+            }
+        }
+    }
+
+    uint32_t size() const
+    {
+        return local_hist.size();
+    }
+
+private:
+    LocalHist local_hist;
+    T *global_hist = nullptr;
+};
+
+template <typename T>
+struct HistGlobalMemory
+{
+    static constexpr bool const sync_after_init = false;
+    static constexpr bool const sync_before_finalize = false;
+
+    HistGlobalMemory(T *global_data)
+    {
+        global_hist = global_data;
+    }
+
+    template <int _Dims>
+    void init(const sycl::nd_item<_Dims> &) const
+    {
+    }
+
+    template <int _Dims>
+    void add(const sycl::nd_item<_Dims> &, int32_t bin, T value) const
+    {
+        AtomicOp<T, sycl::memory_order::relaxed,
+                 sycl::memory_scope::device>::add(global_hist[bin], value);
+    }
+
+    template <int _Dims>
+    void finalize(const sycl::nd_item<_Dims> &) const
+    {
+    }
+
+private:
+    T *global_hist = nullptr;
+};
+
+template <typename T = uint32_t>
+struct NoWeights
+{
+    constexpr T get(size_t) const
+    {
+        return 1;
+    }
+};
+
+template <typename T>
+struct Weights
+{
+    Weights(T *weights)
+    {
+        data = weights;
+    }
+
+    T get(size_t id) const
+    {
+        return data[id];
+    }
+
+private:
+    T *data = nullptr;
+};
+
+template <typename T, typename HistImpl, typename Edges, typename Weights>
+class histogram_kernel;
+
+template <typename T, typename HistImpl, typename Edges, typename Weights>
+void submit_histogram(const T *in,
+                      size_t size,
+                      size_t dims,
+                      uint32_t WorkPI,
+                      const HistImpl &hist,
+                      const Edges &edges,
+                      const Weights &weights,
+                      sycl::nd_range<1> nd_range,
+                      sycl::handler &cgh)
+{
+    cgh.parallel_for<histogram_kernel<T, HistImpl, Edges, Weights>>(
+        nd_range, [=](sycl::nd_item<1> item) {
+            auto id = item.get_group_linear_id();
+            auto lid = item.get_local_linear_id();
+            auto group = item.get_group();
+            auto local_size = item.get_local_range(0);
+
+            hist.init(item);
+            edges.init(item);
+
+            if constexpr (HistImpl::sync_after_init || Edges::sync_after_init) {
+                sycl::group_barrier(group, sycl::memory_scope::work_group);
+            }
+
+            auto bounds = edges.get_bounds();
+
+            for (uint32_t i = 0; i < WorkPI; ++i) {
+                auto data_idx = id * WorkPI * local_size + i * local_size + lid;
+                if (data_idx < size) {
+                    auto *d = &in[data_idx * dims];
+
+                    if (edges.in_bounds(d, bounds)) {
+                        auto bin = edges.get_bin(item, d, bounds);
+                        auto weight = weights.get(data_idx);
+                        hist.add(item, bin, weight);
+                    }
+                }
+            }
+
+            if constexpr (HistImpl::sync_before_finalize) {
+                sycl::group_barrier(group, sycl::memory_scope::work_group);
+            }
+
+            hist.finalize(item);
+        });
+}
+
+void validate(const usm_ndarray &sample,
+              const usm_ndarray &bins,
+              std::optional<const dpctl::tensor::usm_ndarray> &weights,
+              const usm_ndarray &histogram);
+
+uint32_t get_local_hist_copies_count(uint32_t loc_mem_size_in_items,
+                                     uint32_t local_size,
+                                     uint32_t hist_size_in_items);
+
+} // namespace histogram
+} // namespace statistics
diff --git a/dpnp/backend/extensions/statistics/statistics_py.cpp b/dpnp/backend/extensions/statistics/statistics_py.cpp
new file mode 100644
index 000000000000..e4f533cd7a02
--- /dev/null
+++ b/dpnp/backend/extensions/statistics/statistics_py.cpp
@@ -0,0 +1,38 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+// This file defines functions of dpnp.backend._lapack_impl extensions
+//
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "histogram.hpp"
+
+PYBIND11_MODULE(_statistics_impl, m)
+{
+    statistics::histogram::populate_histogram(m);
+}
diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py
index c0e95e588c1a..b5eea9f37bfc 100644
--- a/dpnp/dpnp_iface_histograms.py
+++ b/dpnp/dpnp_iface_histograms.py
@@ -42,9 +42,16 @@
 
 import dpctl.utils as dpu
 import numpy
+from dpctl.tensor._type_utils import _can_cast
 
 import dpnp
 
+# pylint: disable=no-name-in-module
+import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext
+
+# pylint: disable=no-name-in-module
+from .dpnp_utils import map_dtype_to_device
+
 __all__ = [
     "digitize",
     "histogram",
@@ -201,19 +208,6 @@ def _get_bin_edges(a, bins, range, usm_type):
     return bin_edges, None
 
 
-def _search_sorted_inclusive(a, v):
-    """
-    Like :obj:`dpnp.searchsorted`, but where the last item in `v` is placed
-    on the right.
-    In the context of a histogram, this makes the last bin edge inclusive
-
-    """
-
-    return dpnp.concatenate(
-        (a.searchsorted(v[:-1], "left"), a.searchsorted(v[-1:], "right"))
-    )
-
-
 def digitize(x, bins, right=False):
     """
     Return the indices of the bins to which each value in input array belongs.
@@ -306,6 +300,35 @@ def digitize(x, bins, right=False):
     return bins.size - dpnp.searchsorted(bins[::-1], x, side=side)
 
 
+def _result_type_for_device(dtype1, dtype2, device):
+    rt = dpnp.result_type(dtype1, dtype2)
+    return map_dtype_to_device(rt, device)
+
+
+def _align_dtypes(a_dtype, bins_dtype, ntype, supported_types, device):
+    has_fp64 = device.has_aspect_fp64
+    has_fp16 = device.has_aspect_fp16
+
+    a_bin_dtype = _result_type_for_device(a_dtype, bins_dtype, device)
+
+    # histogram implementation doesn't support uint64 as histogram type
+    # we can use int64 instead. Result would be correct even in case of overflow
+    if ntype == numpy.uint64:
+        ntype = dpnp.int64
+
+    if (a_bin_dtype, ntype) in supported_types:
+        return a_bin_dtype, ntype
+
+    for sample_type, hist_type in supported_types:
+        if _can_cast(
+            a_bin_dtype, sample_type, has_fp16, has_fp64
+        ) and _can_cast(ntype, hist_type, has_fp16, has_fp64):
+            return sample_type, hist_type
+
+    # should not happen
+    return None, None
+
+
 def histogram(a, bins=10, range=None, density=None, weights=None):
     """
     Compute the histogram of a data set.
@@ -393,7 +416,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
 
     a, weights, usm_type = _ravel_check_a_and_weights(a, weights)
 
-    bin_edges, uniform_bins = _get_bin_edges(a, bins, range, usm_type)
+    bin_edges, _ = _get_bin_edges(a, bins, range, usm_type)
 
     # Histogram is an integer or a float array depending on the weights.
     if weights is None:
@@ -401,41 +424,74 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
     else:
         ntype = weights.dtype
 
-    # The fast path uses bincount, but that only works for certain types
-    # of weight
-    # simple_weights = (
-    #     weights is None or
-    #     dpnp.can_cast(weights.dtype, dpnp.double) or
-    #     dpnp.can_cast(weights.dtype, complex)
-    # )
-    # TODO: implement a fast path
-    simple_weights = False
-
-    if uniform_bins is not None and simple_weights:
-        # TODO: implement fast algorithm for equal bins
-        pass
-    else:
-        # Compute via cumulative histogram
-        if weights is None:
-            sa = dpnp.sort(a)
-            cum_n = _search_sorted_inclusive(sa, bin_edges)
-        else:
-            zero = dpnp.zeros(
-                1, dtype=ntype, sycl_queue=a.sycl_queue, usm_type=usm_type
-            )
-            sorting_index = dpnp.argsort(a)
-            sa = a[sorting_index]
-            sw = weights[sorting_index]
-            cw = dpnp.concatenate((zero, sw.cumsum(dtype=ntype)))
-            bin_index = _search_sorted_inclusive(sa, bin_edges)
-            cum_n = cw[bin_index]
+    queue = a.sycl_queue
+    device = queue.sycl_device
+
+    supported_types = statistics_ext.histogram_dtypes()
+    a_bin_dtype, hist_dtype = _align_dtypes(
+        a.dtype, bin_edges.dtype, ntype, supported_types, device
+    )
+
+    if a_bin_dtype is None or hist_dtype is None:
+        raise ValueError(
+            f"function '{histogram}' does not support input types "
+            f"({a.dtype}, {bin_edges.dtype}, {ntype}), "
+            "and the inputs could not be coerced to any "
+            "supported types"
+        )
 
-        n = dpnp.diff(cum_n)
+    a_casted = dpnp.astype(a, a_bin_dtype, order="C", copy=False)
+    bin_edges_casted = dpnp.astype(
+        bin_edges, a_bin_dtype, order="C", copy=False
+    )
+    weights_casted = (
+        dpnp.astype(weights, hist_dtype, order="C", copy=False)
+        if weights is not None
+        else None
+    )
+
+    # histogram implementation uses atomics, but atomics doesn't work with
+    # host usm memory
+    n_usm_type = "device" if usm_type == "host" else usm_type
+
+    # histogram implementation requires output array to be filled with zeros
+    n_casted = dpnp.zeros(
+        bin_edges.size - 1,
+        dtype=hist_dtype,
+        sycl_queue=a.sycl_queue,
+        usm_type=n_usm_type,
+    )
+
+    _manager = dpu.SequentialOrderManager[queue]
+
+    a_usm = dpnp.get_usm_ndarray(a_casted)
+    bins_usm = dpnp.get_usm_ndarray(bin_edges_casted)
+    weights_usm = (
+        dpnp.get_usm_ndarray(weights_casted)
+        if weights_casted is not None
+        else None
+    )
+    n_usm = dpnp.get_usm_ndarray(n_casted)
+
+    mem_ev, ht_ev = statistics_ext.histogram(
+        a_usm,
+        bins_usm,
+        weights_usm,
+        n_usm,
+        depends=_manager.submitted_events,
+    )
+    _manager.add_event_pair(mem_ev, ht_ev)
+
+    if usm_type != n_usm_type:
+        n = dpnp.asarray(n_casted, dtype=ntype, usm_type=usm_type)
+    else:
+        n = dpnp.astype(n_casted, ntype, copy=False)
 
     if density:
-        # pylint: disable=possibly-used-before-assignment
-        db = dpnp.diff(bin_edges).astype(dpnp.default_float_type())
-        return n / db / n.sum(), bin_edges
+        db = dpnp.astype(
+            dpnp.diff(bin_edges), dpnp.default_float_type(sycl_queue=queue)
+        )
+        return n / db / dpnp.sum(n), bin_edges
 
     return n, bin_edges
 
diff --git a/tests/test_histogram.py b/tests/test_histogram.py
index 0e6e33fd99c4..c37e5a4316ff 100644
--- a/tests/test_histogram.py
+++ b/tests/test_histogram.py
@@ -492,6 +492,19 @@ def test_weights_another_sycl_queue(self):
         with assert_raises(ValueError):
             dpnp.histogram(v, weights=w)
 
+    @pytest.mark.parametrize(
+        "bins_count",
+        [10, 10**2, 10**3, 10**4, 10**5, 10**6],
+    )
+    def test_different_bins_amount(self, bins_count):
+        v = numpy.linspace(0, bins_count, bins_count, dtype=numpy.float32)
+        iv = dpnp.array(v)
+
+        expected_hist, expected_edges = numpy.histogram(v, bins=bins_count)
+        result_hist, result_edges = dpnp.histogram(iv, bins=bins_count)
+        assert_array_equal(result_hist, expected_hist)
+        assert_allclose(result_edges, expected_edges)
+
 
 class TestHistogramBinEdges:
     @pytest.mark.parametrize(