Applying review comments

AlexanderKalistratov · AlexanderKalistratov · commit cd9008d3d46b · 2024-11-05T18:06:59.000+01:00
diff --git a/dpnp/backend/extensions/statistics/bincount.cpp b/dpnp/backend/extensions/statistics/bincount.cpp
@@ -72,9 +72,8 @@ struct BincountEdges
     template <typename dT>
     bool in_bounds(const dT *val, const boundsT &bounds) const
     {
-        Less<dT> _less;
-        return !_less(val[0], std::get<0>(bounds)) &&
-               !_less(std::get<1>(bounds), val[0]) && !IsNan<dT>::isnan(val[0]);
+        return check_in_bounds(val[0], std::get<0>(bounds),
+                               std::get<1>(bounds));
     }
 
 private:
@@ -87,8 +86,8 @@ struct BincountF
 {
     static sycl::event impl(sycl::queue &exec_q,
                             const void *vin,
-                            int64_t min,
-                            int64_t max,
+                            const int64_t min,
+                            const int64_t max,
                             const void *vweights,
                             void *vout,
                             const size_t,
@@ -100,22 +99,20 @@ struct BincountF
         // shift output pointer by min elements
         HistType *out = static_cast<HistType *>(vout) + min;
 
-        size_t needed_bins_count = (max - min) + 1;
-        auto device = exec_q.get_device();
+        const size_t needed_bins_count = (max - min) + 1;
 
-        uint32_t local_size = get_max_local_size(exec_q);
+        const uint32_t local_size = get_max_local_size(exec_q);
 
-        uint32_t WorkPI = 128; // empirically found number
-        auto nd_range = make_ndrange(size, local_size, WorkPI);
+        constexpr uint32_t WorkPI = 128; // empirically found number
+        const auto nd_range = make_ndrange(size, local_size, WorkPI);
 
         return exec_q.submit([&](sycl::handler &cgh) {
             cgh.depends_on(depends);
             constexpr uint32_t dims = 1;
 
             auto dispatch_bins = [&](const auto &weights) {
-                auto local_mem_size =
-                    device.get_info<sycl::info::device::local_mem_size>() /
-                    sizeof(T);
+                const auto local_mem_size =
+                    get_local_mem_size_in_items<T>(exec_q);
                 if (local_mem_size >= needed_bins_count) {
                     uint32_t local_hist_count = get_local_hist_copies_count(
                         local_mem_size, local_size, needed_bins_count);
diff --git a/dpnp/backend/extensions/statistics/common.hpp b/dpnp/backend/extensions/statistics/common.hpp
@@ -165,6 +165,19 @@ size_t get_local_mem_size_in_items(const sycl::device &device, size_t reserve)
     return get_local_mem_size_in_bytes(device, sizeof(T) * reserve) / sizeof(T);
 }
 
+template <typename T>
+inline size_t get_local_mem_size_in_items(const sycl::queue &queue)
+{
+    return get_local_mem_size_in_items<T>(queue.get_device());
+}
+
+template <typename T>
+inline size_t get_local_mem_size_in_items(const sycl::queue &queue,
+                                          size_t reserve)
+{
+    return get_local_mem_size_in_items<T>(queue.get_device(), reserve);
+}
+
 template <int Dims>
 sycl::nd_range<Dims> make_ndrange(const sycl::range<Dims> &global_range,
                                   const sycl::range<Dims> &local_range,
diff --git a/dpnp/backend/extensions/statistics/histogram.cpp b/dpnp/backend/extensions/statistics/histogram.cpp
@@ -94,9 +94,8 @@ struct HistogramEdges
     template <typename dT>
     bool in_bounds(const dT *val, const boundsT &bounds) const
     {
-        Less<dT> _less;
-        return !_less(val[0], std::get<0>(bounds)) &&
-               !_less(std::get<1>(bounds), val[0]) && !IsNan<dT>::isnan(val[0]);
+        return check_in_bounds(val[0], std::get<0>(bounds),
+                               std::get<1>(bounds));
     }
 
 private:
@@ -110,7 +109,7 @@ template <typename T>
 using UncachedEdges = HistogramEdges<T, UncachedData<const T, 1>>;
 
 template <typename T, typename BinsT, typename HistType = size_t>
-struct histogram_kernel
+struct HistogramF
 {
     static sycl::event impl(sycl::queue &exec_q,
                             const void *vin,
@@ -185,7 +184,7 @@ struct histogram_kernel
 };
 
 template <typename SampleType, typename HistType>
-using histogram_kernel_ = histogram_kernel<SampleType, SampleType, HistType>;
+using HistogramF_ = HistogramF<SampleType, SampleType, HistType>;
 
 } // namespace
 
@@ -212,7 +211,7 @@ using SupportedTypes = std::tuple<std::tuple<uint64_t, int64_t>,
 
 Histogram::Histogram() : dispatch_table("sample", "histogram")
 {
-    dispatch_table.populate_dispatch_table<SupportedTypes, histogram_kernel_>();
+    dispatch_table.populate_dispatch_table<SupportedTypes, HistogramF_>();
 }
 
 std::tuple<sycl::event, sycl::event>
diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp
@@ -278,6 +278,13 @@ struct Weights
     T *data = nullptr;
 };
 
+template <typename dT>
+bool check_in_bounds(const dT &val, const dT &min, const dT &max)
+{
+    Less<dT> _less;
+    return !_less(val, min) && !_less(max, val) && !IsNan<dT>::isnan(val);
+}
+
 template <typename T, typename HistImpl, typename Edges, typename Weights>
 class histogram_kernel;
 
diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py
@@ -38,7 +38,6 @@
 """
 
 import operator
-import warnings
 
 import dpctl.utils as dpu
 import numpy
@@ -104,16 +103,6 @@ def _ravel_check_a_and_weights(a, weights):
     dpnp.check_supported_arrays_type(a)
     usm_type = a.usm_type
 
-    # ensure that the array is a "subtractable" dtype
-    if a.dtype == dpnp.bool:
-        warnings.warn(
-            f"Converting input from {a.dtype} to {numpy.uint8} "
-            "for compatibility.",
-            RuntimeWarning,
-            stacklevel=3,
-        )
-        a = dpnp.astype(a, numpy.uint8)
-
     if weights is not None:
         # check that `weights` array has supported type
         dpnp.check_supported_arrays_type(weights)
@@ -323,22 +312,26 @@ def bincount(x, weights=None, minlength=None):
 
     Parameters
     ----------
-    x : {dpnp.ndarray, usm_ndarray}, 1 dimension, nonnegative ints
-        Input array.
-    weights : {dpnp.ndarray, usm_ndarray}, optional
+    x : {dpnp.ndarray, usm_ndarray}
+        Input 1-dimensional array with nonnegative integer values.
+    weights : {None, dpnp.ndarray, usm_ndarray}, optional
         Weights, array of the same shape as `x`.
-    minlength : int, optional
+        Default: ``None``
+    minlength : {None, int}, optional
         A minimum number of bins for the output array.
+        Default: ``None``
 
     Returns
     -------
     out : dpnp.ndarray of ints
         The result of binning the input array.
-        The length of `out` is equal to ``np.amax(x)+1``.
+        The length of `out` is equal to ``np.amax(x) + 1``.
 
     See Also
     --------
-    dpnp.histogram, dpnp.digitize, dpnp.unique
+    :obj:`dpnp.histogram` : Compute the histogram of a data set.
+    :obj:`dpnp.digitize` : Return the indices of the bins to which each value
+    :obj:`dpnp.unique` : Find the unique elements of an array.
 
     Examples
     --------
@@ -349,25 +342,24 @@ def bincount(x, weights=None, minlength=None):
     array([1, 3, 1, 1, 0, 0, 0, 1])
 
     >>> x = np.array([0, 1, 1, 3, 2, 1, 7, 23])
-    >>> np.bincount(x).size == np.amax(x)+1
-    True
+    >>> np.bincount(x).size == np.amax(x) + 1
+    array(True)
 
     The input array needs to be of integer dtype, otherwise a
     TypeError is raised:
 
-    >>> np.bincount(np.arange(5, dtype=float))
+    >>> np.bincount(np.arange(5, dtype=np.float32))
     Traceback (most recent call last):
       ...
-    TypeError: Cannot cast array data from dtype('float64') to dtype('int64')
-    according to the rule 'safe'
+    TypeError: x must be an integer array
 
     A possible use of ``bincount`` is to perform sums over
-    variable-size chunks of an array, using the ``weights`` keyword.
+    variable-size chunks of an array, using the `weights` keyword.
 
-    >>> w = np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6]) # weights
+    >>> w = np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6], dtype=np.float32) # weights
     >>> x = np.array([0, 1, 1, 2, 2, 2])
-    >>> np.bincount(x,  weights=w)
-    array([ 0.3,  0.7,  1.1])
+    >>> np.bincount(x, weights=w)
+    array([0.3, 0.7, 1.1], dtype=float32)
 
     """
 
diff --git a/tests/helper.py b/tests/helper.py
@@ -1,4 +1,3 @@
-from collections.abc import Iterable
 from sys import platform
 
 import dpctl
diff --git a/tests/test_histogram.py b/tests/test_histogram.py
@@ -258,22 +258,6 @@ def test_outliers_normalization_weights(self):
         assert_allclose(result_hist, expected_hist)
         assert_allclose(result_edges, expected_edges)
 
-    @pytest.mark.parametrize("xp", [numpy, dpnp])
-    def test_bool_conversion(self, xp):
-        a = xp.array([1, 1, 0], dtype=numpy.uint8)
-        int_hist, int_edges = xp.histogram(a)
-
-        with suppress_warnings() as sup:
-            rec = sup.record(RuntimeWarning, "Converting input from .*")
-
-            v = xp.array([True, True, False])
-            hist, edges = xp.histogram(v)
-
-            # A warning should be issued
-            assert len(rec) == 1
-            assert_array_equal(hist, int_hist)
-            assert_array_equal(edges, int_edges)
-
     @pytest.mark.parametrize("density", [True, False])
     def test_weights(self, density):
         v = numpy.random.rand(100)
@@ -574,6 +558,13 @@ def test_weights_another_sycl_queue(self):
         with assert_raises(ValueError):
             dpnp.bincount(v, weights=w)
 
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_weights_unsupported_dtype(self, xp):
+        v = dpnp.arange(5)
+        w = dpnp.arange(5, dtype=dpnp.complex64)
+        with assert_raises(ValueError):
+            dpnp.bincount(v, weights=w)
+
     @pytest.mark.parametrize(
         "bins_count",
         [10, 10**2, 10**3, 10**4, 10**5, 10**6],
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
@@ -2460,6 +2460,27 @@ def test_lstsq(m, n, nrhs, device):
         assert_sycl_queue_equal(param_dp.sycl_queue, b_dp.sycl_queue)
 
 
+@pytest.mark.parametrize("weights", [None, numpy.arange(7, 12)])
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+def test_bincount(weights, device):
+    v = numpy.arange(5)
+    w = weights
+
+    iv = dpnp.array(v, device=device)
+    iw = None if weights is None else dpnp.array(w, sycl_queue=iv.sycl_queue)
+
+    expected_hist = numpy.bincount(v, weights=w)
+    result_hist = dpnp.bincount(iv, weights=iw)
+    assert_array_equal(result_hist, expected_hist)
+
+    hist_queue = result_hist.sycl_queue
+    assert_sycl_queue_equal(hist_queue, iv.sycl_queue)
+
+
 @pytest.mark.parametrize("weights", [None, numpy.arange(7, 12)])
 @pytest.mark.parametrize(
     "device",
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
@@ -1475,11 +1475,10 @@ def test_bincount(usm_type_v, usm_type_w):
     v = dp.arange(5, usm_type=usm_type_v)
     w = dp.arange(7, 12, usm_type=usm_type_w)
 
-    hist, edges = dp.histogram(v, weights=w)
+    hist = dp.bincount(v, weights=w)
     assert v.usm_type == usm_type_v
     assert w.usm_type == usm_type_w
     assert hist.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w])
-    assert edges.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w])
 
 
 @pytest.mark.parametrize(
diff --git a/tests/third_party/cupy/statistics_tests/test_histogram.py b/tests/third_party/cupy/statistics_tests/test_histogram.py
@@ -271,7 +271,9 @@ def test_bincount_duplicated_value(self, xp, dtype):
         return xp.bincount(x)
 
     @for_all_dtypes_combination_bincount(names=["x_type", "w_type"])
-    @testing.numpy_cupy_allclose(accept_error=TypeError, type_check=False)
+    @testing.numpy_cupy_allclose(
+        accept_error=TypeError, type_check=has_support_aspect64()
+    )
     def test_bincount_with_weight(self, xp, x_type, w_type):
         x = testing.shaped_arange((3,), xp, x_type)
         w = testing.shaped_arange((3,), xp, w_type)

Original file line number	Diff line number	Diff line change
`@@ -94,9 +94,8 @@ struct HistogramEdges`
`94`	`94`	`template <typename dT>`
`95`	`95`	`bool in_bounds(const dT *val, const boundsT &bounds) const`
`96`	`96`	`{`
`97`		`- Less<dT> _less;`
`98`		`- return !_less(val[0], std::get<0>(bounds)) &&`
`99`		`- !_less(std::get<1>(bounds), val[0]) && !IsNan<dT>::isnan(val[0]);`
	`97`	`+ return check_in_bounds(val[0], std::get<0>(bounds),`
	`98`	`+ std::get<1>(bounds));`
`100`	`99`	`}`
`101`	`100`
`102`	`101`	`private:`
`@@ -110,7 +109,7 @@ template <typename T>`
`110`	`109`	`using UncachedEdges = HistogramEdges<T, UncachedData<const T, 1>>;`
`111`	`110`
`112`	`111`	`template <typename T, typename BinsT, typename HistType = size_t>`
`113`		`-struct histogram_kernel`
	`112`	`+struct HistogramF`
`114`	`113`	`{`
`115`	`114`	`static sycl::event impl(sycl::queue &exec_q,`
`116`	`115`	`const void *vin,`
`@@ -185,7 +184,7 @@ struct histogram_kernel`
`185`	`184`	`};`
`186`	`185`
`187`	`186`	`template <typename SampleType, typename HistType>`
`188`		`-using histogram_kernel_ = histogram_kernel<SampleType, SampleType, HistType>;`
	`187`	`+using HistogramF_ = HistogramF<SampleType, SampleType, HistType>;`
`189`	`188`
`190`	`189`	`} // namespace`
`191`	`190`
`@@ -212,7 +211,7 @@ using SupportedTypes = std::tuple<std::tuple<uint64_t, int64_t>,`
`212`	`211`
`213`	`212`	`Histogram::Histogram() : dispatch_table("sample", "histogram")`
`214`	`213`	`{`
`215`		`- dispatch_table.populate_dispatch_table<SupportedTypes, histogram_kernel_>();`
	`214`	`+ dispatch_table.populate_dispatch_table<SupportedTypes, HistogramF_>();`
`216`	`215`	`}`
`217`	`216`
`218`	`217`	`std::tuple<sycl::event, sycl::event>`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-from collections.abc import Iterable`
`2`	`1`	`from sys import platform`
`3`	`2`
`4`	`3`	`import dpctl`