|
47 | 47 |
|
48 | 48 | __all__ = [
|
49 | 49 | "histogram",
|
| 50 | + "histogram_bin_edges", |
50 | 51 | ]
|
51 | 52 |
|
52 | 53 | # range is a keyword argument to many functions, so save the builtin so they can
|
@@ -219,34 +220,36 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
|
219 | 220 | Input data. The histogram is computed over the flattened array.
|
220 | 221 | bins : {int, dpnp.ndarray, usm_ndarray, sequence of scalars}, optional
|
221 | 222 | If `bins` is an int, it defines the number of equal-width bins in the
|
222 |
| - given range (``10``, by default). |
| 223 | + given range. |
223 | 224 | If `bins` is a sequence, it defines a monotonically increasing array
|
224 | 225 | of bin edges, including the rightmost edge, allowing for non-uniform
|
225 | 226 | bin widths.
|
226 |
| - If `bins` is a string, it defines the method used to calculate the |
227 |
| - optimal bin width, as defined by :obj:`dpnp.histogram_bin_edges`. |
228 |
| - range : {2-tuple of float}, optional |
| 227 | + Default: ``10``. |
| 228 | + range : {None, 2-tuple of float}, optional |
229 | 229 | The lower and upper range of the bins. If not provided, range is simply
|
230 | 230 | ``(a.min(), a.max())``. Values outside the range are ignored. The first
|
231 | 231 | element of the range must be less than or equal to the second. `range`
|
232 | 232 | affects the automatic bin computation as well. While bin width is
|
233 | 233 | computed to be optimal based on the actual data within `range`, the bin
|
234 | 234 | count will fill the entire range including portions containing no data.
|
235 |
| - weights : {dpnp.ndarray, usm_ndarray}, optional |
| 235 | + Default: ``None``. |
| 236 | + density : {None, bool}, optional |
| 237 | + If ``False`` or ``None``, the result will contain the number of samples |
| 238 | + in each bin. If ``True``, the result is the value of the probability |
| 239 | + *density* function at the bin, normalized such that the *integral* over |
| 240 | + the range is ``1``. Note that the sum of the histogram values will not |
| 241 | + be equal to ``1`` unless bins of unity width are chosen; it is not |
| 242 | + a probability *mass* function. |
| 243 | + Default: ``None``. |
| 244 | + weights : {None, dpnp.ndarray, usm_ndarray}, optional |
236 | 245 | An array of weights, of the same shape as `a`. Each value in `a` only
|
237 | 246 | contributes its associated weight towards the bin count (instead of 1).
|
238 | 247 | If `density` is ``True``, the weights are normalized, so that the
|
239 | 248 | integral of the density over the range remains ``1``.
|
240 | 249 | Please note that the ``dtype`` of `weights` will also become the
|
241 | 250 | ``dtype`` of the returned accumulator (`hist`), so it must be large
|
242 | 251 | enough to hold accumulated values as well.
|
243 |
| - density : {bool}, optional |
244 |
| - If ``False``, the result will contain the number of samples in each bin. |
245 |
| - If ``True``, the result is the value of the probability *density* |
246 |
| - function at the bin, normalized such that the *integral* over the range |
247 |
| - is ``1``. Note that the sum of the histogram values will not be equal |
248 |
| - to ``1`` unless bins of unity width are chosen; it is not a probability |
249 |
| - *mass* function. |
| 252 | + Default: ``None``. |
250 | 253 |
|
251 | 254 | Returns
|
252 | 255 | -------
|
@@ -337,3 +340,88 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
|
337 | 340 | return n / db / n.sum(), bin_edges
|
338 | 341 |
|
339 | 342 | return n, bin_edges
|
| 343 | + |
| 344 | + |
| 345 | +def histogram_bin_edges(a, bins=10, range=None, weights=None): |
| 346 | + """ |
| 347 | + Function to calculate only the edges of the bins used by the |
| 348 | + :obj:`dpnp.histogram` function. |
| 349 | +
|
| 350 | + For full documentation refer to :obj:`numpy.histogram_bin_edges`. |
| 351 | +
|
| 352 | + Parameters |
| 353 | + ---------- |
| 354 | + a : {dpnp.ndarray, usm_ndarray} |
| 355 | + Input data. The histogram is computed over the flattened array. |
| 356 | + bins : {int, dpnp.ndarray, usm_ndarray, sequence of scalars}, optional |
| 357 | + If `bins` is an int, it defines the number of equal-width bins in the |
| 358 | + given range. |
| 359 | + If `bins` is a sequence, it defines the bin edges, including the |
| 360 | + rightmost edge, allowing for non-uniform bin widths. |
| 361 | + Default: ``10``. |
| 362 | + range : {None, 2-tuple of float}, optional |
| 363 | + The lower and upper range of the bins. If not provided, range is simply |
| 364 | + ``(a.min(), a.max())``. Values outside the range are ignored. The first |
| 365 | + element of the range must be less than or equal to the second. `range` |
| 366 | + affects the automatic bin computation as well. While bin width is |
| 367 | + computed to be optimal based on the actual data within `range`, the bin |
| 368 | + count will fill the entire range including portions containing no data. |
| 369 | + Default: ``None``. |
| 370 | + weights : {None, dpnp.ndarray, usm_ndarray}, optional |
| 371 | + An array of weights, of the same shape as `a`. Each value in `a` only |
| 372 | + contributes its associated weight towards the bin count (instead of 1). |
| 373 | + This is currently not used by any of the bin estimators, but may be in |
| 374 | + the future. |
| 375 | + Default: ``None``. |
| 376 | +
|
| 377 | + Returns |
| 378 | + ------- |
| 379 | + bin_edges : {dpnp.ndarray of floating data type} |
| 380 | + The edges to pass into :obj:`dpnp.histogram`. |
| 381 | +
|
| 382 | + See Also |
| 383 | + -------- |
| 384 | + :obj:`dpnp.histogram` : Compute the histogram of a data set. |
| 385 | +
|
| 386 | + Examples |
| 387 | + -------- |
| 388 | + >>> import dpnp as np |
| 389 | + >>> arr = np.array([0, 0, 0, 1, 2, 3, 3, 4, 5]) |
| 390 | + >>> np.histogram_bin_edges(arr, bins=2) |
| 391 | + array([0. , 2.5, 5. ]) |
| 392 | +
|
| 393 | + For consistency with histogram, an array of pre-computed bins is |
| 394 | + passed through unmodified: |
| 395 | +
|
| 396 | + >>> np.histogram_bin_edges(arr, [1, 2]) |
| 397 | + array([1, 2]) |
| 398 | +
|
| 399 | + This function allows one set of bins to be computed, and reused across |
| 400 | + multiple histograms: |
| 401 | +
|
| 402 | + >>> shared_bins = np.histogram_bin_edges(arr, bins=5) |
| 403 | + >>> shared_bins |
| 404 | + array([0., 1., 2., 3., 4., 5.]) |
| 405 | +
|
| 406 | + >>> gid = np.array([0, 1, 1, 0, 1, 1, 0, 1, 1]) |
| 407 | + >>> hist_0, _ = np.histogram(arr[gid == 0], bins=shared_bins) |
| 408 | + >>> hist_1, _ = np.histogram(arr[gid == 1], bins=shared_bins) |
| 409 | +
|
| 410 | + >>> hist_0, hist_1 |
| 411 | + (array([1, 1, 0, 1, 0]), array([2, 0, 1, 1, 2])) |
| 412 | +
|
| 413 | + Which gives more easily comparable results than using separate bins for |
| 414 | + each histogram: |
| 415 | +
|
| 416 | + >>> hist_0, bins_0 = np.histogram(arr[gid == 0], bins=3) |
| 417 | + >>> hist_1, bins_1 = np.histogram(arr[gid == 1], bins=4) |
| 418 | + >>> hist_0, hist_1 |
| 419 | + (array([1, 1, 1]), array([2, 1, 1, 2])) |
| 420 | + >>> bins_0, bins_1 |
| 421 | + (array([0., 1., 2., 3.]), array([0. , 1.25, 2.5 , 3.75, 5. ])) |
| 422 | +
|
| 423 | + """ |
| 424 | + |
| 425 | + a, weights, usm_type = _ravel_check_a_and_weights(a, weights) |
| 426 | + bin_edges, _ = _get_bin_edges(a, bins, range, usm_type) |
| 427 | + return bin_edges |
0 commit comments