IntelPython
diff --git a/‎dpbench/benchmarks/npbench/azimint_hist/azimint_hist_numba_dpex_p.py‎
Lines changed: 62 additions & 0 deletions b/‎dpbench/benchmarks/npbench/azimint_hist/azimint_hist_numba_dpex_p.py‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎dpbench/benchmarks/npbench/azimint_naive/azimint_naive_numba_dpex_p.py‎
Lines changed: 29 additions & 0 deletions b/‎dpbench/benchmarks/npbench/azimint_naive/azimint_naive_numba_dpex_p.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎dpbench/benchmarks/npbench/contour_integral/contour_integral_numba_dpex_p.py‎
Lines changed: 31 additions & 0 deletions b/‎dpbench/benchmarks/npbench/contour_integral/contour_integral_numba_dpex_p.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎dpbench/benchmarks/npbench/deep_learning/conv2d_bias/conv2d_numba_dpex_p.py‎
Lines changed: 51 additions & 0 deletions b/‎dpbench/benchmarks/npbench/deep_learning/conv2d_bias/conv2d_numba_dpex_p.py‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎dpbench/benchmarks/npbench/deep_learning/lenet/lenet_numba_dpex_p.py‎
Lines changed: 101 additions & 0 deletions b/‎dpbench/benchmarks/npbench/deep_learning/lenet/lenet_numba_dpex_p.py‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎dpbench/benchmarks/npbench/deep_learning/mlp/mlp_numba_dpex_p.py‎
Lines changed: 36 additions & 0 deletions b/‎dpbench/benchmarks/npbench/deep_learning/mlp/mlp_numba_dpex_p.py‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎dpbench/benchmarks/npbench/deep_learning/resnet/resnet_numba_dpex_p.py‎
Lines changed: 83 additions & 0 deletions b/‎dpbench/benchmarks/npbench/deep_learning/resnet/resnet_numba_dpex_p.py‎
Lines changed: 83 additions & 0 deletions
@@ -0,0 +1,62 @@
+# SPDX-FileCopyrightText: 2014 Jérôme Kieffer et al.
+# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Jérôme Kieffer and Giannis Ashiotis. Pyfai: a python library for
+high performance azimuthal integration on gpu, 2014. In Proceedings of the
+7th European Conference on Python in Science (EuroSciPy 2014).
+"""
+
+import dpnp as np
+import numba as nb
+from numba_dpex import dpjit
+
+
+@dpjit
+def get_bin_edges_prange(a, bins):
+    bin_edges = np.zeros((bins + 1,), dtype=np.float64)
+    a_min = a.min()
+    a_max = a.max()
+    delta = (a_max - a_min) / bins
+    for i in nb.prange(bin_edges.shape[0]):
+        bin_edges[i] = a_min + i * delta
+
+    bin_edges[-1] = a_max  # Avoid roundoff error on last point
+    return bin_edges
+
+
+@dpjit
+def compute_bin(x, bin_edges):
+    # assuming uniform bins for now
+    n = bin_edges.shape[0] - 1
+    a_min = bin_edges[0]
+    a_max = bin_edges[-1]
+
+    # special case to mirror NumPy behavior for last bin
+    if x == a_max:
+        return n - 1  # a_max always in last bin
+
+    return int(n * (x - a_min) / (a_max - a_min))
+
+
+@dpjit
+def histogram_prange(a, bins, weights):
+    hist = np.zeros((bins,), dtype=a.dtype)
+    bin_edges = get_bin_edges_prange(a, bins)
+
+    for i in nb.prange(a.shape[0]):
+        bin = compute_bin(a[i], bin_edges)
+        hist[bin] += weights[i]
+
+    return hist, bin_edges
+
+
+@dpjit
+def azimint_hist(data, radius, npt):
+    histu = np.histogram(radius, npt)[0]
+    # histw = np.histogram(radius, npt, weights=data)[0]
+    histw = histogram_prange(radius, npt, weights=data)[0]
+    return histw / histu
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: 2014 Jérôme Kieffer et al.
+# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Jérôme Kieffer and Giannis Ashiotis. Pyfai: a python library for
+high performance azimuthal integration on gpu, 2014. In Proceedings of the
+7th European Conference on Python in Science (EuroSciPy 2014).
+"""
+
+
+import dpnp as np
+import numba as nb
+from numba_dpex import dpjit
+
+
+@dpjit
+def azimint_naive(data, radius, npt):
+    rmax = radius.max()
+    res = np.zeros(npt, dtype=np.float64)
+    for i in nb.prange(npt):
+        r1 = rmax * i / npt
+        r2 = rmax * (i + 1) / npt
+        mask_r12 = np.logical_and((r1 <= radius), (radius < r2))
+        values_r12 = data[mask_r12]
+        res[i] = values_r12.mean()
+    return res
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import dpnp as np
+import numba as nb
+from numba_dpex import dpjit
+
+
+@dpjit
+def contour_integral(NR, NM, slab_per_bc, Ham, int_pts, Y):
+    P0 = np.zeros((NR, NM), dtype=np.complex128)
+    P1 = np.zeros((NR, NM), dtype=np.complex128)
+    # for z in int_pts:
+    for i in nb.prange(len(int_pts)):
+        z = int_pts[i]
+        Tz = np.zeros((NR, NR), dtype=np.complex128)
+        for n in nb.prange(slab_per_bc + 1):
+            zz = np.power(z, slab_per_bc / 2 - n)
+            Tz += zz * Ham[n]
+        if NR == NM:
+            X = np.linalg.inv(Tz)
+        else:
+            X = np.linalg.solve(Tz, Y)
+        if abs(z) < 1.0:
+            X = -X
+        P0 += X
+        P1 += z * X
+
+    return P0, P1
@@ -0,0 +1,51 @@
+# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+import dpnp as np
+import numba as nb
+from numba_dpex import dpjit
+
+
+# Deep learning convolutional operator (stride = 1)
+@dpjit
+def conv2d(input, weights):
+    K = weights.shape[0]  # Assuming square kernel
+    N = input.shape[0]
+    H_out = input.shape[1] - K + 1
+    W_out = input.shape[2] - K + 1
+    C_in = input.shape[3]
+    C_out = weights.shape[3]
+    output = np.empty((N, H_out, W_out, C_out), dtype=np.float32)
+
+    # Loop structure adapted from https://github.com/SkalskiP/ILearnDeepLearning.py/blob/ba0b5ba589d4e656141995e8d1a06d44db6ce58d/01_mysteries_of_neural_networks/06_numpy_convolutional_neural_net/src/layers/convolutional.py#L88
+    for i in nb.prange(H_out):
+        for j in nb.prange(W_out):
+            # output[:, i, j, :] = np.sum(
+            #     input[:, i:i + K, j:j + K, :, np.newaxis] *
+            #     weights[np.newaxis, :, :, :],
+            #     axis=(1, 2, 3),
+            # )
+            # Reshape supported only on contiguous arrays
+            inp = input[:, i : i + K, j : j + K, :].copy()
+            # Tuple of ints not supported in axis keyword
+            output[:, i, j, :] = np.sum(
+                np.sum(
+                    np.sum(
+                        np.reshape(inp, (N, K, K, C_in, 1))
+                        * np.reshape(weights, (1, K, K, C_in, C_out)),
+                        axis=1,
+                    ),
+                    axis=1,
+                ),
+                axis=1,
+            )
+
+    return output
+
+
+@dpjit
+def conv2d_bias(input, weights, bias):
+    return conv2d(input, weights) + bias
@@ -0,0 +1,101 @@
+# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+import dpnp as np
+import numba as nb
+from numba_dpex import dpjit
+
+
+@dpjit
+def relu(x):
+    return np.maximum(x, 0)
+
+
+# Deep learning convolutional operator (stride = 1)
+@dpjit
+def conv2d(input, weights):
+    K = weights.shape[0]  # Assuming square kernel
+    N = input.shape[0]
+    H_out = input.shape[1] - K + 1
+    W_out = input.shape[2] - K + 1
+    C_in = input.shape[3]
+    C_out = weights.shape[3]
+    output = np.empty((N, H_out, W_out, C_out), dtype=np.float32)
+
+    # Loop structure adapted from https://github.com/SkalskiP/ILearnDeepLearning.py/blob/ba0b5ba589d4e656141995e8d1a06d44db6ce58d/01_mysteries_of_neural_networks/06_numpy_convolutional_neural_net/src/layers/convolutional.py#L88
+    for i in nb.prange(H_out):
+        for j in nb.prange(W_out):
+            # output[:, i, j, :] = np.sum(
+            #     input[:, i:i + K, j:j + K, :, np.newaxis] *
+            #     weights[np.newaxis, :, :, :],
+            #     axis=(1, 2, 3),
+            # )
+            # Reshape supported only on contiguous arrays
+            inp = input[:, i : i + K, j : j + K, :].copy()
+            # Tuple of ints not supported in axis keyword
+            output[:, i, j, :] = np.sum(
+                np.sum(
+                    np.sum(
+                        np.reshape(inp, (N, K, K, C_in, 1))
+                        * np.reshape(weights, (1, K, K, C_in, C_out)),
+                        axis=1,
+                    ),
+                    axis=1,
+                ),
+                axis=1,
+            )
+
+    return output
+
+
+# 2x2 maxpool operator, as used in LeNet-5
+@dpjit
+def maxpool2d(x):
+    # output = np.empty(
+    #     [x.shape[0], x.shape[1] // 2, x.shape[2] // 2, x.shape[3]],
+    #     dtype=x.dtype)
+    output = np.empty(
+        (x.shape[0], x.shape[1] // 2, x.shape[2] // 2, x.shape[3]),
+        dtype=x.dtype,
+    )
+    for i in nb.prange(x.shape[1] // 2):
+        for j in nb.prange(x.shape[2] // 2):
+            # output[:, i, j, :] = np.max(x[:, 2 * i:2 * i + 2,
+            #                               2 * j:2 * j + 2, :],
+            #                             axis=(1, 2))
+            for k in nb.prange(x.shape[0]):
+                for l in nb.prange(x.shape[3]):  # noqa: E741 math variable
+                    output[k, i, j, l] = np.max(
+                        x[k, 2 * i : 2 * i + 2, 2 * j : 2 * j + 2, l]
+                    )
+    return output
+
+
+# LeNet-5 Convolutional Neural Network (inference mode)
+@dpjit
+def lenet5(
+    input,
+    conv1,
+    conv1bias,
+    conv2,
+    conv2bias,
+    fc1w,
+    fc1b,
+    fc2w,
+    fc2b,
+    fc3w,
+    fc3b,
+    N,
+    C_before_fc1,
+):
+    x = relu(conv2d(input, conv1) + conv1bias)
+    x = maxpool2d(x)
+    x = relu(conv2d(x, conv2) + conv2bias)
+    x = maxpool2d(x)
+    x = np.reshape(x, (N, C_before_fc1))
+    x = relu(x @ fc1w + fc1b)
+    x = relu(x @ fc2w + fc2b)
+    return x @ fc3w + fc3b
@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import dpnp as np
+import numba as nb
+from numba_dpex import dpjit
+
+
+@dpjit
+def relu(x):
+    return np.maximum(x, 0)
+
+
+# Numerically-stable version of softmax
+@dpjit
+def softmax(x):
+    new_shape = (x.shape[0], 1)
+    # tmp_max = np.max(x, axis=-1, keepdims=True)
+    tmp_max = np.empty(new_shape, dtype=x.dtype)
+    for i in nb.prange(x.shape[0]):
+        tmp_max[i, 0] = np.max(x[i])
+    tmp_out = np.exp(x - tmp_max)
+    # tmp_sum = np.sum(tmp_out, axis=-1, keepdims=True)
+    tmp_sum = np.reshape(np.sum(tmp_out, axis=-1), new_shape)
+    return tmp_out / tmp_sum
+
+
+# 3-layer MLP
+@dpjit
+def mlp(input, w1, b1, w2, b2, w3, b3):
+    x = relu(input @ w1 + b1)
+    x = relu(x @ w2 + b2)
+    x = softmax(x @ w3 + b3)  # Softmax call can be omitted if necessary
+    return x
@@ -0,0 +1,83 @@
+# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import dpnp as np
+import numba as nb
+from numba_dpex import dpjit
+
+
+@dpjit
+def relu(x):
+    return np.maximum(x, 0)
+
+
+# Deep learning convolutional operator (stride = 1)
+@dpjit
+def conv2d(input, weights):
+    K = weights.shape[0]  # Assuming square kernel
+    N = input.shape[0]
+    H_out = input.shape[1] - K + 1
+    W_out = input.shape[2] - K + 1
+    C_in = input.shape[3]
+    C_out = weights.shape[3]
+    output = np.empty((N, H_out, W_out, C_out), dtype=np.float32)
+
+    # Loop structure adapted from https://github.com/SkalskiP/ILearnDeepLearning.py/blob/ba0b5ba589d4e656141995e8d1a06d44db6ce58d/01_mysteries_of_neural_networks/06_numpy_convolutional_neural_net/src/layers/convolutional.py#L88
+    for i in nb.prange(H_out):
+        for j in nb.prange(W_out):
+            # output[:, i, j, :] = np.sum(
+            #     input[:, i:i + K, j:j + K, :, np.newaxis] *
+            #     weights[np.newaxis, :, :, :],
+            #     axis=(1, 2, 3),
+            # )
+            # Reshape supported only on contiguous arrays
+            inp = input[:, i : i + K, j : j + K, :].copy()
+            # Tuple of ints not supported in axis keyword
+            output[:, i, j, :] = np.sum(
+                np.sum(
+                    np.sum(
+                        np.reshape(inp, (N, K, K, C_in, 1))
+                        * np.reshape(weights, (1, K, K, C_in, C_out)),
+                        axis=1,
+                    ),
+                    axis=1,
+                ),
+                axis=1,
+            )
+
+    return output
+
+
+# Batch normalization operator, as used in ResNet
+@dpjit
+def batchnorm2d(x, eps=1e-5):
+    # mean = np.mean(x, axis=0, keepdims=True)
+    mean = np.empty(x.shape, dtype=x.dtype)
+    mean[:] = np.sum(x, axis=0) / x.shape[0]
+    # std = np.std(x, axis=0, keepdims=True)
+    std = np.empty(x.shape, dtype=x.dtype)
+    std[:] = np.sqrt(np.sum((x - mean) ** 2, axis=0) / x.shape[0])
+    return (x - mean) / np.sqrt(std + eps)
+
+
+# Bottleneck residual block (after initial convolution, without downsampling)
+# in the ResNet-50 CNN (inference)
+@dpjit
+def resnet_basicblock(input, conv1, conv2, conv3):
+    # Pad output of first convolution for second convolution
+    padded = np.zeros(
+        (input.shape[0], input.shape[1] + 2, input.shape[2] + 2, conv1.shape[3])
+    )
+
+    padded[:, 1:-1, 1:-1, :] = conv2d(input, conv1)
+    x = batchnorm2d(padded)
+    x = relu(x)
+
+    x = conv2d(x, conv2)
+    x = batchnorm2d(x)
+    x = relu(x)
+    x = conv2d(x, conv3)
+    x = batchnorm2d(x)
+    return relu(x + input)