Add numba-mlir benchmarks

Hardcode84 · Hardcode84 · commit 34ea69e11b1a · 2023-05-01T22:09:55.000+02:00
diff --git a/.github/workflows/build_and_run.yml b/.github/workflows/build_and_run.yml
@@ -47,7 +47,7 @@ jobs:
           conda install numpy numba cython cmake ninja scikit-build pandas
           conda install scipy scikit-learn pybind11 tomli
           conda install -c pkgs/main libgcc-ng">=11.2.0" libstdcxx-ng">=11.2.0" libgomp">=11.2.0"
-          conda install -c dppy/label/dev -c intel -c main dpctl numba-dpex dpnp
+          conda install -c dppy/label/dev -c intel -c main dpctl numba-dpex dpnp numba-mlir
           pip install alembic
           conda list
 
@@ -61,6 +61,8 @@ jobs:
         run: |
           export OCL_ICD_FILENAMES=libintelocl.so
 
+          export NUMBA_MLIR_GPU_RUNTIME=sycl
+
           # Turn off numba-dpex autofall back
           export NUMBA_DPEX_FALLBACK_ON_CPU=0
           # Make sure numba-dpex is using native atomics in github CI
diff --git a/README.md b/README.md
@@ -74,3 +74,26 @@ SPDX-License-Identifier: Apache-2.0
     ```bash
     $ python -c "import dpbench; dpbench.run_benchmark(\"black_scholes\", "<absolute path to json file>")"
     ```
+
+## Running numba-mlir benchmarks
+1. Setting up conda environment and installing dependencies:
+
+    Use same instructions as for usual dpbench setup, but do not install numba-dpex.
+
+    Install latest `numba-mlir` dev package:
+
+        $ conda install numba-mlir -c dppy/label/dev -c intel
+
+2. Build and run DPBench
+
+    Use same commands to setup and run dpbench:
+
+        $ python -c "import dpbench; dpbench.run_benchmark(\"black_scholes\")" 2> /dev/null
+
+    or, to run specific version:
+
+        $ python -c "import dpbench; dpbench.run_benchmark(\"black_scholes\",implementation_postfix=\"numba_mlir_k\")" 2> /dev/null
+
+    to run all `numba-mlir` benchmarks:
+
+        $ python -c "import dpbench; dpbench.run_benchmarks(implementations=[\"numba_mlir_n\",\"numba_mlir_p\",\"numba_mlir_k\"])" 2> /dev/null
diff --git a/dpbench/benchmarks/black_scholes/black_scholes_numba_mlir_k.py b/dpbench/benchmarks/black_scholes/black_scholes_numba_mlir_k.py
@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from math import erf, exp, log, sqrt
+
+import numba_mlir.kernel as nb
+
+
+@nb.kernel
+def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put):
+    mr = -rate
+    sig_sig_two = volatility * volatility * 2
+
+    i = nb.get_global_id(0)
+
+    P = price[i]
+    S = strike[i]
+    T = t[i]
+
+    a = log(P / S)
+    b = T * mr
+
+    z = T * sig_sig_two
+    c = 0.25 * z
+    y = 1.0 / sqrt(z)
+
+    w1 = (a - b + c) * y
+    w2 = (a - b - c) * y
+
+    d1 = 0.5 + 0.5 * erf(w1)
+    d2 = 0.5 + 0.5 * erf(w2)
+
+    Se = exp(b) * S
+
+    r = P * d1 - Se * d2
+    call[i] = r
+    put[i] = r - P + Se
+
+
+def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
+    _black_scholes_kernel[nopt, nb.DEFAULT_LOCAL_SIZE](
+        nopt, price, strike, t, rate, volatility, call, put
+    )
diff --git a/dpbench/benchmarks/black_scholes/black_scholes_numba_mlir_n.py b/dpbench/benchmarks/black_scholes/black_scholes_numba_mlir_n.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from math import erf
+
+import numba_mlir as nb
+from numpy import exp, log, sqrt
+
+
+@nb.vectorize(nopython=True)
+def _nberf(x):
+    return erf(x)
+
+
+@nb.njit(parallel=True, fastmath=True)
+def _black_scholes(price, strike, t, rate, volatility, call, put):
+    mr = -rate
+    sig_sig_two = volatility * volatility * 2
+
+    P = price
+    S = strike
+    T = t
+
+    a = log(P / S)
+    b = T * mr
+
+    z = T * sig_sig_two
+    c = 0.25 * z
+    y = 1.0 / sqrt(z)
+
+    w1 = (a - b + c) * y
+    w2 = (a - b - c) * y
+
+    d1 = 0.5 + 0.5 * _nberf(w1)
+    d2 = 0.5 + 0.5 * _nberf(w2)
+
+    Se = exp(b) * S
+
+    r = P * d1 - Se * d2
+    call[:] = r  # temporary `r` is necessary for faster `put` computation
+    put[:] = r - P + Se
+
+
+def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
+    _black_scholes(price, strike, t, rate, volatility, call, put)
diff --git a/dpbench/benchmarks/black_scholes/black_scholes_numba_mlir_p.py b/dpbench/benchmarks/black_scholes/black_scholes_numba_mlir_p.py
@@ -0,0 +1,43 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from math import erf, exp, log, sqrt
+
+import numba
+import numba_mlir as nb
+
+
+# blackscholes implemented as a parallel loop using numba.prange
+@nb.njit(parallel=True, fastmath=True)
+def _black_scholes(nopt, price, strike, t, rate, volatility, call, put):
+    mr = -rate
+    sig_sig_two = volatility * volatility * 2
+
+    for i in numba.prange(nopt):
+        P = price[i]
+        S = strike[i]
+        T = t[i]
+
+        a = log(P / S)
+        b = T * mr
+
+        z = T * sig_sig_two
+        c = 0.25 * z
+        y = 1.0 / sqrt(z)
+
+        w1 = (a - b + c) * y
+        w2 = (a - b - c) * y
+
+        d1 = 0.5 + 0.5 * erf(w1)
+        d2 = 0.5 + 0.5 * erf(w2)
+
+        Se = exp(b) * S
+
+        r = P * d1 - Se * d2
+        call[i] = r
+        put[i] = r - P + Se
+
+
+def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
+    _black_scholes(nopt, price, strike, t, rate, volatility, call, put)
diff --git a/dpbench/benchmarks/l2_norm/l2_norm_numba_mlir_k.py b/dpbench/benchmarks/l2_norm/l2_norm_numba_mlir_k.py
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import numba_mlir.kernel as nb
+import numpy as np
+
+
+@nb.kernel
+def l2_norm_kernel(a, d):
+    i = nb.get_global_id(0)
+    a_rows = a.shape[1]
+    d[i] = 0.0
+    for k in range(a_rows):
+        d[i] += a[i, k] * a[i, k]
+    d[i] = np.sqrt(d[i])
+
+
+def l2_norm(a, d):
+    l2_norm_kernel[a.shape[0], nb.DEFAULT_LOCAL_SIZE](a, d)
diff --git a/dpbench/benchmarks/l2_norm/l2_norm_numba_mlir_n.py b/dpbench/benchmarks/l2_norm/l2_norm_numba_mlir_n.py
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import numba_mlir as nb
+import numpy as np
+
+
+@nb.njit(parallel=True, fastmath=True)
+def _l2_norm(a, d):
+    sq = np.square(a)
+    sum = sq.sum(axis=1)
+    d[:] = np.sqrt(sum)
+
+
+def l2_norm(a, d):
+    _l2_norm(a, d)
diff --git a/dpbench/benchmarks/l2_norm/l2_norm_numba_mlir_p.py b/dpbench/benchmarks/l2_norm/l2_norm_numba_mlir_p.py
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import numba
+import numba_mlir as nb
+import numpy as np
+
+
+@nb.njit(parallel=True, fastmath=True)
+def _l2_norm(a, d):
+    for i in numba.prange(a.shape[0]):
+        for k in range(a.shape[1]):
+            d[i] += np.square(a[i, k])
+        d[i] = np.sqrt(d[i])
+
+
+def l2_norm(a, d):
+    _l2_norm(a, d)
diff --git a/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_mlir_k.py b/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_mlir_k.py
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import numba_mlir.kernel as nb
+import numpy as np
+
+
+@nb.kernel
+def _pairwise_distance_kernel(X1, X2, D):
+    i = nb.get_global_id(0)
+
+    X2_rows = X2.shape[0]
+    X1_cols = X1.shape[1]
+    for j in range(X2_rows):
+        d = 0.0
+        for k in range(X1_cols):
+            tmp = X1[i, k] - X2[j, k]
+            d += tmp * tmp
+        D[i, j] = np.sqrt(d)
+
+
+def pairwise_distance(X1, X2, D):
+    _pairwise_distance_kernel[X1.shape[0], nb.DEFAULT_LOCAL_SIZE](X1, X2, D)
diff --git a/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_mlir_n.py b/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_mlir_n.py
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import numba_mlir as nb
+import numpy as np
+
+
+@nb.njit(parallel=True, fastmath=True)
+def _pairwise_distance(X1, X2, D):
+    x1 = np.sum(np.square(X1), axis=1)
+    x2 = np.sum(np.square(X2), axis=1)
+    np.dot(X1, X2.T, D)
+    # D *= -2 TODO: inplace ops doesn't work as intended
+    D[:] = D * -2
+    x3 = x1.reshape(x1.size, 1)
+    np.add(D, x3, D)
+    np.add(D, x2, D)
+    np.sqrt(D, D)
+
+
+def pairwise_distance(X1, X2, D):
+    _pairwise_distance(X1, X2, D)
diff --git a/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_mlir_p.py b/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_mlir_p.py
@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import numba
+import numba_mlir as nb
+import numpy as np
+
+
+@nb.njit(parallel=True, fastmath=True)
+def _pairwise_distance(X1, X2, D):
+    """Naïve pairwise distance impl - take an array representing M points in N
+    dimensions, and return the M x M matrix of Euclidean distances
+
+    Args:
+        X1 : Set of points
+        X2 : Set of points
+        D  : Outputted distance matrix
+    """
+    # Size of inputs
+    X1_rows = X1.shape[0]
+    X2_rows = X2.shape[0]
+    X1_cols = X1.shape[1]
+
+    # Outermost parallel loop over the matrix X1
+    for i in numba.prange(X1_rows):
+        # Loop over the matrix X2
+        for j in range(X2_rows):
+            d = 0.0
+            # Compute exclidean distance
+            for k in range(X1_cols):
+                tmp = X1[i, k] - X2[j, k]
+                d += tmp * tmp
+            # Write computed distance to distance matrix
+            D[i, j] = np.sqrt(d)
+
+
+def pairwise_distance(X1, X2, D):
+    _pairwise_distance(X1, X2, D)
diff --git a/dpbench/benchmarks/pairwise_distance/pairwise_distance_sycl_native_ext/__init__.py b/dpbench/benchmarks/pairwise_distance/pairwise_distance_sycl_native_ext/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2022 - 2023 Intel Corporation
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/dpbench/configs/framework_info/numba_mlir.toml b/dpbench/configs/framework_info/numba_mlir.toml
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+[framework]
+simple_name = "numba_mlir"
+full_name = "numba_mlir"
+prefix = "mlir"
+class = "NumbaMlirFramework"
+sycl_device = "opencl:cpu:0"
+
+[[framework.postfixes]]
+impl_postfix = "numba_mlir_k"
+description = "Numba-mlir kernel"
+
+[[framework.postfixes]]
+impl_postfix = "numba_mlir_p"
+description = "Numba-mlir prange"
+
+[[framework.postfixes]]
+impl_postfix = "numba_mlir_n"
+description = "Numba-mlir NumPy API"
diff --git a/dpbench/infrastructure/__init__.py b/dpbench/infrastructure/__init__.py
@@ -18,6 +18,7 @@
     Framework,
     NumbaDpexFramework,
     NumbaFramework,
+    NumbaMlirFramework,
 )
 from .reporter import (
     generate_impl_summary_report,
@@ -35,6 +36,7 @@
     "Framework",
     "NumbaFramework",
     "NumbaDpexFramework",
+    "NumbaMlirFramework",
     "DpnpFramework",
     "DpcppFramework",
     "create_connection",
diff --git a/dpbench/infrastructure/frameworks/__init__.py b/dpbench/infrastructure/frameworks/__init__.py
@@ -8,12 +8,14 @@
 from .framework import Framework
 from .numba_dpex_framework import NumbaDpexFramework
 from .numba_framework import NumbaFramework
+from .numba_mlir_framework import NumbaMlirFramework
 
 __all__ = [
     "Framework",
     "NumbaFramework",
     "NumbaDpexFramework",
     "DpnpFramework",
     "DpcppFramework",
+    "NumbaMlirFramework",
     "build_framework_map",
 ]
diff --git a/dpbench/infrastructure/frameworks/fabric.py b/dpbench/infrastructure/frameworks/fabric.py
diff --git a/dpbench/infrastructure/frameworks/numba_mlir_framework.py b/dpbench/infrastructure/frameworks/numba_mlir_framework.py