Add single support for numba_dpex_p

ZzEeKkAa · ZzEeKkAa · commit 7456b8ef0d6f · 2023-06-06T16:56:27.000-04:00
diff --git a/dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_p.py b/dpbench/benchmarks/black_scholes/black_scholes_numba_dpex_p.py
@@ -10,8 +10,15 @@
 
 @dpjit
 def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
+    dtype = price.dtype
     mr = -rate
-    sig_sig_two = volatility * volatility * 2
+    sig_sig_two = volatility * volatility * dtype.type(2)
+
+    # TODO: get rid of it once prange supports dtype
+    # https://github.com/IntelPython/numba-dpex/issues/1063
+    float025 = dtype.type(0.25)
+    float1 = dtype.type(1.0)
+    float05 = dtype.type(0.5)
 
     for i in prange(nopt):
         P = price[i]
@@ -22,14 +29,14 @@ def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
         b = T * mr
 
         z = T * sig_sig_two
-        c = 0.25 * z
-        y = 1.0 / sqrt(z)
+        c = float025 * z
+        y = float1 / sqrt(z)
 
         w1 = (a - b + c) * y
         w2 = (a - b - c) * y
 
-        d1 = 0.5 + 0.5 * erf(w1)
-        d2 = 0.5 + 0.5 * erf(w2)
+        d1 = float05 + float05 * erf(w1)
+        d2 = float05 + float05 * erf(w2)
 
         Se = exp(b) * S
 
diff --git a/dpbench/benchmarks/dbscan/dbscan_numba_dpex_p.py b/dpbench/benchmarks/dbscan/dbscan_numba_dpex_p.py
@@ -53,6 +53,11 @@ def _queue_empty(head, tail):
 def get_neighborhood(n, dim, data, eps, ind_lst, sz_lst):
     block_size = 1
     nblocks = n // block_size + int(n % block_size > 0)
+
+    # TODO: get rid of it once prange supports dtype
+    # https://github.com/IntelPython/numba-dpex/issues/1063
+    float0 = data.dtype.type(0.0)
+
     for i in nb.prange(nblocks):
         start = i * block_size
         stop = n if i + 1 == nblocks else start + block_size
@@ -64,7 +69,7 @@ def get_neighborhood(n, dim, data, eps, ind_lst, sz_lst):
             i2 = n if ii + 1 == nblocks1 else i1 + block_size1
             for j in range(start, stop):
                 for k in range(i1, i2):
-                    dist = 0.0
+                    dist = float0
                     for m in range(dim):
                         diff = data[k * dim + m] - data[j * dim + m]
                         dist += diff * diff
diff --git a/dpbench/benchmarks/kmeans/kmeans_numba_dpex_p.py b/dpbench/benchmarks/kmeans/kmeans_numba_dpex_p.py
@@ -10,9 +10,13 @@
 # determine the euclidean distance from the cluster center to each point
 @dpex.dpjit
 def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
+    # TODO: get rid of it once prange supports dtype
+    # https://github.com/IntelPython/numba-dpex/issues/1063
+    float1 = arrayC.dtype.type(-1)
+
     # parallel for loop
     for i0 in nb.prange(num_points):
-        minor_distance = -1
+        minor_distance = float1
         for i1 in range(num_centroids):
             dx = arrayP[i0, 0] - arrayC[i1, 0]
             dy = arrayP[i0, 1] - arrayC[i1, 1]
@@ -52,6 +56,13 @@ def updateCentroids(arrayC, arrayCsum, arrayCnumpoint, num_centroids):
         arrayC[i, 1] = arrayCsum[i, 1] / arrayCnumpoint[i]
 
 
+@dpex.dpjit
+def updateCentroids32(arrayC, arrayCsum, arrayCnumpoint, num_centroids):
+    for i in nb.prange(num_centroids):
+        arrayC[i, 0] = arrayCsum[i, 0] / nb.float32(arrayCnumpoint[i])
+        arrayC[i, 1] = arrayCsum[i, 1] / nb.float32(arrayCnumpoint[i])
+
+
 @dpex.dpjit
 def copy_arrayC(arrayC, arrayP, num_centroids):
     for i in nb.prange(num_centroids):
@@ -85,7 +96,12 @@ def kmeans_numba(
             arrayP, arrayPcluster, arrayCsum, arrayCnumpoint
         )
 
-        updateCentroids(arrayC, arrayCsum, arrayCnumpoint, num_centroids)
+        # TODO: get rid of it once prange supports dtype
+        # https://github.com/IntelPython/numba-dpex/issues/1063
+        if isinstance(arrayC.dtype.type(0), np.float32):
+            updateCentroids32(arrayC, arrayCsum, arrayCnumpoint, num_centroids)
+        else:
+            updateCentroids(arrayC, arrayCsum, arrayCnumpoint, num_centroids)
 
     return arrayC, arrayCsum, arrayCnumpoint
 
diff --git a/dpbench/benchmarks/knn/knn_numba_dpex_p.py b/dpbench/benchmarks/knn/knn_numba_dpex_p.py
@@ -20,14 +20,18 @@ def knn(  # noqa: C901: TODO: can we simplify logic?
     votes_to_classes,
     data_dim,
 ):
+    # TODO: get rid of it once prange supports dtype
+    # https://github.com/IntelPython/numba-dpex/issues/1063
+    float0 = x_train.dtype.type(0.0)
+
     for i in nb.prange(test_size):
         queue_neighbors = np.empty(shape=(k, 2))
 
         for j in range(k):
             x1 = x_train[j]
             x2 = x_test[i]
 
-            distance = 0.0
+            distance = float0
             for jj in range(data_dim):
                 diff = x1[jj] - x2[jj]
                 distance += diff * diff
@@ -54,7 +58,7 @@ def knn(  # noqa: C901: TODO: can we simplify logic?
             x1 = x_train[j]
             x2 = x_test[i]
 
-            distance = 0.0
+            distance = float0
             for jj in range(data_dim):
                 diff = x1[jj] - x2[jj]
                 distance += diff * diff
@@ -84,7 +88,7 @@ def knn(  # noqa: C901: TODO: can we simplify logic?
             v_to_c_i[int(queue_neighbors[j, 1])] += 1
 
         max_ind = 0
-        max_value = 0
+        max_value = float0
 
         for j in range(classes_num):
             if v_to_c_i[j] > max_value:
diff --git a/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_dpex_p.py b/dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_dpex_p.py
@@ -22,11 +22,15 @@ def pairwise_distance(X1, X2, D):
     X2_rows = X2.shape[0]
     X1_cols = X1.shape[1]
 
+    # TODO: get rid of it once prange supports dtype
+    # https://github.com/IntelPython/numba-dpex/issues/1063
+    float0 = X1.dtype.type(0.0)
+
     # Outermost parallel loop over the matrix X1
     for i in nb.prange(X1_rows):
         # Loop over the matrix X2
         for j in range(X2_rows):
-            d = 0.0
+            d = float0
             # Compute exclidean distance
             for k in range(X1_cols):
                 tmp = X1[i, k] - X2[j, k]
diff --git a/dpbench/benchmarks/rambo/rambo_numba_dpex_p.py b/dpbench/benchmarks/rambo/rambo_numba_dpex_p.py
@@ -9,11 +9,17 @@
 
 @dpjit
 def rambo(nevts, nout, C1, F1, Q1, output):
+    # TODO: get rid of it once prange supports dtype
+    # https://github.com/IntelPython/numba-dpex/issues/1063
+    float1 = C1.dtype.type(1.0)
+    float2 = C1.dtype.type(2.0)
+    floatPi = C1.dtype.type(np.pi)
+
     for i in nb.prange(nevts):
         for j in range(nout):
-            C = 2.0 * C1[i, j] - 1.0
-            S = np.sqrt(1 - np.square(C))
-            F = 2.0 * np.pi * F1[i, j]
+            C = float2 * C1[i, j] - float1
+            S = np.sqrt(float1 - np.square(C))
+            F = float2 * floatPi * F1[i, j]
             Q = -np.log(Q1[i, j])
 
             output[i, j, 0] = Q