Skip to content

Commit 0377c9e

Browse files
author
Diptorup Deb
authored
Merge pull request #263 from Hardcode84/f64-fixes
numba-mlir f32 devices support
2 parents 6ca2075 + 742369e commit 0377c9e

12 files changed

+21
-21
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ SPDX-License-Identifier: Apache-2.0
9898

9999

100100
```bash
101-
$ dpbench -b black_scholes -i numba_mlir_k s run
101+
$ dpbench -b black_scholes -i numba_mlir_k run
102102
```
103103

104104
to run all `numba-mlir` benchmarks:
105105

106106
```bash
107-
$ dpbench -b black_scholes -i numba_mlir_k,numba_mlir_n,numba_mlir_p s run
107+
$ dpbench -b black_scholes -i numba_mlir_k,numba_mlir_n,numba_mlir_p run
108108
```

dpbench/benchmarks/black_scholes/black_scholes_numba_mlir_k.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numba_mlir.kernel as nb
88

99

10-
@nb.kernel
10+
@nb.kernel(gpu_fp64_truncate="auto")
1111
def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put):
1212
mr = -rate
1313
sig_sig_two = volatility * volatility * 2

dpbench/benchmarks/black_scholes/black_scholes_numba_mlir_n.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def _nberf(x):
1313
return erf(x)
1414

1515

16-
@nb.njit(parallel=True, fastmath=True)
16+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
1717
def _black_scholes(price, strike, t, rate, volatility, call, put):
1818
mr = -rate
1919
sig_sig_two = volatility * volatility * 2

dpbench/benchmarks/black_scholes/black_scholes_numba_mlir_p.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010

1111
# blackscholes implemented as a parallel loop using numba.prange
12-
@nb.njit(parallel=True, fastmath=True)
12+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
1313
def _black_scholes(nopt, price, strike, t, rate, volatility, call, put):
1414
mr = -rate
1515
sig_sig_two = volatility * volatility * 2

dpbench/benchmarks/kmeans/kmeans_numba_mlir_k.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
atomic_add = nb.atomic.add
1010

1111

12-
@nb.kernel
12+
@nb.kernel(gpu_fp64_truncate="auto")
1313
def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
1414
idx = nb.get_global_id(0)
1515
# if idx < num_points: # why it was removed??
@@ -23,15 +23,15 @@ def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
2323
arrayPcluster[idx] = i
2424

2525

26-
@nb.kernel
26+
@nb.kernel(gpu_fp64_truncate="auto")
2727
def calCentroidsSum1(arrayCsum, arrayCnumpoint):
2828
i = nb.get_global_id(0)
2929
arrayCsum[i, 0] = 0
3030
arrayCsum[i, 1] = 0
3131
arrayCnumpoint[i] = 0
3232

3333

34-
@nb.kernel
34+
@nb.kernel(gpu_fp64_truncate="auto")
3535
def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
3636
i = nb.get_global_id(0)
3737
ci = arrayPcluster[i]
@@ -40,14 +40,14 @@ def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
4040
atomic_add(arrayCnumpoint, ci, 1)
4141

4242

43-
@nb.kernel
43+
@nb.kernel(gpu_fp64_truncate="auto")
4444
def updateCentroids(arrayC, arrayCsum, arrayCnumpoint, num_centroids):
4545
i = nb.get_global_id(0)
4646
arrayC[i, 0] = arrayCsum[i, 0] / arrayCnumpoint[i]
4747
arrayC[i, 1] = arrayCsum[i, 1] / arrayCnumpoint[i]
4848

4949

50-
@nb.kernel
50+
@nb.kernel(gpu_fp64_truncate="auto")
5151
def copy_arrayC(arrayC, arrayP):
5252
i = nb.get_global_id(0)
5353
arrayC[i, 0] = arrayP[i, 0]

dpbench/benchmarks/kmeans/kmeans_numba_mlir_p.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212

1313
# determine the euclidean distance from the cluster center to each point
14-
@nb.njit
14+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
1515
def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
1616
# parallel for loop
1717
for i0 in numba.prange(num_points):
@@ -27,7 +27,7 @@ def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
2727

2828

2929
# assign points to cluster
30-
@nb.njit
30+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
3131
def calCentroidsSum(
3232
arrayP, arrayPcluster, arrayCsum, arrayCnumpoint, num_points, num_centroids
3333
):
@@ -38,7 +38,7 @@ def calCentroidsSum(
3838
arrayCnumpoint[i] = 0
3939

4040

41-
@nbk.kernel
41+
@nbk.kernel(gpu_fp64_truncate="auto")
4242
def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
4343
i = nbk.get_global_id(0)
4444
ci = arrayPcluster[i]
@@ -48,14 +48,14 @@ def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
4848

4949

5050
# update the centriods array after computation
51-
@nb.njit
51+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
5252
def updateCentroids(arrayC, arrayCsum, arrayCnumpoint, num_centroids):
5353
for i in numba.prange(num_centroids):
5454
arrayC[i, 0] = arrayCsum[i, 0] / arrayCnumpoint[i]
5555
arrayC[i, 1] = arrayCsum[i, 1] / arrayCnumpoint[i]
5656

5757

58-
@nb.njit
58+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
5959
def copy_arrayC(arrayC, arrayP, num_centroids):
6060
for i in numba.prange(num_centroids):
6161
arrayC[i, 0] = arrayP[i, 0]

dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_mlir_k.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77

88

9-
@nb.kernel
9+
@nb.kernel(gpu_fp64_truncate="auto")
1010
def _pairwise_distance_kernel(X1, X2, D):
1111
i = nb.get_global_id(0)
1212

dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_mlir_n.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77

88

9-
@nb.njit(parallel=True, fastmath=True)
9+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
1010
def _pairwise_distance(X1, X2, D):
1111
x1 = np.sum(np.square(X1), axis=1)
1212
x2 = np.sum(np.square(X2), axis=1)

dpbench/benchmarks/pairwise_distance/pairwise_distance_numba_mlir_p.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88

99

10-
@nb.njit(parallel=True, fastmath=True)
10+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
1111
def _pairwise_distance(X1, X2, D):
1212
"""Naïve pairwise distance impl - take an array representing M points in N
1313
dimensions, and return the M x M matrix of Euclidean distances

dpbench/benchmarks/rambo/rambo_numba_mlir_k.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numba_mlir.kernel as nb
88

99

10-
@nb.kernel
10+
@nb.kernel(gpu_fp64_truncate="auto")
1111
def _rambo(C1, F1, Q1, nout, output):
1212
i = nb.get_global_id(0)
1313
for j in range(nout):

0 commit comments

Comments
 (0)