Skip to content

Commit 9bb68d2

Browse files
Hardcode84Diptorup Deb
authored andcommitted
kmeans f64 emulation
1 parent 672a111 commit 9bb68d2

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed

dpbench/benchmarks/kmeans/kmeans_numba_mlir_k.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
atomic_add = nb.atomic.add
1010

1111

12-
@nb.kernel
12+
@nb.kernel(gpu_fp64_truncate="auto")
1313
def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
1414
idx = nb.get_global_id(0)
1515
# if idx < num_points: # why it was removed??
@@ -23,15 +23,15 @@ def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
2323
arrayPcluster[idx] = i
2424

2525

26-
@nb.kernel
26+
@nb.kernel(gpu_fp64_truncate="auto")
2727
def calCentroidsSum1(arrayCsum, arrayCnumpoint):
2828
i = nb.get_global_id(0)
2929
arrayCsum[i, 0] = 0
3030
arrayCsum[i, 1] = 0
3131
arrayCnumpoint[i] = 0
3232

3333

34-
@nb.kernel
34+
@nb.kernel(gpu_fp64_truncate="auto")
3535
def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
3636
i = nb.get_global_id(0)
3737
ci = arrayPcluster[i]
@@ -40,14 +40,14 @@ def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
4040
atomic_add(arrayCnumpoint, ci, 1)
4141

4242

43-
@nb.kernel
43+
@nb.kernel(gpu_fp64_truncate="auto")
4444
def updateCentroids(arrayC, arrayCsum, arrayCnumpoint, num_centroids):
4545
i = nb.get_global_id(0)
4646
arrayC[i, 0] = arrayCsum[i, 0] / arrayCnumpoint[i]
4747
arrayC[i, 1] = arrayCsum[i, 1] / arrayCnumpoint[i]
4848

4949

50-
@nb.kernel
50+
@nb.kernel(gpu_fp64_truncate="auto")
5151
def copy_arrayC(arrayC, arrayP):
5252
i = nb.get_global_id(0)
5353
arrayC[i, 0] = arrayP[i, 0]

dpbench/benchmarks/kmeans/kmeans_numba_mlir_p.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212

1313
# determine the euclidean distance from the cluster center to each point
14-
@nb.njit
14+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
1515
def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
1616
# parallel for loop
1717
for i0 in numba.prange(num_points):
@@ -27,7 +27,7 @@ def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
2727

2828

2929
# assign points to cluster
30-
@nb.njit
30+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
3131
def calCentroidsSum(
3232
arrayP, arrayPcluster, arrayCsum, arrayCnumpoint, num_points, num_centroids
3333
):
@@ -38,7 +38,7 @@ def calCentroidsSum(
3838
arrayCnumpoint[i] = 0
3939

4040

41-
@nbk.kernel
41+
@nbk.kernel(gpu_fp64_truncate="auto")
4242
def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
4343
i = nbk.get_global_id(0)
4444
ci = arrayPcluster[i]
@@ -48,14 +48,14 @@ def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
4848

4949

5050
# update the centriods array after computation
51-
@nb.njit
51+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
5252
def updateCentroids(arrayC, arrayCsum, arrayCnumpoint, num_centroids):
5353
for i in numba.prange(num_centroids):
5454
arrayC[i, 0] = arrayCsum[i, 0] / arrayCnumpoint[i]
5555
arrayC[i, 1] = arrayCsum[i, 1] / arrayCnumpoint[i]
5656

5757

58-
@nb.njit
58+
@nb.njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
5959
def copy_arrayC(arrayC, arrayP, num_centroids):
6060
for i in numba.prange(num_centroids):
6161
arrayC[i, 0] = arrayP[i, 0]

0 commit comments

Comments
 (0)