1111
1212
1313# determine the euclidean distance from the cluster center to each point
14- @nb .njit
14+ @nb .njit ( parallel = True , fastmath = True , gpu_fp64_truncate = "auto" )
1515def groupByCluster (arrayP , arrayPcluster , arrayC , num_points , num_centroids ):
1616 # parallel for loop
1717 for i0 in numba .prange (num_points ):
@@ -27,7 +27,7 @@ def groupByCluster(arrayP, arrayPcluster, arrayC, num_points, num_centroids):
2727
2828
2929# assign points to cluster
30- @nb .njit
30+ @nb .njit ( parallel = True , fastmath = True , gpu_fp64_truncate = "auto" )
3131def calCentroidsSum (
3232 arrayP , arrayPcluster , arrayCsum , arrayCnumpoint , num_points , num_centroids
3333):
@@ -38,7 +38,7 @@ def calCentroidsSum(
3838 arrayCnumpoint [i ] = 0
3939
4040
41- @nbk .kernel
41+ @nbk .kernel ( gpu_fp64_truncate = "auto" )
4242def calCentroidsSum2 (arrayP , arrayPcluster , arrayCsum , arrayCnumpoint ):
4343 i = nbk .get_global_id (0 )
4444 ci = arrayPcluster [i ]
@@ -48,14 +48,14 @@ def calCentroidsSum2(arrayP, arrayPcluster, arrayCsum, arrayCnumpoint):
4848
4949
5050# update the centriods array after computation
51- @nb .njit
51+ @nb .njit ( parallel = True , fastmath = True , gpu_fp64_truncate = "auto" )
5252def updateCentroids (arrayC , arrayCsum , arrayCnumpoint , num_centroids ):
5353 for i in numba .prange (num_centroids ):
5454 arrayC [i , 0 ] = arrayCsum [i , 0 ] / arrayCnumpoint [i ]
5555 arrayC [i , 1 ] = arrayCsum [i , 1 ] / arrayCnumpoint [i ]
5656
5757
58- @nb .njit
58+ @nb .njit ( parallel = True , fastmath = True , gpu_fp64_truncate = "auto" )
5959def copy_arrayC (arrayC , arrayP , num_centroids ):
6060 for i in numba .prange (num_centroids ):
6161 arrayC [i , 0 ] = arrayP [i , 0 ]
0 commit comments