Skip to content

Commit ad00a99

Browse files
committed
Merge pull request #199 from arrayfire/arrayfire-release-test
Fixing errors when using multiple contexts
2 parents c2e7334 + bef2f6b commit ad00a99

15 files changed

+443
-324
lines changed

src/library/blas/AutoGemm/AutoGemm.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,12 @@
3838
if args.output:
3939
Common.setOutputPath(args.output)
4040
else:
41-
print "AutoGemm.py: Warning: No output path specified; default is working directory."
41+
print("AutoGemm.py: Warning: No output path specified; default is working directory.")
4242

43-
print "AutoGemm.py: using OpenCL " + args.clCompilerVersion + " compiler"
43+
print("AutoGemm.py: using OpenCL " + args.clCompilerVersion + " compiler")
4444
Common.setClCompilerVersion(args.clCompilerVersion)
4545
AutoGemmParameters.setArchitecture(args.architecture)
4646

4747
KernelOpenCL.writeOpenCLKernels()
4848
KernelSelection.writeKernelSelection()
4949
Includes.writeIncludes()
50-

src/library/blas/AutoGemm/AutoGemmParameters.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,20 +127,20 @@ def getTilesForPrecision(precision):
127127
validTiles = sizeData[2]
128128
# add valid tiles
129129
for tileParams in validTiles:
130-
#print tileParams
130+
#print(tileParams)
131131
tile.workGroupNumRows = tileParams[0]
132132
tile.workGroupNumCols = tileParams[1]
133133
tile.microTileNumRows = tileParams[2]
134134
tile.microTileNumCols = tileParams[3]
135135
tile.macroTileNumRows = tile.workGroupNumRows*tile.microTileNumRows
136136
tile.macroTileNumCols = tile.workGroupNumCols*tile.microTileNumCols
137-
#print tile.getName()
137+
#print(tile.getName())
138138
for unroll in unrolls[precision]:
139139
tile.unroll = unroll
140140
if tile.isValid():
141141
tiles.append( copy.copy(tile) )
142142
else:
143-
print tile.getName() + " - SKIPPING - "
143+
print(tile.getName() + " - SKIPPING - ")
144144

145145
# add fallback tile
146146
tile.workGroupNumRows = fallbackTile[0]
@@ -154,7 +154,7 @@ def getTilesForPrecision(precision):
154154
if tile.isValid():
155155
tiles.append( copy.copy(tile) )
156156
else:
157-
print tile.getName() + " - SKIPPING - "
157+
print(tile.getName() + " - SKIPPING - ")
158158

159159
setTiles = set(tiles)
160160
tiles = list( setTiles )

src/library/blas/AutoGemm/Includes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ def writeToFile(self):
425425
# Write Includes
426426
################################################################################
427427
def writeIncludes():
428-
print "AutoGemm.py: Generating include files."
428+
print("AutoGemm.py: Generating include files.")
429429
if not os.path.exists( Common.getIncludePath() ):
430430
os.makedirs( Common.getIncludePath() )
431431

@@ -490,5 +490,5 @@ def writeIncludes():
490490
if len(sys.argv) == 2:
491491
Common.setOutputPath(sys.argv[1])
492492
else:
493-
print "Warning: No output path specified; default is working directory."
493+
print("Warning: No output path specified; default is working directory.")
494494
writeIncludes()

src/library/blas/AutoGemm/KernelOpenCL.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,13 @@ def makeOpenCLKernelString(kernel):
2525
kStr += "/* %s */" % kernel.getName()
2626
kStr += endLine
2727

28+
####################################
29+
# Double precision pragma
30+
prec = kernel.getName()[0].lower()
31+
if prec == "d" or prec == "z":
32+
kStr += endLine
33+
kStr += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" + endLine
34+
2835
####################################
2936
# kernel parameters
3037
kStr += endLine
@@ -168,14 +175,14 @@ def makeOpenCLKernelString(kernel):
168175
kStr += endLine
169176
kStr += "/* %dx%d micro-tile */%s" % (kernel.microTileNumRows, kernel.microTileNumCols, endLine)
170177
kStr += "#define MICRO_TILE \\\\" + endLine
171-
for a in range(0, kernel.microTileNumRows):
178+
for a in range(0, int(kernel.microTileNumRows)):
172179
kStr += " rA[%d] = localA[offA + %d*WG_NUM_ROWS]; \\\\%s" % (a, a, endLine)
173-
for b in range(0, kernel.microTileNumCols):
180+
for b in range(0, int(kernel.microTileNumCols)):
174181
kStr += " rB[%d] = localB[offB + %d*WG_NUM_COLS]; \\\\%s" % (b, b, endLine)
175182
kStr += " offA += (MACRO_TILE_NUM_ROWS+LOCAL_COL_PAD); \\\\" + endLine
176183
kStr += " offB += (MACRO_TILE_NUM_COLS+LOCAL_ROW_PAD); \\\\" + endLine
177-
for a in range(0, kernel.microTileNumRows):
178-
for b in range(0, kernel.microTileNumCols):
184+
for a in range(0, int(kernel.microTileNumRows)):
185+
for b in range(0, int(kernel.microTileNumCols)):
179186
kStr += " TYPE_MAD(rA[%d],rB[%d],rC[%d][%d]); \\\\%s" % (a, b, a, b, endLine)
180187
kStr += " mem_fence(CLK_LOCAL_MEM_FENCE);" + endLine
181188
kStr += endLine
@@ -365,7 +372,7 @@ def makeOpenCLKernelString(kernel):
365372
zeroString = "(double2)(0.0, 0.0)"
366373
else:
367374
zeroString = "0.0"
368-
for a in range(0, numALoads):
375+
for a in range(0, int(numALoads)):
369376
kStr += " lA[ %d*localAStride ] = " % a
370377
if kernel.isRowKernel():
371378
kStr += "( globalARow(%d) >= M) ? %s : " % ( a, zeroString )
@@ -378,7 +385,7 @@ def makeOpenCLKernelString(kernel):
378385
kStr += "A[ GET_GLOBAL_INDEX_A( globalARow(%d), globalACol(%d) ) ];%s" % (numALoads, numALoads, endLine)
379386
kStr += " }" + endLine
380387

381-
for b in range(0, numBLoads):
388+
for b in range(0, int(numBLoads)):
382389
kStr += " lB[ %d*localBStride ] = " % b
383390
if kernel.isColKernel():
384391
kStr += "( globalBCol(%d) >= N) ? %s : " % ( b, zeroString )
@@ -399,7 +406,7 @@ def makeOpenCLKernelString(kernel):
399406
# do mads
400407
kStr += endLine
401408
kStr += " /* do mads */" + endLine
402-
for u in range(0, kernel.unroll):
409+
for u in range(0, int(kernel.unroll)):
403410
kStr += " MICRO_TILE" + endLine
404411

405412
####################################
@@ -437,8 +444,8 @@ def makeOpenCLKernelString(kernel):
437444
if kernel.precision=="z":
438445
kStr += " double type_mad_tmp;" + endLine
439446

440-
for a in range(0, kernel.microTileNumRows):
441-
for b in range(0, kernel.microTileNumCols):
447+
for a in range(0, int(kernel.microTileNumRows)):
448+
for b in range(0, int(kernel.microTileNumCols)):
442449
if kernel.isRowKernel():
443450
kStr += " if (globalCRow+%d*WG_NUM_ROWS < M)" % a
444451
if kernel.isColKernel():
@@ -534,7 +541,7 @@ def writeOpenCLKernels():
534541
cornerKernel.macroTileNumCols = 1
535542
writeOpenCLKernelToFile(cornerKernel)
536543
numKernels += 4
537-
print "AutoGemm.py: generated %d kernels" % numKernels
544+
print("AutoGemm.py: generated %d kernels" % numKernels)
538545

539546

540547

@@ -583,5 +590,4 @@ def writeOpenCLKernels():
583590

584591
kernelName = kernel.getName()
585592
kernelFileName = Common.getKernelSourcePath() + kernelName +"_src.cpp"
586-
print "kernel \"%s\" written to %s" % (kernelName, kernelFileName)
587-
593+
print("kernel \"%s\" written to %s" % (kernelName, kernelFileName))

src/library/blas/AutoGemm/KernelParameters.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,32 @@ def __str__(self):
4444
def __repr__(self):
4545
return self.getName()
4646

47+
def __lt__(self, other):
48+
return self.getName() < other.getName()
49+
50+
def __cmp__(self, other):
51+
# Python3 should ignore this method
52+
# This is needed for python2 for proper comparison
53+
try:
54+
return cmp(self.getName(), other.getName())
55+
except:
56+
self_name = self.getName()
57+
other_name = other.getName()
58+
if (self_name < other_name):
59+
return -1
60+
elif (self_name == other_name):
61+
return 0
62+
else:
63+
return 1
4764

4865
def printAttributes(self):
49-
print "workGroupNumRows = %d" % self.workGroupNumRows
50-
print "workGroupNumCols = %d" % self.workGroupNumCols
51-
print "microTileNumRows = %d" % self.microTileNumRows
52-
print "microTileNumCols = %d" % self.microTileNumCols
53-
print "macroTileNumRows = %d" % self.macroTileNumRows
54-
print "macroTileNumCols = %d" % self.macroTileNumCols
55-
print "unroll = %d" % self.unroll
66+
print("workGroupNumRows = %d" % self.workGroupNumRows)
67+
print("workGroupNumCols = %d" % self.workGroupNumCols)
68+
print("microTileNumRows = %d" % self.microTileNumRows)
69+
print("microTileNumCols = %d" % self.microTileNumCols)
70+
print("macroTileNumRows = %d" % self.macroTileNumRows)
71+
print("macroTileNumCols = %d" % self.macroTileNumCols)
72+
print("unroll = %d" % self.unroll)
5673

5774
##############################################################################
5875
# Tile - get Multiples
@@ -188,11 +205,11 @@ def __init__(self):
188205
self.beta = -1 # 0, 1
189206

190207
def printAttributes(self):
191-
print "precision = " + self.precision
192-
print "order = " + self.order
193-
print "transA = " + self.transA
194-
print "transB = " + self.transB
195-
print "beta = %d" % self.beta
208+
print("precision = " + self.precision)
209+
print("order = " + self.order)
210+
print("transA = " + self.transA)
211+
print("transB = " + self.transB)
212+
print("beta = %d" % self.beta)
196213

197214
##############################################################################
198215
# NonTile - get Name
@@ -250,4 +267,3 @@ def getColName(self):
250267
def getCornerName(self):
251268
return NonTileParameters.getName(self) \
252269
+ "_" + TileParameters.getCornerName(self)
253-

src/library/blas/AutoGemm/KernelSelection.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ def tileInRange( tileMin, tileMax, rangeMin, rangeMax):
1717
valid = True
1818
else:
1919
valid = False
20-
#print "Range [%4ux%4u]: [%4u,%4u] is %s b/c" \
21-
# % (rangeMin, rangeMax, tileMin, tileMax, "valid" if valid else "INVALID" )
22-
#print "if ( %i<0 or (%u >= %u and %u>0) and %u <= %u" \
23-
# %( tileMax, tileMax, rangeMax, rangeMax, tileMin, rangeMin )
20+
#print("Range [%4ux%4u]: [%4u,%4u] is %s b/c" \
21+
# % (rangeMin, rangeMax, tileMin, tileMax, "valid" if valid else "INVALID" ))
22+
#print("if ( %i<0 or (%u >= %u and %u>0) and %u <= %u" \
23+
# %( tileMax, tileMax, rangeMax, rangeMax, tileMin, rangeMin ))
2424
return valid
2525

2626

@@ -159,15 +159,15 @@ def __init__( \
159159
####################################
160160
# order
161161
for order in orderList:
162-
#print precision + "gemm" + "_" + order
162+
#print(precision + "gemm" + "_" + order)
163163
kernel.order = order
164164
self.logic += indent(1) + "if (order == " + order + ") {\n"
165165
transList = transDict[precision]
166166

167167
####################################
168168
# transA
169169
for transA in transList:
170-
#print precision + "gemm" + "_" + order + "_" + transA
170+
#print(precision + "gemm" + "_" + order + "_" + transA)
171171
kernel.transA = transA
172172
self.logic += indent(2) + "if (transA == "
173173
if transA == "N":
@@ -194,7 +194,7 @@ def __init__( \
194194
####################################
195195
# beta
196196
for beta in betaList:
197-
#print precision + "gemm" + "_" + order + "_" + transA + "_" + transB + "_B" + str(beta)
197+
#print(precision + "gemm" + "_" + order + "_" + transA + "_" + transB + "_B" + str(beta))
198198
kernel.beta = beta
199199
self.logic += indent(4) + "if ( "
200200
if beta == 0:
@@ -212,7 +212,7 @@ def __init__( \
212212
fallbackTile = sizeEvent[1]
213213
validTiles = sizeEvent[2]
214214
self.logic += indent(5)+"if ( M*N >= "+str(sizeMin)+"*"+str(sizeMin) + ") {\n"
215-
#print precision + "gemm" + "_" + order + "_" + transA + "_" + transB + "_B" + str(beta) + "_" + str(sizeMin) + "->" + str(sizeMax)
215+
#print(precision + "gemm" + "_" + order + "_" + transA + "_" + transB + "_B" + str(beta) + "_" + str(sizeMin) + "->" + str(sizeMax))
216216

217217
####################################
218218
# valid tiles
@@ -234,7 +234,7 @@ def __init__( \
234234
####################################
235235
# fallback tile - TODO all tiles begin added
236236
self.logic += indent(6)+"// fallback tile\n"
237-
#print "\nFallback[%i, %i]"%(sizeMin, sizeMax)
237+
#print("\nFallback[%i, %i]"%(sizeMin, sizeMax))
238238
kernel.workGroupNumRows = fallbackTile[0]
239239
kernel.workGroupNumCols = fallbackTile[1]
240240
kernel.microTileNumRows = fallbackTile[2]
@@ -387,7 +387,7 @@ def __init__(self):
387387
self.betaInitialized = False
388388

389389
def newPrecision(self, precision ):
390-
#print "KernelSelectionSpecific: " + precision + "gemm"
390+
#print("KernelSelectionSpecific: " + precision + "gemm")
391391
if self.precisionInitialized:
392392
self.logic += self.zeroIndent+self.tab+self.tab + "}\n" # 2 tabs
393393
self.logic += self.zeroIndent+self.tab + "}\n" # 1 tab
@@ -621,7 +621,7 @@ def writeToFile(self):
621621
# Main
622622
################################################################################
623623
def writeKernelSelection():
624-
print "AutoGemm.py: Generating kernel selection."
624+
print("AutoGemm.py: Generating kernel selection.")
625625
if not os.path.exists( Common.getIncludePath() ):
626626
os.makedirs( Common.getIncludePath() )
627627

@@ -680,5 +680,4 @@ def writeKernelSelection():
680680
AutoGemmParameters.setArchitecture(sys.argv[2])
681681
writeKernelSelection()
682682
else:
683-
print "USAGE: python KernelSelection.py output_path architecture"
684-
683+
print("USAGE: python KernelSelection.py output_path architecture")

src/library/blas/AutoGemm/KernelsToPreCompile.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
################################################################################
1010

1111
def writeOfflineCompilation(args):
12-
print "AutoGemm.py: Generating list of kernels to pre-compile."
12+
print("AutoGemm.py: Generating list of kernels to pre-compile.")
1313
if not os.path.exists( Common.getIncludePath() ):
1414
os.makedirs( Common.getIncludePath() )
1515

@@ -57,7 +57,7 @@ def writeOfflineCompilation(args):
5757
ocFile.write( fileStr )
5858
ocFile.close()
5959
count *= 4
60-
print "AutoGemm.py: %u kernels will be pre-compiled." % count
60+
print("AutoGemm.py: %u kernels will be pre-compiled." % count)
6161

6262

6363
################################################################################
@@ -76,7 +76,7 @@ def writeOfflineCompilation(args):
7676
if args.output:
7777
Common.setOutputPath(args.output)
7878
else:
79-
print "Warning: No output path specified; default is working directory."
79+
print("Warning: No output path specified; default is working directory.")
8080

8181
# write offline compilation header
8282
if args.precisions is None:
@@ -88,4 +88,3 @@ def writeOfflineCompilation(args):
8888
if args.betas is None:
8989
args.betas = []
9090
writeOfflineCompilation(args)
91-

src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B0_MX048_NX048_KX08_src.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const unsigned int dgemm_Col_NN_B0_MX048_NX048_KX08_microTileNumCols = 6;
1818
const unsigned int dgemm_Col_NN_B0_MX048_NX048_KX08_unroll = 8;
1919

2020
const char * const dgemm_Col_NN_B0_MX048_NX048_KX08_src = STRINGIFY(
21+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n
2122

2223
#define M6x6 \
2324
rA[0] = lA[offA + 0];\

src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B1_MX048_NX048_KX08_src.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const unsigned int dgemm_Col_NN_B1_MX048_NX048_KX08_microTileNumCols = 6;
1818
const unsigned int dgemm_Col_NN_B1_MX048_NX048_KX08_unroll = 8;
1919

2020
const char * const dgemm_Col_NN_B1_MX048_NX048_KX08_src = STRINGIFY(
21+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n
2122

2223
#define M6x6 \
2324
rA[0] = lA[offA + 0]; \

src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B0_MX048_NX048_KX08_src.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const unsigned int dgemm_Col_NT_B0_MX048_NX048_KX08_microTileNumCols = 6;
1818
const unsigned int dgemm_Col_NT_B0_MX048_NX048_KX08_unroll = 8;
1919

2020
const char * const dgemm_Col_NT_B0_MX048_NX048_KX08_src = STRINGIFY(
21+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n
2122
\n
2223
\ntypedef union _GPtr {
2324
\n __global float *f;

0 commit comments

Comments
 (0)