Skip to content

Commit 8505291

Browse files
committed
Merge branch 'master' of https://github.com/EJain-Dev/CLBlast
2 parents 25c8daa + d176318 commit 8505291

File tree

79 files changed

+1000
-205
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+1000
-205
lines changed

doc/tuning.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a
5555
- GeForce GTX 1650
5656
- GeForce GTX 1650 Ti
5757
- GeForce GTX 1650 Super
58+
- GeForce RTX 2050
5859
- GeForce RTX 2060
5960
- GeForce RTX 2070 with Max-Q
6061
- GeForce RTX 2070 Super
@@ -72,6 +73,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a
7273
- GeForce RTX 3060
7374
- GeForce RTX 3060 Laptop
7475
- GeForce RTX 3070
76+
- GeForce RTX 3070 Laptop
7577
- GeForce RTX 3070 Ti Laptop
7678
- GeForce RTX 3080
7779
- GeForce RTX 3080 Laptop
@@ -88,6 +90,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a
8890
- GeForce RTX 4080
8991
- GeForce RTX 4090
9092
- SM 12.0:
93+
- GeForce RTX 5070 Ti
9194
- GeForce RTX 5080
9295
* AMD GPUs:
9396
- Turks:
@@ -115,6 +118,8 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a
115118
- Radeon RX 590 GME
116119
- Vega:
117120
- Radeon RX Vega
121+
- gfx90c:
122+
- Radeon Graphics
118123
- gfx902:
119124
- Radeon RX Vega
120125
- Radeon RX Vega 10
@@ -139,6 +144,8 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a
139144
- gfx1035:
140145
- Radeon 680M
141146
- Ryzen 4600G APU
147+
- gfx1036:
148+
- Radeon Graphics
142149
- gfx1100:
143150
- Radeon RX 7900 XTX
144151
- gfx1101:
@@ -159,10 +166,12 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a
159166
- HD Graphics Haswell Ultrabook GT2 Mobile
160167
- HD Graphics IvyBridge M GT2
161168
- HD Graphics Skylake ULT GT2
169+
- UHD Graphics
162170
- UHD Graphics 620
163171
- UHD Graphics 630
164172
- UHD Graphics 770
165173
- Iris
174+
- Iris Plus Graphics 640
166175
- Iris Pro
167176
- Iris Xe Graphics
168177
- RaptorLake-S Mobile Graphics
@@ -192,6 +201,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a
192201
- Qualcomm Adreno 640 GPU
193202
- Qualcomm Adreno 650 GPU
194203
- Qualcomm Adreno 730 GPU
204+
- Qualcomm Adreno 735
195205
- Qualcomm Adreno 740 GPU
196206
- Intel MIC
197207
- Imagination PowerVR B-Series BXE-4-32

scripts/database/database/clblast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def get_cpp_device_vendor(vendor, device_type):
9292

9393
def get_cpp_family_includes(family, precisions):
9494
result = "\n"
95-
result += "#include \"database/kernels/%s/%s.hpp\"\n" % (family, family)
95+
result += "#include \"database/kernels/%s/%s.hpp\"\n\n" % (family, family)
9696
for precision in precisions:
9797
result += "#include \"database/kernels/%s/%s_%s.hpp\"\n" % (family, family, precision)
9898
return result

src/database/kernels/copy/copy_16.hpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ const DatabaseEntry CopyHalf = {
5858
{ Name{"AMD Radeon(TM) Graphics "}, Params{ 16, 16, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
5959
{ kDeviceNameDefault , Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
6060
} },
61+
{ "gfx1036", {
62+
{ Name{"AMD Radeon Graphics "}, Params{ 8, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
63+
{ kDeviceNameDefault , Params{ 8, 16, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
64+
} },
6165
{ "gfx1100", {
6266
{ Name{"Radeon RX 7900 XTX "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
6367
{ kDeviceNameDefault , Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -119,11 +123,13 @@ const DatabaseEntry CopyHalf = {
119123
{ Name{"Intel(R) HD Graphics 5500 BroadWell U-Processor GT"}, Params{ 8, 16, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
120124
{ Name{"Intel(R) HD Graphics 620 "}, Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
121125
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 8, 32, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
122-
{ Name{"Intel(R) Iris(R) Xe Graphics "}, Params{ 8, 8, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
126+
{ Name{"Intel(R) Iris(R) Plus Graphics 640 "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
127+
{ Name{"Intel(R) Iris(R) Xe Graphics "}, Params{ 8, 32, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
123128
{ Name{"Intel(R) RaptorLake-S Mobile Graphics Controller "}, Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
129+
{ Name{"Intel(R) UHD Graphics "}, Params{ 8, 16, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
124130
{ Name{"Intel(R) UHD Graphics 620 "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
125131
{ Name{"Intel(R) UHD Graphics 770 "}, Params{ 16, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
126-
{ kDeviceNameDefault , Params{ 32, 8, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
132+
{ kDeviceNameDefault , Params{ 8, 16, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
127133
} },
128134
}
129135
},
@@ -145,6 +151,10 @@ const DatabaseEntry CopyHalf = {
145151
{ Name{"QUALCOMM Adreno(TM) "}, Params{ 32, 16, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
146152
{ kDeviceNameDefault , Params{ 32, 16, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
147153
} },
154+
{ "OpenCL C 3.0 Adreno(TM) 735", {
155+
{ Name{"QUALCOMM Adreno(TM) 735 "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
156+
{ kDeviceNameDefault , Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
157+
} },
148158
{ "OpenCL C 3.0 Adreno(TM) 740", {
149159
{ Name{"QUALCOMM Adreno(TM) "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
150160
{ kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -154,7 +164,7 @@ const DatabaseEntry CopyHalf = {
154164
{ // Default
155165
kDeviceTypeAll, "default", {
156166
{ "default", {
157-
{ kDeviceNameDefault , Params{ 16, 16, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
167+
{ kDeviceNameDefault , Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
158168
} },
159169
}
160170
},

src/database/kernels/copy/copy_32.hpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ const DatabaseEntry CopySingle = {
9191
{ Name{"AMD Radeon(TM) Graphics "}, Params{ 8, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
9292
{ kDeviceNameDefault , Params{ 32, 8, 1, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
9393
} },
94+
{ "gfx1036", {
95+
{ Name{"AMD Radeon Graphics "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
96+
{ kDeviceNameDefault , Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
97+
} },
9498
{ "gfx1100", {
9599
{ Name{"Radeon RX 7900 XTX "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
96100
{ kDeviceNameDefault , Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -187,13 +191,15 @@ const DatabaseEntry CopySingle = {
187191
{ Name{"Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile "}, Params{ 32, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
188192
{ Name{"Intel(R) HD Graphics IvyBridge M GT2 "}, Params{ 16, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
189193
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 8, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
190-
{ Name{"Intel(R) Iris(R) Xe Graphics "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
194+
{ Name{"Intel(R) Iris(R) Plus Graphics 640 "}, Params{ 8, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
195+
{ Name{"Intel(R) Iris(R) Xe Graphics "}, Params{ 32, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
191196
{ Name{"Intel(R) RaptorLake-S Mobile Graphics Controller "}, Params{ 8, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
197+
{ Name{"Intel(R) UHD Graphics "}, Params{ 8, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
192198
{ Name{"Intel(R) UHD Graphics 620 "}, Params{ 8, 32, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
193199
{ Name{"Intel(R) UHD Graphics 770 "}, Params{ 32, 8, 2, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
194200
{ Name{"Iris "}, Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
195201
{ Name{"Iris Pro "}, Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
196-
{ kDeviceNameDefault , Params{ 8, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
202+
{ kDeviceNameDefault , Params{ 16, 16, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
197203
} },
198204
}
199205
},
@@ -209,8 +215,9 @@ const DatabaseEntry CopySingle = {
209215
{ // NVIDIA GPUs
210216
kDeviceTypeGPU, "NVIDIA", {
211217
{ "SM12.0", {
218+
{ Name{"NVIDIA GeForce RTX 5070 Ti "}, Params{ 8, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
212219
{ Name{"NVIDIA GeForce RTX 5080 "}, Params{ 8, 8, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
213-
{ kDeviceNameDefault , Params{ 8, 8, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
220+
{ kDeviceNameDefault , Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
214221
} },
215222
{ "SM2.0", {
216223
{ Name{"GeForce GTX 480 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -289,19 +296,20 @@ const DatabaseEntry CopySingle = {
289296
{ kDeviceNameDefault , Params{ 8, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
290297
} },
291298
{ "SM8.6", {
292-
{ Name{"NVIDIA GeForce RTX 2050 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
293-
{ Name{"NVIDIA GeForce RTX 3050 Laptop GPU "}, Params{ 8, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
294-
{ Name{"NVIDIA GeForce RTX 3050 Ti Laptop GPU "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
299+
{ Name{"NVIDIA GeForce RTX 2050 "}, Params{ 32, 32, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
300+
{ Name{"NVIDIA GeForce RTX 3050 Laptop GPU "}, Params{ 8, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
301+
{ Name{"NVIDIA GeForce RTX 3050 Ti Laptop GPU "}, Params{ 8, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
295302
{ Name{"NVIDIA GeForce RTX 3060 "}, Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
296303
{ Name{"NVIDIA GeForce RTX 3060 Laptop GPU "}, Params{ 16, 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
297304
{ Name{"NVIDIA GeForce RTX 3070 "}, Params{ 8, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
305+
{ Name{"NVIDIA GeForce RTX 3070 Laptop GPU "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
298306
{ Name{"NVIDIA GeForce RTX 3070 Ti Laptop GPU "}, Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
299307
{ Name{"NVIDIA GeForce RTX 3080 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
300308
{ Name{"NVIDIA GeForce RTX 3080 Laptop GPU "}, Params{ 8, 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
301309
{ Name{"NVIDIA GeForce RTX 3080 Ti "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
302310
{ Name{"NVIDIA GeForce RTX 3090 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
303311
{ Name{"NVIDIA RTX A6000 "}, Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
304-
{ kDeviceNameDefault , Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
312+
{ kDeviceNameDefault , Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
305313
} },
306314
{ "SM8.9", {
307315
{ Name{"NVIDIA GeForce RTX 4050 Laptop GPU "}, Params{ 32, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
@@ -337,6 +345,10 @@ const DatabaseEntry CopySingle = {
337345
{ Name{"QUALCOMM Adreno(TM) "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
338346
{ kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
339347
} },
348+
{ "OpenCL C 3.0 Adreno(TM) 735", {
349+
{ Name{"QUALCOMM Adreno(TM) 735 "}, Params{ 32, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
350+
{ kDeviceNameDefault , Params{ 32, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
351+
} },
340352
{ "OpenCL C 3.0 Adreno(TM) 740", {
341353
{ Name{"QUALCOMM Adreno(TM) "}, Params{ 16, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
342354
{ kDeviceNameDefault , Params{ 16, 32, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },

0 commit comments

Comments
 (0)