@@ -20,24 +20,29 @@ void foo() {
20
20
const double *alpha_d;
21
21
const float2 *alpha_c;
22
22
const double2 *alpha_z;
23
+ const half *alpha_h;
23
24
const float *A_s;
24
25
const double *A_d;
25
26
const float2 *A_c;
26
27
const double2 *A_z;
28
+ const half *A_h;
27
29
int64_t lda;
28
30
const float *B_s;
29
31
const double *B_d;
30
32
const float2 *B_c;
31
33
const double2 *B_z;
34
+ const half *B_h;
32
35
int64_t ldb;
33
36
const float *beta_s;
34
37
const double *beta_d;
35
38
const float2 *beta_c;
36
39
const double2 *beta_z;
40
+ const half *beta_h;
37
41
float *C_s;
38
42
double *C_d;
39
43
float2 *C_c;
40
44
double2 *C_z;
45
+ half *C_h;
41
46
float *C1_s;
42
47
double *C1_d;
43
48
float2 *C1_c;
@@ -142,26 +147,38 @@ void foo() {
142
147
double *y_d;
143
148
float2 *y_c;
144
149
double2 *y_z;
145
- status = cublasSgemv_64 (handle, transa, m, n, alpha_s, A_s, lda, x_s, incx, beta_s, y_s, incy);
146
- status = cublasDgemv_64 (handle, transa, m, n, alpha_d, A_d, lda, x_d, incx, beta_d, y_d, incy);
147
- status = cublasCgemv_64 (handle, transa, m, n, alpha_c, A_c, lda, x_c, incx, beta_c, y_c, incy);
148
- status = cublasZgemv_64 (handle, transa, m, n, alpha_z, A_z, lda, x_z, incx, beta_z, y_z, incy);
150
+ status = cublasSgemv_64 (handle, transa, m, n, alpha_s, A_s, lda, x_s, incx,
151
+ beta_s, y_s, incy);
152
+ status = cublasDgemv_64 (handle, transa, m, n, alpha_d, A_d, lda, x_d, incx,
153
+ beta_d, y_d, incy);
154
+ status = cublasCgemv_64 (handle, transa, m, n, alpha_c, A_c, lda, x_c, incx,
155
+ beta_c, y_c, incy);
156
+ status = cublasZgemv_64 (handle, transa, m, n, alpha_z, A_z, lda, x_z, incx,
157
+ beta_z, y_z, incy);
149
158
150
159
int64_t kl, ku;
151
- status = cublasSgbmv_64 (handle, transa, m, n, kl, ku, alpha_s, A_s, lda, x_s, incx, beta_s, y_s, incy);
152
- status = cublasDgbmv_64 (handle, transa, m, n, kl, ku, alpha_d, A_d, lda, x_d, incx, beta_d, y_d, incy);
153
- status = cublasCgbmv_64 (handle, transa, m, n, kl, ku, alpha_c, A_c, lda, x_c, incx, beta_c, y_c, incy);
154
- status = cublasZgbmv_64 (handle, transa, m, n, kl, ku, alpha_z, A_z, lda, x_z, incx, beta_z, y_z, incy);
160
+ status = cublasSgbmv_64 (handle, transa, m, n, kl, ku, alpha_s, A_s, lda, x_s,
161
+ incx, beta_s, y_s, incy);
162
+ status = cublasDgbmv_64 (handle, transa, m, n, kl, ku, alpha_d, A_d, lda, x_d,
163
+ incx, beta_d, y_d, incy);
164
+ status = cublasCgbmv_64 (handle, transa, m, n, kl, ku, alpha_c, A_c, lda, x_c,
165
+ incx, beta_c, y_c, incy);
166
+ status = cublasZgbmv_64 (handle, transa, m, n, kl, ku, alpha_z, A_z, lda, x_z,
167
+ incx, beta_z, y_z, incy);
155
168
156
169
status = cublasStrmv_64 (handle, uplo, transa, diag, n, A_s, lda, y_s, incy);
157
170
status = cublasDtrmv_64 (handle, uplo, transa, diag, n, A_d, lda, y_d, incy);
158
171
status = cublasCtrmv_64 (handle, uplo, transa, diag, n, A_c, lda, y_c, incy);
159
172
status = cublasZtrmv_64 (handle, uplo, transa, diag, n, A_z, lda, y_z, incy);
160
173
161
- status = cublasStbmv_64 (handle, uplo, transa, diag, n, k, A_s, lda, y_s, incy);
162
- status = cublasDtbmv_64 (handle, uplo, transa, diag, n, k, A_d, lda, y_d, incy);
163
- status = cublasCtbmv_64 (handle, uplo, transa, diag, n, k, A_c, lda, y_c, incy);
164
- status = cublasZtbmv_64 (handle, uplo, transa, diag, n, k, A_z, lda, y_z, incy);
174
+ status =
175
+ cublasStbmv_64 (handle, uplo, transa, diag, n, k, A_s, lda, y_s, incy);
176
+ status =
177
+ cublasDtbmv_64 (handle, uplo, transa, diag, n, k, A_d, lda, y_d, incy);
178
+ status =
179
+ cublasCtbmv_64 (handle, uplo, transa, diag, n, k, A_c, lda, y_c, incy);
180
+ status =
181
+ cublasZtbmv_64 (handle, uplo, transa, diag, n, k, A_z, lda, y_z, incy);
165
182
166
183
status = cublasStpmv_64 (handle, uplo, transa, diag, n, A_s, y_s, incy);
167
184
status = cublasDtpmv_64 (handle, uplo, transa, diag, n, A_d, y_d, incy);
@@ -178,47 +195,107 @@ void foo() {
178
195
status = cublasCtpsv_64 (handle, uplo, transa, diag, n, A_c, y_c, incy);
179
196
status = cublasZtpsv_64 (handle, uplo, transa, diag, n, A_z, y_z, incy);
180
197
181
- status = cublasSgemm_64 (handle, transa, transb, m, n, k, alpha_s, A_s, lda, B_s, ldb, beta_s, C_s, ldc);
182
- status = cublasDgemm_64 (handle, transa, transb, m, n, k, alpha_d, A_d, lda, B_d, ldb, beta_d, C_d, ldc);
183
- status = cublasCgemm_64 (handle, transa, transb, m, n, k, alpha_c, A_c, lda, B_c, ldb, beta_c, C_c, ldc);
184
- status = cublasZgemm_64 (handle, transa, transb, m, n, k, alpha_z, A_z, lda, B_z, ldb, beta_z, C_z, ldc);
185
-
186
- status = cublasSsyrk_64 (handle, uplo, transa, n, k, alpha_s, A_s, lda, beta_s, C_s, ldc);
187
- status = cublasDsyrk_64 (handle, uplo, transa, n, k, alpha_d, A_d, lda, beta_d, C_d, ldc);
188
- status = cublasCsyrk_64 (handle, uplo, transa, n, k, alpha_c, A_c, lda, beta_c, C_c, ldc);
189
- status = cublasZsyrk_64 (handle, uplo, transa, n, k, alpha_z, A_z, lda, beta_z, C_z, ldc);
190
-
191
- status = cublasSsymm_64 (handle, side, uplo, m, n, alpha_s, A_s, lda, B_s, ldb, beta_s, C_s, ldc);
192
- status = cublasDsymm_64 (handle, side, uplo, m, n, alpha_d, A_d, lda, B_d, ldb, beta_d, C_d, ldc);
193
- status = cublasCsymm_64 (handle, side, uplo, m, n, alpha_c, A_c, lda, B_c, ldb, beta_c, C_c, ldc);
194
- status = cublasZsymm_64 (handle, side, uplo, m, n, alpha_z, A_z, lda, B_z, ldb, beta_z, C_z, ldc);
195
-
196
- status = cublasStrsm_64 (handle, side, uplo, transa, diag, m, n, alpha_s, A_s, lda, C_s, ldc);
197
- status = cublasDtrsm_64 (handle, side, uplo, transa, diag, m, n, alpha_d, A_d, lda, C_d, ldc);
198
- status = cublasCtrsm_64 (handle, side, uplo, transa, diag, m, n, alpha_c, A_c, lda, C_c, ldc);
199
- status = cublasZtrsm_64 (handle, side, uplo, transa, diag, m, n, alpha_z, A_z, lda, C_z, ldc);
200
-
201
- status = cublasChemm_64 (handle, side, uplo, m, n, alpha_c, A_c, lda, B_c, ldb, beta_c, C_c, ldc);
202
- status = cublasZhemm_64 (handle, side, uplo, m, n, alpha_z, A_z, lda, B_z, ldb, beta_z, C_z, ldc);
203
-
204
- status = cublasCherk_64 (handle, uplo, transa, n, k, alpha_s, A_c, lda, beta_s, C_c, ldc);
205
- status = cublasZherk_64 (handle, uplo, transa, n, k, alpha_d, A_z, lda, beta_d, C_z, ldc);
206
-
207
- status = cublasSsyr2k_64 (handle, uplo, transa, n, k, alpha_s, A_s, lda, B_s, ldb, beta_s, C_s, ldc);
208
- status = cublasDsyr2k_64 (handle, uplo, transa, n, k, alpha_d, A_d, lda, B_d, ldb, beta_d, C_d, ldc);
209
- status = cublasCsyr2k_64 (handle, uplo, transa, n, k, alpha_c, A_c, lda, B_c, ldb, beta_c, C_c, ldc);
210
- status = cublasZsyr2k_64 (handle, uplo, transa, n, k, alpha_z, A_z, lda, B_z, ldb, beta_z, C_z, ldc);
211
-
212
- status = cublasCher2k_64 (handle, uplo, transa, n, k, alpha_c, A_c, lda, B_c, ldb, beta_s, C_c, ldc);
213
- status = cublasZher2k_64 (handle, uplo, transa, n, k, alpha_z, A_z, lda, B_z, ldb, beta_d, C_z, ldc);
214
-
215
- status = cublasSgeam_64 (handle, transa, transb, m, n, alpha_s, A_s, lda, beta_s, B_s, ldb, C_s, ldc);
216
- status = cublasDgeam_64 (handle, transa, transb, m, n, alpha_d, A_d, lda, beta_d, B_d, ldb, C_d, ldc);
217
- status = cublasCgeam_64 (handle, transa, transb, m, n, alpha_c, A_c, lda, beta_c, B_c, ldb, C_c, ldc);
218
- status = cublasZgeam_64 (handle, transa, transb, m, n, alpha_z, A_z, lda, beta_z, B_z, ldb, C_z, ldc);
198
+ status = cublasSgemm_64 (handle, transa, transb, m, n, k, alpha_s, A_s, lda,
199
+ B_s, ldb, beta_s, C_s, ldc);
200
+ status = cublasDgemm_64 (handle, transa, transb, m, n, k, alpha_d, A_d, lda,
201
+ B_d, ldb, beta_d, C_d, ldc);
202
+ status = cublasCgemm_64 (handle, transa, transb, m, n, k, alpha_c, A_c, lda,
203
+ B_c, ldb, beta_c, C_c, ldc);
204
+ status = cublasZgemm_64 (handle, transa, transb, m, n, k, alpha_z, A_z, lda,
205
+ B_z, ldb, beta_z, C_z, ldc);
206
+ status = cublasHgemm_64 (handle, transa, transb, m, n, k, alpha_h, A_h, lda,
207
+ B_h, ldb, beta_h, C_h, ldc);
208
+
209
+ status = cublasCgemm3m_64 (handle, transa, transb, m, n, k, alpha_c, A_c, lda,
210
+ B_c, ldb, beta_c, C_c, ldc);
211
+ status = cublasZgemm3m_64 (handle, transa, transb, m, n, k, alpha_z, A_z, lda,
212
+ B_z, ldb, beta_z, C_z, ldc);
213
+
214
+ status = cublasSsyrk_64 (handle, uplo, transa, n, k, alpha_s, A_s, lda, beta_s,
215
+ C_s, ldc);
216
+ status = cublasDsyrk_64 (handle, uplo, transa, n, k, alpha_d, A_d, lda, beta_d,
217
+ C_d, ldc);
218
+ status = cublasCsyrk_64 (handle, uplo, transa, n, k, alpha_c, A_c, lda, beta_c,
219
+ C_c, ldc);
220
+ status = cublasZsyrk_64 (handle, uplo, transa, n, k, alpha_z, A_z, lda, beta_z,
221
+ C_z, ldc);
222
+
223
+ status = cublasSsymm_64 (handle, side, uplo, m, n, alpha_s, A_s, lda, B_s, ldb,
224
+ beta_s, C_s, ldc);
225
+ status = cublasDsymm_64 (handle, side, uplo, m, n, alpha_d, A_d, lda, B_d, ldb,
226
+ beta_d, C_d, ldc);
227
+ status = cublasCsymm_64 (handle, side, uplo, m, n, alpha_c, A_c, lda, B_c, ldb,
228
+ beta_c, C_c, ldc);
229
+ status = cublasZsymm_64 (handle, side, uplo, m, n, alpha_z, A_z, lda, B_z, ldb,
230
+ beta_z, C_z, ldc);
231
+
232
+ status = cublasStrsm_64 (handle, side, uplo, transa, diag, m, n, alpha_s, A_s,
233
+ lda, C_s, ldc);
234
+ status = cublasDtrsm_64 (handle, side, uplo, transa, diag, m, n, alpha_d, A_d,
235
+ lda, C_d, ldc);
236
+ status = cublasCtrsm_64 (handle, side, uplo, transa, diag, m, n, alpha_c, A_c,
237
+ lda, C_c, ldc);
238
+ status = cublasZtrsm_64 (handle, side, uplo, transa, diag, m, n, alpha_z, A_z,
239
+ lda, C_z, ldc);
240
+
241
+ status = cublasChemm_64 (handle, side, uplo, m, n, alpha_c, A_c, lda, B_c, ldb,
242
+ beta_c, C_c, ldc);
243
+ status = cublasZhemm_64 (handle, side, uplo, m, n, alpha_z, A_z, lda, B_z, ldb,
244
+ beta_z, C_z, ldc);
245
+
246
+ status = cublasCherk_64 (handle, uplo, transa, n, k, alpha_s, A_c, lda, beta_s,
247
+ C_c, ldc);
248
+ status = cublasZherk_64 (handle, uplo, transa, n, k, alpha_d, A_z, lda, beta_d,
249
+ C_z, ldc);
250
+
251
+ status = cublasSsyr2k_64 (handle, uplo, transa, n, k, alpha_s, A_s, lda, B_s,
252
+ ldb, beta_s, C_s, ldc);
253
+ status = cublasDsyr2k_64 (handle, uplo, transa, n, k, alpha_d, A_d, lda, B_d,
254
+ ldb, beta_d, C_d, ldc);
255
+ status = cublasCsyr2k_64 (handle, uplo, transa, n, k, alpha_c, A_c, lda, B_c,
256
+ ldb, beta_c, C_c, ldc);
257
+ status = cublasZsyr2k_64 (handle, uplo, transa, n, k, alpha_z, A_z, lda, B_z,
258
+ ldb, beta_z, C_z, ldc);
259
+
260
+ status = cublasCher2k_64 (handle, uplo, transa, n, k, alpha_c, A_c, lda, B_c,
261
+ ldb, beta_s, C_c, ldc);
262
+ status = cublasZher2k_64 (handle, uplo, transa, n, k, alpha_z, A_z, lda, B_z,
263
+ ldb, beta_d, C_z, ldc);
264
+
265
+ status = cublasSgeam_64 (handle, transa, transb, m, n, alpha_s, A_s, lda,
266
+ beta_s, B_s, ldb, C_s, ldc);
267
+ status = cublasDgeam_64 (handle, transa, transb, m, n, alpha_d, A_d, lda,
268
+ beta_d, B_d, ldb, C_d, ldc);
269
+ status = cublasCgeam_64 (handle, transa, transb, m, n, alpha_c, A_c, lda,
270
+ beta_c, B_c, ldb, C_c, ldc);
271
+ status = cublasZgeam_64 (handle, transa, transb, m, n, alpha_z, A_z, lda,
272
+ beta_z, B_z, ldb, C_z, ldc);
219
273
220
274
status = cublasSdgmm_64 (handle, side, m, n, A_s, lda, B_s, ldb, C_s, ldc);
221
275
status = cublasDdgmm_64 (handle, side, m, n, A_d, lda, B_d, ldb, C_d, ldc);
222
276
status = cublasCdgmm_64 (handle, side, m, n, A_c, lda, B_c, ldb, C_c, ldc);
223
277
status = cublasZdgmm_64 (handle, side, m, n, A_z, lda, B_z, ldb, C_z, ldc);
278
+
279
+ status = cublasStrmm_64 (handle, side, uplo, transa, diag, m, n, alpha_s, A_s,
280
+ lda, B_s, ldb, C_s, ldc);
281
+ status = cublasDtrmm_64 (handle, side, uplo, transa, diag, m, n, alpha_d, A_d,
282
+ lda, B_d, ldb, C_d, ldc);
283
+ status = cublasCtrmm_64 (handle, side, uplo, transa, diag, m, n, alpha_c, A_c,
284
+ lda, B_c, ldb, C_c, ldc);
285
+ status = cublasZtrmm_64 (handle, side, uplo, transa, diag, m, n, alpha_z, A_z,
286
+ lda, B_z, ldb, C_z, ldc);
287
+
288
+ status = cublasSsyrkx_64 (handle, uplo, transa, n, k, alpha_s, A_s, lda, B_s,
289
+ ldb, beta_s, C_s, ldc);
290
+ status = cublasDsyrkx_64 (handle, uplo, transa, n, k, alpha_d, A_d, lda, B_d,
291
+ ldb, beta_d, C_d, ldc);
292
+ status = cublasCsyrkx_64 (handle, uplo, transa, n, k, alpha_c, A_c, lda, B_c,
293
+ ldb, beta_c, C_c, ldc);
294
+ status = cublasZsyrkx_64 (handle, uplo, transa, n, k, alpha_z, A_z, lda, B_z,
295
+ ldb, beta_z, C_z, ldc);
296
+
297
+ status = cublasCherkx_64 (handle, uplo, transa, n, k, alpha_c, A_c, lda, B_c,
298
+ ldb, beta_s, C_c, ldc);
299
+ status = cublasZherkx_64 (handle, uplo, transa, n, k, alpha_z, A_z, lda, B_z,
300
+ ldb, beta_d, C_z, ldc);
224
301
}
0 commit comments