1- // Example 2. Application Using C and cuBLAS: 0-based indexing
1+ // Example 2. Application Using C and cuBLAS: 0-based indexing
22// -----------------------------------------------------------
3+ #include < cuda_runtime.h>
4+ #include < math.h>
35#include < stdio.h>
46#include < stdlib.h>
5- #include < math.h>
6- #include < cuda_runtime.h>
7+
78#include " cublas_v2.h"
89#define M 6
910#define N 5
10- #define IDX2C (i,j, ld ) (((j)* (ld))+ (i))
11+ #define IDX2C (i, j, ld ) (((j) * (ld)) + (i))
1112
12- static __inline__ void modify (cublasHandle_t handle, float *m, int ldm, int n, int p, int q, float alpha, float beta){
13- cublasSscal (handle, n-q, &alpha, &m[IDX2C (p,q,ldm)], ldm);
14- cublasSscal (handle, ldm-p, &beta, &m[IDX2C (p,q,ldm)], 1 );
13+ static __inline__ void modify (cublasHandle_t handle, float * m, int ldm, int n,
14+ int p, int q, float alpha, float beta) {
15+ cublasSscal (handle, n - q, &alpha, &m[IDX2C (p, q, ldm)], ldm);
16+ cublasSscal (handle, ldm - p, &beta, &m[IDX2C (p, q, ldm)], 1 );
1517}
1618
17- int main (void ){
18- cudaError_t cudaStat;
19- cublasStatus_t stat;
20- cublasHandle_t handle;
21- int i, j;
22- float * devPtrA;
23- float * a = 0 ;
24- a = (float *)malloc (M * N * sizeof (*a));
25- if (!a) {
26- printf (" host memory allocation failed" );
27- return EXIT_FAILURE;
28- }
29- for (j = 0 ; j < N; j++) {
30- for (i = 0 ; i < M; i++) {
31- a[IDX2C (i,j,M)] = (float )(i * N + j + 1 );
32- }
33- }
34- cudaStat = cudaMalloc ((void **)&devPtrA, M*N*sizeof (*a));
35- if (cudaStat != cudaSuccess) {
36- printf (" device memory allocation failed" );
37- free (a);
38- return EXIT_FAILURE;
39- }
40- stat = cublasCreate (&handle);
41- if (stat != CUBLAS_STATUS_SUCCESS) {
42- printf (" CUBLAS initialization failed\n " );
43- free (a);
44- cudaFree (devPtrA);
45- return EXIT_FAILURE;
19+ int main (void ) {
20+ cudaError_t cudaStat;
21+ cublasStatus_t stat;
22+ cublasHandle_t handle;
23+ int i, j;
24+ float * devPtrA;
25+ float * a = 0 ;
26+ a = (float *)malloc (M * N * sizeof (*a));
27+ if (!a) {
28+ printf (" host memory allocation failed" );
29+ return EXIT_FAILURE;
30+ }
31+ for (j = 0 ; j < N; j++) {
32+ for (i = 0 ; i < M; i++) {
33+ a[IDX2C (i, j, M)] = (float )(i * N + j + 1 );
4634 }
47- stat = cublasSetMatrix (M, N, sizeof (*a), a, M, devPtrA, M);
48- if (stat != CUBLAS_STATUS_SUCCESS) {
49- printf (" data download failed" );
50- free (a);
51- cudaFree (devPtrA);
52- cublasDestroy (handle);
53- return EXIT_FAILURE;
54- }
55- modify (handle, devPtrA, M, N, 1 , 2 , 16 .0f , 12 .0f );
56- stat = cublasGetMatrix (M, N, sizeof (*a), devPtrA, M, a, M);
57- if (stat != CUBLAS_STATUS_SUCCESS) {
58- printf (" data upload failed" );
59- free (a);
60- cudaFree (devPtrA);
61- cublasDestroy (handle);
62- return EXIT_FAILURE;
63- }
64- cudaFree (devPtrA);
35+ }
36+ cudaStat = cudaMalloc ((void **)&devPtrA, M * N * sizeof (*a));
37+ if (cudaStat != cudaSuccess) {
38+ printf (" device memory allocation failed" );
39+ free (a);
40+ return EXIT_FAILURE;
41+ }
42+ stat = cublasCreate (&handle);
43+ if (stat != CUBLAS_STATUS_SUCCESS) {
44+ printf (" CUBLAS initialization failed\n " );
45+ free (a);
46+ cudaFree (devPtrA);
47+ return EXIT_FAILURE;
48+ }
49+ stat = cublasSetMatrix (M, N, sizeof (*a), a, M, devPtrA, M);
50+ if (stat != CUBLAS_STATUS_SUCCESS) {
51+ printf (" data download failed" );
52+ free (a);
53+ cudaFree (devPtrA);
6554 cublasDestroy (handle);
66- for (j = 0 ; j < N; j++) {
67- for (i = 0 ; i < M; i++) {
68- printf ( " %7 .0f" , a[ IDX2C (i,j,M)] );
69- }
70- printf ( " \n " );
71- }
55+ return EXIT_FAILURE;
56+ }
57+ modify (handle, devPtrA, M, N, 1 , 2 , 16 .0f , 12 . 0f );
58+ stat = cublasGetMatrix (M, N, sizeof (*a), devPtrA, M, a, M);
59+ if (stat != CUBLAS_STATUS_SUCCESS) {
60+ printf ( " data upload failed " );
7261 free (a);
73- return EXIT_SUCCESS;
62+ cudaFree (devPtrA);
63+ cublasDestroy (handle);
64+ return EXIT_FAILURE;
65+ }
66+ cudaFree (devPtrA);
67+ cublasDestroy (handle);
68+ for (j = 0 ; j < N; j++) {
69+ for (i = 0 ; i < M; i++) {
70+ printf (" %7.0f" , a[IDX2C (i, j, M)]);
71+ }
72+ printf (" \n " );
73+ }
74+ free (a);
75+ return EXIT_SUCCESS;
7476}
0 commit comments