|
17 | 17 | ! |
18 | 18 | ! Global variables |
19 | 19 | ! |
| 20 | +#ifdef TARGET_X8664 |
20 | 21 | integer*8 :: mra, ncb, kab, lda, ldb, ldc |
| 22 | +#else |
| 23 | + integer :: mra, ncb, kab, lda, ldb, ldc |
| 24 | +#endif |
21 | 25 | complex*16, dimension( lda, * )::a |
22 | 26 | complex*16, dimension( ldb, * )::b |
23 | 27 | complex*16, dimension( ldc, * )::c |
|
26 | 30 | ! |
27 | 31 | ! local variables |
28 | 32 | ! |
| 33 | +#ifdef TARGET_X8664 |
29 | 34 | integer*8 :: colsa, rowsa, rowsb, colsb |
30 | 35 | integer*8 :: i, j, jb, k, ak, bk, jend |
31 | 36 | integer*8 :: ar, ar_sav, ac, ac_sav, br, bc |
|
36 | 41 | integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end |
37 | 42 | integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end |
38 | 43 | integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav |
| 44 | +#else |
| 45 | + integer :: colsa, rowsa, rowsb, colsb |
| 46 | + integer :: i, j, jb, k, ak, bk, jend |
| 47 | + integer :: ar, ar_sav, ac, ac_sav, br, bc |
| 48 | + integer :: ndxa, ndxasav |
| 49 | + integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3 |
| 50 | + integer :: colachunk, colachunks, colbchunk, colbchunks |
| 51 | + integer :: rowchunk, rowchunks |
| 52 | + integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end |
| 53 | + integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end |
| 54 | + integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav |
| 55 | +#endif |
39 | 56 | integer :: ta, tb |
40 | 57 | complex*16 :: temp, temp0, temp1, temp2, temp3 |
41 | 58 | real*8 :: temprr0, temprr1, temprr2, temprr3 |
|
52 | 69 | complex*16, allocatable, dimension(:) :: buffera, bufferb |
53 | 70 |
|
54 | 71 | !Minimun number of multiplications needed to activate the blocked optimization. |
| 72 | +#ifdef TARGET_X8664 |
55 | 73 | integer, parameter :: min_blocked_mult = 15000 |
| 74 | +#elif TARGET_LINUX_POWER |
| 75 | + integer, parameter :: min_blocked_mult = 15000 !Complex calculations not vectorized on OpenPower. |
| 76 | +#else |
| 77 | + #warning untuned matrix multiplication parameter |
| 78 | + integer, parameter :: min_blocked_mult = 15000 |
| 79 | +#endif |
56 | 80 |
|
57 | 81 | #undef DCMPLX |
58 | 82 | #define DCMPLX(r,i) cmplx(r,i,kind=8) |
0 commit comments