Skip to content

Commit 1dce76d

Browse files
authored
Merge pull request #597 from ThePortlandGroup/nv_stage
Pull 2018-09-30T14-55 Recent NVIDIA Changes
2 parents 498be06 + 1af4225 commit 1dce76d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+2545
-3325
lines changed

runtime/flang/directives.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
#define F3 % xmm2
5050
#define F4 % xmm3
5151

52-
#else
52+
#elif defined(LINUX_ELF) || defined(TARGET_LINUX_X86) || defined(TARGET_LINUX_X8664)
5353
#define ENT(n) n
5454
#define ALN_WORD .align 4
5555
#define ALN_FUNC .align 16
@@ -71,6 +71,31 @@
7171
#define F3 % xmm2
7272
#define F4 % xmm3
7373

74+
#elif defined(TARGET_OSX_X8664)
75+
#define ENT(n) ASM_CONCAT(_,n)
76+
#define ALN_WORD .align 2
77+
#define ALN_FUNC .align 4
78+
#define ALN_DBLE .align 3
79+
#define ALN_QUAD .align 4
80+
#define ELF_FUNC(s)
81+
#define ELF_OBJ(s)
82+
#define ELF_SIZE(s)
83+
#define AS_VER
84+
#define I1 % rdi
85+
#define I1W % edi
86+
#define I2 % rsi
87+
#define I2W % esi
88+
#define I3 % rdx
89+
#define I3W % edx
90+
#define I4 % rcx
91+
#define F1 % xmm0
92+
#define F2 % xmm1
93+
#define F3 % xmm2
94+
#define F4 % xmm3
95+
96+
#else
97+
#error X8664 TARGET platform not defined.
98+
#error TARGET must be one of TARGET_LINUX_X8664, TARGET_OSX_X8664, or TARGET_WIN_X8664.
7499
#endif
75100

76101
/* macros for handling pic and non-pic code */

runtime/flang/fmtconv.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 1995-2017, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -563,6 +563,7 @@ __fortio_fmt_g(__BIGREAL_T val, int w, int d, int e, int sf, int type,
563563
{
564564
int sign_char;
565565
int newd;
566+
#if defined(TARGET_X8664)
566567
/*
567568
* the following guarded IF may look like a no-op, but is
568569
* needed when val is a denorm and DAZ is enabled. In this case, the
@@ -580,6 +581,7 @@ __fortio_fmt_g(__BIGREAL_T val, int w, int d, int e, int sf, int type,
580581
((int *)&val)[1] |= 0x80000000;
581582
}
582583
}
584+
#endif
583585
field_overflow = FALSE;
584586
/*
585587
fp_canon(val, type, round);

runtime/flang/fortDt.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,10 @@ typedef __INT_T dtype;
307307
* which can be either a 64-bit or 32-bit type depending on DESC_I8
308308
*/
309309

310+
#if defined(TARGET_X8664)
310311
#define __NELEM_T __INT8_T
312+
#else
313+
#define __NELEM_T __INT_T
314+
#endif
311315

312316
#endif /*_PGHPF_TYPES_H_*/

runtime/flang/ftncharsup.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 1993-2018, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -384,8 +384,13 @@ Ftn_str_free(char **first)
384384

385385
#define __HAVE_LONGLONG_T
386386

387+
#if defined(LINUX8664) || defined(OSX8664)
387388
typedef long _LONGLONG_T;
388389
typedef unsigned long _ULONGLONG_T;
390+
#else
391+
typedef long long _LONGLONG_T;
392+
typedef unsigned long long _ULONGLONG_T;
393+
#endif
389394

390395
/* ***********************************************************************/
391396
/** \brief

runtime/flang/ftni64.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 1997-2018, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -24,8 +24,13 @@
2424

2525
#define __HAVE_LONGLONG_T
2626

27+
#if defined(LINUX8664) || defined(OSX8664)
2728
typedef long _LONGLONG_T;
2829
typedef unsigned long _ULONGLONG_T;
30+
#else
31+
typedef long long _LONGLONG_T;
32+
typedef unsigned long long _ULONGLONG_T;
33+
#endif
2934

3035
/* now defined if BaseTsd10.h included */
3136
typedef int INT64[2];
@@ -44,6 +49,7 @@ typedef union {
4449
_LONGLONG_T lv;
4550
} INT64D;
4651

52+
#if defined(LINUX8664) || defined(OSX8664)
4753
#define __I8RET_T long
4854
#define UTL_I_I64RET(m, l) \
4955
{ \
@@ -52,3 +58,18 @@ typedef union {
5258
I64_LSH(int64d.i) = l; \
5359
return int64d.lv; \
5460
}
61+
#elif defined(WIN64)
62+
/* Someday, should only care if TM_I8 is defined */
63+
#define __I8RET_T long long
64+
#define UTL_I_I64RET(m, l) \
65+
{ \
66+
INT64D int64d; \
67+
I64_MSH(int64d.i) = m; \
68+
I64_LSH(int64d.i) = l; \
69+
return int64d.lv; \
70+
}
71+
#else
72+
#define __I8RET_T void
73+
#define UTL_I_I64RET __utl_i_i64ret
74+
extern VOID UTL_I_I64RET();
75+
#endif

runtime/flang/ftnncharsup.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 1993-2018, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -160,8 +160,13 @@ int a2_len; /* length of a2 */
160160

161161
#define __HAVE_LONGLONG_T
162162

163+
#if defined(LINUX8664) || defined(OSX8664)
163164
typedef long _LONGLONG_T;
164165
typedef unsigned long _ULONGLONG_T;
166+
#else
167+
typedef long long _LONGLONG_T;
168+
typedef unsigned long long _ULONGLONG_T;
169+
#endif
165170

166171

167172
/* ***********************************************************************/

runtime/flang/miscsup_com.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,7 +816,11 @@ ENTFTN(SYSCLK, sysclk)(__STAT_T *count, __STAT_T *count_rate,
816816

817817
if (resol == 0) {
818818
int def;
819+
#if defined(TARGET_X8664)
819820
def = 1000000;
821+
#else
822+
def = sizeof(__STAT_T) < 8 ? 1000 : 1000000;
823+
#endif
820824
resol = __fort_getoptn("-system_clock_rate", def);
821825
if (resol <= 0)
822826
__fort_abort("invalid value given for system_clock rate");
@@ -2880,6 +2884,7 @@ ENTF90(TRIMA, trima)
28802884
i = CLEN(expr);
28812885
while (i > 0) {
28822886
if (CADR(expr)[i - 1] != ' ') {
2887+
#if defined(TARGET_X8664)
28832888
if (i <= 11) {
28842889
int *rptr = ((int *)CADR(res));
28852890
int *eptr = ((int *)CADR(expr));
@@ -2899,6 +2904,11 @@ ENTF90(TRIMA, trima)
28992904
}
29002905
rcptr = (char *)rptr;
29012906
ecptr = (char *)eptr;
2907+
#else
2908+
if (i <= 3) {
2909+
rcptr = ((char *)CADR(res));
2910+
ecptr = ((char *)CADR(expr));
2911+
#endif
29022912
j = i & 3;
29032913
if (j > 2)
29042914
*rcptr++ = *ecptr++;
@@ -4823,7 +4833,11 @@ ENTF90(SPACINGD, spacingd)(__REAL8_T *d)
48234833

48244834
#ifndef DESC_I8
48254835

4836+
#if defined(TARGET_X8664)
48264837
typedef __INT8_T SZ_T;
4838+
#else
4839+
typedef __INT4_T SZ_T;
4840+
#endif
48274841

48284842
#undef _MZERO
48294843
#define _MZERO(n, t) \

runtime/flang/pgf90_mmul_cmplx16.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@
1717
!
1818
! Global variables
1919
!
20+
#ifdef TARGET_X8664
2021
integer*8 :: mra, ncb, kab, lda, ldb, ldc
22+
#else
23+
integer :: mra, ncb, kab, lda, ldb, ldc
24+
#endif
2125
complex*16, dimension( lda, * )::a
2226
complex*16, dimension( ldb, * )::b
2327
complex*16, dimension( ldc, * )::c
@@ -26,6 +30,7 @@
2630
!
2731
! local variables
2832
!
33+
#ifdef TARGET_X8664
2934
integer*8 :: colsa, rowsa, rowsb, colsb
3035
integer*8 :: i, j, jb, k, ak, bk, jend
3136
integer*8 :: ar, ar_sav, ac, ac_sav, br, bc
@@ -36,6 +41,18 @@
3641
integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
3742
integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
3843
integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
44+
#else
45+
integer :: colsa, rowsa, rowsb, colsb
46+
integer :: i, j, jb, k, ak, bk, jend
47+
integer :: ar, ar_sav, ac, ac_sav, br, bc
48+
integer :: ndxa, ndxasav
49+
integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3
50+
integer :: colachunk, colachunks, colbchunk, colbchunks
51+
integer :: rowchunk, rowchunks
52+
integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
53+
integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
54+
integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
55+
#endif
3956
integer :: ta, tb
4057
complex*16 :: temp, temp0, temp1, temp2, temp3
4158
real*8 :: temprr0, temprr1, temprr2, temprr3
@@ -52,7 +69,14 @@
5269
complex*16, allocatable, dimension(:) :: buffera, bufferb
5370

5471
!Minimun number of multiplications needed to activate the blocked optimization.
72+
#ifdef TARGET_X8664
5573
integer, parameter :: min_blocked_mult = 15000
74+
#elif TARGET_LINUX_POWER
75+
integer, parameter :: min_blocked_mult = 15000 !Complex calculations not vectorized on OpenPower.
76+
#else
77+
#warning untuned matrix multiplication parameter
78+
integer, parameter :: min_blocked_mult = 15000
79+
#endif
5680

5781
#undef DCMPLX
5882
#define DCMPLX(r,i) cmplx(r,i,kind=8)

runtime/flang/pgf90_mmul_cmplx8.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@
1717
!
1818
! Global variables
1919
!
20+
#ifdef TARGET_X8664
2021
integer*8 :: mra, ncb, kab, lda, ldb, ldc
22+
#else
23+
integer :: mra, ncb, kab, lda, ldb, ldc
24+
#endif
2125
complex*8, dimension( lda, * )::a
2226
complex*8, dimension( ldb, * )::b
2327
complex*8, dimension( ldc, * )::c
@@ -26,6 +30,7 @@
2630
!
2731
! local variables
2832
!
33+
#ifdef TARGET_X8664
2934
integer*8 :: colsa, rowsa, rowsb, colsb
3035
integer*8 :: i, j, jb, k, ak, bk, jend
3136
integer*8 :: ar, ar_sav, ac, ac_sav, br, bc
@@ -36,6 +41,18 @@
3641
integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
3742
integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
3843
integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
44+
#else
45+
integer :: colsa, rowsa, rowsb, colsb
46+
integer :: i, j, jb, k, ak, bk, jend
47+
integer :: ar, ar_sav, ac, ac_sav, br, bc
48+
integer :: ndxa, ndxasav
49+
integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3
50+
integer :: colachunk, colachunks, colbchunk, colbchunks
51+
integer :: rowchunk, rowchunks
52+
integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
53+
integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
54+
integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
55+
#endif
3956
integer :: ta, tb
4057
complex*8 :: temp, temp0, temp1, temp2, temp3
4158
real*4 :: temprr0, temprr1, temprr2, temprr3
@@ -52,5 +69,12 @@
5269
complex*8, allocatable, dimension(:) :: buffera, bufferb
5370

5471
!Minimun number of multiplications needed to activate the blocked optimization.
72+
#ifdef TARGET_X8664
5573
integer, parameter :: min_blocked_mult = 1750
74+
#elif TARGET_LINUX_POWER
75+
integer, parameter :: min_blocked_mult = 1750 !Complex calculations not vectorized on OpenPower.
76+
#else
77+
#warning untuned matrix multiplication parameter
78+
integer, parameter :: min_blocked_mult = 1750
79+
#endif
5680

runtime/flang/pgf90_mmul_real4.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,11 @@
2121
!
2222
! Global variables
2323
!
24+
#ifdef TARGET_X8664
2425
integer*8 :: mra, ncb, kab, lda, ldb, ldc
26+
#else
27+
integer :: mra, ncb, kab, lda, ldb, ldc
28+
#endif
2529
real*4, dimension( lda, * )::a
2630
real*4, dimension( ldb, * )::b
2731
real*4, dimension( ldc, * )::c
@@ -30,6 +34,7 @@
3034
!
3135
! local variables
3236
!
37+
#ifdef TARGET_X8664
3338
integer*8 :: colsa, rowsa, rowsb, colsb
3439
integer*8 :: i, j, jb, k, ak, bk, jend
3540
integer*8 :: ar, ar_sav, ac, ac_sav, br, bc
@@ -40,6 +45,18 @@
4045
integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
4146
integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
4247
integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
48+
#else
49+
integer :: colsa, rowsa, rowsb, colsb
50+
integer :: i, j, jb, k, ak, bk, jend
51+
integer :: ar, ar_sav, ac, ac_sav, br, bc
52+
integer :: ndxa, ndxasav
53+
integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3
54+
integer :: colachunk, colachunks, colbchunk, colbchunks
55+
integer :: rowchunk, rowchunks
56+
integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
57+
integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
58+
integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
59+
#endif
4360
real*4 :: temp, temp0, temp1, temp2, temp3
4461
real*4 :: bufatemp, bufbtemp
4562
real*8 :: time_start, time_end, ttime, all_time
@@ -50,4 +67,11 @@
5067
real*4, allocatable, dimension(:) :: buffera, bufferb
5168

5269
!Minimun number of multiplications needed to activate the blocked optimization.
70+
#ifdef TARGET_X8664
5371
integer, parameter :: min_blocked_mult = 5000
72+
#elif TARGET_LINUX_POWER
73+
integer, parameter :: min_blocked_mult = 10000
74+
#else
75+
#warning untuned matrix multiplication parameter
76+
integer, parameter :: min_blocked_mult = 5000
77+
#endif

0 commit comments

Comments
 (0)