Skip to content

Commit a0a67c5

Browse files
authored
Merge pull request #794 from ThePortlandGroup/nv_stage
Pull 2019-09-03T14-15 Recent NVIDIA Changes
2 parents 7e96858 + 7a7aae1 commit a0a67c5

File tree

16 files changed

+192
-38
lines changed

16 files changed

+192
-38
lines changed

runtime/libpgmath/lib/common/atan/atan_d_vec.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ vdouble __attribute__((noinline)) atan_d_vec(vdouble const x) {
105105

106106
vdouble result = vsel_vd_vo_vd_vd(f_big, result_f_big, result_not_f_big);
107107

108+
result = vreinterpret_vd_vm(vreinterpret_vm_vd(result) | vreinterpret_vm_vd(ans_sgn));
109+
108110
return result;
109111
}
110-

runtime/libpgmath/lib/common/atan/fd_atan_scalar.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ double __attribute__((noinline)) atan_d_scalar(double x) {
9292

9393
double result_d = FMA(x2 * xReduced, poly, xReduced);
9494

95+
result_d = copysign(result_d, x);
96+
9597
return result_d;
9698
}
97-

runtime/libpgmath/lib/common/atanf/atan_vec.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,8 @@ vfloat __attribute__((noinline)) atan_vec(vfloat const x) {
7272

7373
vfloat result = vsel_vf_vo_vf_vf(x_big, result_x_big, result_not_x_big);
7474

75+
//Make sure atanf(-0.0f) = -0.0f:
76+
result = vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(result), vreinterpret_vm_vf(ans_sgn)));
77+
7578
return result;
7679
}

runtime/libpgmath/lib/common/atanf/fs_atan_scalar.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,8 @@ float __attribute__((noinline)) atan_scalar(const float x) {
7676

7777
float result_d = FMAF(x2 * xReduced, poly, xReduced);
7878

79+
//This fixes atanf(-0.0) = -0.0, but doesn't slow down the code seemingly
80+
result_d = copysignf(result_d, x);
81+
7982
return result_d;
8083
}

runtime/libpgmath/lib/x86_64/math_tables/mth_atandefs.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ MTHINTRIN(atan , sv8m , avxfma4 , __fs_atan_8_mn , __rs_atan_8_mn
5252
MTHINTRIN(atan , dv4m , avxfma4 , __fd_atan_4_mn , __rd_atan_4_mn , __pd_atan_4_mn ,__math_dispatch_error)
5353

5454
MTHINTRIN(atan , ss , avx2 , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
55-
MTHINTRIN(atan , ds , avx2 , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
55+
MTHINTRIN(atan , ds , avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
5656
MTHINTRIN(atan , sv4 , avx2 , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
5757
MTHINTRIN(atan , dv2 , avx2 , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
5858
MTHINTRIN(atan , sv8 , avx2 , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)
@@ -63,7 +63,7 @@ MTHINTRIN(atan , sv8m , avx2 , __fs_atan_8_mn , __rs_atan_8_mn
6363
MTHINTRIN(atan , dv4m , avx2 , __fd_atan_4_mn , __rd_atan_4_mn , __pd_atan_4_mn ,__math_dispatch_error)
6464

6565
MTHINTRIN(atan , ss , avx512knl , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
66-
MTHINTRIN(atan , ds , avx512knl , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
66+
MTHINTRIN(atan , ds , avx512knl , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
6767
MTHINTRIN(atan , sv4 , avx512knl , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
6868
MTHINTRIN(atan , dv2 , avx512knl , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
6969
MTHINTRIN(atan , sv8 , avx512knl , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)
@@ -78,7 +78,7 @@ MTHINTRIN(atan , sv16m, avx512knl , __fs_atan_16_mn , __rs_atan_16_mn
7878
MTHINTRIN(atan , dv8m , avx512knl , __fd_atan_8_mn , __rd_atan_8_mn , __pd_atan_8_mn ,__math_dispatch_error)
7979

8080
MTHINTRIN(atan , ss , avx512 , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
81-
MTHINTRIN(atan , ds , avx512 , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
81+
MTHINTRIN(atan , ds , avx512 , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
8282
MTHINTRIN(atan , sv4 , avx512 , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
8383
MTHINTRIN(atan , dv2 , avx512 , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
8484
MTHINTRIN(atan , sv8 , avx512 , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)

tools/flang1/flang1exe/semant.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2748,6 +2748,8 @@ semant1(int rednum, SST *top)
27482748
* <end stmt> ::= <END stmt> |
27492749
*/
27502750
case END_STMT1:
2751+
if (gbl.rutype == RU_SUBR || gbl.rutype == RU_FUNC)
2752+
defer_arg_chk(SPTR_NULL, SPTR_NULL, SPTR_NULL, 0, 0, true);
27512753
if (sem.interface && !gbl.rutype)
27522754
error(310, 3, gbl.lineno, "Missing ENDINTERFACE statement", CNULL);
27532755
else if (sem.which_pass)
@@ -2798,6 +2800,7 @@ semant1(int rednum, SST *top)
27982800
* <end stmt> ::= ENDFUNCTION <opt ident> |
27992801
*/
28002802
case END_STMT3:
2803+
defer_arg_chk(SPTR_NULL, SPTR_NULL, SPTR_NULL, 0, 0, true);
28012804
submod_proc_endfunc:
28022805
fix_iface(gbl.currsub);
28032806
if (sem.which_pass && !sem.interface) {
@@ -2889,6 +2892,7 @@ semant1(int rednum, SST *top)
28892892
* <end stmt> ::= ENDSUBROUTINE <opt ident> |
28902893
*/
28912894
case END_STMT6:
2895+
defer_arg_chk(SPTR_NULL, SPTR_NULL, SPTR_NULL, 0, 0, true);
28922896
fix_iface(gbl.currsub);
28932897
if (sem.which_pass && !sem.interface) {
28942898
fix_class_args(gbl.currsub);

tools/flang1/flang1exe/semant.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,6 +1619,8 @@ LOGICAL chk_arguments(int, int, ITEM *, char *, int, int, int, int *);
16191619
LOGICAL ignore_tkr(int, int);
16201620
LOGICAL ignore_tkr_all(int);
16211621
int iface_intrinsic(int);
1622+
void defer_arg_chk(SPTR formal, SPTR actual, SPTR subprog,
1623+
cmp_interface_flags, int lineno, bool performChk);
16221624
/* end semfunc2.c */
16231625

16241626
/* semgnr.c */

tools/flang1/flang1exe/semfunc2.c

Lines changed: 102 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1994-2018, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 1994-2019, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -1986,6 +1986,84 @@ sum_scatter_args(ITEM *list, int cnt)
19861986

19871987
/*---------------------------------------------------------------------*/
19881988

1989+
/** \brief Process information for deferred interface argument checking in
1990+
* in the compat_arg_lists() function below.
1991+
*
1992+
* If the performChk argument is false, then we save the information
1993+
* (defer the check). If performChk argument is true, then we perform
1994+
* the argument checking. Note: If performChk is true, then the other
1995+
* arguments are ignored.
1996+
*
1997+
* \param formal is the symbol table pointer of the dummy/formal argument.
1998+
* \param actual is the symbol table pointer of the actual argument.
1999+
* \param flags are comparison flags that enable/disable certain checks
2000+
* \param lineno is the source line number for the deferred check
2001+
* \param performChk is false to defer checks and true to perform the checks.
2002+
*/
2003+
void
2004+
defer_arg_chk(SPTR formal, SPTR actual, SPTR subprog,
2005+
cmp_interface_flags flags, int lineno, bool performChk)
2006+
{
2007+
2008+
typedef struct chkList {
2009+
char *formal;
2010+
SPTR actual;
2011+
char *subprog;
2012+
cmp_interface_flags flags;
2013+
int lineno;
2014+
struct chkList * next;
2015+
}CHKLIST;
2016+
2017+
static CHKLIST *list = NULL;
2018+
CHKLIST *ptr, *prev;
2019+
2020+
if (!performChk) {
2021+
/* Add a deferred check to the list */
2022+
NEW(ptr, CHKLIST, sizeof(CHKLIST));
2023+
NEW(ptr->formal, char, strlen(SYMNAME(formal))+1);
2024+
strcpy(ptr->formal, SYMNAME(formal));
2025+
ptr->actual = actual;
2026+
NEW(ptr->subprog, char, strlen(SYMNAME(subprog))+1);
2027+
strcpy(ptr->subprog, SYMNAME(subprog));
2028+
ptr->flags = flags;
2029+
ptr->lineno = lineno;
2030+
ptr->next = list;
2031+
list = ptr;
2032+
} else if (sem.which_pass == 1) {
2033+
for(prev = ptr = list; ptr != NULL; ) {
2034+
if (strcmp(SYMNAME(gbl.currsub),ptr->subprog) == 0) {
2035+
/* perform argument check */
2036+
formal = getsym(ptr->formal, strlen(ptr->formal));
2037+
if (!compatible_characteristics(formal, ptr->actual, ptr->flags)) {
2038+
char details[1000];
2039+
sprintf(details, "- arguments of %s and %s do not agree",
2040+
SYMNAME(ptr->actual), ptr->formal);
2041+
error(74, 3, ptr->lineno, ptr->subprog, details);
2042+
}
2043+
if (prev == ptr) {
2044+
prev = ptr->next;
2045+
FREE(ptr->formal);
2046+
FREE(ptr->subprog);
2047+
FREE(ptr);
2048+
list = ptr = prev;
2049+
} else {
2050+
prev->next = ptr->next;
2051+
FREE(ptr->formal);
2052+
FREE(ptr->subprog);
2053+
FREE(ptr);
2054+
ptr = prev->next;
2055+
}
2056+
} else {
2057+
prev = ptr;
2058+
ptr = ptr->next;
2059+
}
2060+
}
2061+
}
2062+
2063+
}
2064+
2065+
2066+
19892067
/** \brief For arguments that are subprograms, check that their argument lists
19902068
* are compatible.
19912069
*/
@@ -1995,23 +2073,40 @@ compat_arg_lists(int formal, int actual)
19952073
int paramct;
19962074
int fdscptr, adscptr;
19972075
int i;
2076+
bool func_chk;
2077+
cmp_interface_flags flags;
19982078

19992079
/* TODO: Not checking certain cases for now. */
20002080
if (STYPEG(actual) == ST_INTRIN || STYPEG(actual) == ST_GENERIC)
20012081
return TRUE;
20022082

2003-
if (STYPEG(formal) == ST_PROC && STYPEG(actual) == ST_PROC && FVALG(formal) &&
2004-
FVALG(actual) &&
2005-
!compatible_characteristics(formal, actual,
2006-
(IGNORE_ARG_NAMES | RELAX_STYPE_CHK |
2007-
RELAX_POINTER_CHK | RELAX_PURE_CHK_2))) {
2083+
flags = (IGNORE_ARG_NAMES | RELAX_STYPE_CHK | RELAX_POINTER_CHK |
2084+
RELAX_PURE_CHK_2);
2085+
func_chk = (STYPEG(formal) == ST_PROC && STYPEG(actual) == ST_PROC &&
2086+
FVALG(formal) && FVALG(actual));
2087+
2088+
if (func_chk && resolve_sym_aliases(SCOPEG(SCOPEG(formal))) == gbl.currsub){
2089+
flags |= DEFER_IFACE_CHK;
2090+
}
2091+
2092+
if (func_chk && !compatible_characteristics(formal, actual, flags)) {
20082093
return FALSE;
20092094
}
20102095

2096+
if (flags & DEFER_IFACE_CHK) {
2097+
/* We are calling an internal subprogram. We need to defer the
2098+
* check on the procedure dummy argument until we have seen the
2099+
* internal subprogram.
2100+
*/
2101+
defer_arg_chk(formal, actual, SCOPEG(formal), (flags ^ DEFER_IFACE_CHK),
2102+
gbl.lineno, false);
2103+
}
2104+
20112105
fdscptr = DPDSCG(formal);
20122106
adscptr = DPDSCG(actual);
2013-
if (fdscptr == 0 || adscptr == 0)
2107+
if (fdscptr == 0 || adscptr == 0 || (flags & DEFER_IFACE_CHK)) {
20142108
return TRUE; /* No dummy parameter descriptor; can't check. */
2109+
}
20152110
paramct = PARAMCTG(formal);
20162111
if (PARAMCTG(actual) != paramct)
20172112
return FALSE;

tools/flang1/flang1exe/semutil2.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8521,10 +8521,10 @@ eval_merge(ACL *arg, DTYPE dtype)
85218521
return result;
85228522
}
85238523

8524-
/* Compare two constant ACLs. Return x > y or x < y depending on want_greater.
8524+
/* Compare two constant ACLs. Return x > y or x < y depending on want_max.
85258525
*/
85268526
static bool
8527-
cmp_acl(DTYPE dtype, ACL *x, ACL *y, bool want_greater, bool back)
8527+
cmp_acl(DTYPE dtype, ACL *x, ACL *y, bool want_max, bool back)
85288528
{
85298529
int cmp;
85308530
switch (DTY(dtype)) {
@@ -8535,10 +8535,12 @@ cmp_acl(DTYPE dtype, ACL *x, ACL *y, bool want_greater, bool back)
85358535
case TY_BINT:
85368536
case TY_SINT:
85378537
case TY_INT:
8538-
if (back && want_greater) {
8539-
cmp = x->conval >= y->conval ? 1 : -1;
8538+
if (x->conval == y->conval) {
8539+
cmp = 0;
8540+
} else if (x->conval > y->conval) {
8541+
cmp = 1;
85408542
} else {
8541-
cmp = x->conval > y->conval ? 1 : -1;
8543+
cmp = -1;
85428544
}
85438545
break;
85448546
case TY_REAL:
@@ -8553,9 +8555,9 @@ cmp_acl(DTYPE dtype, ACL *x, ACL *y, bool want_greater, bool back)
85538555
return false;
85548556
}
85558557
if (back) {
8556-
return want_greater ? cmp >= 0 : cmp <= 0;
8558+
return want_max ? cmp >= 0 : cmp <= 0;
85578559
} else {
8558-
return want_greater ? cmp > 0 : cmp < 0;
8560+
return want_max ? cmp > 0 : cmp < 0;
85598561
}
85608562
}
85618563

@@ -8730,6 +8732,7 @@ do_eval_minval_or_maxval(INDEX *index, DTYPE elem_dt, DTYPE loc_dt, ACL *elems,
87308732
if (!want_val) {
87318733
for (i = 0; i < locs_size; i++) {
87328734
ACL *elem = GET_ACL(15);
8735+
BZERO(elem, ACL, 1);
87338736
elem->id = AC_CONST;
87348737
elem->dtype = loc_dt;
87358738
elem->is_const = true;

tools/flang1/flang1exe/symtab.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2608,7 +2608,8 @@ compatible_characteristics(int psptr, int psptr2, cmp_interface_flags flag)
26082608
return false;
26092609
}
26102610

2611-
if (STYPEG(psptr) == ST_PROC && STYPEG(psptr2) == ST_PROC) {
2611+
if (STYPEG(psptr) == ST_PROC && STYPEG(psptr2) == ST_PROC &&
2612+
(flag & DEFER_IFACE_CHK) == 0) {
26122613
if (!cmp_interfaces_strict(psptr, psptr2, (flag | CMP_OPTARG))) {
26132614
return false;
26142615
}
@@ -2718,15 +2719,15 @@ cmp_interfaces_strict(SPTR sym1, SPTR sym2, cmp_interface_flags flag)
27182719

27192720
for (j = i = 0; i < paramct; ++i) {
27202721
psptr = aux.dpdsc_base[dpdsc + i];
2721-
if (CCSYMG(psptr)) {
2722+
if (CCSYMG(psptr) && CLASSG(psptr)) {
27222723
++j;
27232724
}
27242725
}
27252726
paramct -= j;
27262727

27272728
for (j = i = 0; i < paramct2; ++i) {
27282729
psptr2 = aux.dpdsc_base[dpdsc2 + i];
2729-
if (CCSYMG(psptr2)) {
2730+
if (CCSYMG(psptr2) && CLASSG(psptr2)) {
27302731
++j;
27312732
}
27322733
}

0 commit comments

Comments
 (0)