Skip to content

Commit d5db06d

Browse files
authored
Merge pull request #690 from ThePortlandGroup/nv_stage
Pull 2019-03-20T10-58 Recent NVIDIA Changes
2 parents cc04c6e + 5670a43 commit d5db06d

File tree

15 files changed

+203
-162
lines changed

15 files changed

+203
-162
lines changed

runtime/flang/format-double.c

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -218,6 +218,22 @@ div_by_billion(uint32_t le_x[32], int *words)
218218
return remainder;
219219
}
220220

221+
static inline uint64_t
222+
double_to_uint64 (double x) {
223+
224+
#if defined(TARGET_LLVM) && defined(TARGET_LINUX_X8664)
225+
/*
226+
* LLVM emulates 'vcvttsd2usi' (a new AVX-512F instruction) with 'vcvttsd2si'
227+
* on non AVX-512F machines to cast double to unsigned long. With -Ktrap=fp
228+
* option, this generates a floating point exception when the converted number
229+
* is >= 9223372036854775808 (1<<63).
230+
*/
231+
if (x >= SIGN_BIT)
232+
return (uint64_t) (x - SIGN_BIT) + SIGN_BIT;
233+
#endif
234+
return (uint64_t) x;
235+
}
236+
221237
/*
222238
* Convert a nonnegative integer represented as a double
223239
* into a sequence of decimal digit characters ('0' to '9').
@@ -234,7 +250,7 @@ format_int_part(char *buff, int width, double x)
234250
* arithmetic below.
235251
*/
236252
if (x <= MAX_EXACTLY_REPRESENTABLE_UINT64) {
237-
out = reversed_uint64(out, buff, x);
253+
out = reversed_uint64(out, buff, double_to_uint64(x));
238254
if (!out)
239255
return width + 1; /* overflow */
240256
} else {
@@ -361,7 +377,7 @@ format_fraction(char buff[MAX_FRACTION_SIGNIFICANT_DECIMAL_DIGITS],
361377
return;
362378
}
363379

364-
absx -= (uint64_t) absx;
380+
absx -= double_to_uint64(absx);
365381
if (absx == 0.0) {
366382
fill(buff, '0', width);
367383
return;
@@ -453,7 +469,7 @@ fraction_digits(char buff[MAX_FRACTION_SIGNIFICANT_DECIMAL_DIGITS],
453469

454470
if (absx >= MIN_ENTIRELY_INTEGER)
455471
return -1;
456-
absx -= (uint64_t) absx;
472+
absx -= double_to_uint64(absx);
457473
if (absx == 0.0)
458474
return -1;
459475

@@ -657,7 +673,7 @@ F_format(char *output_buffer, int width,
657673
/* |x| is an integer (no bits worth < 2**0) */
658674
fill(frac, '0', frac_digits);
659675
} else {
660-
uint64_t int_absx = (uint64_t) absx;
676+
uint64_t int_absx = double_to_uint64(absx);
661677
int next_digit_for_rounding = 0;
662678
bool is_inexact = false;
663679
format_fraction(frac, &next_digit_for_rounding, &is_inexact,
@@ -885,7 +901,7 @@ ED_format(char *out_buffer, int width, const struct formatting_control *control,
885901
} else if (frac_part_digits < 0) {
886902
expo = int_part_digits;
887903
is_inexact = absx < MAX_EXACTLY_REPRESENTABLE_UINT64 &&
888-
absx != (uint64_t) absx;
904+
absx != double_to_uint64(absx);
889905
while (int_part_digits > significant_digits) {
890906
is_inexact |= next_digit_for_rounding != 0;
891907
next_digit_for_rounding = payload[--int_part_digits] - '0';

runtime/flang/type.c

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2010-2018, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 2010-2019, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -30,6 +30,8 @@ static struct type_desc *I8(__f03_ty_to_id)[];
3030
void ENTF90(SET_INTRIN_TYPE, set_intrin_type)(F90_Desc *dd,
3131
__INT_T intrin_type);
3232

33+
static TYPE_DESC * get_parent_pointer(TYPE_DESC *src_td, __INT_T level);
34+
3335
#define ARG1_PTR 0x1
3436
#define ARG1_ALLOC 0x2
3537
#define ARG2_PTR 0x4
@@ -135,8 +137,7 @@ ENTF90(EXTENDS_TYPE_OF, extends_type_of)
135137
return GET_DIST_TRUE_LOG;
136138

137139
if (atd->obj.level > btd->obj.level) {
138-
__INT_T offset = (btd->obj.level + 1) * sizeof(__POINT_T);
139-
TYPE_DESC *parent = *((TYPE_DESC **)(((char *)atd) - offset));
140+
TYPE_DESC *parent = get_parent_pointer(atd, btd->obj.level+1);
140141
if (btd == parent)
141142
return GET_DIST_TRUE_LOG;
142143
}
@@ -245,8 +246,7 @@ ENTF90(KEXTENDS_TYPE_OF, kextends_type_of)
245246
return GET_DIST_TRUE_LOG;
246247

247248
if (atd->obj.level > btd->obj.level) {
248-
__INT_T offset = (btd->obj.level + 1) * sizeof(__POINT_T);
249-
TYPE_DESC *parent = *((TYPE_DESC **)(((char *)atd) - offset));
249+
TYPE_DESC *parent = get_parent_pointer(atd, btd->obj.level+1);
250250
if (btd == parent)
251251
return GET_DIST_TRUE_LOG;
252252
}
@@ -310,6 +310,50 @@ ENTF90(KGET_OBJECT_SIZE, kget_object_size)(F90_Desc *d)
310310
return (__INT8_T)(td ? td->obj.size : od->size);
311311
}
312312

313+
/** \brief Returns a type descriptor pointer of a specified ancestor of
314+
* a type descriptor.
315+
*
316+
* \param src_td is the type descriptor used to locate the ancestor type
317+
* type descriptor.
318+
* \param level specifies the heirarchical position in the inheritance graph
319+
* of the desired ancestor type descriptor. To find its immediate
320+
* parent, specify a level equal to src_td's level.
321+
*
322+
* \return a type descriptor representing the ancestor or NULL if there is no
323+
* ancestor.
324+
*/
325+
static TYPE_DESC *
326+
get_parent_pointer(TYPE_DESC *src_td, __INT_T level)
327+
{
328+
329+
__INT_T offset, src_td_level;
330+
TYPE_DESC *parent;
331+
332+
if (level <= 0 || src_td == NULL)
333+
return NULL;
334+
335+
src_td_level = src_td->obj.level;
336+
if (src_td_level < 0 || level > src_td_level)
337+
return NULL;
338+
339+
if (src_td->parents != NULL) {
340+
/* The parents field is filled in, so use it to get the desired parent */
341+
offset = (src_td_level - level) * sizeof(__POINT_T);
342+
parent = *((TYPE_DESC **)(((char *)src_td->parents) + offset));
343+
} else {
344+
/* The parents field is not filled in, so find the parent from the
345+
* src_td base pointer. The parents field is not filled in
346+
* when a type descriptor is created with an older compiler.
347+
* Note: This method does not always work if the type descriptor is
348+
* defined in a shared library.
349+
*/
350+
offset = level * sizeof(__POINT_T);
351+
parent = *((TYPE_DESC **)(((char *)src_td) - offset));
352+
}
353+
354+
return parent;
355+
356+
}
313357
static void
314358
process_final_procedures(char *area, F90_Desc *sd)
315359
{
@@ -408,8 +452,9 @@ process_final_procedures(char *area, F90_Desc *sd)
408452

409453
if (((F90_Desc *)src_td)->tag == __POLY && src_td->obj.level > 0) {
410454
/* process parent finals */
411-
__INT_T offset = (src_td->obj.level) * sizeof(__POINT_T);
412-
TYPE_DESC *parent = *((TYPE_DESC **)(((char *)src_td) - offset));
455+
TYPE_DESC *parent = get_parent_pointer(src_td, src_td->obj.level);
456+
457+
413458

414459
if (rank > 0) {
415460
int i;
@@ -910,14 +955,12 @@ void I8(__fort_dump_type)(TYPE_DESC *d)
910955
fprintf(__io_stderr(), "Size: %d\n", d->obj.size);
911956
fprintf(__io_stderr(), "Type Descriptor:\n\t'%s'\n", d->name);
912957
if (d->obj.level > 0) {
913-
TYPE_DESC *parent;
914958
__INT_T offset, level;
915959
fprintf(__io_stderr(), "(Child Type)\n");
916960
fprintf(__io_stderr(), "Parent Descriptor%s\n",
917961
(d->obj.level == 1) ? ":" : "s:");
918962
for (level = d->obj.level - 1; level >= 0; --level) {
919-
offset = (level + 1) * sizeof(__POINT_T);
920-
TYPE_DESC *parent = *((TYPE_DESC **)(((char *)d) - offset));
963+
TYPE_DESC *parent = get_parent_pointer(d, level+1);
921964
fprintf(__io_stderr(), "\t'%s'\n", parent->name);
922965
}
923966

runtime/flang/type.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2010-2018, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 2010-2019, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -120,7 +120,7 @@ struct object_desc {
120120
struct type_desc /* extends(OBJECT_DESC) */ {
121121
OBJECT_DESC obj; /**< parent object_desc */
122122
VTABLE(func_table); /**< pointer to virtual function table */
123-
VTABLE(constructor); /**< reserved */
123+
POINT(TYPE_DESC, parents); /**< pointer to parent type descriptor list */
124124
FINAL_TABLE(finals); /**< pointer to final procedures table */
125125
POINT(LAYOUT_DESC, layout); /**< pointer to layout descriptor */
126126
char name[MAX_TYPE_NAME + 1];/**< null terminated user defined name of type */

runtime/libpgmath/lib/common/dispatch.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,6 @@
5353
*/
5454

5555
#if defined(TARGET_WIN_X8664)
56-
/*
57-
* Defining CPP object macro _NO_CRT_STDIO_INLINE prevents the Visual Studio
58-
* header files from generating local versions of printf(), fprintf() and
59-
* others.
60-
*/
61-
62-
#define _NO_CRT_STDIO_INLINE
63-
6456
/*
6557
* The Windows system header files are missing the argument list in the
6658
* following function declarations. Without the argument list, albeit void,

runtime/libpgmath/lib/x86_64/x86id.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,8 +964,20 @@ X86IDFN(init_hw_features)(uint32_t old_hw_features)
964964
* Abort and avoid infinite loop since nothing is going to change.
965965
*/
966966

967+
#if defined(TARGET_WIN_X8664) && ! defined(_NO_CRT_STDIO_INLINE)
968+
/*
969+
* Exception! Windows - building x86id.obj for libcpuid.lib:
970+
* It is unclear why fprintf() can't be used when x86id.c is being
971+
* compiled for libcpuid.lib.
972+
*/
973+
967974
printf("Error: %s called twice with hw_features=%#x\n", __func__,
968975
X86IDFN(hw_features));
976+
#else
977+
// All other architectures/platforms/libraries can safely use fprintf().
978+
fprintf(stderr, "Error: %s called twice with hw_features=%#x\n", __func__,
979+
X86IDFN(hw_features));
980+
#endif
969981
exit(EXIT_FAILURE); // XXX XXX - should be __abort(1, "some string");
970982

971983
}/* init_hw_features */

tools/flang1/flang1exe/lowerexp.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997-2018, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 1997-2019, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -2296,6 +2296,8 @@ lower_function(int ast)
22962296
/* prefix: J K */
22972297
#define in_J_K 0x0530000
22982298
/* prefix: none A D */
2299+
#define in_R_D 0x0003300
2300+
/* prefix: R D */
22992301
#define in_r_D 0x0001300
23002302
/* prefix: R D C CD */
23012303
#define in_R_D_C_CD 0x0001333
@@ -2626,6 +2628,9 @@ intrinsic_arg_dtype(int intr, int ast, int args, int nargs)
26262628
case I_ANINT:
26272629
case I_DNINT:
26282630

2631+
case I_CEILING:
2632+
case I_FLOOR:
2633+
26292634
case I_CONJG:
26302635
case I_DCONJG:
26312636

@@ -2873,8 +2878,6 @@ intrinsic_arg_dtype(int intr, int ast, int args, int nargs)
28732878
case I_SIZE:
28742879
case I_LBOUND:
28752880
case I_UBOUND:
2876-
case I_CEILING:
2877-
case I_FLOOR:
28782881
case I_MODULO:
28792882
case I_EXPONENT:
28802883
case I_FRACTION:
@@ -3071,6 +3074,7 @@ lower_intrinsic(int ast)
30713074
nargs = A_ARGCNTG(ast);
30723075
args = A_ARGSG(ast);
30733076
intr = A_OPTYPEG(ast);
3077+
30743078
if (intr != NEW_INTRIN) {
30753079
symfunc = EXTSYMG(intast_sym[intr]);
30763080
} else {
@@ -3108,6 +3112,8 @@ lower_intrinsic(int ast)
31083112
case I_DINT:
31093113
case I_ANINT:
31103114
case I_DNINT:
3115+
case I_FLOOR:
3116+
case I_CEILING:
31113117
nargs = 1;
31123118
}
31133119
if (argdtype >= 0) {
@@ -3745,6 +3751,15 @@ lower_intrinsic(int ast)
37453751
}
37463752
break;
37473753

3754+
case I_CEILING:
3755+
dtype = A_NDTYPEG(ast);
3756+
ilm = intrin_name("CEIL", ast, in_R_D);
3757+
break;
3758+
case I_FLOOR:
3759+
dtype = A_NDTYPEG(ast);
3760+
ilm = intrin_name("FLOOR", ast, in_R_D);
3761+
break;
3762+
37483763
case I_AINT:
37493764
case I_DINT:
37503765
dtype = A_NDTYPEG(ast);
@@ -4059,21 +4074,6 @@ lower_intrinsic(int ast)
40594074
A_ILMP(ast, ilm);
40604075
return ilm;
40614076

4062-
case I_CEILING:
4063-
case I_FLOOR:
4064-
/*
4065-
* see semfunc.c for the spelling of the function name.
4066-
*/
4067-
dtype = A_NDTYPEG(ast);
4068-
symfunc = A_SPTRG(A_LOPG(ast));
4069-
for (i = 0; i < nargs; ++i) {
4070-
ilm = lower_ilm(ARGT_ARG(args, i));
4071-
ilm = plower("oi", "DPVAL", ilm);
4072-
intrinsic_args[i] = ilm;
4073-
}
4074-
ilm = plower("onsm", ltyped("FUNC", dtype), nargs, symfunc);
4075-
break;
4076-
40774077
case I_MODULO:
40784078
/*
40794079
* see semfunc.c for the spelling of the function name.

tools/flang1/flang1exe/pointsto.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2006-2018, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 2006-2019, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -4247,7 +4247,7 @@ points_to(void)
42474247
if (head.stg_size > 1000000) {
42484248
/* abort */
42494249
Trace(("pointer target analysis is too expensive, abort\n"));
4250-
fini_points_to_anal();
4250+
fini_points_to_prop();
42514251
return;
42524252
}
42534253
STG_ALLOC(head, head.stg_size);

tools/flang1/flang1exe/semant.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ semant_init(int noparse)
549549
sem.expect_dist_do = FALSE;
550550
sem.expect_acc_do = 0;
551551
sem.collapsed_acc_do = 0;
552+
sem.seq_acc_do = 0;
552553
sem.expect_cuf_do = 0;
553554
sem.close_pdo = FALSE;
554555
sem.is_hpf = FALSE;
@@ -1045,6 +1046,7 @@ semant1(int rednum, SST *top)
10451046
sem.expect_dist_do = FALSE;
10461047
sem.expect_acc_do = 0;
10471048
sem.collapsed_acc_do = 0;
1049+
sem.seq_acc_do = 0;
10481050
sem.expect_cuf_do = 0;
10491051
sem.collapse = sem.collapse_depth = 0;
10501052
}

tools/flang1/flang1exe/semant.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ typedef struct { /* DO-IF stack entries */
180180
int count; /* var=triplet control count -- outermost=1 */
181181
int kind; /* temp: 1) curr locality kind; 2) loop component kind */
182182
bool no_default; /* loop has a DEFAULT(NONE) locality spec? */
183+
int popindex; /* do pop the index symbol */
183184
int block_sym; /* loop body block sym */
184185
int syms; /* list of index, local, local_init, and shared syms */
185186
int last_sym; /* last sym in syms list */
@@ -378,6 +379,7 @@ typedef struct { /* DO-IF stack entries */
378379
#define DI_TOP_LABEL(d) sem.doif_base[d].u.u1.top_label
379380
#define DI_DO_AST(d) sem.doif_base[d].u.u1.ast
380381
#define DI_DOINFO(d) sem.doif_base[d].u.u1.doinfo
382+
#define DI_DO_POPINDEX(d) sem.doif_base[d].u.u1.popindex
381383
#define DI_CONC_SYMAVL(d) sem.doif_base[d].u.u1.symavl
382384
#define DI_CONC_COUNT(d) sem.doif_base[d].u.u1.count
383385
#define DI_CONC_KIND(d) sem.doif_base[d].u.u1.kind
@@ -1159,6 +1161,7 @@ typedef struct {
11591161
int doif_size; /* size in records of DOIF stack area. */
11601162
DOIF *doif_base; /* base pointer for DOIF stack area. */
11611163
int doif_depth; /* current DO-IF nesting level */
1164+
SPTR index_sym_to_pop; /* DO index symbol to pop off hash link at end of loop */
11621165
SPTR doconcurrent_symavl; /* stb.stg_avail value at start of do concurrent */
11631166
DTYPE doconcurrent_dtype; /* explicit do concurrent index data type */
11641167
int eqvlist; /* head of list of equivalences */
@@ -1300,6 +1303,7 @@ typedef struct {
13001303
* needs to be a DO.
13011304
*/
13021305
int collapsed_acc_do; /* value of collapse clause for acc loop */
1306+
int seq_acc_do; /* acc loop with 'seq' clause */
13031307
int expect_cuf_do; /* next statement after CUF KERNELS needs to be a DO. */
13041308
LOGICAL close_pdo; /* A DO loop for a PDO, PARALLELDO, or DOACROSS
13051309
* has been processed and its removal from the

0 commit comments

Comments
 (0)