Skip to content

Commit 86b315a

Browse files
committed
Changes for the IBM XL compiler.
1 parent e226540 commit 86b315a

15 files changed

+177
-41
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ if( NOT is_submodule )
2020
option( ENABLE_BENCHMARKS "Builds benchmarks" ON )
2121
option( ENABLE_DOCS "Builds documentation" ON )
2222

23+
option( ENABLE_UMPIRE "Build with UMPIRE" ON )
2324
option( ENABLE_CHAI "Build with CHAI" ON )
2425
option( ENABLE_MPI "Build with MPI" ON )
2526
option( ENABLE_CUDA "Build with CUDA" OFF )

cmake/SetupTPL.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ if(ENABLE_CHAI)
6868
get_target_property(CHAI_LINK_LIBRARIES chai INTERFACE_LINK_LIBRARIES)
6969
list(REMOVE_ITEM CHAI_LINK_LIBRARIES RAJA)
7070
set_target_properties(chai
71-
PROPERTIES INTERFACE_LINK_LIBRARIES ${CHAI_LINK_LIBRARIES})
71+
PROPERTIES INTERFACE_LINK_LIBRARIES "${CHAI_LINK_LIBRARIES}")
7272

7373
set(thirdPartyLibs ${thirdPartyLibs} chai)
7474
else()
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
set(CONFIG_NAME "[email protected]" CACHE PATH "")
2+
3+
# set(RAJA_DIR PATH/TO/RAJA CACHE PATH "")
4+
5+
set(ENABLE_UMPIRE OFF CACHE BOOL "")
6+
set(ENABLE_CHAI OFF CACHE BOOL "")
7+
set(ENABLE_CALIPER OFF CACHE BOOL "")
8+
set(USE_ADDR2LINE ON CACHE BOOL "")
9+
10+
# C options
11+
set(CMAKE_C_COMPILER /usr/tce/packages/xl/xl-2020.08.13-cuda-11.0.2/bin/xlc CACHE PATH "")
12+
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036" CACHE STRING "")
13+
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036" CACHE STRING "")
14+
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -qsmp=omp:noopt " CACHE STRING "")
15+
set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,muldefs" CACHE STRING "")
16+
17+
# C++ options
18+
set(CMAKE_CXX_COMPILER /usr/tce/packages/xl/xl-2020.08.13-cuda-11.0.2/bin/xlC CACHE PATH "")
19+
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036" CACHE STRING "")
20+
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qsmp=omp -qhot -qnoeh -qsuppress=1500-029 -qsuppress=1500-036" CACHE STRING "")
21+
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -qsmp=omp:noopt " CACHE STRING "")
22+
set(BLT_CXX_STANDARD 14 CACHE STRING "")
23+
24+
# OpenMP options
25+
set(ENABLE_OPENMP ON CACHE BOOL "" FORCE)
26+
27+
# MPI options
28+
set(ENABLE_MPI ON CACHE BOOL "")
29+
set(MPI_ROOT /usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-xl-2020.08.13-cuda-11.0.2/ CACHE PATH "")
30+
set(MPI_C_COMPILER ${MPI_ROOT}/bin/mpicc CACHE PATH "")
31+
set(MPI_CXX_COMPILER ${MPI_ROOT}/bin/mpicxx CACHE PATH "")
32+
set(MPIEXEC lrun CACHE STRING "")
33+
set(MPIEXEC_NUMPROC_FLAG -n CACHE STRING "")
34+
set(ENABLE_WRAP_ALL_TESTS_WITH_MPIEXEC ON CACHE BOOL "")
35+
36+
# Cuda options
37+
set(ENABLE_CUDA ON CACHE BOOL "")
38+
set(CUDA_TOOLKIT_ROOT_DIR /usr/tce/packages/cuda/cuda-11.0.2 CACHE STRING "")
39+
set(CMAKE_CUDA_HOST_COMPILER ${MPI_CXX_COMPILER} CACHE STRING "")
40+
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc CACHE STRING "")
41+
set(CUDA_ARCH sm_70 CACHE STRING "")
42+
set(CMAKE_CUDA_STANDARD 14 CACHE STRING "")
43+
set(CMAKE_CUDA_FLAGS "-restrict -arch ${CUDA_ARCH} --expt-extended-lambda -Werror cross-execution-space-call,reorder,deprecated-declarations" CACHE STRING "")
44+
set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG -Xcompiler -DNDEBUG -Xcompiler -O3 -Xcompiler -qxlcompatmacros -Xcompiler -qalias=noansi -Xcompiler -qsmp=omp -Xcompiler -qhot -Xcompiler -qnoeh -Xcompiler -qsuppress=1500-029 -Xcompiler -qsuppress=1500-036" CACHE STRING "")
45+
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo ${CMAKE_CUDA_FLAGS_RELEASE}" CACHE STRING "")
46+
set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 -Xcompiler -O0" CACHE STRING "")
47+
48+
# Uncomment this line to make nvcc output register usage for each kernel.
49+
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --resource-usage" CACHE STRING "" FORCE)
50+
51+
# GTEST options
52+
set(ENABLE_GTEST_DEATH_TESTS OFF CACHE BOOL "")
53+
set(gtest_disable_pthreads ON CACHE BOOL "")
54+
55+
# Documentation options
56+
set(ENABLE_DOCS OFF CACHE BOOL "")

src/Array.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ class Array : public ArrayView< T,
210210
template< typename ... DIMS >
211211
LVARRAY_HOST_DEVICE
212212
std::enable_if_t< sizeof ... ( DIMS ) == NDIM && typeManipulation::all_of_t< std::is_integral< DIMS > ... >::value >
213-
resize( DIMS... newDims )
213+
resize( DIMS const ... newDims )
214214
{
215215
static_assert( sizeof ... ( DIMS ) == NDIM, "The number of arguments provided does not equal NDIM!" );
216216
INDEX_TYPE const oldSize = this->size();

src/ArraySlice.hpp

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,27 @@
1212

1313
#pragma once
1414

15-
#ifndef NDEBUG
16-
#ifndef __APPLE__
15+
#if !defined( NDEBUG ) && !defined( __APPLE__ ) && !defined( __ibmxl__ )
1716
/**
1817
* @brief Add GDB pretty printers the given script.
1918
* @param script_name The python script that contains the gdb hooks.
2019
* @note Taken from https://sourceware.org/gdb/onlinedocs/gdb/dotdebug_005fgdb_005fscripts-section.html
2120
*/
2221
#define DEFINE_GDB_PY_SCRIPT( script_name ) \
2322
asm (".pushsection \".debug_gdb_scripts\", \"MS\",@progbits,1\n \
24-
.byte 1 /* Python */\n \
25-
.asciz \"" script_name "\"\n \
26-
.popsection \n" )
27-
#else
23+
.byte 1 /* Python */\n \
24+
.asciz \"" script_name "\"\n \
25+
.popsection \n" );
26+
#else
2827
/**
2928
* @brief Add GDB pretty printers for OSX. This hasn't been done yet.
3029
* @param script_name The python script that contains the gdb hooks.
3130
*/
3231
#define DEFINE_GDB_PY_SCRIPT( script_name )
33-
#endif
32+
#endif
3433

3534
/// Point GDB at the scripts/gdb-printers.py
36-
DEFINE_GDB_PY_SCRIPT( "scripts/gdb-printers.py" );
37-
#endif
35+
DEFINE_GDB_PY_SCRIPT( "scripts/gdb-printers.py" )
3836

3937
// Source includes
4038
#include "LvArrayConfig.hpp"
@@ -171,7 +169,19 @@ class ArraySlice
171169
*/
172170
LVARRAY_HOST_DEVICE inline constexpr
173171
INDEX_TYPE size() const noexcept
174-
{ return indexing::multiplyAll< NDIM >( m_dims ); }
172+
{
173+
#if defined( __ibmxl__ )
174+
// Note: This used to be done with a recursive template but XL-release would produce incorrect results.
175+
// Specifically in exampleArray it would return an "old" size even after being updated, strange.
176+
INDEX_TYPE val = m_dims[ 0 ];
177+
for( int i = 1; i < NDIM; ++i )
178+
{ val *= m_dims[ i ]; }
179+
180+
return val;
181+
#else
182+
return indexing::multiplyAll< NDIM >( m_dims );
183+
#endif
184+
}
175185

176186
/**
177187
* @return Return the length of the given dimension.

src/ArrayView.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,19 @@ class ArrayView
303303
*/
304304
LVARRAY_HOST_DEVICE inline
305305
INDEX_TYPE size() const noexcept
306-
{ return indexing::multiplyAll< NDIM >( m_dims ); }
306+
{
307+
#if defined( __ibmxl__ )
308+
// Note: This used to be done with a recursive template but XL-release would produce incorrect results.
309+
// Specifically in exampleArray it would return an "old" size even after being updated, strange.
310+
INDEX_TYPE val = m_dims[ 0 ];
311+
for( int i = 1; i < NDIM; ++i )
312+
{ val *= m_dims[ i ]; }
313+
314+
return val;
315+
#else
316+
return indexing::multiplyAll< NDIM >( m_dims.data );
317+
#endif
318+
}
307319

308320
/**
309321
* @return Return the length of the given dimension.

src/Macros.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@
219219
* @param lhs expression to be evaluated and used as left-hand side in comparison
220220
* @param rhs expression to be evaluated and used as right-hand side in comparison
221221
*/
222-
#define LVARRAY_ERROR_IF_EQ( lhs, rhs ) LVARRAY_ERROR_IF_NE_MSG( lhs, rhs, "" )
222+
#define LVARRAY_ERROR_IF_EQ( lhs, rhs ) LVARRAY_ERROR_IF_EQ_MSG( lhs, rhs, "" )
223223

224224
/**
225225
* @brief Raise a hard error if two values are not equal.

src/limits.hpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,35 @@ struct NumericLimits : public std::numeric_limits< T >
5151
static constexpr T denorm_min = std::numeric_limits< T >::denorm_min();
5252
};
5353

54+
/**
55+
* @struct NumericLimitsNC
56+
* @brief The same as @c NumericLimits except the entries are not static or constexpr.
57+
* @details This is useful for solving "undefined reference" errors that pop up often in lambdas.
58+
* @tparam T the numeric type to query.
59+
*/
60+
template< typename T >
61+
struct NumericLimitsNC : public std::numeric_limits< T >
62+
{
63+
/// The smallest finite value T can hold.
64+
T const min = std::numeric_limits< T >::min();
65+
/// The lowest finite value T can hold.
66+
T const lowest = std::numeric_limits< T >::lowest();
67+
/// The largest finite value T can hold.
68+
T const max = std::numeric_limits< T >::max();
69+
/// The difference between 1.0 and the next representable value (if T is floating point).
70+
T const epsilon = std::numeric_limits< T >::epsilon();
71+
/// The maximum rounding error (if T is a floating point).
72+
T const round_error = std::numeric_limits< T >::round_error();
73+
/// A positive infinity value (if T is a floating point).
74+
T const infinity = std::numeric_limits< T >::infinity();
75+
/// A quiet NaN (if T is a floating point).
76+
T const quiet_NaN = std::numeric_limits< T >::quiet_NaN();
77+
/// A signaling NaN (if T is a floating point).
78+
T const signaling_NaN = std::numeric_limits< T >::signaling_NaN();
79+
/// The smallest positive subnormal value (if T is a floating point).
80+
T const denorm_min = std::numeric_limits< T >::denorm_min();
81+
};
82+
5483
namespace internal
5584
{
5685

src/sortedArrayManipulation.hpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -314,9 +314,21 @@ std::ptrdiff_t removeDuplicates( ITER first, ITER const last, Compare && comp=Co
314314
}
315315

316316
std::ptrdiff_t numUnique = 1;
317+
317318
ITER next = first;
318-
++next;
319-
while( next != last )
319+
320+
/**
321+
* For whatever reason the standard approach doesn't work with XL in release on device.
322+
* It does some really strange things, for example `last - next == 0` and they can have identical
323+
* values but `next != last`. If you print out the array each iteration it works as expected. I even
324+
* tried substituting the example code for std::unique from cppreference and that exhibited the same problem.
325+
* My guess is it's most likely a compiler bug.
326+
*/
327+
#if defined( __ibmxl__ ) && defined( __CUDA_ARCH__ )
328+
while( arrayManipulation::iterDistance( ++next, last ) > 0 )
329+
#else
330+
while( ++next != last )
331+
#endif
320332
{
321333
if( comp( *first, *next ) )
322334
{
@@ -327,8 +339,6 @@ std::ptrdiff_t removeDuplicates( ITER first, ITER const last, Compare && comp=Co
327339
*first = std::move( *next );
328340
}
329341
}
330-
331-
++next;
332342
}
333343

334344
return numUnique;

unitTests/testMath.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@ struct TestMath : public ::testing::Test
5555
void sqrtAndInvSqrt()
5656
{
5757
using FloatingPoint = decltype( math::sqrt( T() ) );
58-
FloatingPoint const epsilon = NumericLimits< FloatingPoint >::epsilon;
59-
forall< POLICY >( 1, [epsilon] LVARRAY_HOST_DEVICE ( int )
58+
forall< POLICY >( 1, [] LVARRAY_HOST_DEVICE ( int )
6059
{
60+
FloatingPoint const epsilon = NumericLimitsNC< FloatingPoint >{}.epsilon;
6161

6262
T a = 5 * 5;
6363
PORTABLE_EXPECT_EQ( math::sqrt( a ), 5.0 );
@@ -112,10 +112,11 @@ struct TestMathFloatingPointOnly : public ::testing::Test
112112

113113
void trig()
114114
{
115-
FloatingPoint const epsilon = NumericLimits< FloatingPoint >::epsilon;
116-
forall< POLICY >( 1, [epsilon] LVARRAY_HOST_DEVICE ( int )
115+
forall< POLICY >( 1, [] LVARRAY_HOST_DEVICE ( int )
117116
{
118-
FloatingPoint coords[ 2 ][ 3 ] = { { 1, 2, 1.10714871779409050301 },
117+
FloatingPoint const epsilon = NumericLimitsNC< FloatingPoint >{}.epsilon;
118+
119+
FloatingPoint const coords[ 2 ][ 3 ] = { { 1, 2, 1.10714871779409050301 },
119120
{ 4, -1, -0.24497866312686415417 } };
120121

121122
for( int i = 0; i < 2; ++i )
@@ -140,7 +141,12 @@ struct TestMathFloatingPointOnly : public ::testing::Test
140141

141142
PORTABLE_EXPECT_NEAR( math::abs( math::asin( sinTheta ) ), math::abs( theta ), 1.1 * epsilon );
142143
PORTABLE_EXPECT_NEAR( math::abs( math::acos( cosTheta ) ), math::abs( theta ), 1.1 * epsilon );
144+
145+
#if defined( __ibmxl__ ) && !defined( __CUDA_ARCH__ )
146+
PORTABLE_EXPECT_NEAR( math::atan2( y, x ), theta, 1.1 * epsilon );
147+
#else
143148
PORTABLE_EXPECT_NEAR( math::atan2( y, x ), theta, epsilon );
149+
#endif
144150
}
145151
} );
146152
}

0 commit comments

Comments
 (0)